Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Download to stdout #438

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,22 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
* Add `*_PART_SIZE` constants to public interface
* Add `*_PART_SIZE`, `BUCKET_NAME_*`, `STDOUT_FILEPATH` constants
* Add `points_to_fifo`, `points_to_stdout` functions

### Changed
* Mark `TempDir` as deprecated in favor of `tempfile.TemporaryDirectory`

### Fixed
* Fix downloading to a non-seekable file, such as /dev/stdout
* Fix ScanPoliciesManager support for compiled regexes

### Infrastructure
* Fix readthedocs build by updating to v2 configuration schema
* Fix spellcheck erroring out on LICENSE file
* Fix snyk reporting vulnerability due to tornado package use in docs generation
* Deduplicate test_base files in test suite
* Refactor integration tests for better pytest compatibility & eager bucket cleanup

## [1.24.1] - 2023-09-27

Expand Down
14 changes: 14 additions & 0 deletions b2sdk/_internal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
######################################################################
#
# File: b2sdk/_internal/__init__.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
"""
b2sdk._internal package contains internal modules, and should not be used directly.

Please use chosen apiver package instead, e.g. b2sdk.v2
"""
9 changes: 9 additions & 0 deletions b2sdk/_internal/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
######################################################################
#
# File: b2sdk/_internal/utils/__init__.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
36 changes: 36 additions & 0 deletions b2sdk/_internal/utils/filesystem.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
######################################################################
#
# File: b2sdk/_internal/utils/filesystem.py
#
# Copyright 2023 Backblaze Inc. All Rights Reserved.
#
# License https://www.backblaze.com/using_b2_code.html
#
######################################################################
import pathlib
import platform
import stat

_IS_WINDOWS = platform.system() == "Windows"


def points_to_fifo(path: pathlib.Path) -> bool:
"""Check if the path points to a fifo."""
path = path.resolve()
try:

return stat.S_ISFIFO(path.stat().st_mode)
except OSError:
return False


_STDOUT_FILENAME = "CON" if _IS_WINDOWS else "/dev/stdout"
STDOUT_FILEPATH = pathlib.Path(_STDOUT_FILENAME)


def points_to_stdout(path: pathlib.Path) -> bool:
"""Check if the path points to stdout."""
try:
return path == STDOUT_FILEPATH or path.resolve() == STDOUT_FILEPATH
except OSError:
return False
8 changes: 8 additions & 0 deletions b2sdk/_v3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@
IncrementalHexDigester,
)

from b2sdk._internal.utils.filesystem import (
points_to_fifo,
points_to_stdout,
STDOUT_FILEPATH,
)
from b2sdk.utils import trace_call
from b2sdk.utils.docs import get_b2sdk_doc_urls

Expand Down Expand Up @@ -239,6 +244,9 @@
from b2sdk.cache import DummyCache
from b2sdk.cache import InMemoryCache
from b2sdk.http_constants import (
BUCKET_NAME_CHARS,
BUCKET_NAME_CHARS_UNIQ,
BUCKET_NAME_LENGTH_RANGE,
DEFAULT_MAX_PART_SIZE,
DEFAULT_MIN_PART_SIZE,
DEFAULT_RECOMMENDED_UPLOAD_PART_SIZE,
Expand Down
6 changes: 5 additions & 1 deletion b2sdk/exception.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,11 @@ class PotentialS3EndpointPassedAsRealm(InvalidJsonResponse):
pass


class DestinationDirectoryError(B2Error):
class DestinationError(B2Error):
pass


class DestinationDirectoryError(DestinationError):
pass


Expand Down
7 changes: 7 additions & 0 deletions b2sdk/http_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,16 @@
######################################################################
from __future__ import annotations

import string

# These constants are needed in different modules, so they are stored in this module, that
# imports nothing, thus avoiding circular imports

# https://www.backblaze.com/docs/cloud-storage-buckets#bucket-names
BUCKET_NAME_CHARS = string.ascii_lowercase + string.digits + '-'
BUCKET_NAME_CHARS_UNIQ = string.ascii_lowercase + string.digits + '-'
BUCKET_NAME_LENGTH_RANGE = (6, 63)

LIST_FILE_NAMES_MAX_LIMIT = 10000 # https://www.backblaze.com/b2/docs/b2_list_file_names.html

FILE_INFO_HEADER_PREFIX = 'X-Bz-Info-'
Expand Down
90 changes: 79 additions & 11 deletions b2sdk/transfer/inbound/downloaded_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,32 @@
######################################################################
from __future__ import annotations

import contextlib
import io
import logging
import pathlib
from typing import TYPE_CHECKING
import sys
from typing import TYPE_CHECKING, BinaryIO

from requests.models import Response

from b2sdk._internal.utils.filesystem import _IS_WINDOWS, points_to_fifo, points_to_stdout
from b2sdk.exception import (
ChecksumMismatch,
DestinationDirectoryDoesntAllowOperation,
DestinationDirectoryDoesntExist,
DestinationError,
DestinationIsADirectory,
DestinationParentIsNotADirectory,
TruncatedOutput,
)
from b2sdk.utils import set_file_mtime

try:
from typing_extensions import Literal
except ImportError:
from typing import Literal

from ...encryption.setting import EncryptionSetting
from ...file_version import DownloadVersion
from ...progress import AbstractProgressListener
Expand All @@ -40,6 +49,9 @@
class MtimeUpdatedFile(io.IOBase):
"""
Helper class that facilitates updating a files mod_time after closing.

Over the time this class has grown, and now it also adds better exception handling.

Usage:

.. code-block: python
Expand All @@ -50,13 +62,27 @@ class MtimeUpdatedFile(io.IOBase):
# 'some_local_path' has the mod_time set according to metadata in B2
"""

def __init__(self, path_, mod_time_millis: int, mode='wb+', buffering=None):
self.path_ = path_
def __init__(
self,
path_: str | pathlib.Path,
mod_time_millis: int,
mode: Literal['wb', 'wb+'] = 'wb+',
buffering: int | None = None,
):
self.path = pathlib.Path(path_) if isinstance(path_, str) else path_
self.mode = mode
self.buffering = buffering if buffering is not None else -1
self.mod_time_to_set = mod_time_millis
self.file = None

@property
def path_(self) -> str:
return str(self.path)

@path_.setter
def path_(self, value: str) -> None:
self.path = pathlib.Path(value)

def write(self, value):
"""
This method is overwritten (monkey-patched) in __enter__ for performance reasons
Expand All @@ -69,6 +95,9 @@ def read(self, *a):
"""
raise NotImplementedError

def seekable(self) -> bool:
return self.file.seekable()

def seek(self, offset, whence=0):
return self.file.seek(offset, whence)

Expand All @@ -77,7 +106,7 @@ def tell(self):

def __enter__(self):
try:
path = pathlib.Path(self.path_)
path = self.path
if not path.parent.exists():
raise DestinationDirectoryDoesntExist()

Expand All @@ -91,22 +120,26 @@ def __enter__(self):
except PermissionError as ex:
raise DestinationDirectoryDoesntAllowOperation() from ex

# All remaining problems should be with permissions.
try:
self.file = open(self.path_, self.mode, buffering=self.buffering)
self.file = open(
self.path,
self.mode,
buffering=self.buffering,
)
except PermissionError as ex:
raise DestinationDirectoryDoesntAllowOperation() from ex

self.write = self.file.write
self.read = self.file.read
self.mode = self.file.mode
return self

def __exit__(self, exc_type, exc_val, exc_tb):
self.file.close()
set_file_mtime(self.path_, self.mod_time_to_set)

def __str__(self):
return str(self.path_)
return str(self.path)


class DownloadedFile:
Expand Down Expand Up @@ -157,14 +190,20 @@ def _validate_download(self, bytes_read, actual_sha1):
if bytes_read != desired_length:
raise TruncatedOutput(bytes_read, desired_length)

def save(self, file, allow_seeking=True):
def save(self, file: BinaryIO, allow_seeking: bool | None = None) -> None:
"""
Read data from B2 cloud and write it to a file-like object

:param file: a file-like object
:param allow_seeking: if False, download strategies that rely on seeking to write data
(parallel strategies) will be discarded.
"""
if allow_seeking is None:
allow_seeking = file.seekable()
elif allow_seeking and not file.seekable():
logger.warning('File is not seekable, disabling strategies that require seeking')
allow_seeking = False

if self.progress_listener:
file = WritingStreamWithProgress(file, self.progress_listener)
if self.range_ is not None:
Expand All @@ -187,7 +226,12 @@ def save(self, file, allow_seeking=True):
)
self._validate_download(bytes_read, actual_sha1)

def save_to(self, path_, mode='wb+', allow_seeking=True):
def save_to(
self,
path_: str | pathlib.Path,
mode: Literal['wb', 'wb+'] | None = None,
allow_seeking: bool | None = None,
) -> None:
"""
Open a local file and write data from B2 cloud to it, also update the mod_time.

Expand All @@ -196,10 +240,34 @@ def save_to(self, path_, mode='wb+', allow_seeking=True):
:param allow_seeking: if False, download strategies that rely on seeking to write data
(parallel strategies) will be discarded.
"""
path_ = pathlib.Path(path_)
is_stdout = points_to_stdout(path_)
if is_stdout or points_to_fifo(path_):
if mode not in (None, 'wb'):
raise DestinationError(f'invalid mode requested {mode!r} for FIFO file {path_!r}')

if is_stdout and _IS_WINDOWS:
if self.write_buffer_size and self.write_buffer_size not in (
-1, io.DEFAULT_BUFFER_SIZE
):
logger.warning(
'Unable to set arbitrary write_buffer_size for stdout on Windows'
)
context = contextlib.nullcontext(sys.stdout.buffer)
else:
context = open(path_, 'wb', buffering=self.write_buffer_size or -1)

try:
with context as file:
return self.save(file, allow_seeking=allow_seeking)
finally:
if not is_stdout:
set_file_mtime(path_, self.download_version.mod_time_millis)

with MtimeUpdatedFile(
path_,
mod_time_millis=self.download_version.mod_time_millis,
mode=mode,
mode=mode or 'wb+',
buffering=self.write_buffer_size,
) as file:
self.save(file, allow_seeking=allow_seeking)
return self.save(file, allow_seeking=allow_seeking)
28 changes: 24 additions & 4 deletions b2sdk/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@

import base64
import hashlib
import logging
import os
import pathlib
import platform
import re
import time
Expand All @@ -23,6 +25,8 @@

from logfury.v1 import DefaultTraceAbstractMeta, DefaultTraceMeta, limit_trace_arguments, disable_trace, trace_call

logger = logging.getLogger(__name__)

Sha1HexDigest = NewType('Sha1HexDigest', str)
T = TypeVar('T')
# TODO: When we drop Python 3.7 support, this should be replaced
Expand Down Expand Up @@ -277,14 +281,26 @@ def get_file_mtime(local_path):
return int(mod_time)


def set_file_mtime(local_path, mod_time_millis):
def is_special_file(path: str | pathlib.Path) -> bool:
"""
Is the path a special file, such as /dev/null or stdout?

:param path: a "file" path
:return: True if the path is a special file
"""
path_str = str(path)
return (
path == os.devnull or path_str.startswith('/dev/') or
platform.system() == 'Windows' and path_str.upper() in ('CON', 'NUL')
)


def set_file_mtime(local_path: str | pathlib.Path, mod_time_millis: int) -> None:
"""
Set modification time of a file in milliseconds.

:param local_path: a file path
:type local_path: str
:param mod_time_millis: time to be set
:type mod_time_millis: int
"""
mod_time = mod_time_millis / 1000.0

Expand All @@ -299,7 +315,11 @@ def set_file_mtime(local_path, mod_time_millis):
# See #617 for details.
mod_time = float(Decimal('%.3f5' % mod_time))

os.utime(local_path, (mod_time, mod_time))
try:
os.utime(local_path, (mod_time, mod_time))
except OSError:
if not is_special_file(local_path):
raise


def fix_windows_path_limit(path):
Expand Down
Loading