-
Notifications
You must be signed in to change notification settings - Fork 10
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix b2cli 604 #90
base: master
Are you sure you want to change the base?
Fix b2cli 604 #90
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -158,15 +158,26 @@ def _upload_part( | |
try: | ||
with part_upload_source.open() as part_stream: | ||
input_stream = ReadingStreamWithProgress(part_stream, part_progress_listener) | ||
hashing_stream = StreamWithHash( | ||
input_stream, stream_length=part_upload_source.get_content_length() | ||
) | ||
# it is important that `len()` works on `hashing_stream` | ||
|
||
if part_upload_source.is_sha1_known(): | ||
sha1_checksum = part_upload_source.content_sha1 | ||
logger.debug('hash for part %s is known: %s, use that', part_upload_source, sha1_checksum) | ||
else: | ||
sha1_checksum = HEX_DIGITS_AT_END | ||
# wrap it with a hasher | ||
input_stream = StreamWithHash( | ||
input_stream, | ||
stream_length=part_upload_source.get_content_length(), | ||
) | ||
logger.debug('hash for part %s is unknown, calculate it and provide it at the end of the stream', part_upload_source) | ||
response = self.services.session.upload_part( | ||
file_id, part_number, hashing_stream.length, HEX_DIGITS_AT_END, | ||
hashing_stream | ||
file_id, | ||
part_number, | ||
part_upload_source.get_content_length(), | ||
HEX_DIGITS_AT_END, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sha1_checksum? |
||
input_stream, | ||
) | ||
assert hashing_stream.hash == response['contentSha1'] | ||
assert part_upload_source.get_content_sha1() == response['contentSha1'], 'part checksum mismatch! %s vs %s' % (part_upload_source.get_content_sha1(), response['contentSha1']) | ||
return response | ||
|
||
except B2Error as e: | ||
|
@@ -189,13 +200,24 @@ def _upload_small_file( | |
try: | ||
with upload_source.open() as file: | ||
input_stream = ReadingStreamWithProgress(file, progress_listener) | ||
hashing_stream = StreamWithHash(input_stream, stream_length=content_length) | ||
# it is important that `len()` works on `hashing_stream` | ||
if upload_source.is_sha1_known(): | ||
sha1_checksum = upload_source.content_sha1 | ||
logger.debug('hash for %s is known: %s, use that', upload_source, sha1_checksum) | ||
else: | ||
sha1_checksum = HEX_DIGITS_AT_END | ||
# wrap it with a hasher | ||
input_stream = StreamWithHash(input_stream, stream_length=content_length) | ||
logger.debug('hash for %s is unknown, calculate it and provide it at the end of the stream', upload_source) | ||
response = self.services.session.upload_file( | ||
bucket_id, file_name, hashing_stream.length, content_type, | ||
HEX_DIGITS_AT_END, file_info, hashing_stream | ||
bucket_id, | ||
file_name, | ||
content_length, | ||
content_type, | ||
sha1_checksum, # can be HEX_DIGITS_AT_END | ||
file_info, | ||
input_stream, # can be a hashing stream or a raw stream | ||
) | ||
assert hashing_stream.hash == response['contentSha1'] | ||
assert upload_source.get_content_sha1() == response['contentSha1'], 'small file checksum mismatch!' | ||
return FileVersionInfoFactory.from_api_response(response) | ||
|
||
except B2Error as e: | ||
|
@@ -204,4 +226,5 @@ def _upload_small_file( | |
exception_info_list.append(e) | ||
self.account_info.clear_bucket_upload_data(bucket_id) | ||
|
||
|
||
raise MaxRetriesExceeded(self.MAX_UPLOAD_ATTEMPTS, exception_info_list) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,19 +23,40 @@ | |
class AbstractUploadSource(OutboundTransferSource): | ||
""" | ||
The source of data for uploading to b2. | ||
|
||
`is_sha1_known()` is useful for medium-sized files where in the first upload attempt we'd like to | ||
stream-read-and-hash, but later on when retrying, the hash is already calculated, so | ||
there is no point in calculating it again. The caller may use :py:class:`b2sdk.v1.StreamWithHash` | ||
in the first attempt and then switch to passing the checksum explicitly to :meth:`b2sdk.v1.Session.upload_file` | ||
in order to avoid (cpu-intensive) re-streaming. | ||
|
||
:ivar ~.content_sha1: sha1 checksum of the entire file, can be ``None`` if unknown (yet) | ||
:vartype ~.content_sha1: str or None | ||
""" | ||
def __init__(self, content_sha1=None): | ||
self.content_sha1 = content_sha1 # NOTE: b2sdk.transfer.upload_manager *writes* to this field | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it is better to create a setter... |
||
|
||
@abstractmethod | ||
def get_content_sha1(self): | ||
""" | ||
Return a 40-character string containing the hex SHA1 checksum of the data in the file. | ||
Return a 40-character string containing the hex sha1 checksum of the data in the file. | ||
The implementation of this method may cache the checksum value to avoid recalculating it. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't like this design - |
||
This method may not be thread-safe: if two threads are trying to get the checksum | ||
at the exact same moment, it may be calculated twice. | ||
""" | ||
|
||
def is_sha1_known(self): | ||
""" | ||
Tells whether the checksum would be calculated if `get_content_sha1()` would be called. | ||
|
||
:rtype: bool | ||
""" | ||
return self.content_sha1 is not None | ||
|
||
@abstractmethod | ||
def open(self): | ||
""" | ||
Return a binary file-like object from which the | ||
data can be read. | ||
Return a binary file-like object from which the data can be read. | ||
:return: | ||
""" | ||
|
||
|
@@ -47,8 +68,9 @@ def is_copy(self): | |
|
||
|
||
class UploadSourceBytes(AbstractUploadSource): | ||
def __init__(self, data_bytes): | ||
def __init__(self, data_bytes, content_sha1=None): | ||
self.data_bytes = data_bytes | ||
super(UploadSourceBytes, self).__init__(content_sha1) | ||
|
||
def __repr__(self): | ||
return '<{classname} data={data} id={id}>'.format( | ||
|
@@ -62,7 +84,9 @@ def get_content_length(self): | |
return len(self.data_bytes) | ||
|
||
def get_content_sha1(self): | ||
return hashlib.sha1(self.data_bytes).hexdigest() | ||
if self.content_sha1 is None: | ||
self.content_sha1 = hashlib.sha1(self.data_bytes).hexdigest() | ||
return self.content_sha1 | ||
|
||
def open(self): | ||
return io.BytesIO(self.data_bytes) | ||
|
@@ -74,7 +98,7 @@ def __init__(self, local_path, content_sha1=None): | |
if not os.path.isfile(local_path): | ||
raise InvalidUploadSource(local_path) | ||
self.content_length = os.path.getsize(local_path) | ||
self.content_sha1 = content_sha1 | ||
super(UploadSourceLocalFile, self).__init__(content_sha1) | ||
|
||
def __repr__(self): | ||
return ( | ||
|
@@ -115,6 +139,7 @@ def __init__(self, local_path, content_sha1=None, offset=0, length=None): | |
if length + self.offset > self.file_size: | ||
raise ValueError('Range length overflow file size') | ||
self.content_length = length | ||
super(UploadSourceLocalFileRange, self).__init__(content_sha1) | ||
|
||
def __repr__(self): | ||
return ( | ||
|
@@ -138,7 +163,7 @@ class UploadSourceStream(AbstractUploadSource): | |
def __init__(self, stream_opener, stream_length=None, stream_sha1=None): | ||
self.stream_opener = stream_opener | ||
self._content_length = stream_length | ||
self._content_sha1 = stream_sha1 | ||
super(UploadSourceStream, self).__init__(content_sha1=stream_sha1) | ||
|
||
def __repr__(self): | ||
return ( | ||
|
@@ -148,7 +173,7 @@ def __repr__(self): | |
classname=self.__class__.__name__, | ||
stream_opener=repr(self.stream_opener), | ||
content_length=self._content_length, | ||
content_sha1=self._content_sha1, | ||
content_sha1=self.content_sha1, | ||
id=id(self), | ||
) | ||
|
||
|
@@ -158,17 +183,17 @@ def get_content_length(self): | |
return self._content_length | ||
|
||
def get_content_sha1(self): | ||
if self._content_sha1 is None: | ||
if self.content_sha1 is None: | ||
self._set_content_length_and_sha1() | ||
return self._content_sha1 | ||
return self.content_sha1 | ||
|
||
def open(self): | ||
return self.stream_opener() | ||
|
||
def _set_content_length_and_sha1(self): | ||
sha1, content_length = hex_sha1_of_unlimited_stream(self.open()) | ||
self._content_length = content_length | ||
self._content_sha1 = sha1 | ||
self.content_sha1 = sha1 | ||
|
||
|
||
class UploadSourceStreamRange(UploadSourceStream): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bleeeh