diff --git a/smart_open/s3.py b/smart_open/s3.py index 8c2b3c2f..95138538 100644 --- a/smart_open/s3.py +++ b/smart_open/s3.py @@ -352,6 +352,11 @@ class SeekableBufferedInputBase(BufferedInputBase): def __init__(self, bucket, key, version_id=None, buffer_size=DEFAULT_BUFFER_SIZE, line_terminator=BINARY_NEWLINE, session=None, resource_kwargs=None): + + self._buffer_size = buffer_size + self._session = session + self._resource_kwargs = resource_kwargs + if session is None: session = boto3.Session() if resource_kwargs is None: @@ -412,6 +417,29 @@ def truncate(self, size=None): """Unsupported.""" raise io.UnsupportedOperation + def __str__(self): + return "smart_open.s3.SeekableBufferedInputBase(%r, %r)" % (self._object.bucket_name, self._object.key) + + def __repr__(self): + return ( + "smart_open.s3.SeekableBufferedInputBase(" + "bucket=%r, " + "key=%r, " + "version_id=%r, " + "buffer_size=%r, " + "line_terminator=%r, " + "session=%r, " + "resource_kwargs=%r)" + ) % ( + self._object.bucket_name, + self._object.key, + self._version_id, + self._buffer_size, + self._line_terminator, + self._session, + self._resource_kwargs, + ) + class BufferedOutputBase(io.BufferedIOBase): """Writes bytes to S3. @@ -427,6 +455,11 @@ def __init__( resource_kwargs=None, multipart_upload_kwargs=None, ): + + self._session = session + self._resource_kwargs = resource_kwargs + self._multipart_upload_kwargs = multipart_upload_kwargs + if min_part_size < MIN_MIN_PART_SIZE: logger.warning("S3 requires minimum part size >= 5MB; \ multipart upload may fail") @@ -451,6 +484,7 @@ def __init__( self._total_parts = 0 self._parts = [] + # # This member is part of the io.BufferedIOBase interface. # @@ -553,6 +587,27 @@ def __exit__(self, exc_type, exc_val, exc_tb): else: self.close() + def __str__(self): + return "smart_open.s3.BufferedOutputBase(%r, %r)" % (self._object.bucket_name, self._object.key) + + def __repr__(self): + return ( + "smart_open.s3.BufferedOutputBase(" + "bucket=%r, " + "key=%r, " + "min_part_size=%r, " + "session=%r, " + "resource_kwargs=%r, " + "multipart_upload_kwargs=%r)" + ) % ( + self._object.bucket_name, + self._object.key, + self._min_part_size, + self._session, + self._resource_kwargs, + self._multipart_upload_kwargs, + ) + def iter_bucket(bucket_name, prefix='', accept_key=None, key_limit=None, workers=16, retries=3):