Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add support for S3 object version IDs #4

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
2d0260a
add support for S3 object version IDs
nlangellier May 20, 2023
0e01b38
remove __new__ from `PureS3Path`
nlangellier May 20, 2023
8c2308b
add `version_id` to `StatResult`
nlangellier May 22, 2023
3f950bf
change default for `version_id` in `StatResult` to `None`
nlangellier May 22, 2023
35462e4
remove unnecessary argument to `StatResult`
nlangellier May 22, 2023
6acb603
Revert "remove unnecessary argument to `StatResult`"
nlangellier May 22, 2023
e761c83
add `version_id` to `S3DirEntry`
nlangellier May 22, 2023
5e5e042
remove default value for `version_id` argument of `StatResult`
nlangellier May 22, 2023
100c81a
refactor
nlangellier May 22, 2023
71f5298
refactor
nlangellier May 22, 2023
fbc3d7f
add ?version_id=
chnpenny May 27, 2023
bd8b5c1
Merge pull request #5 from nlangellier/oneline_try
nlangellier May 27, 2023
582cf07
fix the space, tab problem
chnpenny May 29, 2023
30ec514
Merge pull request #6 from nlangellier/oneline_try2
chnpenny May 29, 2023
e7aa8b7
space-tab 2
chnpenny May 30, 2023
c9c5458
add from_bucket_key_versionid
chnpenny May 30, 2023
f461738
fix from_bucket_key_versionid
chnpenny May 30, 2023
bcdac7c
add test for versioned S3 objects
nlangellier May 30, 2023
dde618e
bug fix
nlangellier May 30, 2023
733048e
add test for `from_bucket_key_versionid`
nlangellier May 30, 2023
b153755
refactor versioned object test
nlangellier May 30, 2023
7d74371
refactor versioned object test and include check for latest version w…
nlangellier May 31, 2023
9f29739
bug fix
nlangellier May 31, 2023
8838dcf
add version_id to s3path
chnpenny Jun 3, 2023
89868ab
Merge branch 'feature/s3_object_version_id_support' of github.com:nla…
chnpenny Jun 3, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 63 additions & 5 deletions s3path.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,15 @@ def stat(self, path, *, follow_symlinks=True):
raise NotImplementedError(
f'Setting follow_symlinks to {follow_symlinks} is unsupported on S3 service.')
resource, _ = self.configuration_map.get_configuration(path)

if hasattr(path, 'version_id') and path.version_id is not None:
object_summary = resource.ObjectVersion(path.bucket, path.key, path.version_id).get()
return StatResult(
size=object_summary['ContentLength'],
last_modified=object_summary['LastModified'],
version_id=object_summary['VersionId'],
)

object_summary = resource.ObjectSummary(path.bucket, path.key)
return StatResult(
size=object_summary.size,
Expand Down Expand Up @@ -245,6 +254,8 @@ def open(self, path, *, mode='r', buffering=-1, encoding=None, errors=None, newl
'newline': newline,
}
transport_params = {'defer_seek': True}
if hasattr(path, 'version_id') and path.version_id is not None:
transport_params['version_id'] = path.version_id
dummy_object = resource.Object('bucket', 'key')
if smart_open.__version__ >= '5.1.0':
self._smart_open_new_version_kwargs(
Expand Down Expand Up @@ -764,7 +775,20 @@ def from_uri(cls, uri):
"""
if not uri.startswith('s3://'):
raise ValueError('Provided uri seems to be no S3 URI!')
return cls(uri[4:])

uri, *version_id = uri.split('?VersionID=')

if len(version_id) > 1:
raise ValueError('Do you use "?VersionID=" in your bucket or key?')
elif len(version_id) == 0:
version_id = None
else:
version_id = version_id[0]

self = cls(uri[4:])
self.version_id = version_id

return self

@property
def bucket(self):
Expand Down Expand Up @@ -808,7 +832,22 @@ def from_bucket_key(cls, bucket, key):
key = cls(key)
if key.is_absolute():
key = key.relative_to('/')
return bucket / key
self = bucket / key
self.version_id = None
return self

@classmethod
def from_bucket_key_versionid(cls, bucket, key, version_id):
"""
from_bucket_key_versionid class method create a class instance from bucket, key, and version_id

>> from s3path import PureS3Path
>> PureS3Path.from_bucket_key_versionid(bucket='<bucket>', key='<key>', version_id='<version_id>')
<< PureS3Path('/<bucket>/<key>')
"""
self = cls.from_bucket_key(bucket, key)
self.version_id = version_id
return self

def as_uri(self):
"""
Expand All @@ -833,6 +872,21 @@ class S3Path(_PathNotSupportedMixin, Path, PureS3Path):
_accessor = _s3_accessor
__slots__ = ()

def __new__(cls, *args, **kwargs):

args, *version_id = args.split('?VersionID=')

if len(version_id) > 1:
raise ValueError('Do you use "?VersionID=" in your bucket or key?')
elif len(version_id) == 0:
version_id = None
else:
version_id = version_id[0]

self = super().__new__(cls, *args, **kwargs)
self.version_id = version_id
return self

def _init(self, template=None):
super()._init(template)
if template is None:
Expand Down Expand Up @@ -1124,7 +1178,7 @@ def absolute(self):
raise ValueError("Absolute path can't be determined for relative S3Path objects")


class StatResult(namedtuple('BaseStatResult', 'size, last_modified')):
class StatResult(namedtuple('BaseStatResult', 'size, last_modified, version_id', defaults=(None,))):
"""
Base of os.stat_result but with boto3 s3 features
"""
Expand All @@ -1142,12 +1196,16 @@ def st_size(self):
def st_mtime(self):
return self.last_modified.timestamp()

@property
def st_version_id(self):
return self.version_id


class S3DirEntry:
def __init__(self, name, is_dir, size=None, last_modified=None):
def __init__(self, name, is_dir, size=None, last_modified=None, version_id=None):
self.name = name
self._is_dir = is_dir
self._stat = StatResult(size=size, last_modified=last_modified)
self._stat = StatResult(size=size, last_modified=last_modified, version_id=version_id)

def __repr__(self):
return f'{type(self).__name__}(name={self.name}, is_dir={self._is_dir}, stat={self._stat})'
Expand Down
40 changes: 39 additions & 1 deletion tests/test_path_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,4 +704,42 @@ def test_absolute(s3_mock):

relative_path = S3Path('./Test.test')
with pytest.raises(ValueError):
relative_path.absolute()
relative_path.absolute()


def test_versioned_bucket(s3_mock):
bucket, key = 'test-versioned-bucket', 'versioned_file.txt'

s3 = boto3.resource('s3')
s3.create_bucket(Bucket=bucket)
s3.BucketVersioning(bucket).enable()

object_summary = s3.ObjectSummary(bucket, key)
file_contents_by_version = (b'Test', b'Test updated', b'Test', b'Test final')

version_id_to_file_content = {}
for file_content in file_contents_by_version:
version_id = object_summary.put(Body=file_content).get("VersionId")
version_id_to_file_content[version_id] = file_content

assert len(version_id_to_file_content) == 4

def assert_expected_file_content(s3_paths, expected_file_content):
for s3_path in s3_paths:
with s3_path.open(mode='rb') as file_pointer:
assert file_pointer.read() == expected_file_content

# Test that we can read specific versions of the file
for version_id, file_content in version_id_to_file_content.items():
paths = (
S3Path.from_uri(f's3://{bucket}/{key}?VersionID={version_id}'),
S3Path.from_bucket_key_versionid(bucket=bucket, key=key, version_id=version_id),
)
assert_expected_file_content(s3_paths=paths, expected_file_content=file_content)

# Test that we receive the latest version of the file when no version_id is specified
paths = (
S3Path.from_uri(f's3://{bucket}/{key}'),
S3Path.from_bucket_key(bucket=bucket, key=key),
)
assert_expected_file_content(s3_paths=paths, expected_file_content=file_contents_by_version[-1])