Skip to content

Commit a54d606

Browse files
authored
feat: S3 copy method (#125)
* feat: add AUDIT log level for upload * chore: update outdated tests * fix: allow empty str as RESULT_PATH_PREFIX & replace w/ default val * fix: allowing optional original stac item * feat: copy method for s3
1 parent 4d0eea8 commit a54d606

File tree

1 file changed

+108
-14
lines changed

1 file changed

+108
-14
lines changed

mdps_ds_lib/lib/aws/aws_s3.py

Lines changed: 108 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -223,20 +223,23 @@ def read_small_txt_file(self):
223223
bytestream = BytesIO(self.get_stream().read()) # get the bytes stream of zipped file
224224
return bytestream.read().decode('UTF-8')
225225

226-
def delete_one(self):
227-
response = self.__s3_client.delete_object(
228-
Bucket=self.__target_bucket,
229-
Key=self.__target_key,
230-
# MFA='string',
231-
# VersionId='string',
232-
# RequestPayer='requester',
233-
# BypassGovernanceRetention=True | False,
234-
# ExpectedBucketOwner='string',
235-
# IfMatch='string',
236-
# IfMatchLastModifiedTime=datetime(2015, 1, 1),
237-
# IfMatchSize=123
238-
)
239-
return response
226+
def delete_one(self, version_id: str = None):
227+
params = {
228+
'Bucket': self.__target_bucket,
229+
'Key': self.__target_key,
230+
}
231+
# MFA='string',
232+
# VersionId='string',
233+
# RequestPayer='requester',
234+
# BypassGovernanceRetention=True | False,
235+
# ExpectedBucketOwner='string',
236+
# IfMatch='string',
237+
# IfMatchLastModifiedTime=datetime(2015, 1, 1),
238+
# IfMatchSize=123
239+
240+
if version_id is not None:
241+
params['VersionId'] = version_id
242+
return self.__s3_client.delete_object(**params)
240243

241244
def delete_multiple(self, s3_urls: list=[], s3_bucket: str='', s3_paths: list=[]):
242245
if len(s3_urls) < 1 and len(s3_paths) < 1:
@@ -274,3 +277,94 @@ def delete_multiple(self, s3_urls: list=[], s3_bucket: str='', s3_paths: list=[]
274277
)
275278
return response
276279

280+
def get_tags(self, version_id: str = None) -> Union[dict, None]:
281+
"""
282+
returning all the tags in a dictionary form
283+
284+
:param base_path: bucket
285+
:param relative_path: s3 key
286+
:return:
287+
"""
288+
params = {
289+
'Bucket': self.target_bucket,
290+
'Key': self.target_key,
291+
}
292+
if version_id is not None:
293+
params['VersionId'] = version_id
294+
response = self.__s3_client.get_object_tagging(**params)
295+
if 'TagSet' not in response:
296+
return None
297+
return {k['Key']: k['Value'] for k in response['TagSet']}
298+
299+
def copy_artifact(self, src_base_path: str, src_relative_path: str, dest_base_path: str, dest_relative_path: str,
300+
copy_tags: float = True, update_old_metadata_style: bool = True, delete_original: bool = False):
301+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/head_object.html
302+
try:
303+
source_head = self.__s3_client.head_object(Bucket=src_base_path, Key=src_relative_path)
304+
except Exception as e:
305+
raise ValueError(f'missing source: {src_base_path} - {src_relative_path}')
306+
storage_class = self.__s3_client.get_object_attributes(Bucket=src_base_path, Key=src_relative_path,
307+
ObjectAttributes=['StorageClass'])['StorageClass']
308+
src_metadata = source_head['Metadata']
309+
self.target_bucket, self.target_key = src_base_path, src_relative_path
310+
src_tagging = self.get_tags() if copy_tags else {}
311+
if update_old_metadata_style:
312+
for k, v in src_metadata.items():
313+
src_tagging[k] = v
314+
src_metadata = {}
315+
316+
tags = [f'{k}={v}' for k, v in src_tagging.items()]
317+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/copy.html
318+
# /Users/wphyo/anaconda3/envs/lsmd_3.11__2/lib/python3.11/site-packages/s3transfer/manager.py#157
319+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.TransferConfig
320+
copy_source = {'Bucket': src_base_path, 'Key': src_relative_path,} # 'VersionId': 'string'
321+
self.__s3_client.copy(copy_source, dest_base_path, dest_relative_path, ExtraArgs={
322+
})
323+
self.target_bucket, self.target_key = dest_base_path, dest_relative_path
324+
self.add_tags_to_obj(src_tagging)
325+
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3/client/copy_object.html
326+
# self.__s3_client.copy_object(
327+
# # ACL='private'|'public-read'|'public-read-write'|'authenticated-read'|'aws-exec-read'|'bucket-owner-read'|'bucket-owner-full-control',
328+
# Bucket=dest_base_path,
329+
# Key=dest_relative_path,
330+
# ServerSideEncryption='AES256', # 'aws:kms',
331+
# CopySource={'Bucket': src_base_path, 'Key': src_relative_path,}, # 'VersionId': 'string'
332+
# # CacheControl='string',
333+
# # ChecksumAlgorithm='CRC32'|'CRC32C'|'SHA1'|'SHA256',
334+
# # ContentDisposition='string',
335+
# # ContentEncoding='string',
336+
# # ContentLanguage='string',
337+
# # ContentType='string',
338+
# # CopySourceIfMatch='string',
339+
# # CopySourceIfModifiedSince=datetime(2015, 1, 1),
340+
# # CopySourceIfNoneMatch='string',
341+
# # CopySourceIfUnmodifiedSince=datetime(2015, 1, 1),
342+
# # Expires=datetime(2015, 1, 1),
343+
# # GrantFullControl='string',
344+
# # GrantRead='string',
345+
# # GrantReadACP='string',
346+
# # GrantWriteACP='string',
347+
# Metadata=src_metadata,
348+
# MetadataDirective='REPLACE', # 'COPY'|'REPLACE',
349+
# TaggingDirective='REPLACE', # 'COPY'|'REPLACE',
350+
# StorageClass=storage_class, # 'STANDARD'|'REDUCED_REDUNDANCY'|'STANDARD_IA'|'ONEZONE_IA'|'INTELLIGENT_TIERING'|'GLACIER'|'DEEP_ARCHIVE'|'OUTPOSTS'|'GLACIER_IR'|'SNOW',
351+
# Tagging='&'.join(tags),
352+
# # WebsiteRedirectLocation='string',
353+
# # SSECustomerAlgorithm='string',
354+
# # SSECustomerKey='string',
355+
# # SSEKMSKeyId='string',
356+
# # SSEKMSEncryptionContext='string',
357+
# # BucketKeyEnabled=True|False,
358+
# # CopySourceSSECustomerAlgorithm='string',
359+
# # CopySourceSSECustomerKey='string',
360+
# # RequestPayer='requester',
361+
# # ObjectLockMode='GOVERNANCE'|'COMPLIANCE',
362+
# # ObjectLockRetainUntilDate=datetime(2015, 1, 1),
363+
# # ObjectLockLegalHoldStatus='ON'|'OFF',
364+
# # ExpectedBucketOwner='string',
365+
# # ExpectedSourceBucketOwner='string'
366+
# )
367+
if delete_original:
368+
self.target_bucket, self.__target_key = src_base_path, src_relative_path
369+
self.delete_one()
370+
return f's3://{dest_base_path}/{dest_relative_path}'

0 commit comments

Comments
 (0)