From e43169e64eb81a7e7fa858e9d798eb555b4f6243 Mon Sep 17 00:00:00 2001 From: jackzhhuang Date: Wed, 5 Apr 2023 01:49:06 +0800 Subject: [PATCH 1/5] add upload file any size --- scripts/upload_block_on_s3.sh | 3 +- scripts/upload_file_any_size.py | 89 +++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 1 deletion(-) create mode 100644 scripts/upload_file_any_size.py diff --git a/scripts/upload_block_on_s3.sh b/scripts/upload_block_on_s3.sh index 070b80295a..c98e7244ef 100755 --- a/scripts/upload_block_on_s3.sh +++ b/scripts/upload_block_on_s3.sh @@ -6,4 +6,5 @@ end=$3 filename=block_"$start"_"$end".csv compress_name=$filename".tar.gz" tar czvf $compress_name $filename -aws s3api put-object --bucket main1.starcoin.org --key "$net"/"$compress_name" --body $compress_name +## aws s3api put-object --bucket main1.starcoin.org --key "$net"/"$compress_name" --body $compress_name +python3 upload_file_any_size.py main1.starcoin.org "$net"/"$compress_name" $compress_name diff --git a/scripts/upload_file_any_size.py b/scripts/upload_file_any_size.py new file mode 100644 index 0000000000..b7459f689b --- /dev/null +++ b/scripts/upload_file_any_size.py @@ -0,0 +1,89 @@ +import boto3 +from botocore.exceptions import ClientError +import os +import sys + +per_uploading = 104857600 ## 100 MB per uploading + +def upload_file(file_name, bucket, key): + object_name = os.path.basename(file_name) + + s3_client = boto3.client('s3') + try: + response = s3_client.upload_file(file_name, bucket, key) + except ClientError as e: + print(str(e)) + return False + return True + +def upload_file_big_file(file_name, bucket): + client = boto3.client('s3') + object_name = os.path.basename(file_name) + try: + create_bucket(bucket) + upload_file_big_file(file_name, bucket) + response = client.create_multipart_upload(Bucket = bucket, Key = object_name) + upload_id = response["UploadId"] + file_obj = open(file_name, "rb") + content = file_obj.read(per_uploading) + count = 1 + parts = [] + while content: + print(count) + response = client.upload_part(Body = content, Bucket = bucket, Key = object_name, PartNumber = count, UploadId = upload_id) + parts.append({'ETag': response["ETag"], 'PartNumber': count}) + count += 1 + content = file_obj.read(per_uploading) ## 100 MB per uploading + response = client.complete_multipart_upload(Bucket = bucket, Key = object_name, MultipartUpload = {'Parts': parts,}, UploadId = upload_id) + print(str(response)) + except ClientError as e: + print(str(e)) + return False + return True + +def upload_file_any_size(file_name, bucket, key): + file_size = os.path.getsize(file_name) + if file_size > per_uploading: + upload_file_big_file(file_name, bucket, key) + else: + upload_file(file_name, bucket, key) + +def create_bucket(bucket_name, region=None): + try: + if region is None: + s3_client = boto3.client('s3') + s3_client.create_bucket(Bucket=bucket_name) + else: + s3_client = boto3.client('s3', region_name=region) + location = {'LocationConstraint': region} + s3_client.create_bucket(Bucket=bucket_name, + CreateBucketConfiguration=location) + except ClientError as e: + print(str(e)) + return False + return True + +def list_bucket(): + s3 = boto3.client('s3') + response = s3.list_buckets() + print('Existing buckets:') + for bucket in response['Buckets']: + print(f' {bucket["Name"]}') + +def list_bucket(bucket_name, max_key): + s3 = boto3.client('s3') + response = s3.list_objects(Bucket = bucket_name, MaxKeys = max_key) + for item in response['Contents']: + print(f' {item["Key"]}') + + +def main(): + bucket_name = sys.argv[1] + key = sys.argv[2] + test_file = sys.argv[3] + + upload_file_any_size(test_file, bucket_name, key) + list_bucket(bucket_name, 10) + +if __name__ == '__main__': + main() \ No newline at end of file From 19fbef1cf5e4aee94a521c29dd7a9ffb0731d98a Mon Sep 17 00:00:00 2001 From: jackzhhuang Date: Wed, 5 Apr 2023 01:55:55 +0800 Subject: [PATCH 2/5] fix some type styles --- scripts/upload_file_any_size.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/upload_file_any_size.py b/scripts/upload_file_any_size.py index b7459f689b..5592e1578a 100644 --- a/scripts/upload_file_any_size.py +++ b/scripts/upload_file_any_size.py @@ -3,7 +3,7 @@ import os import sys -per_uploading = 104857600 ## 100 MB per uploading +PER_UPLOADING = 104857600 ## up to 100 MB per uploading action def upload_file(file_name, bucket, key): object_name = os.path.basename(file_name) @@ -25,7 +25,7 @@ def upload_file_big_file(file_name, bucket): response = client.create_multipart_upload(Bucket = bucket, Key = object_name) upload_id = response["UploadId"] file_obj = open(file_name, "rb") - content = file_obj.read(per_uploading) + content = file_obj.read(PER_UPLOADING) count = 1 parts = [] while content: @@ -33,7 +33,7 @@ def upload_file_big_file(file_name, bucket): response = client.upload_part(Body = content, Bucket = bucket, Key = object_name, PartNumber = count, UploadId = upload_id) parts.append({'ETag': response["ETag"], 'PartNumber': count}) count += 1 - content = file_obj.read(per_uploading) ## 100 MB per uploading + content = file_obj.read(PER_UPLOADING) response = client.complete_multipart_upload(Bucket = bucket, Key = object_name, MultipartUpload = {'Parts': parts,}, UploadId = upload_id) print(str(response)) except ClientError as e: @@ -43,7 +43,7 @@ def upload_file_big_file(file_name, bucket): def upload_file_any_size(file_name, bucket, key): file_size = os.path.getsize(file_name) - if file_size > per_uploading: + if file_size > PER_UPLOADING: upload_file_big_file(file_name, bucket, key) else: upload_file(file_name, bucket, key) @@ -52,11 +52,11 @@ def create_bucket(bucket_name, region=None): try: if region is None: s3_client = boto3.client('s3') - s3_client.create_bucket(Bucket=bucket_name) + s3_client.create_bucket(Bucket = bucket_name) else: - s3_client = boto3.client('s3', region_name=region) + s3_client = boto3.client('s3', region_name = region) location = {'LocationConstraint': region} - s3_client.create_bucket(Bucket=bucket_name, + s3_client.create_bucket(Bucket = bucket_name, CreateBucketConfiguration=location) except ClientError as e: print(str(e)) @@ -83,7 +83,7 @@ def main(): test_file = sys.argv[3] upload_file_any_size(test_file, bucket_name, key) - list_bucket(bucket_name, 10) + # list_bucket(bucket_name, 10) if __name__ == '__main__': main() \ No newline at end of file From 7a8199faab73f510d4c98e8c9b49259450438058 Mon Sep 17 00:00:00 2001 From: jackzhhuang Date: Wed, 5 Apr 2023 02:00:36 +0800 Subject: [PATCH 3/5] remove print count meaningless --- scripts/upload_file_any_size.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/upload_file_any_size.py b/scripts/upload_file_any_size.py index 5592e1578a..12ec526eb3 100644 --- a/scripts/upload_file_any_size.py +++ b/scripts/upload_file_any_size.py @@ -29,7 +29,7 @@ def upload_file_big_file(file_name, bucket): count = 1 parts = [] while content: - print(count) + # print(count) response = client.upload_part(Body = content, Bucket = bucket, Key = object_name, PartNumber = count, UploadId = upload_id) parts.append({'ETag': response["ETag"], 'PartNumber': count}) count += 1 From 241afdf4112e382091db7a3321803d5e4512395e Mon Sep 17 00:00:00 2001 From: jackzhhuang Date: Wed, 5 Apr 2023 10:46:15 +0800 Subject: [PATCH 4/5] add key in upload large file procedure --- scripts/upload_file_any_size.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/upload_file_any_size.py b/scripts/upload_file_any_size.py index 12ec526eb3..4d98d952d9 100644 --- a/scripts/upload_file_any_size.py +++ b/scripts/upload_file_any_size.py @@ -16,25 +16,22 @@ def upload_file(file_name, bucket, key): return False return True -def upload_file_big_file(file_name, bucket): +def upload_file_big_file(file_name, bucket, key): client = boto3.client('s3') - object_name = os.path.basename(file_name) try: create_bucket(bucket) - upload_file_big_file(file_name, bucket) - response = client.create_multipart_upload(Bucket = bucket, Key = object_name) + response = client.create_multipart_upload(Bucket = bucket, Key = key) upload_id = response["UploadId"] file_obj = open(file_name, "rb") content = file_obj.read(PER_UPLOADING) count = 1 parts = [] while content: - # print(count) - response = client.upload_part(Body = content, Bucket = bucket, Key = object_name, PartNumber = count, UploadId = upload_id) + response = client.upload_part(Body = content, Bucket = bucket, Key = key, PartNumber = count, UploadId = upload_id) parts.append({'ETag': response["ETag"], 'PartNumber': count}) count += 1 content = file_obj.read(PER_UPLOADING) - response = client.complete_multipart_upload(Bucket = bucket, Key = object_name, MultipartUpload = {'Parts': parts,}, UploadId = upload_id) + response = client.complete_multipart_upload(Bucket = bucket, Key = key, MultipartUpload = {'Parts': parts,}, UploadId = upload_id) print(str(response)) except ClientError as e: print(str(e)) @@ -82,6 +79,9 @@ def main(): key = sys.argv[2] test_file = sys.argv[3] + print(f"bucket_name: {bucket_name}") + print(f"key: {key}") + print(f"test_file: {test_file}") upload_file_any_size(test_file, bucket_name, key) # list_bucket(bucket_name, 10) From 5bdd9521ef75bae26a984b39afaf7d56b1233242 Mon Sep 17 00:00:00 2001 From: jackzhhuang Date: Wed, 5 Apr 2023 11:24:40 +0800 Subject: [PATCH 5/5] no need to create bucket --- scripts/upload_file_any_size.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/upload_file_any_size.py b/scripts/upload_file_any_size.py index 4d98d952d9..801aa834c6 100644 --- a/scripts/upload_file_any_size.py +++ b/scripts/upload_file_any_size.py @@ -19,7 +19,6 @@ def upload_file(file_name, bucket, key): def upload_file_big_file(file_name, bucket, key): client = boto3.client('s3') try: - create_bucket(bucket) response = client.create_multipart_upload(Bucket = bucket, Key = key) upload_id = response["UploadId"] file_obj = open(file_name, "rb")