Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#150: Used multipart upload for VM images #154

Merged
merged 16 commits into from
Jan 31, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/changes/changes_0.1.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ Version: 0.1.0
* #75: Changed default port of Jupyter server to 49494
* #145: Add Docket Test Library to prepare Notebook tests
* #255: Renamed data science sandbox to exasol-ai-lab
* #150: Used multipart upload for VM images

## Bug Fixes

Expand Down
22 changes: 21 additions & 1 deletion exasol/ds/sandbox/lib/aws_access/aws_access.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from functools import wraps
from typing import Optional, Any, List, Dict, Tuple
from typing import Any, Callable, Dict, List, Optional, Tuple

import boto3
import botocore
Expand Down Expand Up @@ -419,6 +419,26 @@ def copy_s3_object(self, bucket: str, source: str, dest: str):
copy_source = {'Bucket': bucket, 'Key': source}
cloud_client.copy_object(Bucket=bucket, CopySource=copy_source, Key=dest)

@_log_function_start
def transfer_to_s3(
self,
bucket: str,
source: str,
dest: str,
callback: Callable[[int], None] = None,
):
"""
Transfers a file to an AWS bucket using an AWS transfer object.
The transfer object will perform a multi-part upload which supports to
transfer even files larger than 5 GB.

Optional parameter :callback: is method which takes a number of bytes
transferred to be periodically called during the upload.
"""
cloud_client = self._get_aws_client("s3")
config = boto3.s3.transfer.TransferConfig()
cloud_client.upload_file(source, bucket, dest, Config=config, Callback=callback)

@_log_function_start
def delete_s3_object(self, bucket: str, source: str):
"""
Expand Down
2 changes: 1 addition & 1 deletion exasol/ds/sandbox/lib/export_vm/rename_s3_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,5 +39,5 @@ def rename_image_in_s3(aws_access: AwsAccess, export_image_task: ExportImageTask
vm_image_format=vm_image_format)
dest = build_image_destination(prefix=export_image_task.s3_prefix, asset_id=asset_id,
vm_image_format=vm_image_format)
aws_access.copy_s3_object(bucket=export_image_task.s3_bucket, source=source, dest=dest)
aws_access.transfer_to_s3(bucket=export_image_task.s3_bucket, source=source, dest=dest)
aws_access.delete_s3_object(bucket=export_image_task.s3_bucket, source=source)
49 changes: 49 additions & 0 deletions test/codebuild/test_ci_s3_transfer_multipart.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os
import pytest

from exasol.ds.sandbox.lib.asset_id import AssetId
from exasol.ds.sandbox.lib.aws_access.aws_access import AwsAccess
from exasol.ds.sandbox.lib.vm_bucket.vm_dss_bucket import find_vm_bucket

# from dataclasses import dataclass
#
# @dataclass
# class Progress:
# bytes: int = 0
#
# def report(self, bytes: int):
# self.bytes += bytes
# display = round(self.bytes / 1024 / 1024)
# print(f'\rTransferred {display} MB ...', flush=True, end="")


@pytest.fixture
def sample_file(tmp_path):
"""
Create a sample file of size 6 MB for transfer to S3 bucket.
"""
file = tmp_path / "sample-file.txt"
one_kb = "123456789 " * 102 + "1234"
file.write_text(one_kb * 1024 * 6)
yield file
file.unlink()


@pytest.mark.skipif(os.environ.get('DSS_RUN_CI_TEST') != 'true',
reason="CI test need to be activated by env variable DSS_RUN_CI_TEST")
def test_s3_transfer_multipart(sample_file):
aws = AwsAccess(None)
source = sample_file
bucket = find_vm_bucket(aws)
s3_key = f"{AssetId.BUCKET_PREFIX}-itest-sample-file"
# progress = Progress()
# print("")
try:
aws.transfer_to_s3(
ckunki marked this conversation as resolved.
Show resolved Hide resolved
bucket=bucket,
source=source,
dest=s3_key,
# callback=progress.report,
)
finally:
aws.delete_s3_object(bucket, s3_key)
Loading