Skip to content

Commit

Permalink
test_helper.py improvements (#442)
Browse files Browse the repository at this point in the history
- Don't skip work if the bucket already exists, just upload everything every time you run `init`.
    - BECAUSE: This lets you add new objects to this script without obliterating all your buckets first.
- Don't try and print exceptions as strings, just let python print out the stacktrace
    - BECAUSE: I was getting MemoryErrors because python couldn't allocate a 2GiB buffer, but MemoryError prints as an empty string, so I had no idea what was going wrong.
- Always print() **before** doing an S3 call, instead printing **after** doing an S3 call
    - BECAUSE: It makes the cause of error, and the cause of long delays, more obvious
- Remove the error-handling that would delete all your buckets if anything went wrong during `init`.
    - BECAUSE: If you try and add a new file, and something goes wrong, and the script deletes ALL of the team's buckets in response, all kinds of CI will start failing. If you really want to clean up, just run the script again with `clean`
  • Loading branch information
graebm authored Jun 21, 2024
1 parent a549581 commit cb431ba
Showing 1 changed file with 117 additions and 127 deletions.
244 changes: 117 additions & 127 deletions tests/test_helper/test_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@
import os
import random

print(boto3.__version__)

REGION = 'us-west-2'
REGION_EAST_1 = 'us-east-1'
s3 = boto3.resource('s3')
Expand Down Expand Up @@ -70,9 +68,9 @@ def create_bytes(size):
def put_pre_existing_objects(size_or_body, keyname, bucket=BUCKET_NAME_BASE,
sse=None, public_read=False, content_type=None,
client=s3_client):
print(f"s3://{bucket}/{keyname} - Uploading...")
if size_or_body == 0:
client.put_object(Bucket=bucket, Key=keyname)
print(f"Object {keyname} uploaded")
return

if isinstance(size_or_body, int):
Expand Down Expand Up @@ -100,159 +98,151 @@ def put_pre_existing_objects(size_or_body, keyname, bucket=BUCKET_NAME_BASE,
try:
client.put_object(**args)
except botocore.exceptions.ClientError as e:
print(f"Object {keyname} failed to upload, with exception: {e}")
if public_read and e.response['Error']['Code'] == 'AccessDenied':
print("Check your account level S3 settings, public access may be blocked.")
exit(-1)
print(f"Object {keyname} uploaded")
raise


def create_bucket_with_lifecycle(availability_zone=None, client=s3_client):
# Calls client.create_bucket(**kwargs)
# Suppresses "BucketAlreadyExists" errors
def create_bucket(client, **kwargs):
try:
# Create the bucket. This returns an error if the bucket already exists.

if availability_zone is not None:
bucket_config = {
'Location': {
'Type': 'AvailabilityZone',
'Name': availability_zone
},
'Bucket': {
'Type': 'Directory',
'DataRedundancy': 'SingleAvailabilityZone'
}
print(f"s3://{kwargs['Bucket']} - Creating bucket...")
client.create_bucket(**kwargs)

except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] in ('BucketAlreadyOwnedByYou', 'BucketAlreadyExists'):
return

raise e


def create_bucket_with_lifecycle(availability_zone=None, client=s3_client):
if availability_zone is not None:
bucket_config = {
'Location': {
'Type': 'AvailabilityZone',
'Name': availability_zone
},
'Bucket': {
'Type': 'Directory',
'DataRedundancy': 'SingleAvailabilityZone'
}
bucket_name = BUCKET_NAME_BASE+f"--{availability_zone}--x-s3"
else:
bucket_config = {'LocationConstraint': REGION}
bucket_name = BUCKET_NAME_BASE

client.create_bucket(
Bucket=bucket_name, CreateBucketConfiguration=bucket_config)
if availability_zone is None:
client.put_bucket_lifecycle_configuration(
Bucket=bucket_name,
LifecycleConfiguration={
'Rules': [
{
'ID': 'clean up non-pre-existing objects',
'Expiration': {
'Days': 1,
},
'Filter': {
'Prefix': 'upload/',
},
'Status': 'Enabled',
'NoncurrentVersionExpiration': {
'NoncurrentDays': 1,
},
'AbortIncompleteMultipartUpload': {
'DaysAfterInitiation': 1,
},
}
bucket_name = BUCKET_NAME_BASE+f"--{availability_zone}--x-s3"
else:
bucket_config = {'LocationConstraint': REGION}
bucket_name = BUCKET_NAME_BASE

create_bucket(client,
Bucket=bucket_name,
CreateBucketConfiguration=bucket_config)
if availability_zone is None:
print(f"s3://{bucket_name} - Configuring bucket...")
client.put_bucket_lifecycle_configuration(
Bucket=bucket_name,
LifecycleConfiguration={
'Rules': [
{
'ID': 'clean up non-pre-existing objects',
'Expiration': {
'Days': 1,
},
'Filter': {
'Prefix': 'upload/',
},
'Status': 'Enabled',
'NoncurrentVersionExpiration': {
'NoncurrentDays': 1,
},
],
},
)
print(f"Bucket {bucket_name} created", file=sys.stderr)
'AbortIncompleteMultipartUpload': {
'DaysAfterInitiation': 1,
},
},
],
},
)

put_pre_existing_objects(
10*MB, 'pre-existing-10MB', bucket=bucket_name, client=client)
put_pre_existing_objects(
10*MB, 'pre-existing-10MB', bucket=bucket_name, client=client)

if availability_zone is None:
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-aes256-c', sse='aes256-c', bucket=bucket_name)
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-aes256', sse='aes256', bucket=bucket_name)
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-kms', sse='kms', bucket=bucket_name)
if availability_zone is None:
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-aes256-c', sse='aes256-c', bucket=bucket_name)
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-aes256', sse='aes256', bucket=bucket_name)
put_pre_existing_objects(
10*MB, 'pre-existing-10MB-kms', sse='kms', bucket=bucket_name)
put_pre_existing_objects(
1*MB, 'pre-existing-1MB', bucket=bucket_name)
put_pre_existing_objects(
1*MB, 'pre-existing-1MB-@', bucket=bucket_name)
put_pre_existing_objects(
0, 'pre-existing-empty', bucket=bucket_name)
put_pre_existing_objects(
ASYNC_ERROR_XML, 'pre-existing-async-error-xml', bucket=bucket_name, content_type='application/xml')
if args.large_objects:
put_pre_existing_objects(
1*MB, 'pre-existing-1MB', bucket=bucket_name)
256*MB, 'pre-existing-256MB', bucket=bucket_name)
put_pre_existing_objects(
1*MB, 'pre-existing-1MB-@', bucket=bucket_name)
256*MB, 'pre-existing-256MB-@', bucket=bucket_name)
put_pre_existing_objects(
0, 'pre-existing-empty', bucket=bucket_name)
2*GB, 'pre-existing-2GB', bucket=bucket_name)
put_pre_existing_objects(
ASYNC_ERROR_XML, 'pre-existing-async-error-xml', bucket=bucket_name, content_type='application/xml')
if args.large_objects:
put_pre_existing_objects(
256*MB, 'pre-existing-256MB', bucket=bucket_name)
put_pre_existing_objects(
256*MB, 'pre-existing-256MB-@', bucket=bucket_name)
put_pre_existing_objects(
2*GB, 'pre-existing-2GB', bucket=bucket_name)
put_pre_existing_objects(
2*GB, 'pre-existing-2GB-@', bucket=bucket_name)

except botocore.exceptions.ClientError as e:
# The bucket already exists. That's fine.
if e.response['Error']['Code'] == 'BucketAlreadyOwnedByYou' or e.response['Error']['Code'] == 'BucketAlreadyExists':
print(
f"Bucket {bucket_name} not created, skip initializing.", file=sys.stderr)
return
raise e
2*GB, 'pre-existing-2GB-@', bucket=bucket_name)
else:
print("Skipping large objects, run with --large_objects if you need these.")


def create_bucket_with_public_object():
try:
s3_client.create_bucket(Bucket=PUBLIC_BUCKET_NAME,
CreateBucketConfiguration={
'LocationConstraint': REGION},
ObjectOwnership='ObjectWriter'
)
s3_client.put_public_access_block(
Bucket=PUBLIC_BUCKET_NAME,
PublicAccessBlockConfiguration={
'BlockPublicAcls': False,
}
)
print(f"Bucket {PUBLIC_BUCKET_NAME} created", file=sys.stderr)
put_pre_existing_objects(
1*MB, 'pre-existing-1MB', bucket=PUBLIC_BUCKET_NAME, public_read=True)
except botocore.exceptions.ClientError as e:
# The bucket already exists. That's fine.
if e.response['Error']['Code'] == 'BucketAlreadyOwnedByYou' or e.response['Error']['Code'] == 'BucketAlreadyExists':
print(
f"Bucket {PUBLIC_BUCKET_NAME} not created, skip initializing.", file=sys.stderr)
return
raise e
create_bucket(s3_client,
Bucket=PUBLIC_BUCKET_NAME,
CreateBucketConfiguration={'LocationConstraint': REGION},
ObjectOwnership='ObjectWriter'
)
print(f"s3://{PUBLIC_BUCKET_NAME} - Configuring bucket...")
s3_client.put_public_access_block(
Bucket=PUBLIC_BUCKET_NAME,
PublicAccessBlockConfiguration={
'BlockPublicAcls': False,
}
)

put_pre_existing_objects(
1*MB, 'pre-existing-1MB', bucket=PUBLIC_BUCKET_NAME, public_read=True)


def cleanup(bucket_name, availability_zone=None, client=s3_client):
if availability_zone is not None:
bucket_name = bucket_name+f"--{availability_zone}--x-s3"

objects = client.list_objects_v2(Bucket=bucket_name)["Contents"]
print(f"s3://{bucket_name}/* - Listing objects...")
try:
objects = client.list_objects_v2(Bucket=bucket_name)["Contents"]
except botocore.exceptions.ClientError as e:
if e.response['Error']['Code'] == 'NoSuchBucket':
print(f"s3://{bucket_name} - Did not exist. Moving on...")
return
objects = list(map(lambda x: {"Key": x["Key"]}, objects))
client.delete_objects(Bucket=bucket_name, Delete={"Objects": objects})
if objects:
print(f"s3://{bucket_name}/* - Deleting {len(objects)} objects...")
client.delete_objects(Bucket=bucket_name, Delete={"Objects": objects})
print(f"s3://{bucket_name} - Deleting bucket...")
client.delete_bucket(Bucket=bucket_name)
print(f"Bucket {bucket_name} deleted", file=sys.stderr)


if args.action == 'init':
try:
print(BUCKET_NAME_BASE + " " + PUBLIC_BUCKET_NAME + " initializing...")
create_bucket_with_lifecycle("use1-az4", s3_client_east1)
create_bucket_with_lifecycle("usw2-az1")
create_bucket_with_lifecycle()
create_bucket_with_public_object()
if os.environ.get('CRT_S3_TEST_BUCKET_NAME') != BUCKET_NAME_BASE:
print(
f"* Please set the environment variable $CRT_S3_TEST_BUCKET_NAME to {BUCKET_NAME_BASE} before running the tests.")
except Exception as e:
print(e)
try:
# Try to clean up the bucket created, when initialization failed.
cleanup(BUCKET_NAME_BASE, "use1-az4", s3_client_east1)
cleanup(BUCKET_NAME_BASE, "usw2-az1")
cleanup(BUCKET_NAME_BASE)
cleanup(PUBLIC_BUCKET_NAME)
except Exception as e2:
exit(-1)
exit(-1)
create_bucket_with_lifecycle("use1-az4", s3_client_east1)
create_bucket_with_lifecycle("usw2-az1")
create_bucket_with_lifecycle()
create_bucket_with_public_object()
if os.environ.get('CRT_S3_TEST_BUCKET_NAME') != BUCKET_NAME_BASE:
print(
f"*** Set the environment variable $CRT_S3_TEST_BUCKET_NAME to {BUCKET_NAME_BASE} before running the tests ***")

elif args.action == 'clean':
if "CRT_S3_TEST_BUCKET_NAME" not in os.environ and args.bucket_name is None:
print("Set the environment variable CRT_S3_TEST_BUCKET_NAME before clean up, or pass in bucket_name as argument.")
exit(-1)
exit("Set the environment variable CRT_S3_TEST_BUCKET_NAME before clean up, or pass in bucket_name as argument.")
cleanup(BUCKET_NAME_BASE, "use1-az4", s3_client_east1)
cleanup(BUCKET_NAME_BASE, "usw2-az1")
cleanup(BUCKET_NAME_BASE)
Expand Down

0 comments on commit cb431ba

Please sign in to comment.