Skip to content

Commit

Permalink
Add integration test role in Environment stack + session in conftest …
Browse files Browse the repository at this point in the history
…+ aws clients for dataset
  • Loading branch information
dlpzx committed Jul 2, 2024
1 parent 45d1407 commit 54d5ff8
Show file tree
Hide file tree
Showing 7 changed files with 284 additions and 21 deletions.
28 changes: 28 additions & 0 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
self.environment_admins_group: EnvironmentGroup = self.get_environment_admins_group(
self.engine, self._environment
)
# Create test role for integration tests
if os.getenv('INTEGRATION_TESTS', None) == 'True':
self.create_integration_tests_role()

# Create or import Pivot role
if self.create_pivot_role is True:
Expand Down Expand Up @@ -559,3 +562,28 @@ def create_topic(self, construct_id, central_account, environment, kms_key):
)
)
return topic

def create_integration_tests_role(self):
self.test_role = iam.Role(
self,
'IntegrationTestRole',
role_name='dataall-integration-tests-role',
assumed_by=iam.AccountPrincipal(os.getenv('TOOLING_ACCOUNT')),
)
self.test_role.add_to_policy(
iam.PolicyStatement(
actions=['s3:CreateBucket', 's3:DeleteBucket'],
effect=iam.Effect.ALLOW,
resources=['*'],
),
iam.PolicyStatement(
actions=['glue:createDatabase', 'glue:deleteDatabase'],
effect=iam.Effect.ALLOW,
resources=['*'],
),
iam.PolicyStatement(
actions=['kms:CreateKey', 'kms:DeleteKey', 'kms:ListAliases'],
effect=iam.Effect.ALLOW,
resources=['*'],
),
)
2 changes: 2 additions & 0 deletions deploy/stacks/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,8 @@ def set_approval_tests_stage(
f'export TESTDATA=$(aws ssm get-parameter --name /dataall/{target_env["envname"]}/testdata --profile buildprofile --output text --query "Parameter.Value")',
f'export ENVNAME={target_env["envname"]}',
f'export AWS_REGION={target_env["region"]}',
'export INTEGRATION_TESTS=True',
f'export TOOLING_ACCOUNT={self.account}',
f'aws codeartifact login --tool pip --repository {self.codeartifact.codeartifact_pip_repo_name} --domain {self.codeartifact.codeartifact_domain_name} --domain-owner {self.codeartifact.domain.attr_owner}',
'python -m venv env',
'. env/bin/activate',
Expand Down
20 changes: 19 additions & 1 deletion tests_new/integration_tests/core/environment/global_conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging

import pytest
import boto3

from integration_tests.client import GqlError
from integration_tests.core.environment.queries import (
Expand Down Expand Up @@ -50,6 +50,24 @@ def session_env1(client1, group1, org1, session_id, testdata):
delete_env(client1, env)


@pytest.fixture(scope='session')
def session_env1_aws_client(session_env1, session_id):
try:
base_session = boto3.Session()
role_arn = f'arn:aws:iam::{session_env1.AwsAccountId}:role/dataall-integration-tests-role'
response = base_session.client('sts', region_name=session_env1.region).assume_role(
RoleArn=role_arn, RoleSessionName=role_arn.split('/')[1]
)
yield boto3.Session(
aws_access_key_id=response['Credentials']['AccessKeyId'],
aws_secret_access_key=response['Credentials']['SecretAccessKey'],
aws_session_token=response['Credentials']['SessionToken'],
)
except:
log.exception('Failed to assume environment integration test role')
raise


@pytest.fixture(scope='session')
def session_env2(client1, group1, org1, session_id, testdata):
envdata = testdata.envs['session_env2']
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# TODO: This file will be replaced by using the SDK directly
from backend.dataall.modules.datasets_base.services.datasets_enums import ConfidentialityClassification

DATASET_BASE_TYPE = """
datasetUri
Expand Down
169 changes: 169 additions & 0 deletions tests_new/integration_tests/modules/s3_datasets/aws_clients.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import logging
import json
from botocore.exceptions import ClientError

log = logging.getLogger(__name__)


class S3Client:
def __init__(self, session, region):
self._client = session.client('s3', region_name=region)
self._resource = session.resource('s3', region_name=region)
self._region = region

def create_bucket(self, bucket_name, kms_key_id=None):
"""
Create an S3 bucket.
:param bucket_name: Name of the S3 bucket to be created
:param kms_key_id: KMS key ID to use for encryption if encryption_type is 'aws:kms'
:return: None
"""

encryption_type = 'aws:kms' if kms_key_id else 'AES256'
encryption_config = (
{'SSEAlgorithm': encryption_type, 'KMSMasterKeyID': kms_key_id}
if encryption_type == 'aws:kms'
else {'SSEAlgorithm': encryption_type}
)

try:
if self._region == 'us-east-1':
self._client.create_bucket(ACL='private', Bucket=bucket_name)
else:
create_bucket_config = {'LocationConstraint': self._region}
self._client.create_bucket(
ACL='private', Bucket=bucket_name, CreateBucketConfiguration=create_bucket_config
)

self._client.put_bucket_encryption(
Bucket=bucket_name,
ServerSideEncryptionConfiguration={
'Rules': [
{'ApplyServerSideEncryptionByDefault': encryption_config, 'BucketKeyEnabled': False},
]
},
)
except ClientError as e:
log.exception(f'Error creating S3 bucket: {e}')

def delete_bucket(self, bucket_name):
"""
Delete an S3 bucket.
:param bucket_name: Name of the S3 bucket to be deleted
:return: None
"""
try:
# Delete all objects in the bucket before deleting the bucket
bucket = self._resource.Bucket(bucket_name)
bucket_versioning = self._resource.BucketVersioning(bucket_name)
if bucket_versioning.status == 'Enabled':
bucket.object_versions.delete()
else:
bucket.objects.all().delete()
self._client.delete_bucket(Bucket=bucket_name)
except ClientError as e:
log.exception(f'Error deleting S3 bucket: {e}')


class KMSClient:
def __init__(self, session, account_id, region):
self._client = session.client('kms', region_name=region)
self._account_id = account_id

def create_key_with_alias(self, alias_name):
try:
response = self._client.create_key()
key_id = response['KeyMetadata']['KeyId']
self._client.create_alias(AliasName=f'alias/{alias_name}', TargetKeyId=key_id)
self._put_key_policy(key_id)

return key_id

except ClientError as e:
log.exception(f'Error creating KMS key with alias: {e}')

def _put_key_policy(self, key_id):
response = self._client.get_key_policy(KeyId=key_id, PolicyName='default')
policy = json.loads(response['Policy'])
# The updated policy replaces the existing policy. Add a new statement to
# the list along with the original policy statements.
principal = f'arn:aws:iam::{self._account_id}:role/dataallPivotRole-cdk'
policy['Statement'].append(
{
'Sid': 'Allow access for PivotRole',
'Effect': 'Allow',
'Principal': {'AWS': principal},
'Action': [
'kms:Decrypt',
'kms:Encrypt',
'kms:GenerateDataKey*',
'kms:PutKeyPolicy',
'kms:GetKeyPolicy',
'kms:ReEncrypt*',
'kms:TagResource',
'kms:UntagResource',
],
'Resource': '*',
}
)
try:
self._client.put_key_policy(KeyId=key_id, PolicyName='default', Policy=json.dumps(policy))
except ClientError as err:
log.exception(
"Couldn't set policy for key %s. Here's why %s",
key_id,
err.response['Error']['Message'],
)

def delete_key_by_alias(self, alias_name):
"""
Delete a KMS key by its alias.
:param alias_name: Alias of the KMS key to be deleted
:return: None
"""
try:
key_id = self._get_key_by_alias(alias_name)
if key_id:
# Schedule the key for deletion
self._client.schedule_key_deletion(KeyId=key_id)
except ClientError as e:
log.exception(f'Error deleting KMS key by alias: {e}')

def _get_key_by_alias(self, alias_name):
try:
response = self._client.list_aliases()
aliases = response.get('Aliases', [])

for alias in aliases:
if alias['AliasName'] == f'alias/{alias_name}':
key_id = alias['TargetKeyId']
return key_id
return None

except ClientError as e:
log.exception(f'Error getting KMS key by alias: {e}')


class GlueClient:
def __init__(self, session, region):
self._client = session.client('glue', region_name=region)

def create_database(self, database_name, bucket):
try:
self._client.create_database(DatabaseInput={'Name': database_name, 'LocationUri': f's3://{bucket}/'})
except ClientError as e:
log.exception(f'Error creating Glue database: {e}')

def delete_database(self, database_name):
"""
Delete a Glue database.
:param database_name: Name of the Glue database to be deleted
:return: None
"""
try:
self._client.delete_database(Name=database_name)
except ClientError as e:
if e.response['Error']['Code'] == 'EntityNotFoundException':
log.exception(f"Glue database '{database_name}' does not exist.")
else:
log.exception(f'Error deleting Glue database: {e}')
42 changes: 34 additions & 8 deletions tests_new/integration_tests/modules/s3_datasets/global_conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
)
from tests_new.integration_tests.modules.datasets_base.queries import list_datasets

from integration_tests.modules.s3_datasets.aws_clients import S3Client, KMSClient, GlueClient

log = logging.getLogger(__name__)


Expand Down Expand Up @@ -102,11 +104,17 @@ def session_s3_dataset1(client1, group1, org1, session_env1, session_id, testdat


@pytest.fixture(scope='session')
def session_imported_sse_s3_dataset1(client1, group1, org1, session_env1, session_id, testdata):
def session_imported_sse_s3_dataset1(
client1, group1, org1, session_env1, session_id, testdata, session_env1_aws_client
):
envdata = testdata.datasets['session_imported_sse_s3_dataset1']
ds = None
bucket = None
try:
# TODO: Create S3 Bucket before import + Clean Up in Finally
bucket = S3Client(session=session_env1_aws_client, region=session_env1['region']).create_bucket(
bucket_name=f'{envdata.bucket}{session_id}', kms_key_id=None
)

ds = import_s3_dataset(
client1,
owner='someone',
Expand All @@ -115,20 +123,31 @@ def session_imported_sse_s3_dataset1(client1, group1, org1, session_env1, sessio
env_uri=session_env1['environmentUri'],
dataset_name=envdata.name,
tags=[session_id],
bucket=envdata.bucket,
bucket=f'{envdata.bucket}{session_id}',
)
yield ds
finally:
if ds:
delete_s3_dataset(client1, session_env1['environmentUri'], ds)
S3Client(session=session_env1_aws_client, region=session_env1['region']).delete_bucket(f'{envdata.bucket}{session_id}')


@pytest.fixture(scope='session')
def session_imported_kms_s3_dataset1(client1, group1, org1, session_env1, session_id, testdata):
def session_imported_kms_s3_dataset1(
client1, group1, org1, session_env1, session_id, testdata, session_env1_aws_client
):
envdata = testdata.datasets['session_imported_kms_s3_dataset1']
ds = None
try:
# TODO: Create S3 Bucket, KMS, + Glue DB before import + Clean Up in Finally
kms_key_id = KMSClient(
session=session_env1_aws_client, account_id=session_env1['AwsAccountId'], region=session_env1['region']
).create_key_with_alias(f'{envdata.kmsAlias}{session_id}')
S3Client(session=session_env1_aws_client, region=session_env1['region']).create_bucket(
bucket_name=f'{envdata.bucket}{session_id}', kms_key_id=kms_key_id
)
GlueClient(session=session_env1_aws_client, region=session_env1['region']).create_database(
database_name=f'{envdata.glueDatabaseName}{session_id}', bucket=f'{envdata.bucket}{session_id}'
)
ds = import_s3_dataset(
client1,
owner='someone',
Expand All @@ -137,14 +156,21 @@ def session_imported_kms_s3_dataset1(client1, group1, org1, session_env1, sessio
env_uri=session_env1['environmentUri'],
dataset_name=envdata.name,
tags=[session_id],
bucket=envdata.bucket,
kms_alias=envdata.kmsAlias,
glue_db_name=envdata.glueDatabaseName,
bucket=f'{envdata.bucket}{session_id}',
kms_alias=f'{envdata.kmsAlias}{session_id}',
glue_db_name=f'{envdata.glueDatabaseName}{session_id}',
)
yield ds
finally:
if ds:
delete_s3_dataset(client1, session_env1['environmentUri'], ds)
S3Client(session=session_env1_aws_client, region=session_env1['region']).delete_bucket(f'{envdata.bucket}{session_id}')
KMSClient(
session=session_env1_aws_client, account_id=session_env1['AwsAccountId'], region=session_env1['region']
).delete_key_by_alias(f'{envdata.kmsAlias}{session_id}')
GlueClient(session=session_env1_aws_client, region=session_env1['region']).delete_database(
f'{envdata.glueDatabaseName}{session_id}'
)


"""
Expand Down
Loading

0 comments on commit 54d5ff8

Please sign in to comment.