Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move custom resources to Dataset Environment extension #647

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
211 changes: 0 additions & 211 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@
custom_resources as cr,
aws_s3 as s3,
aws_iam as iam,
aws_lambda as _lambda,
aws_lambda_destinations as lambda_destination,
aws_ssm as ssm,
aws_sns as sns,
aws_sqs as sqs,
aws_sns_subscriptions as sns_subs,
Expand All @@ -20,7 +17,6 @@
CfnOutput,
Stack,
Duration,
CustomResource,
Tags,
)

Expand Down Expand Up @@ -50,8 +46,6 @@ def extent(setup: 'EnvironmentSetup'):
class EnvironmentSetup(Stack):
"""Deploy common environment resources:
- default environment S3 Bucket
- Lambda + Provider for dataset Glue Databases custom resource
- Lambda + Provider for dataset Data Lake location custom resource
- SSM parameters for the Lambdas and Providers
- pivotRole (if configured)
- SNS topic (if subscriptions are enabled)
Expand Down Expand Up @@ -226,133 +220,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
)
self.create_athena_workgroups(self.environment_groups, default_environment_bucket)

kms_key = self.set_cr_kms_key(self.group_roles, self.default_role)

# Lakeformation default settings custom resource
# Set PivotRole as Lake Formation data lake admin
entry_point = str(
pathlib.PosixPath(os.path.dirname(__file__), '../../../core/environment/cdk/assets/lakeformationdefaultsettings').resolve()
)

lakeformation_cr_dlq = self.set_dlq(
f'{self._environment.resourcePrefix}-lfcr-{self._environment.environmentUri}',
kms_key
)
lf_default_settings_custom_resource = _lambda.Function(
self,
'LakeformationDefaultSettingsHandler',
function_name=f'{self._environment.resourcePrefix}-lf-settings-handler-{self._environment.environmentUri}',
role=self.pivot_role,
handler='index.on_event',
code=_lambda.Code.from_asset(entry_point),
memory_size=1664,
description='This Lambda function is a cloudformation custom resource provider for Lakeformation default settings',
timeout=Duration.seconds(5 * 60),
environment={
'envname': self._environment.name,
'LOG_LEVEL': 'DEBUG',
'AWS_ACCOUNT': self._environment.AwsAccountId,
'DEFAULT_ENV_ROLE_ARN': self._environment.EnvironmentDefaultIAMRoleArn,
'DEFAULT_CDK_ROLE_ARN': self._environment.CDKRoleArn,
},
dead_letter_queue_enabled=True,
dead_letter_queue=lakeformation_cr_dlq,
on_failure=lambda_destination.SqsDestination(lakeformation_cr_dlq),
runtime=_lambda.Runtime.PYTHON_3_9,
)
LakeformationDefaultSettingsProvider = cr.Provider(
self,
f'{self._environment.resourcePrefix}LakeformationDefaultSettingsProvider',
on_event_handler=lf_default_settings_custom_resource,
)

default_lf_settings = CustomResource(
self,
f'{self._environment.resourcePrefix}DefaultLakeFormationSettings',
service_token=LakeformationDefaultSettingsProvider.service_token,
resource_type='Custom::LakeformationDefaultSettings',
properties={
'DataLakeAdmins': [
f'arn:aws:iam::{self._environment.AwsAccountId}:role/{self.pivot_role_name}',
]
},
)

ssm.StringParameter(
self,
'LakeformationDefaultSettingsCustomeResourceFunctionArn',
string_value=lf_default_settings_custom_resource.function_arn,
parameter_name=f'/dataall/{self._environment.environmentUri}/cfn/lf/defaultsettings/lambda/arn',
)

ssm.StringParameter(
self,
'LakeformationDefaultSettingsCustomeResourceFunctionName',
string_value=lf_default_settings_custom_resource.function_name,
parameter_name=f'/dataall/{self._environment.environmentUri}/cfn/lf/defaultsettings/lambda/name',
)

# Glue database custom resource - New
# This Lambda is triggered with the creation of each dataset, it is not executed when the environment is created
entry_point = str(
pathlib.PosixPath(os.path.dirname(__file__), '../../../core/environment/cdk/assets/gluedatabasecustomresource').resolve()
)

gluedb_lf_cr_dlq = self.set_dlq(
f'{self._environment.resourcePrefix}-gluedb-lf-cr-{self._environment.environmentUri}',
kms_key
)
gluedb_lf_custom_resource = _lambda.Function(
self,
'GlueDatabaseLFCustomResourceHandler',
function_name=f'{self._environment.resourcePrefix}-gluedb-lf-handler-{self._environment.environmentUri}',
role=self.pivot_role,
handler='index.on_event',
code=_lambda.Code.from_asset(entry_point),
memory_size=1664,
description='This Lambda function is a cloudformation custom resource provider for Glue database '
'as Cfn currently does not support the CreateTableDefaultPermissions parameter',
timeout=Duration.seconds(5 * 60),
environment={
'envname': self._environment.name,
'LOG_LEVEL': 'DEBUG',
'AWS_ACCOUNT': self._environment.AwsAccountId,
'DEFAULT_ENV_ROLE_ARN': self._environment.EnvironmentDefaultIAMRoleArn,
'DEFAULT_CDK_ROLE_ARN': self._environment.CDKRoleArn,
},
dead_letter_queue_enabled=True,
dead_letter_queue=gluedb_lf_cr_dlq,
on_failure=lambda_destination.SqsDestination(gluedb_lf_cr_dlq),
tracing=_lambda.Tracing.ACTIVE,
runtime=_lambda.Runtime.PYTHON_3_9,
)

glue_db_provider = cr.Provider(
self,
f'{self._environment.resourcePrefix}GlueDbCustomResourceProvider',
on_event_handler=gluedb_lf_custom_resource
)
ssm.StringParameter(
self,
'GlueLFCustomResourceFunctionArn',
string_value=gluedb_lf_custom_resource.function_arn,
parameter_name=f'/dataall/{self._environment.environmentUri}/cfn/custom-resources/gluehandler/lambda/arn',
)

ssm.StringParameter(
self,
'GlueLFCustomResourceFunctionName',
string_value=gluedb_lf_custom_resource.function_name,
parameter_name=f'/dataall/{self._environment.environmentUri}/cfn/custom-resources/gluehandler/lambda/name',
)

ssm.StringParameter(
self,
'GlueLFCustomResourceProviderServiceToken',
string_value=glue_db_provider.service_token,
parameter_name=f'/dataall/{self._environment.environmentUri}/cfn/custom-resources/gluehandler/provider/servicetoken',
)

# Create SNS topics for subscriptions
if self._environment.subscriptionsEnabled:
subscription_key_policy = iam.PolicyDocument(
Expand Down Expand Up @@ -671,81 +538,3 @@ def create_topic(self, construct_id, central_account, environment, kms_key):
)
)
return topic

def set_cr_kms_key(self, group_roles, default_role) -> kms.Key:
key_policy = iam.PolicyDocument(
assign_sids=True,
statements=[
iam.PolicyStatement(
actions=[
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
],
effect=iam.Effect.ALLOW,
principals=[
default_role,
] + group_roles,
resources=["*"],
conditions={
"StringEquals": {"kms:ViaService": f"sqs.{self._environment.region}.amazonaws.com"}
}
),
iam.PolicyStatement(
actions=[
"kms:DescribeKey",
"kms:List*",
"kms:GetKeyPolicy",
],
effect=iam.Effect.ALLOW,
principals=[
default_role,
] + group_roles,
resources=["*"],
)
]
)

kms_key = kms.Key(
self,
f'dataall-environment-{self._environment.environmentUri}-cr-key',
removal_policy=RemovalPolicy.DESTROY,
alias=f'dataall-environment-{self._environment.environmentUri}-cr-key',
enable_key_rotation=True,
admins=[
iam.ArnPrincipal(self._environment.CDKRoleArn),
],
policy=key_policy
)
return kms_key

def set_dlq(self, queue_name, kms_key) -> sqs.Queue:
dlq = sqs.Queue(
self,
f'{queue_name}-queue',
queue_name=f'{queue_name}',
retention_period=Duration.days(14),
encryption=sqs.QueueEncryption.KMS,
encryption_master_key=kms_key,
data_key_reuse=Duration.days(1),
removal_policy=RemovalPolicy.DESTROY,
)

enforce_tls_statement = iam.PolicyStatement(
sid='Enforce TLS for all principals',
effect=iam.Effect.DENY,
principals=[
iam.AnyPrincipal(),
],
actions=[
'sqs:*',
],
resources=[dlq.queue_arn],
conditions={
'Bool': {'aws:SecureTransport': 'false'},
},
)

dlq.add_to_resource_policy(enforce_tls_statement)
return dlq
2 changes: 2 additions & 0 deletions backend/dataall/modules/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ def __init__(self):
import dataall.modules.datasets.cdk
from dataall.core.environment.cdk.environment_stack import EnvironmentSetup
from dataall.modules.datasets.cdk.dataset_glue_profiler_extension import DatasetGlueProfilerExtension
from dataall.modules.datasets.cdk.dataset_custom_resources_extension import DatasetCustomResourcesExtension

EnvironmentSetup.register(DatasetGlueProfilerExtension)
EnvironmentSetup.register(DatasetCustomResourcesExtension)

log.info("Dataset stacks have been imported")

Expand Down
1 change: 0 additions & 1 deletion backend/dataall/modules/datasets/api/dataset/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,3 @@ class Topic(GraphQLEnumMapper):
Energy = 'Energy'
Customers = 'Customers'
Misc = 'Misc'

Loading