diff --git a/backend/docker/prod/ecs/Dockerfile b/backend/docker/prod/ecs/Dockerfile index b272902af..1ab2be91d 100644 --- a/backend/docker/prod/ecs/Dockerfile +++ b/backend/docker/prod/ecs/Dockerfile @@ -49,12 +49,13 @@ RUN /bin/bash -c "pip3.8 install -r /dh.requirements.txt" RUN /bin/bash -c "pip3.8 install -r /cdk.requirements.txt" ADD backend/dataall /dataall +VOLUME ["/dataall"] ADD backend/blueprints /blueprints ADD backend/cdkproxymain.py /cdkproxymain.py RUN mkdir -p dataall/cdkproxy/assets/glueprofilingjob/jars RUN mkdir -p blueprints/ml_data_pipeline/engine/glue/jars -RUN curl https://repo1.maven.org/maven2/com/amazon/deequ/deequ/$DEEQU_VERSION/deequ-$DEEQU_VERSION.jar --output dataall/cdkproxy/assets/glueprofilingjob/jars/deequ-$DEEQU_VERSION.jar +ADD https://repo1.maven.org/maven2/com/amazon/deequ/deequ/$DEEQU_VERSION/deequ-$DEEQU_VERSION.jar /dataall/cdkproxy/assets/glueprofilingjob/jars/ RUN cp -f dataall/cdkproxy/assets/glueprofilingjob/jars/deequ-$DEEQU_VERSION.jar blueprints/ml_data_pipeline/engine/glue/jars/deequ-$DEEQU_VERSION.jar WORKDIR / diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 78b920482..ee88405d4 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -277,7 +277,7 @@ def __init__( ], database=aurora_stack.cluster.cluster_identifier, ecs_cluster=self.ecs_stack.ecs_cluster, - ecs_task_definitions=self.ecs_stack.ecs_task_definitions, + ecs_task_definitions_families=self.ecs_stack.ecs_task_definitions_families, backend_api=self.lambda_api_stack.backend_api_name, queue_name=sqs_stack.queue.queue_name, **kwargs, diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 997ad5d76..544a72e18 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -49,48 +49,88 @@ def __init__( self.task_role = self.create_task_role(envname, resource_prefix, pivot_role_name) self.cicd_stacks_updater_role = self.create_cicd_stacks_updater_role(envname, resource_prefix, tooling_account_id) - cdkproxy_task_definition = ecs.FargateTaskDefinition( + cdkproxy_container_name = f'container' + cdkproxy_log_group = self.create_log_group( + envname, resource_prefix, log_group_name='cdkproxy' + ) + cdkproxy_image = ecs.ContainerImage.from_ecr_repository( + repository=ecr_repository, + tag=cdkproxy_image_tag + ) + + cdkproxy_task_definition = ecs.CfnTaskDefinition( self, f'{resource_prefix}-{envname}-cdkproxy', - cpu=1024, - memory_limit_mib=2048, - task_role=self.task_role, - execution_role=self.task_role, + container_definitions=[ecs.CfnTaskDefinition.ContainerDefinitionProperty( + image=cdkproxy_image.image_name, + name=cdkproxy_container_name, + command=['python3.8', '-m', 'dataall.tasks.cdkproxy'], + environment=[ + ecs.CfnTaskDefinition.KeyValuePairProperty( + name="AWS_REGION", + value=self.region + ), + ecs.CfnTaskDefinition.KeyValuePairProperty( + name="envname", + value=envname + ), + ecs.CfnTaskDefinition.KeyValuePairProperty( + name="LOGLEVEL", + value="DEBUG" + ), + ], + essential=True, + log_configuration=ecs.CfnTaskDefinition.LogConfigurationProperty( + log_driver="awslogs", + options={ + "awslogs-group": cdkproxy_log_group.log_group_name, + "awslogs-region": self.region, + "awslogs-stream-prefix": "task" + }, + ), + mount_points=[ + ecs.CfnTaskDefinition.MountPointProperty( + container_path="/dataall", + read_only=False, + source_volume="dataall_scratch" + ), + ecs.CfnTaskDefinition.MountPointProperty( + container_path="/tmp", + read_only=False, + source_volume="dataall_tmp_scratch" + ) + ], + readonly_root_filesystem=True, + )], + cpu="1024", + memory="2048", + execution_role_arn=self.task_role.role_arn, family=f'{resource_prefix}-{envname}-cdkproxy', - ) - - cdkproxy_container = cdkproxy_task_definition.add_container( - f'ShareManagementTaskContainer{envname}', - container_name=f'container', - image=ecs.ContainerImage.from_ecr_repository( - repository=ecr_repository, tag=cdkproxy_image_tag - ), - environment={ - 'AWS_REGION': self.region, - 'envname': envname, - 'LOGLEVEL': 'DEBUG', - }, - command=['python3.8', '-m', 'dataall.tasks.cdkproxy'], - logging=ecs.LogDriver.aws_logs( - stream_prefix='task', - log_group=self.create_log_group( - envname, resource_prefix, log_group_name='cdkproxy' + requires_compatibilities=[ecs.Compatibility.FARGATE.name], + task_role_arn=self.task_role.role_arn, + network_mode="awsvpc", + volumes=[ + ecs.CfnTaskDefinition.VolumeProperty( + name="dataall_scratch" ), - ), + ecs.CfnTaskDefinition.VolumeProperty( + name="dataall_tmp_scratch" + ) + ] ) ssm.StringParameter( self, f'CDKProxyTaskDefParam{envname}', parameter_name=f'/dataall/{envname}/ecs/task_def_arn/cdkproxy', - string_value=cdkproxy_task_definition.task_definition_arn, + string_value=cdkproxy_task_definition.attr_task_definition_arn, ) ssm.StringParameter( self, f'CDKProxyContainerParam{envname}', parameter_name=f'/dataall/{envname}/ecs/container/cdkproxy', - string_value=cdkproxy_container.container_name, + string_value=cdkproxy_container_name, ) scheduled_tasks_sg = self.create_task_sg( @@ -258,6 +298,7 @@ def __init__( envname, resource_prefix, log_group_name='share-manager' ), ), + readonly_root_filesystem=True, ) ssm.StringParameter( @@ -300,13 +341,13 @@ def __init__( ) self.ecs_cluster = cluster - self.ecs_task_definitions = [ - cdkproxy_task_definition, - sync_tables_task.task_definition, - update_bucket_policies_task.task_definition, - catalog_indexer_task.task_definition, - share_management_task_definition, - subscriptions_task.task_definition, + self.ecs_task_definitions_families = [ + cdkproxy_task_definition.family, + sync_tables_task.task_definition.family, + update_bucket_policies_task.task_definition.family, + catalog_indexer_task.task_definition.family, + share_management_task_definition.family, + subscriptions_task.task_definition.family, ] def create_cicd_stacks_updater_role(self, envname, resource_prefix, tooling_account_id): @@ -544,6 +585,7 @@ def set_scheduled_task( environment=environment, command=command, logging=ecs.LogDriver.aws_logs(stream_prefix='task', log_group=log_group), + readonly_root_filesystem=True, ) scheduled_task = ecs_patterns.ScheduledFargateTask( self, diff --git a/deploy/stacks/monitoring.py b/deploy/stacks/monitoring.py index f6cd4f9f7..001e4b361 100644 --- a/deploy/stacks/monitoring.py +++ b/deploy/stacks/monitoring.py @@ -27,7 +27,7 @@ def __init__( lambdas: [_lambda.Function] = None, database='dataalldevdb', ecs_cluster: ecs.Cluster = None, - ecs_task_definitions: [ecs.FargateTaskDefinition] = None, + ecs_task_definitions_families = None, backend_api=None, queue_name: str = None, **kwargs, @@ -51,7 +51,7 @@ def __init__( backend_api, database, ecs_cluster, - ecs_task_definitions, + ecs_task_definitions_families, envname, lambdas, resource_prefix, @@ -136,7 +136,7 @@ def create_cw_dashboard( backend_api, database, ecs_cluster, - ecs_task_definitions, + ecs_task_definitions_families, envname, lambdas, resource_prefix, @@ -173,19 +173,18 @@ def create_cw_dashboard( cf_ecs.build_ecs_cluster_task_count_widget(cluster_name), ) - if ecs_task_definitions: + if ecs_task_definitions_families: dashboard.add_widgets(cw.TextWidget(width=24, markdown='# ECS Tasks')) - task: ecs.FargateTaskDefinition - for task in ecs_task_definitions: + for task_family in ecs_task_definitions_families: dashboard.add_widgets( cf_ecs.build_ecs_task_container_insight_cpu_widget( - cluster_name, task.family + cluster_name, task_family ), cf_ecs.build_ecs_task_container_insight_memory_widget( - cluster_name, task.family + cluster_name, task_family ), cf_ecs.build_ecs_task_container_insight_storage_widget( - cluster_name, task.family + cluster_name, task_family ), ) if database: