From e9c64d79ea73d34c10c5dceeca45391c89c6a0c6 Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Fri, 26 May 2023 11:37:05 +0200 Subject: [PATCH 01/11] fix: Fix typo that destroys storage locations (#481) ### Feature or Bugfix - Bugfix ### Detail The constant to define the dataallPivotRole missed an "a" and as a consequence the storage location for the Dataset was not registered ### Relates By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/aws/handlers/lakeformation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/aws/handlers/lakeformation.py b/backend/dataall/aws/handlers/lakeformation.py index e1939c536..ba91a6e47 100644 --- a/backend/dataall/aws/handlers/lakeformation.py +++ b/backend/dataall/aws/handlers/lakeformation.py @@ -6,7 +6,7 @@ from .sts import SessionHelper log = logging.getLogger('aws:lakeformation') -PIVOT_ROLE_NAME_PREFIX = "datallPivotRole" +PIVOT_ROLE_NAME_PREFIX = "dataallPivotRole" class LakeFormation: From 9fc84bf54a844deb42ab2a610ffd32deb628e5b9 Mon Sep 17 00:00:00 2001 From: Gezim Musliaj <102723839+gmuslia@users.noreply.github.com> Date: Tue, 30 May 2023 10:04:51 +0200 Subject: [PATCH 02/11] Update CDK Version to v2.77.0 to fix issue with CDK Pipeline role (#484) ### Feature or Bugfix - Bugfix - Refactoring ### Detail - The AWS Cloud Development Kit (CDK) Team recently identified an issue with the CDK Pipelines construct library that may result in unintended permissions being granted to authenticated users within your account. As of April 4, 2023, we have fixed the issue in version 1.200.0 [1] for CDK v1, and version 2.77.0 [2] for CDK v2. We strongly recommend you upgrade to one of these versions as soon as possible. Please refer to the Managing Dependencies documentation [3] in the CDK Developer Guide for instructions on how to perform the upgrade. Starting with versions 1.158.0 and 2.26.0, released May 30, 2022, the library creates a role that allows every identity in the same account with sts:AssumeRole permissions on Resource: * to assume it. This may result in granting privileges to authenticated users in your account allowing them to take pipeline actions beyond what was intended. ### Relates - N.A By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/cdkproxy/requirements.txt | 2 +- deploy/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/cdkproxy/requirements.txt b/backend/dataall/cdkproxy/requirements.txt index 8a3e6fb98..f85335573 100644 --- a/backend/dataall/cdkproxy/requirements.txt +++ b/backend/dataall/cdkproxy/requirements.txt @@ -1,4 +1,4 @@ -aws-cdk-lib==2.61.1 +aws-cdk-lib==2.77.0 aws_cdk.aws_redshift_alpha==2.14.0a0 boto3==1.24.85 boto3-stubs==1.24.85 diff --git a/deploy/requirements.txt b/deploy/requirements.txt index 5ea72abd5..4490b9d4a 100644 --- a/deploy/requirements.txt +++ b/deploy/requirements.txt @@ -1,4 +1,4 @@ -aws-cdk-lib==2.61.1 +aws-cdk-lib==2.77.0 boto3-stubs==1.20.20 boto3==1.24.85 botocore==1.27.85 From 7844c1f7800f910da50c5deb6e624930e22870a0 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 1 Jun 2023 14:07:27 +0200 Subject: [PATCH 03/11] Clean-up dataset IAM role permissions - missing Glue crawling ones --- backend/dataall/cdkproxy/stacks/dataset.py | 38 +++------------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 410d4b79d..b950f8d28 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -196,12 +196,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): iam.PolicyStatement( actions=['s3:List*'], resources=['*'], effect=iam.Effect.ALLOW ), - iam.PolicyStatement( - actions=['logs:*'], resources=['*'], effect=iam.Effect.ALLOW - ), - iam.PolicyStatement( - actions=['tag:*'], resources=['*'], effect=iam.Effect.ALLOW - ), iam.PolicyStatement( actions=['s3:List*', 's3:Get*'], resources=[dataset_bucket.bucket_arn], @@ -217,26 +211,22 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): 's3:GetAccessPoint', 's3:GetAccessPointPolicy', 's3:ListAccessPoints', - 's3:CreateAccessPoint', - 's3:DeleteAccessPoint', 's3:GetAccessPointPolicyStatus', - 's3:DeleteAccessPointPolicy', - 's3:PutAccessPointPolicy', ], effect=iam.Effect.ALLOW, resources=[ - f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/*', + f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{dataset.datasetUri}*', ], ), iam.PolicyStatement( - actions=['s3:List*', 's3:Get*'], + actions=['s3:List*'], resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}'], effect=iam.Effect.ALLOW, ), iam.PolicyStatement( - actions=['s3:*'], + actions=['s3:List*', 's3:Get*'], + resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}/profiling*'], effect=iam.Effect.ALLOW, - resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}/*'], ), iam.PolicyStatement( effect=iam.Effect.ALLOW, @@ -268,19 +258,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): effect=iam.Effect.ALLOW, resources=['arn:aws:logs:*:*:/aws-glue/*'], ), - iam.PolicyStatement( - actions=['kms:*'], effect=iam.Effect.ALLOW, resources=['*'] - ), - iam.PolicyStatement( - actions=['glue:*', 'athena:*', 'lakeformation:*'], - resources=['*'], - effect=iam.Effect.ALLOW, - ), - iam.PolicyStatement( - actions=['cloudformation:*'], - resources=['*'], - effect=iam.Effect.ALLOW, - ), ], ) dataset_admin_policy.node.add_dependency(dataset_bucket) @@ -291,13 +268,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): role_name=dataset.IAMDatasetAdminRoleArn.split('/')[-1], assumed_by=iam.CompositePrincipal( iam.ServicePrincipal('glue.amazonaws.com'), - iam.ServicePrincipal('lakeformation.amazonaws.com'), - iam.ServicePrincipal('athena.amazonaws.com'), - iam.ServicePrincipal('sagemaker.amazonaws.com'), - iam.ServicePrincipal('lambda.amazonaws.com'), - iam.ServicePrincipal('ec2.amazonaws.com'), - iam.AccountPrincipal(os.environ.get('CURRENT_AWS_ACCOUNT')), - iam.AccountPrincipal(dataset.AwsAccountId), iam.ArnPrincipal( f'arn:aws:iam::{dataset.AwsAccountId}:role/{self.pivot_role_name}' ), From 8ece7069b0789890acd9c333753a0b4d34667b71 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 1 Jun 2023 14:10:39 +0200 Subject: [PATCH 04/11] Clean-up dataset IAM role permissions - removed Glue crawling ones --- backend/dataall/cdkproxy/stacks/dataset.py | 31 ---------------------- 1 file changed, 31 deletions(-) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index b950f8d28..7f6cf2b14 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -228,36 +228,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}/profiling*'], effect=iam.Effect.ALLOW, ), - iam.PolicyStatement( - effect=iam.Effect.ALLOW, - resources=['arn:aws:s3:::aws-glue-*'], - actions=['s3:CreateBucket'], - ), - iam.PolicyStatement( - actions=['s3:GetObject', 's3:PutObject', 's3:DeleteObject'], - effect=iam.Effect.ALLOW, - resources=[ - 'arn:aws:s3:::aws-glue-*/*', - 'arn:aws:s3:::*/*aws-glue-*/*', - ], - ), - iam.PolicyStatement( - actions=['s3:GetObject'], - effect=iam.Effect.ALLOW, - resources=[ - 'arn:aws:s3:::crawler-public*', - 'arn:aws:s3:::aws-glue-*', - ], - ), - iam.PolicyStatement( - actions=[ - 'logs:CreateLogGroup', - 'logs:CreateLogStream', - 'logs:PutLogEvents', - ], - effect=iam.Effect.ALLOW, - resources=['arn:aws:logs:*:*:/aws-glue/*'], - ), ], ) dataset_admin_policy.node.add_dependency(dataset_bucket) @@ -267,7 +237,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): 'DatasetAdminRole', role_name=dataset.IAMDatasetAdminRoleArn.split('/')[-1], assumed_by=iam.CompositePrincipal( - iam.ServicePrincipal('glue.amazonaws.com'), iam.ArnPrincipal( f'arn:aws:iam::{dataset.AwsAccountId}:role/{self.pivot_role_name}' ), From fa45abd0b2af1b97c4f677354c5eed4d3de3d8c9 Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Thu, 1 Jun 2023 17:11:10 +0200 Subject: [PATCH 05/11] fix: safe removal of consumption roles with open share requests (#485) ### Feature or Bugfix - Bugfix ### Detail - Added check and exception if there are open share requests on a consumption role or on a group that we are removing from an environment ### Relates - #450 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/dataall/db/api/environment.py | 34 +++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/backend/dataall/db/api/environment.py b/backend/dataall/db/api/environment.py index 1d8c0e68e..cac9f2bed 100644 --- a/backend/dataall/db/api/environment.py +++ b/backend/dataall/db/api/environment.py @@ -392,6 +392,23 @@ def remove_group(session, username, groups, uri, data=None, check_perm=None): message=f'Team: {group} has created {group_env_objects_count} resources on this environment.', ) + shares_count = ( + session.query(models.ShareObject) + .filter( + and_( + models.ShareObject.principalId == group, + models.ShareObject.principalType == PrincipalType.Group.value + ) + ) + .count() + ) + + if shares_count > 0: + raise exceptions.EnvironmentResourcesFound( + action='Remove Team', + message=f'Team: {group} has created {shares_count} share requests on this environment.', + ) + group_membership = Environment.find_environment_group( session, group, environment.environmentUri ) @@ -529,6 +546,23 @@ def remove_consumption_role(session, username, groups, uri, data=None, check_per consumption_role = Environment.get_environment_consumption_role(session, uri, data.get('environmentUri')) + shares_count = ( + session.query(models.ShareObject) + .filter( + and_( + models.ShareObject.principalId == uri, + models.ShareObject.principalType == PrincipalType.ConsumptionRole.value + ) + ) + .count() + ) + + if shares_count > 0: + raise exceptions.EnvironmentResourcesFound( + action='Remove Consumption Role', + message=f'Consumption role: {consumption_role.consumptionRoleName} has created {shares_count} share requests on this environment.', + ) + if consumption_role: session.delete(consumption_role) session.commit() From 2d61beaa1234c307eb333753e1f0e3be6833b446 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 6 Jun 2023 18:16:49 +0200 Subject: [PATCH 06/11] Allowing glue crawler and restricting bucket permissions --- backend/dataall/cdkproxy/stacks/dataset.py | 24 +++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 7f6cf2b14..674f9a809 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -194,15 +194,29 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): policy_name=dataset.S3BucketName, statements=[ iam.PolicyStatement( - actions=['s3:List*'], resources=['*'], effect=iam.Effect.ALLOW + actions=[ + "s3:ListAllMyBuckets", + "s3:ListAccessPoints", + ], + resources=["*"], + effect=iam.Effect.ALLOW ), iam.PolicyStatement( - actions=['s3:List*', 's3:Get*'], + actions=[ + "s3:ListBucket", + "s3:GetBucketLocation" + ], resources=[dataset_bucket.bucket_arn], effect=iam.Effect.ALLOW, ), iam.PolicyStatement( - actions=['s3:*'], + actions=[ + "s3:PutObject", + "s3:PutObjectAcl", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:DeleteObject" + ], effect=iam.Effect.ALLOW, resources=[dataset_bucket.bucket_arn + '/*'], ), @@ -210,7 +224,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): actions=[ 's3:GetAccessPoint', 's3:GetAccessPointPolicy', - 's3:ListAccessPoints', 's3:GetAccessPointPolicyStatus', ], effect=iam.Effect.ALLOW, @@ -219,7 +232,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): ], ), iam.PolicyStatement( - actions=['s3:List*'], + actions=['s3:ListBucket'], resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}'], effect=iam.Effect.ALLOW, ), @@ -240,6 +253,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): iam.ArnPrincipal( f'arn:aws:iam::{dataset.AwsAccountId}:role/{self.pivot_role_name}' ), + iam.ServicePrincipal('glue.amazonaws.com'), ), ) dataset_admin_policy.attach_to_role(dataset_admin_role) From 36e8e33092894df0860ee6a058fa4d34e1eef862 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 7 Jun 2023 10:18:51 +0200 Subject: [PATCH 07/11] Finish restriction for users in dataset roles + enforced crawler + changes in read environment bucket - missing:errors on profiling jobs and KMS key of environment bucket --- backend/dataall/aws/handlers/glue.py | 5 ++- backend/dataall/cdkproxy/stacks/dataset.py | 41 +++++++++++++++++++--- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/backend/dataall/aws/handlers/glue.py b/backend/dataall/aws/handlers/glue.py index bcfecbb8c..037fe3cc8 100644 --- a/backend/dataall/aws/handlers/glue.py +++ b/backend/dataall/aws/handlers/glue.py @@ -473,7 +473,7 @@ def start_glue_crawler(data): glue = session.client('glue', region_name=data.get('region', 'eu-west-1')) if data.get('location'): Glue._update_existing_crawler( - glue, accountid, crawler_name, targets, database + glue, crawler_name, targets, database ) crawler = Glue._get_crawler(glue, crawler_name) glue.start_crawler(Name=crawler_name) @@ -496,7 +496,7 @@ def _get_crawler(glue, crawler_name): return crawler.get('Crawler') if crawler else None @staticmethod - def _update_existing_crawler(glue, accountid, crawler_name, targets, database): + def _update_existing_crawler(glue, crawler_name, targets, database): try: glue.stop_crawler(Name=crawler_name) except ClientError as e: @@ -508,7 +508,6 @@ def _update_existing_crawler(glue, accountid, crawler_name, targets, database): try: glue.update_crawler( Name=crawler_name, - Role=SessionHelper.get_delegation_role_arn(accountid=accountid), DatabaseName=database, Targets=targets, ) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 674f9a809..73331f89e 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -194,6 +194,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): policy_name=dataset.S3BucketName, statements=[ iam.PolicyStatement( + sid="ListAll", actions=[ "s3:ListAllMyBuckets", "s3:ListAccessPoints", @@ -202,6 +203,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): effect=iam.Effect.ALLOW ), iam.PolicyStatement( + sid="ListDatasetBucket", actions=[ "s3:ListBucket", "s3:GetBucketLocation" @@ -210,17 +212,30 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): effect=iam.Effect.ALLOW, ), iam.PolicyStatement( + sid="ReadWriteDatasetBucket", actions=[ "s3:PutObject", "s3:PutObjectAcl", "s3:GetObject", "s3:GetObjectAcl", + "s3:GetObjectVersion", "s3:DeleteObject" ], effect=iam.Effect.ALLOW, resources=[dataset_bucket.bucket_arn + '/*'], ), iam.PolicyStatement( + sid="KMSAccess", + actions=[ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey" + ], + effect=iam.Effect.ALLOW, + resources=[dataset_key.key_arn], + ), + iam.PolicyStatement( + sid="ReadAccessPointsDatasetBucket", actions=[ 's3:GetAccessPoint', 's3:GetAccessPointPolicy', @@ -232,13 +247,31 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): ], ), iam.PolicyStatement( - actions=['s3:ListBucket'], + actions=['s3:ListEnvironmentBucket'], resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}'], effect=iam.Effect.ALLOW, ), iam.PolicyStatement( - actions=['s3:List*', 's3:Get*'], + sid="ReadEnvironmentBucketProfiling", + actions=[ + "s3:GetObject", + "s3:GetObjectAcl", + "s3:GetObjectVersion" + ], + effect=iam.Effect.ALLOW, resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}/profiling*'], + ), + iam.PolicyStatement( + sid="ReadWriteEnvironmentBucketProfiling", + actions=[ + "s3:PutObject", + "s3:PutObjectAcl", + "s3:GetObject", + "s3:GetObjectAcl", + "s3:GetObjectVersion", + "s3:DeleteObject" + ], + resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}/profiling/results/{dataset.datasetUri}/*'], effect=iam.Effect.ALLOW, ), ], @@ -394,9 +427,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): self, 'DatasetGlueProfilingJob', name=dataset.GlueProfilingJobName, - role=iam.ArnPrincipal( - f'arn:aws:iam::{env.AwsAccountId}:role/{self.pivot_role_name}' - ).arn, + role=dataset_admin_role.role_arn, allocated_capacity=10, execution_property=glue.CfnJob.ExecutionPropertyProperty( max_concurrent_runs=100 From 4117f9473ea1d99e84acc7dc65e70970f75cf380 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 7 Jun 2023 11:29:39 +0200 Subject: [PATCH 08/11] Additional permissions for Glue crawler in dataset role --- backend/dataall/cdkproxy/stacks/dataset.py | 30 ++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 73331f89e..ad51862f9 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -246,6 +246,36 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{dataset.datasetUri}*', ], ), + iam.PolicyStatement( + sid="GlueAccessCrawler", + actions=[ + "glue:GetDatabase", + "glue:GetTableVersion", + "glue:CreateTable", + "glue:GetTables", + "glue:GetTableVersions", + "glue:UpdateTable", + "glue:DeleteTableVersion", + "glue:DeleteTable", + "glue:GetTable" + ], + effect=iam.Effect.ALLOW, + resources=[ + f"arn:aws:glue:*:{dataset.AwsAccountId}:catalog", + f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:database/{dataset.S3BucketName}", + f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:table/{dataset.S3BucketName}/*" + ] + ), + iam.PolicyStatement( + sid="LoggingGlueCrawler", + actions=[ + 'logs:PutLogEvents', + ], + effect=iam.Effect.ALLOW, + resources=[ + f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers:log-stream:{dataset.GlueCrawlerName}', + ], + ), iam.PolicyStatement( actions=['s3:ListEnvironmentBucket'], resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}'], From d8b067f78c8a55aef5cc0e6c397c68ecdba22d35 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 7 Jun 2023 13:08:42 +0200 Subject: [PATCH 09/11] Issue with database name in Glue permissions --- backend/dataall/cdkproxy/stacks/dataset.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index ad51862f9..3836c49de 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -262,8 +262,8 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): effect=iam.Effect.ALLOW, resources=[ f"arn:aws:glue:*:{dataset.AwsAccountId}:catalog", - f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:database/{dataset.S3BucketName}", - f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:table/{dataset.S3BucketName}/*" + f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:database/{dataset.GlueDatabaseName}", + f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:table/{dataset.GlueDatabaseName}/*" ] ), iam.PolicyStatement( From 2c5d36139ae07aa92b4d36171ea850d22121445e Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 7 Jun 2023 13:16:59 +0200 Subject: [PATCH 10/11] Linting --- backend/dataall/cdkproxy/stacks/dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 3836c49de..3decb69e3 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -220,7 +220,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): "s3:GetObjectAcl", "s3:GetObjectVersion", "s3:DeleteObject" - ], + ], effect=iam.Effect.ALLOW, resources=[dataset_bucket.bucket_arn + '/*'], ), From bbd0fcc6a29991d48d0b127fbd0b0f84f6932d5a Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 9 Jun 2023 13:01:46 +0200 Subject: [PATCH 11/11] Added createlog permissions to dataset role --- backend/dataall/cdkproxy/stacks/dataset.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 3decb69e3..92e6fdd8b 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -266,6 +266,17 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): f"arn:aws:glue:{dataset.region}:{dataset.AwsAccountId}:table/{dataset.GlueDatabaseName}/*" ] ), + iam.PolicyStatement( + sid="CreateLoggingGlueCrawler", + actions=[ + 'logs:CreateLogGroup', + 'logs:CreateLogStream', + ], + effect=iam.Effect.ALLOW, + resources=[ + f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers*', + ], + ), iam.PolicyStatement( sid="LoggingGlueCrawler", actions=[