Skip to content

Commit

Permalink
Update modularization work with the latest main (#661)
Browse files Browse the repository at this point in the history
Merge latest changes from main into modularization-main

It includes changes from #626, #630, #648, #649, and #651

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.

---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: dlpzx <71252798+dlpzx@users.noreply.github.com>
Co-authored-by: wolanlu <101870655+wolanlu@users.noreply.github.com>
Co-authored-by: Amr Saber <amr.m.saber.mail@gmail.com>
Co-authored-by: Noah Paige <69586985+noah-paige@users.noreply.github.com>
Co-authored-by: kukushking <kukushkin.anton@gmail.com>
Co-authored-by: Dariusz Osiennik <osiend@amazon.com>
Co-authored-by: Dennis Goldner <107395339+degoldner@users.noreply.github.com>
Co-authored-by: Abdulrahman Kaitoua <abdulrahman.kaitoua@polimi.it>
Co-authored-by: akaitoua-sa <126820454+akaitoua-sa@users.noreply.github.com>
Co-authored-by: Gezim Musliaj <102723839+gmuslia@users.noreply.github.com>
Co-authored-by: Rick Bernotas <97474536+rbernotas@users.noreply.github.com>
Co-authored-by: David Mutune Kimengu <57294718+kimengu-david@users.noreply.github.com>
Co-authored-by: chamcca <40579012+chamcca@users.noreply.github.com>
Co-authored-by: Dhruba <117375130+marjet26@users.noreply.github.com>
Co-authored-by: dbalintx <132444646+dbalintx@users.noreply.github.com>
Co-authored-by: Srinivas Reddy <srinivasreddych@outlook.com>
Co-authored-by: mourya-33 <134511711+mourya-33@users.noreply.github.com>
Co-authored-by: Noah Paige <noahpaig@amazon.com>
Co-authored-by: dlpzx <dlpzx@amazon.com>
  • Loading branch information
21 people authored Aug 16, 2023
1 parent f182ee2 commit 90bc8e8
Show file tree
Hide file tree
Showing 12 changed files with 208 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ def on_create(event):
except ClientError as e:
pass

default_db_exists = False
try:
glue_client.get_database(Name="default")
default_db_exists = True
except ClientError as e:
pass

if not exists:
try:
db_input = props.get('DatabaseInput').copy()
Expand All @@ -63,7 +70,7 @@ def on_create(event):
raise Exception(f"Could not create Glue Database {props['DatabaseInput']['Name']} in aws://{AWS_ACCOUNT}/{AWS_REGION}, received {str(e)}")

Entries = []
for i, role_arn in enumerate(props.get('DatabaseAdministrators')):
for i, role_arn in enumerate(props.get('DatabaseAdministrators', [])):
Entries.append(
{
'Id': str(uuid.uuid4()),
Expand Down Expand Up @@ -103,6 +110,20 @@ def on_create(event):
'PermissionsWithGrantOption': ['SELECT', 'ALTER', 'DESCRIBE'],
}
)
if default_db_exists:
Entries.append(
{
'Id': str(uuid.uuid4()),
'Principal': {'DataLakePrincipalIdentifier': role_arn},
'Resource': {
'Database': {
'Name': 'default'
}
},
'Permissions': ['Describe'.upper()],
}
)

lf_client.batch_grant_permissions(CatalogId=props['CatalogId'], Entries=Entries)
physical_id = props['DatabaseInput']['Imported'] + props['DatabaseInput']['Name']

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import logging
import pprint
import sys
Expand All @@ -8,7 +9,6 @@
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from pydeequ.profiles import *

sc = SparkContext.getOrCreate()
sc._jsc.hadoopConfiguration().set('fs.s3.canned.acl', 'BucketOwnerFullControl')
Expand All @@ -32,6 +32,7 @@
'environmentBucket',
'dataallRegion',
'table',
"SPARK_VERSION"
]
try:
args = getResolvedOptions(sys.argv, list_args)
Expand All @@ -43,6 +44,10 @@
list_args.remove('table')
args = getResolvedOptions(sys.argv, list_args)

os.environ["SPARK_VERSION"] = args.get("SPARK_VERSION", "3.1")

from pydeequ.profiles import *

logger.info('Parsed Retrieved parameters')

logger.info('Parsed Args = %s', pprint.pformat(args))
Expand Down
10 changes: 7 additions & 3 deletions backend/dataall/modules/datasets/cdk/dataset_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,24 +300,26 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
]
),
iam.PolicyStatement(
sid="CreateLoggingGlueCrawler",
sid="CreateLoggingGlue",
actions=[
'logs:CreateLogGroup',
'logs:CreateLogStream',
],
effect=iam.Effect.ALLOW,
resources=[
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers*',
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/jobs/*',
],
),
iam.PolicyStatement(
sid="LoggingGlueCrawler",
sid="LoggingGlue",
actions=[
'logs:PutLogEvents',
],
effect=iam.Effect.ALLOW,
resources=[
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/crawlers:log-stream:{dataset.GlueCrawlerName}',
f'arn:aws:logs:{dataset.region}:{dataset.AwsAccountId}:log-group:/aws-glue/jobs/*',
],
),
iam.PolicyStatement(
Expand Down Expand Up @@ -443,7 +445,8 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
'CreateTableDefaultPermissions': [],
'Imported': 'IMPORTED-' if dataset.imported else 'CREATED-'
},
'DatabaseAdministrators': dataset_admins
'DatabaseAdministrators': dataset_admins,
'TriggerUpdate': True
},
)

Expand Down Expand Up @@ -484,6 +487,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
'--enable-metrics': 'true',
'--enable-continuous-cloudwatch-log': 'true',
'--enable-glue-datacatalog': 'true',
'--SPARK_VERSION': '3.1',
}

job = glue.CfnJob(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,61 @@
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy import orm, Column, String
from sqlalchemy.dialects import postgresql
from sqlalchemy.ext.declarative import declarative_base

# revision identifiers, used by Alembic.
revision = 'b1cdc0dc987a'
down_revision = '4392a0c9747f'
branch_labels = None
depends_on = None

Base = declarative_base()


class DataPipeline(Base):
__tablename__ = 'datapipeline'
DataPipelineUri = Column(
String, nullable=False, primary_key=True
)
devStrategy = Column(String, nullable=True)
devStages = Column(postgresql.ARRAY(String), nullable=True)


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
# Modify column types
print("Upgrade devStages and devStrategy column types. Updating nullable to True...")
op.add_column(
'datapipeline',
sa.Column('template', sa.String(), nullable=True)
)
op.alter_column(
'datapipeline',
'devStages',
existing_type=postgresql.ARRAY(sa.VARCHAR()),
nullable=True
)
op.alter_column(
'datapipeline',
'devStrategy',
existing_type=sa.VARCHAR(),
nullable=True
)
print("Backfilling values for devStages and devStrategy...")
# Backfill values
bind = op.get_bind()
session = orm.Session(bind=bind)
session.query(DataPipeline).filter(DataPipeline.devStrategy is None).update(
{DataPipeline.devStrategy: 'gitflowBlueprint'}, synchronize_session=False)

session.query(DataPipeline).filter(DataPipeline.devStages is None).update(
{DataPipeline.devStages: ['dev', 'test', 'prod']}, synchronize_session=False)
session.commit()

print("Backfilling values for devStages and devStrategy is done. Updating nullable to False...")
# Force nullable = False
op.alter_column(
'datapipeline',
'devStages',
Expand Down
37 changes: 17 additions & 20 deletions deploy/cdk_exec_policy/cdkExecPolicy.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
AWSTemplateFormatVersion: 2010-09-09
Description: Custom least privilege IAM policy for linking environments to dataall
Parameters:
AwsAccountId:
Description: AWS AccountId of the account that we wish to link.
Type: String
PolicyName:
Description: IAM policy name (The same name must be used during CDK bootstrapping. Default is DataAllCustomCDKPolicy.)
Type: String
Expand Down Expand Up @@ -48,14 +45,14 @@ Resources:
Effect: Allow
Action: 'athena:CreateWorkGroup'
Resource:
- !Sub 'arn:aws:athena:*:${AWS::AccountId}:workgroup/*'
- !Sub 'arn:${AWS::Partition}:athena:*:${AWS::AccountId}:workgroup/*'
- Sid: IAM
Action:
- 'iam:CreatePolicy'
- 'iam:GetPolicy'
Effect: Allow
Resource:
- !Sub 'arn:aws:iam::${AWS::AccountId}:policy/*'
- !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:policy/*'
- Sid: IAMRole
Action:
- 'iam:AttachRolePolicy'
Expand All @@ -82,7 +79,7 @@ Resources:
- 'iam:CreatePolicyVersion'
- 'iam:DeletePolicyVersion'
Resource:
- !Sub 'arn:aws:iam::${AWS::AccountId}:policy/service-role/AWSQuickSight*'
- !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:policy/service-role/AWSQuickSight*'
- Sid: QuickSight
Effect: Allow
Action:
Expand Down Expand Up @@ -114,14 +111,14 @@ Resources:
- 'kms:CreateAlias'
Effect: Allow
Resource:
- !Sub 'arn:aws:kms:*:${AWS::AccountId}:alias/*'
- !Sub 'arn:${AWS::Partition}:kms:*:${AWS::AccountId}:alias/*'
- Sid: KMSKey
Action:
- 's3:PutBucketAcl'
- 's3:PutBucketNotification'
Effect: Allow
Resource:
- !Sub 'arn:aws:s3:::${EnvironmentResourcePrefix}-logging-*'
- !Sub 'arn:${AWS::Partition}:s3:::${EnvironmentResourcePrefix}-logging-*'
- Sid: ReadBuckets
Action:
- 'kms:CreateAlias'
Expand All @@ -136,7 +133,7 @@ Resources:
- 'kms:PutKeyPolicy'
- 'kms:TagResource'
Effect: Allow
Resource: !Sub 'arn:aws:kms:*:${AWS::AccountId}:key/*'
Resource: !Sub 'arn:${AWS::Partition}:kms:*:${AWS::AccountId}:key/*'
- Sid: Lambda
Action:
- 'lambda:AddPermission'
Expand All @@ -154,7 +151,7 @@ Resources:
Action:
- 'lambda:PublishLayerVersion'
Resource:
- !Sub 'arn:aws:lambda:*:${AWS::AccountId}:layer:*'
- !Sub 'arn:${AWS::Partition}:lambda:*:${AWS::AccountId}:layer:*'
- Sid: S3
Action:
- 's3:CreateBucket'
Expand All @@ -170,13 +167,13 @@ Resources:
- 's3:DeleteBucketPolicy'
- 's3:DeleteBucket'
Effect: Allow
Resource: 'arn:aws:s3:::*'
Resource: !Sub 'arn:${AWS::Partition}:s3:::*'
- Sid: SQS
Effect: Allow
Action:
- 'sqs:CreateQueue'
- 'sqs:SetQueueAttributes'
Resource: !Sub 'arn:aws:sqs:*:${AWS::AccountId}:*'
Resource: !Sub 'arn:${AWS::Partition}:sqs:*:${AWS::AccountId}:*'
- Sid: SSM
Effect: Allow
Action:
Expand All @@ -190,18 +187,18 @@ Resources:
- 'logs:CreateLogStream'
- 'logs:PutLogEvents'
- 'logs:DescribeLogStreams'
Resource: 'arn:aws:logs:*:*:*'
Resource: !Sub 'arn:${AWS::Partition}:logs:*:*:*'
- Sid: STS
Effect: Allow
Action:
- 'sts:AssumeRole'
- 'iam:*Role*'
Resource: !Sub 'arn:aws:iam::${AWS::AccountId}:role/cdk-*'
Resource: !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-*'
- Sid: CloudFormation
Effect: Allow
Action:
- 'cloudformation:*'
Resource: !Sub 'arn:aws:cloudformation:*:${AWS::AccountId}:stack/CDKToolkit/*'
Resource: !Sub 'arn:${AWS::Partition}:cloudformation:*:${AWS::AccountId}:stack/CDKToolkit/*'
- Sid: ECR
Effect: Allow
Action:
Expand All @@ -211,14 +208,14 @@ Resources:
- 'ecr:DescribeRepositories'
- 'ecr:CreateRepository'
- 'ecr:DeleteRepository'
Resource: !Sub 'arn:aws:ecr:*:${AWS::AccountId}:repository/cdk-*'
Resource: !Sub 'arn:${AWS::Partition}:ecr:*:${AWS::AccountId}:repository/cdk-*'
- Sid: SSMTwo
Effect: Allow
Action:
- 'ssm:GetParameter'
- 'ssm:PutParameter'
- 'ssm:DeleteParameter'
Resource: !Sub 'arn:aws:ssm:*:${AWS::AccountId}:parameter/cdk-bootstrap/*'
Resource: !Sub 'arn:${AWS::Partition}:ssm:*:${AWS::AccountId}:parameter/cdk-bootstrap/*'
- Sid: CloudformationTwo
Effect: Allow
Action:
Expand All @@ -232,7 +229,7 @@ Resources:
Action:
- 's3:*'
Resource:
- !Sub 'arn:aws:s3:::cdktoolkit-stagingbucket-*'
- !Sub 'arn:${AWS::Partition}:s3:::cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}*'
- Sid: Pipelines
Effect: Allow
Action:
Expand Down Expand Up @@ -261,15 +258,15 @@ Resources:
- 's3:ListBucket'
- 's3:GetBucketPolicy'
Resource:
- 'arn:aws:s3::*:codepipeline-*'
- !Sub 'arn:${AWS::Partition}:s3::*:codepipeline-*'
- Sid: CodeStarNotificationsReadOnly
Effect: Allow
Action:
- 'codestar-notifications:DescribeNotificationRule'
Resource: '*'
Condition:
'StringLike':
'codestar-notifications:NotificationsForResource': 'arn:aws:codepipeline:*'
'codestar-notifications:NotificationsForResource': !Sub 'arn:${AWS::Partition}:codepipeline:*'
- Sid: Eventrules
Effect: Allow
Action:
Expand Down
2 changes: 2 additions & 0 deletions deploy/stacks/backend_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def __init__(
image_tag=None,
pipeline_bucket=None,
vpc_id=None,
vpc_restricted_nacls=False,
vpc_endpoints_sg=None,
internet_facing=True,
custom_domain=None,
Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__(
resource_prefix=resource_prefix,
vpc_endpoints_sg=vpc_endpoints_sg,
vpc_id=vpc_id,
restricted_nacl=vpc_restricted_nacls,
**kwargs,
)
vpc = self.vpc_stack.vpc
Expand Down
2 changes: 2 additions & 0 deletions deploy/stacks/backend_stage.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ def __init__(
tooling_account_id=None,
pipeline_bucket=None,
vpc_id=None,
vpc_restricted_nacls=False,
vpc_endpoints_sg=None,
internet_facing=True,
custom_domain=None,
Expand Down Expand Up @@ -45,6 +46,7 @@ def __init__(
pipeline_bucket=pipeline_bucket,
image_tag=commit_id,
vpc_id=vpc_id,
vpc_restricted_nacls=vpc_restricted_nacls,
vpc_endpoints_sg=vpc_endpoints_sg,
internet_facing=internet_facing,
custom_domain=custom_domain,
Expand Down
Loading

0 comments on commit 90bc8e8

Please sign in to comment.