diff --git a/backend/dataall/base/cdkproxy/requirements.txt b/backend/dataall/base/cdkproxy/requirements.txt index 74eb975e2..d50cc92f3 100644 --- a/backend/dataall/base/cdkproxy/requirements.txt +++ b/backend/dataall/base/cdkproxy/requirements.txt @@ -15,6 +15,5 @@ jinja2==3.1.2 werkzeug==3.0.1 constructs>=10.0.0,<11.0.0 git-remote-codecommit==1.16 -aws-ddk==0.5.1 -aws-ddk-core==0.5.1 +aws-ddk-core==1.3.0 deprecated==1.2.13 \ No newline at end of file diff --git a/backend/dataall/modules/datapipelines/__init__.py b/backend/dataall/modules/datapipelines/__init__.py index a0865ea00..2775a701f 100644 --- a/backend/dataall/modules/datapipelines/__init__.py +++ b/backend/dataall/modules/datapipelines/__init__.py @@ -34,6 +34,7 @@ def __init__(self): FeedRegistry.register(FeedDefinition("DataPipeline", DataPipeline)) TargetType("pipeline", GET_PIPELINE, UPDATE_PIPELINE) + TargetType("cdkpipeline", GET_PIPELINE, UPDATE_PIPELINE) EnvironmentResourceManager.register(DatapipelinesRepository()) diff --git a/backend/dataall/modules/datapipelines/blueprints/cookiecutter_config.yaml b/backend/dataall/modules/datapipelines/blueprints/cookiecutter_config.yaml deleted file mode 100644 index 7b0c8b2e6..000000000 --- a/backend/dataall/modules/datapipelines/blueprints/cookiecutter_config.yaml +++ /dev/null @@ -1,2 +0,0 @@ -cookiecutters_dir: "/dataall" -replay_dir: "/dataall" \ No newline at end of file diff --git a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/app_multiaccount.py b/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/app_multiaccount.py deleted file mode 100644 index c22b92f2f..000000000 --- a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/app_multiaccount.py +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env python3 -import os -import aws_cdk as cdk -from aws_cdk import Tags -from ddk_app.ddk_app_stack import DdkApplicationStack - -from utils.config import MultiaccountConfig - -stage_id = os.environ.get('STAGE', None) -pipeline_name = os.environ.get('PIPELINE_NAME') - -app = cdk.App() - -config = MultiaccountConfig() -environment_id = config.get_stage_env_id(stage_id) -env_vars = config.get_env_var_config(environment_id)['env_vars'] - -Tags.of(app).add("dataall", "true") -Tags.of(app).add("Target", pipeline_name) -DdkApplicationStack(app, - f"{pipeline_name}-DdkApplicationStack", - environment_id, - env_vars) - -app.synth() diff --git a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/ddk_app/ddk_app_stack_multiaccount.py b/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/ddk_app/ddk_app_stack_multiaccount.py deleted file mode 100644 index bdd275647..000000000 --- a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/ddk_app/ddk_app_stack_multiaccount.py +++ /dev/null @@ -1,28 +0,0 @@ -from typing import Any, Optional - -from aws_cdk import Environment, Tags -from aws_ddk_core.base import BaseStack -from aws_ddk_core.config import Config -from constructs import Construct - - -class DdkApplicationStack(BaseStack): - - - def __init__(self, scope: Construct, - id: str, - environment_id: str, - env_vars: dict, - env: Optional[Environment] = None, - **kwargs: Any) -> None: - self._config = Config() - super().__init__( - scope, - id, - environment_id=environment_id, - env=env or self._config.get_env(environment_id), - **kwargs) - - Tags.of(self).add("Team", str(env_vars['Team'])) - - # The code that defines your stack goes here: diff --git a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/utils/config.py b/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/utils/config.py deleted file mode 100644 index 2a11ff176..000000000 --- a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/utils/config.py +++ /dev/null @@ -1,50 +0,0 @@ -from aws_ddk_core.config.config import Config -from typing import Dict - - -class MultiaccountConfig(Config): - def __int__(self, *args, **kwargs) -> None: - super.__init__(*args, **kwargs) - - def get_stage_env_id( - self, - stage_id: str, - ) -> str: - """ - Get environment id representing AWS account and region with specified stage_id. - Parameters - ---------- - stage_id : str - Identifier of the stage - Returns - ------- - environment_id : str - """ - environments = self._config_strategy.get_config(key="environments") - - for env_id, env in environments.items(): - if env.get('stage', {}) == stage_id: - environment_id = env_id - break - else: - raise ValueError(f'Environment id with stage_id {stage_id} was not found!') - - return environment_id - - def get_env_var_config( - self, - environment_id: str, - ) -> dict: - """ - Get environment specific variable from config for given environment id. - Parameters - ---------- - environment_id : str - Identifier of the environment - Returns - ------- - config : Dict[str, Any] - Dictionary that contains environmental variables for the given environment - """ - env_config = self.get_env_config(environment_id) - return env_config \ No newline at end of file diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/.gitignore b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/.gitignore new file mode 100644 index 000000000..37833f8be --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/.gitignore @@ -0,0 +1,10 @@ +*.swp +package-lock.json +__pycache__ +.pytest_cache +.venv +*.egg-info + +# CDK asset staging directory +.cdk.staging +cdk.out diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/README.md b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/README.md new file mode 100644 index 000000000..c53f0b50c --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/README.md @@ -0,0 +1,58 @@ + +# Welcome to your CDK Python project! + +This is a blank project for CDK development with Python. + +The `cdk.json` file tells the CDK Toolkit how to execute your app. + +This project is set up like a standard Python project. The initialization +process also creates a virtualenv within this project, stored under the `.venv` +directory. To create the virtualenv it assumes that there is a `python3` +(or `python` for Windows) executable in your path with access to the `venv` +package. If for any reason the automatic creation of the virtualenv fails, +you can create the virtualenv manually. + +To manually create a virtualenv on MacOS and Linux: + +``` +$ python3 -m venv .venv +``` + +After the init process completes and the virtualenv is created, you can use the following +step to activate your virtualenv. + +``` +$ source .venv/bin/activate +``` + +If you are a Windows platform, you would activate the virtualenv like this: + +``` +% .venv\Scripts\activate.bat +``` + +Once the virtualenv is activated, you can install the required dependencies. + +``` +$ pip install -r requirements.txt +``` + +At this point you can now synthesize the CloudFormation template for this code. + +``` +$ cdk synth +``` + +To add additional dependencies, for example other CDK libraries, just add +them to your `setup.py` file and rerun the `pip install -r requirements.txt` +command. + +## Useful commands + + * `cdk ls` list all stacks in the app + * `cdk synth` emits the synthesized CloudFormation template + * `cdk deploy` deploy this stack to your default AWS account/region + * `cdk diff` compare deployed stack with current state + * `cdk docs` open CDK documentation + +Enjoy! diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/app.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/app.py new file mode 100644 index 000000000..21ce3aea2 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/app.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 +import os +import aws_cdk as cdk +from dataall_pipeline_app.dataall_pipeline_app_stack import DataallPipelineStack + +environment_id = os.environ.get('STAGE', "dev") +pipeline_name = os.environ.get('PIPELINE_NAME', "dataall-pipeline-stack") + +app = cdk.App() + +DataallPipelineStack( + app, + f"{pipeline_name}-{environment_id}-DataallPipelineStack", + environment_id +) + +app.synth() diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/cdk.json b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/cdk.json new file mode 100644 index 000000000..33ab988ba --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/cdk.json @@ -0,0 +1,61 @@ +{ + "app": "python3 app.py", + "watch": { + "include": [ + "**" + ], + "exclude": [ + "README.md", + "cdk*.json", + "requirements*.txt", + "source.bat", + "**/__init__.py", + "**/__pycache__", + "tests" + ] + }, + "context": { + "@aws-cdk/aws-lambda:recognizeLayerVersion": true, + "@aws-cdk/core:checkSecretUsage": true, + "@aws-cdk/core:target-partitions": [ + "aws", + "aws-cn" + ], + "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, + "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, + "@aws-cdk/aws-ecs:arnFormatIncludesClusterName": true, + "@aws-cdk/aws-iam:minimizePolicies": true, + "@aws-cdk/core:validateSnapshotRemovalPolicy": true, + "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, + "@aws-cdk/aws-s3:createDefaultLoggingPolicy": true, + "@aws-cdk/aws-sns-subscriptions:restrictSqsDescryption": true, + "@aws-cdk/aws-apigateway:disableCloudWatchRole": true, + "@aws-cdk/core:enablePartitionLiterals": true, + "@aws-cdk/aws-events:eventsTargetQueueSameAccount": true, + "@aws-cdk/aws-iam:standardizedServicePrincipals": true, + "@aws-cdk/aws-ecs:disableExplicitDeploymentControllerForCircuitBreaker": true, + "@aws-cdk/aws-iam:importedRoleStackSafeDefaultPolicyName": true, + "@aws-cdk/aws-s3:serverAccessLogsUseBucketPolicy": true, + "@aws-cdk/aws-route53-patters:useCertificate": true, + "@aws-cdk/customresources:installLatestAwsSdkDefault": false, + "@aws-cdk/aws-rds:databaseProxyUniqueResourceName": true, + "@aws-cdk/aws-codedeploy:removeAlarmsFromDeploymentGroup": true, + "@aws-cdk/aws-apigateway:authorizerChangeDeploymentLogicalId": true, + "@aws-cdk/aws-ec2:launchTemplateDefaultUserData": true, + "@aws-cdk/aws-secretsmanager:useAttachedSecretResourcePolicyForSecretTargetAttachments": true, + "@aws-cdk/aws-redshift:columnId": true, + "@aws-cdk/aws-stepfunctions-tasks:enableEmrServicePolicyV2": true, + "@aws-cdk/aws-ec2:restrictDefaultSecurityGroup": true, + "@aws-cdk/aws-apigateway:requestValidatorUniqueId": true, + "@aws-cdk/aws-kms:aliasNameRef": true, + "@aws-cdk/aws-autoscaling:generateLaunchTemplateInsteadOfLaunchConfig": true, + "@aws-cdk/core:includePrefixInUniqueNameGeneration": true, + "@aws-cdk/aws-efs:denyAnonymousAccess": true, + "@aws-cdk/aws-opensearchservice:enableOpensearchMultiAzWithStandby": true, + "@aws-cdk/aws-lambda-nodejs:useLatestRuntimeVersion": true, + "@aws-cdk/aws-efs:mountTargetOrderInsensitiveLogicalId": true, + "@aws-cdk/aws-rds:auroraClusterChangeScopeOfInstanceParameterGroupWithEachParameters": true, + "@aws-cdk/aws-appsync:useArnForSourceApiAssociationIdentifier": true, + "@aws-cdk/aws-rds:preventRenderingDeprecatedCredentials": true + } +} diff --git a/backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/ddk_app/__init__.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/dataall_pipeline_app/__init__.py similarity index 100% rename from backend/dataall/modules/datapipelines/blueprints/data_pipeline_blueprint/ddk_app/__init__.py rename to backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/dataall_pipeline_app/__init__.py diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/dataall_pipeline_app/dataall_pipeline_app_stack.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/dataall_pipeline_app/dataall_pipeline_app_stack.py new file mode 100644 index 000000000..19340bd44 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/dataall_pipeline_app/dataall_pipeline_app_stack.py @@ -0,0 +1,26 @@ +from typing import Any, Optional + +from aws_cdk import Environment, Tags +from aws_ddk_core import BaseStack, Configurator +from constructs import Construct + + +class DataallPipelineStack(BaseStack): + def __init__( + self, + scope: Construct, + id: str, + environment_id: str, + env: Optional[Environment] = None, + **kwargs: Any + ) -> None: + super().__init__( + scope, + id, + environment_id=environment_id, + env=env or Configurator.get_environment(config_path="./ddk.json", environment_id=environment_id), + **kwargs + ) + Configurator(scope=self, config="./ddk.json", environment_id=environment_id) + + # The code that defines your stack goes here: diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements-dev.txt b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements-dev.txt new file mode 100644 index 000000000..927094516 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements-dev.txt @@ -0,0 +1 @@ +pytest==6.2.5 diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt new file mode 100644 index 000000000..4067e0fd9 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/requirements.txt @@ -0,0 +1,3 @@ +aws-cdk-lib==2.103.1 +constructs>=10.0.0,<11.0.0 +aws-ddk-core==1.3.0 diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/source.bat b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/source.bat new file mode 100644 index 000000000..9e1a83442 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/source.bat @@ -0,0 +1,13 @@ +@echo off + +rem The sole purpose of this script is to make the command +rem +rem source .venv/bin/activate +rem +rem (which activates a Python virtualenv on Linux or Mac OS X) work on Windows. +rem On Windows, this command just runs this batch file (the argument is ignored). +rem +rem Now we don't need to document a Windows command for activating a virtualenv. + +echo Executing .venv\Scripts\activate.bat for you +.venv\Scripts\activate.bat diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/__init__.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/unit/__init__.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/unit/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/unit/test_dataall_pipeline_app_stack.py b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/unit/test_dataall_pipeline_app_stack.py new file mode 100644 index 000000000..d74726b32 --- /dev/null +++ b/backend/dataall/modules/datapipelines/cdk/blueprints/data_pipeline_blueprint/tests/unit/test_dataall_pipeline_app_stack.py @@ -0,0 +1,15 @@ +import aws_cdk as core +import aws_cdk.assertions as assertions + +from data_pipeline_blueprint.dataall_pipeline_app.dataall_pipeline_app_stack import DataallPipelineStack + +# example tests. To run these tests, uncomment this file along with the example +# resource in data_pipeline_blueprint/data_pipeline_blueprint_stack.py +def test_sqs_queue_created(): + app = core.App() + stack = DataallPipelineStack(app, "dataall-pipeline-stack", "test") + template = assertions.Template.from_stack(stack) + +# template.has_resource_properties("AWS::SQS::Queue", { +# "VisibilityTimeout": 300 +# }) diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_cli_wrapper_extension.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_cli_wrapper_extension.py index 082fc1dba..d8ef76423 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_cli_wrapper_extension.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_cli_wrapper_extension.py @@ -16,17 +16,17 @@ def __init__(self): def extend_deployment(self, stack, session, env): cdkpipeline = CDKPipelineStack(stack.targetUri) - venv_name = cdkpipeline.venv_name if cdkpipeline.venv_name else None + is_create = cdkpipeline.is_create if cdkpipeline.is_create else None self.pipeline = DatapipelinesRepository.get_pipeline_by_uri(session, stack.targetUri) - path = f'/dataall/modules/datapipelines/cdk/{self.pipeline.repo}/' + path = f'{cdkpipeline.code_dir_path}/{self.pipeline.repo}/' app_path = './app.py' - if not venv_name: + if not is_create: logger.info('Successfully Updated CDK Pipeline') meta = describe_stack(stack) stack.stackid = meta['StackId'] stack.status = meta['StackStatus'] update_stack_output(session, stack) - return True, path + return True, path, app_path aws = SessionHelper.remote_session(stack.accountid) creds = aws.get_credentials() @@ -45,4 +45,4 @@ def extend_deployment(self, stack, session, env): return False, path, app_path def post_deployment(self): - CDKPipelineStack.clean_up_repo(path=f'./{self.pipeline.repo}') + CDKPipelineStack.clean_up_repo(pipeline_dir=self.pipeline.repo) diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py index d82f7c2ca..75167db66 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_cdk_pipeline.py @@ -47,30 +47,35 @@ def __init__(self, target_uri): self.env, aws = CDKPipelineStack._set_env_vars(self.pipeline_environment) - self.code_dir_path = os.path.dirname(os.path.abspath(__file__)) - + self.code_dir_path = os.path.realpath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "blueprints" + ) + ) + self.is_create = True try: codecommit_client = aws.client('codecommit', region_name=self.pipeline.region) repository = CDKPipelineStack._check_repository(codecommit_client, self.pipeline.repo) if repository: - self.venv_name = None + self.is_create = False self.code_dir_path = os.path.realpath( - os.path.abspath( - os.path.join( - __file__, "..", "..", "blueprints", "data_pipeline_blueprint" - ) + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "blueprints", + "data_pipeline_blueprint" ) ) - CDKPipelineStack.write_ddk_json_multienvironment(path=self.code_dir_path, output_file="ddk.json", pipeline_environment=self.pipeline_environment, development_environments=self.development_environments) - CDKPipelineStack.write_ddk_app_multienvironment(path=self.code_dir_path, output_file="app.py", pipeline=self.pipeline, development_environments=self.development_environments) + CDKPipelineStack.write_ddk_json_multienvironment(path=os.path.join(self.code_dir_path, self.pipeline.repo), output_file="ddk.json", pipeline_environment=self.pipeline_environment, development_environments=self.development_environments, pipeline_name=self.pipeline.name) + CDKPipelineStack.write_ddk_app_multienvironment(path=os.path.join(self.code_dir_path, self.pipeline.repo), output_file="app.py", pipeline=self.pipeline, development_environments=self.development_environments) logger.info(f"Pipeline Repo {self.pipeline.repo} Exists...Handling Update") update_cmds = [ f'REPO_NAME={self.pipeline.repo}', 'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)', - 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', + 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://${REPO_NAME}/ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', 'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)', - 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://app.py --file-path app.py --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', + 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://${REPO_NAME}/app.py --file-path app.py --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] CommandSanitizer(args=[self.pipeline.repo]) @@ -88,18 +93,19 @@ def __init__(self, target_uri): else: raise Exception except Exception as e: - self.venv_name = self.initialize_repo() + self.initialize_repo() CDKPipelineStack.write_ddk_app_multienvironment(path=os.path.join(self.code_dir_path, self.pipeline.repo), output_file="app.py", pipeline=self.pipeline, development_environments=self.development_environments) - CDKPipelineStack.write_ddk_json_multienvironment(path=os.path.join(self.code_dir_path, self.pipeline.repo), output_file="ddk.json", pipeline_environment=self.pipeline_environment, development_environments=self.development_environments) + CDKPipelineStack.write_ddk_json_multienvironment(path=os.path.join(self.code_dir_path, self.pipeline.repo), output_file="ddk.json", pipeline_environment=self.pipeline_environment, development_environments=self.development_environments, pipeline_name=self.pipeline.name) self.git_push_repo() def initialize_repo(self): - venv_name = ".venv" cmd_init = [ - f"ddk init {self.pipeline.repo} --generate-only", + f"mkdir {self.pipeline.repo}", + f"cp -R data_pipeline_blueprint/* {self.pipeline.repo}/", f"cd {self.pipeline.repo}", "git init --initial-branch main", - f"ddk create-repository {self.pipeline.repo} -t application dataall -t team {self.pipeline.SamlGroupName}" + f"REPO_URL=$(aws codecommit create-repository --repository-name {self.pipeline.repo} --tags application=dataall,team={self.pipeline.SamlGroupName} --query 'repositoryMetadata.cloneUrlHttp' --output text)", + "git remote add origin ${REPO_URL}", ] logger.info(f"Running Commands: {'; '.join(cmd_init)}") @@ -120,23 +126,25 @@ def initialize_repo(self): if process.returncode == 0: logger.info("Successfully Initialized New CDK/DDK App") - return venv_name - @staticmethod - def write_ddk_json_multienvironment(path, output_file, pipeline_environment, development_environments): + def write_ddk_json_multienvironment(path, output_file, pipeline_environment, development_environments, pipeline_name): json_envs = "" for env in development_environments: json_env = f""", "{env.stage}": {{ "account": "{env.AwsAccountId}", "region": "{env.region}", - "resources": {{ - "ddk-bucket": {{"versioned": false, "removal_policy": "destroy"}} + "tags": {{ + "Team": "{env.samlGroupName}" }} }}""" json_envs = json_envs + json_env json = f"""{{ + "tags": {{ + "dataall": "true", + "Target": "{pipeline_name}" + }}, "environments": {{ "cicd": {{ "account": "{pipeline_environment.AwsAccountId}", @@ -144,7 +152,7 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev }}{json_envs} }} }}""" - + os.makedirs(path, exist_ok=True) with open(f'{path}/{output_file}', 'w') as text_file: print(json, file=text_file) @@ -154,9 +162,8 @@ def write_ddk_app_multienvironment(path, output_file, pipeline, development_envi # !/usr/bin/env python3 import aws_cdk as cdk -from aws_ddk_core.cicd import CICDPipelineStack -from ddk_app.ddk_app_stack import DdkApplicationStack -from aws_ddk_core.config import Config +import aws_ddk_core as ddk +from dataall_pipeline_app.dataall_pipeline_app_stack import DataallPipelineStack app = cdk.App() @@ -168,24 +175,27 @@ def __init__( **kwargs, ) -> None: super().__init__(scope, f"dataall-{{environment_id.title()}}", **kwargs) - DdkApplicationStack(self, "DataPipeline-{pipeline.label}-{pipeline.DataPipelineUri}", environment_id) + DataallPipelineStack(self, "{pipeline.name}-DataallPipelineStack", environment_id) id = f"dataall-cdkpipeline-{pipeline.DataPipelineUri}" -config = Config() -( - CICDPipelineStack( +cicd_pipeline = ( + ddk.CICDPipelineStack( app, id=id, - environment_id="cicd", - pipeline_name="{pipeline.label}", + pipeline_name="{pipeline.name}", + description="Cloud formation stack of PIPELINE: {pipeline.label}; URI: {pipeline.DataPipelineUri}; DESCRIPTION: {pipeline.description}", + cdk_language="python", + env=ddk.Configurator.get_environment( + config_path="./ddk.json", environment_id="cicd" + ), ) .add_source_action(repository_name="{pipeline.repo}") .add_synth_action() - .build()""" + .build_pipeline()""" stages = "" for env in sorted(development_environments, key=lambda env: env.order): - stage = f""".add_stage("{env.stage}", ApplicationStage(app, "{env.stage}", env=config.get_env("{env.stage}")))""" + stage = f""".add_stage(stage_id="{env.stage}", stage=ApplicationStage(app, "{env.stage}", env=ddk.Configurator.get_environment(config_path="./ddk.json", environment_id="{env.stage}")))""" stages = stages + stage footer = """ .synth() @@ -194,7 +204,7 @@ def __init__( app.synth() """ app = header + stages + footer - + os.makedirs(path, exist_ok=True) with open(f'{path}/{output_file}', 'w') as text_file: print(app, file=text_file) @@ -226,10 +236,16 @@ def git_push_repo(self): logger.info("Successfully Pushed DDK App Code") @staticmethod - def clean_up_repo(path): - if path: - cmd = ['rm', '-rf', f"{path}"] - cwd = os.path.dirname(os.path.abspath(__file__)) + def clean_up_repo(pipeline_dir): + if pipeline_dir: + code_dir_path = os.path.realpath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "blueprints" + ) + ) + + cmd = ['rm', '-rf', f"./{pipeline_dir}"] logger.info(f"Running command : \n {' '.join(cmd)}") process = subprocess.run( @@ -238,17 +254,17 @@ def clean_up_repo(path): shell=False, encoding='utf-8', capture_output=True, - cwd=cwd + cwd=code_dir_path ) if process.returncode == 0: - print(f"Successfully cleaned cloned repo: {path}. {str(process.stdout)}") + print(f"Successfully cleaned cloned repo: {pipeline_dir}. {str(process.stdout)}") else: logger.error( - f'Failed clean cloned repo: {path} due to {str(process.stderr)}' + f'Failed clean cloned repo: {pipeline_dir} due to {str(process.stderr)}' ) else: - logger.info(f"Info:Path {path} not found") + logger.info(f"Info:Path {pipeline_dir} not found") return @staticmethod @@ -278,7 +294,6 @@ def _set_env_vars(pipeline_environment): 'PYTHONPATH': python_path, 'PATH': python_path, 'envname': os.environ.get('envname', 'local'), - 'COOKIECUTTER_CONFIG': "/dataall/modules/datapipelines/blueprints/cookiecutter_config.yaml", } if env_creds: env.update( diff --git a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py index db5b97484..ea8d34f3e 100644 --- a/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py +++ b/backend/dataall/modules/datapipelines/cdk/datapipelines_pipeline.py @@ -167,10 +167,9 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): # Create CodeCommit repository and mirror blueprint code code_dir_path = os.path.realpath( - os.path.abspath( - os.path.join( - __file__, "..", "..", "blueprints", "data_pipeline_blueprint" - ) + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "blueprints" ) ) logger.info(f"code directory path = {code_dir_path}") @@ -178,13 +177,13 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): try: repository = PipelineStack._check_repository(aws, pipeline_environment.region, pipeline.repo) if repository: - PipelineStack.write_ddk_json_multienvironment(path=code_dir_path, output_file="ddk.json", pipeline_environment=pipeline_environment, development_environments=development_environments) + PipelineStack.write_ddk_json_multienvironment(path=os.path.join(code_dir_path, pipeline.repo), output_file="ddk.json", pipeline_environment=pipeline_environment, development_environments=development_environments, pipeline_name=pipeline.name) logger.info(f"Pipeline Repo {pipeline.repo} Exists...Handling Update") update_cmds = [ f'REPO_NAME={pipeline.repo}', 'COMMITID=$(aws codecommit get-branch --repository-name ${REPO_NAME} --branch-name main --query branch.commitId --output text)', - 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', + 'aws codecommit put-file --repository-name ${REPO_NAME} --branch-name main --file-content file://${REPO_NAME}/ddk.json --file-path ddk.json --parent-commit-id ${COMMITID} --cli-binary-format raw-in-base64-out', ] CommandSanitizer(args=[pipeline.repo]) @@ -207,7 +206,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): PipelineStack.write_deploy_buildspec(path=code_dir_path, output_file=f"{pipeline.repo}/deploy_buildspec.yaml") - PipelineStack.write_ddk_json_multienvironment(path=code_dir_path, output_file=f"{pipeline.repo}/ddk.json", pipeline_environment=pipeline_environment, development_environments=development_environments) + PipelineStack.write_ddk_json_multienvironment(path=os.path.join(code_dir_path, pipeline.repo), output_file="ddk.json", pipeline_environment=pipeline_environment, development_environments=development_environments, pipeline_name=pipeline.name) logger.info(f"Pipeline Repo {pipeline.repo} Does Not Exists... Creating Repository") @@ -440,12 +439,11 @@ def write_deploy_buildspec(path, output_file): commands: - n 16.15.1 - npm install -g aws-cdk - - pip install aws-ddk - pip install -r requirements.txt build: commands: - aws sts get-caller-identity - - ddk deploy + - cdk deploy """ with open(f'{path}/{output_file}', 'x') as text_file: print(yaml, file=text_file) @@ -485,7 +483,7 @@ def make_codebuild_policy_statements( ] @staticmethod - def write_ddk_json_multienvironment(path, output_file, pipeline_environment, development_environments): + def write_ddk_json_multienvironment(path, output_file, pipeline_environment, development_environments, pipeline_name): json_envs = "" for env in development_environments: json_env = f""", @@ -493,14 +491,17 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev "account": "{env.AwsAccountId}", "region": "{env.region}", "stage": "{env.stage}", - "env_vars": {{ - "database": "example_database", + "tags": {{ "Team": "{env.samlGroupName}" }} }}""" json_envs = json_envs + json_env json = f"""{{ + "tags": {{ + "dataall": "true", + "Target": "{pipeline_name}" + }}, "environments": {{ "cicd": {{ "account": "{pipeline_environment.AwsAccountId}", @@ -509,21 +510,16 @@ def write_ddk_json_multienvironment(path, output_file, pipeline_environment, dev }}{json_envs} }} }}""" - + os.makedirs(path, exist_ok=True) with open(f'{path}/{output_file}', 'w') as text_file: print(json, file=text_file) @staticmethod def initialize_repo(pipeline, code_dir_path, env_vars): - venv_name = ".venv" - cmd_init = [ - f"ddk init {pipeline.repo} --generate-only", - f"cp app_multiaccount.py ./{pipeline.repo}/app.py", - f"cp ddk_app/ddk_app_stack_multiaccount.py ./{pipeline.repo}/ddk_app/ddk_app_stack.py", - f"mkdir ./{pipeline.repo}/utils", - f"cp -R utils/* ./{pipeline.repo}/utils/" + f"mkdir {pipeline.repo}", + f"cp -R data_pipeline_blueprint/* {pipeline.repo}/" ] logger.info(f"Running Commands: {'; '.join(cmd_init)}") @@ -554,8 +550,7 @@ def _set_env_vars(pipeline_environment): 'AWS_REGION': pipeline_environment.region, 'AWS_DEFAULT_REGION': pipeline_environment.region, 'CURRENT_AWS_ACCOUNT': pipeline_environment.AwsAccountId, - 'envname': os.environ.get('envname', 'local'), - 'COOKIECUTTER_CONFIG': "/dataall/modules/datapipelines/blueprints/cookiecutter_config.yaml", + 'envname': os.environ.get('envname', 'local') } if env_creds: env.update( diff --git a/backend/dataall/modules/datapipelines/cdk/pivot_role_datapipelines_policy.py b/backend/dataall/modules/datapipelines/cdk/pivot_role_datapipelines_policy.py index 0bed7e176..605568254 100644 --- a/backend/dataall/modules/datapipelines/cdk/pivot_role_datapipelines_policy.py +++ b/backend/dataall/modules/datapipelines/cdk/pivot_role_datapipelines_policy.py @@ -4,7 +4,7 @@ class PipelinesPivotRole(PivotRoleStatementSet): """ - Class including all permissions needed by the pivot role to work with AWS CodeCommit and STS assume for DDK pipelines + Class including all permissions needed by the pivot role to work with AWS CodeCommit and STS assume for CDK Pipelines It allows pivot role to: - .... """ @@ -37,7 +37,7 @@ def get_statements(self): effect=iam.Effect.ALLOW, actions=['sts:AssumeRole'], resources=[ - f'arn:aws:iam::{self.account}:role/ddk-*', + f'arn:aws:iam::{self.account}:role/cdk-*', ], ), iam.PolicyStatement( @@ -54,11 +54,11 @@ def get_statements(self): ], ), iam.PolicyStatement( - sid='ParameterStoreDDK', + sid='ParameterStorePipelines', effect=iam.Effect.ALLOW, actions=['ssm:GetParameter'], resources=[ - f'arn:aws:ssm:*:{self.account}:parameter/ddk/*', + f'arn:aws:ssm:*:{self.account}:parameter/cdk*', ], ), ] diff --git a/deploy/cdk_exec_policy/cdkExecPolicy.yaml b/deploy/cdk_exec_policy/cdkExecPolicy.yaml index 1bf9f7207..a5e7b3af2 100644 --- a/deploy/cdk_exec_policy/cdkExecPolicy.yaml +++ b/deploy/cdk_exec_policy/cdkExecPolicy.yaml @@ -53,7 +53,7 @@ Resources: Action: - 'sts:AssumeRole' - 'iam:*Role*' - Resource: !Sub 'arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-*' + Resource: !Sub 'arn:${AWS::Partition}:iam::*:role/cdk-*' - Sid: Quicksight Effect: Allow diff --git a/deploy/pivot_role/pivotRole.yaml b/deploy/pivot_role/pivotRole.yaml index 0ef6ff277..ced8b4af9 100644 --- a/deploy/pivot_role/pivotRole.yaml +++ b/deploy/pivot_role/pivotRole.yaml @@ -430,7 +430,7 @@ Resources: Resource: - !Sub 'arn:aws:ssm:*:${AWS::AccountId}:parameter/${EnvironmentResourcePrefix}/*' - !Sub 'arn:aws:ssm:*:${AWS::AccountId}:parameter/dataall/*' - - !Sub 'arn:aws:ssm:*:${AWS::AccountId}:parameter/ddk/*' + - !Sub 'arn:aws:ssm:*:${AWS::AccountId}:parameter/cdk*' - Sid: IAMListGet Action: - 'iam:Get*' @@ -464,7 +464,7 @@ Resources: Effect: Allow Resource: - !Sub 'arn:aws:iam::${AWS::AccountId}:role/${EnvironmentResourcePrefix}*' - - !Sub 'arn:aws:iam::${AWS::AccountId}:role/ddk-*' + - !Sub 'arn:aws:iam::${AWS::AccountId}:role/cdk-*' - Sid: CodeCommit Action: - 'codecommit:GetFile' diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 94026ad9b..25d1775e3 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -501,7 +501,6 @@ def create_task_role(self, envname, resource_prefix, pivot_role_name): resources=[ f'arn:aws:iam::*:role/{pivot_role_name}', f'arn:aws:iam::*:role/cdk*', - 'arn:aws:iam::*:role/ddk*', f'arn:aws:iam::{self.account}:role/{resource_prefix}-{envname}-ecs-tasks-role', ], ), diff --git a/documentation/userguide/docs/pictures/pipelines/pip_cp_init2.png b/documentation/userguide/docs/pictures/pipelines/pip_cp_init2.png index b9a4f3193..00d752f2a 100644 Binary files a/documentation/userguide/docs/pictures/pipelines/pip_cp_init2.png and b/documentation/userguide/docs/pictures/pipelines/pip_cp_init2.png differ diff --git a/documentation/userguide/docs/pipelines.md b/documentation/userguide/docs/pipelines.md index 478a467fa..9e18bdcad 100644 --- a/documentation/userguide/docs/pipelines.md +++ b/documentation/userguide/docs/pipelines.md @@ -24,14 +24,26 @@ in the Research-CICD account. The actual data pipeline is deployed in 2 data acc ### Pre-requisites -As a pre-requisite, Research-DEV and Research-PROD accounts need to be bootstrapped trusting the CICD account (`-a` parameter) and setting the stage of the AWS account, the environment id, with the `e` parameter. Assuming 111111111111 = CICD account the commands are as follows: +As a pre-requisite, Research-DEV and Research-PROD accounts need to be bootstrapped using AWS CDK, trusting the CICD account (`--trust` parameter). Assuming 111111111111 = CICD account the commands are as follows: -- In Research-CICD (111111111111): `ddk bootstrap -e cicd` -- In Research-DEV (222222222222): `ddk bootstrap -e dev -a 111111111111` -- In Research-PROD (333333333333): `ddk bootstrap -e prod -a 111111111111` +- In Research-CICD (111111111111): `cdk bootstrap` +- In Research-DEV (222222222222): `cdk bootstrap --trust 111111111111` +- In Research-PROD (333333333333): `cdk bootstrap --trust 111111111111` In data.all we need to link the AWS accounts to the platform by creating 3 data.all Environments: Research-CICD Environment, Research-DEV Environment and Research-PROD Environment. +NOTE: In practice, the cdk bootstrap command would already be run once when linking an environment. For example, if bootstrapping an environment with the default AdministratorAccess CDK execution policy, the command run before linking a new environment would look similar to: + +``` +cdk bootstrap --trust DATA.ALL_AWS_ACCOUNT_NUMBER -c @aws-cdk/core:newStyleStackSynthesis=true --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess aws://YOUR_ENVIRONMENT_AWS_ACCOUNT_NUMBER/ENVIRONMENT_REGION +``` + +In order for the DEV and PROD accounts to also trust the CICD account without impacting the initial bootstrap requirements, the Research-DEV and Research-PROD accounts need to edit the aforementioned bootstrap command similar to the following: + +``` +cdk bootstrap --trust DATA.ALL_AWS_ACCOUNT_NUMBER --trust Research-CICD_AWS_ACCOUNT_NUMBER -c @aws-cdk/core:newStyleStackSynthesis=true --cloudformation-execution-policies arn:aws:iam::aws:policy/AdministratorAccess aws://YOUR_ENVIRONMENT_AWS_ACCOUNT_NUMBER/ENVIRONMENT_REGION +``` + ### Creating a pipeline data.all pipelines are created from the UI, under Pipelines. We need to fill the creation form with the following information: @@ -46,11 +58,6 @@ data.all pipelines are created from the UI, under Pipelines. We need to fill the Finally, we need to add **Development environments**. These are the AWS accounts and regions where the infrastructure defined in the CICD pipeline is deployed. -!!! warning "environment ID = data.all environment stage" - When creating the pipeline and adding development environments, you define the stage of the environment. The ddk bootstrap `-e` parameter needs to match the one that you define in the data.all UI. - In our example, we bootstraped with the parameters "dev" and "prod" and then we defined the stages as "dev" and "prod" correspondingly. - - ![create_pipeline](pictures/pipelines/pip_create_form.png#zoom#shadow) --- @@ -66,6 +73,10 @@ In the deployed repository, data.all pushes a `ddk.json` file with the details o ```json { + "tags": { + "dataall": "true", + "Target": "PIPELINE_NAME" + }, "environments": { "cicd": { "account": "111111111111", @@ -74,15 +85,15 @@ In the deployed repository, data.all pushes a `ddk.json` file with the details o "dev": { "account": "222222222222", "region": "eu-west-1", - "resources": { - "ddk-bucket": {"versioned": false, "removal_policy": "destroy"} + "tags": { + "Team": "DATAALL_GROUP" } }, "prod": { "account": "333333333333", "region": "eu-west-1", - "resources": { - "ddk-bucket": {"versioned": true, "removal_policy": "retain"} + "tags": { + "Team": "DATAALL_GROUP" } } } @@ -95,9 +106,8 @@ In addition, the `app.py` file is also written accordingly to the development en # !/usr/bin/env python3 import aws_cdk as cdk -from aws_ddk_core.cicd import CICDPipelineStack -from ddk_app.ddk_app_stack import DDKApplicationStack -from aws_ddk_core.config import Config +import aws_ddk_core as ddk +from dataall_pipeline_app.dataall_pipeline_app_stack import DataallPipelineStack app = cdk.App() @@ -111,17 +121,37 @@ class ApplicationStage(cdk.Stage): super().__init__(scope, f"dataall-{environment_id.title()}", **kwargs) DDKApplicationStack(self, "DataPipeline-PIPELINENAME-PIPELINEURI", environment_id) -config = Config() -( - CICDPipelineStack( +id = f"dataall-cdkpipeline-PIPELINEURI" +cicd_pipeline = ( + ddk.CICDPipelineStack( app, id="dataall-pipeline-PIPELINENAME-PIPELINEURI", environment_id="cicd", pipeline_name="PIPELINENAME", + cdk_language="python", + env=ddk.Configurator.get_environment( + config_path="./ddk.json", environment_id="cicd" + ), ) .add_source_action(repository_name="dataall-PIPELINENAME-PIPELINEURI") .add_synth_action() - .build().add_stage("dev", ApplicationStage(app, "dev", env=config.get_env("dev"))).add_stage("prod", ApplicationStage(app, "prod", env=config.get_env("prod"))) + .build_pipeline() + .add_stage( + stage_id="dev", + stage=ApplicationStage( + app, + "dev", + env=ddk.Configurator.get_environment(config_path="./ddk.json", environment_id="dev") + ) + ) + .add_stage( + stage_id="prod", + stage=ApplicationStage( + app, + "prod", + env=ddk.Configurator.get_environment(config_path="./ddk.json", environment_id="prod") + ) + ) .synth() ) @@ -145,16 +175,13 @@ use CodePipeline CICD Strategy which leverages the [aws-codepipeline](https://do #### CodeCommit repository and CICD deployment When a pipeline is created, a CloudFormation stack is deployed in the CICD environment AWS account. It contains: -- an AWS CodeCommit repository with the code of an AWS DDK application (by running `ddk init`) with some modifications to allow cross-account deployments. +- an AWS CodeCommit repository with the code of an AWS CDK application (by running `cdk init`) with some modifications to allow cross-account deployments. - CICD CodePipeline(s) pipeline that deploy(s) the application The repository structure will look similar to: ![created_pipeline](pictures/pipelines/pip_cp_init2.png#zoom#shadow) -The added `Multiaccount` configuration class allows us to define the deployment environment based on the `ddk.json`. -Go ahead and customize this configuration further, for example you can set additional `env_vars`. - Trunk-based pipelines append one stage after the other and read from the main branch of our repository: ![created_pipeline](pictures/pipelines/pip_cp_trunk.png#zoom#shadow) @@ -163,7 +190,7 @@ Gitflow strategy uses multiple CodePipeline pipelines for each of the stages. Fo ![created_pipeline](pictures/pipelines/pip_cp_gitflow.png#zoom#shadow) -The `dev` pipeline reads from the `dev` branch of the repository: +Using the Gitflow strategy, the `dev` pipeline reads from the `dev` branch of the repository: ![created_pipeline](pictures/pipelines/pip_cp_gitflow2.png#zoom#shadow)