From 261016b92167157d3b413326b1dad891156b518b Mon Sep 17 00:00:00 2001 From: leunguu Date: Wed, 31 Aug 2022 17:27:16 +0800 Subject: [PATCH 01/19] Create folder sharing with S3 access point --- .../api/Objects/ShareObject/resolvers.py | 2 +- backend/dataall/aws/handlers/iam.py | 53 +++ backend/dataall/aws/handlers/kms.py | 71 ++++ backend/dataall/aws/handlers/s3.py | 146 +++++++- backend/dataall/cdkproxy/stacks/dataset.py | 40 +- .../cdkproxy/stacks/policies/data_policy.py | 16 + backend/dataall/db/api/dataset_location.py | 29 ++ backend/dataall/db/api/share_object.py | 4 +- backend/dataall/tasks/share_manager.py | 349 +++++++++++++++++- backend/dataall/utils/alarm_service.py | 27 ++ deploy/pivot_role/pivotRole.yaml | 19 + 11 files changed, 718 insertions(+), 38 deletions(-) create mode 100644 backend/dataall/aws/handlers/iam.py create mode 100644 backend/dataall/aws/handlers/kms.py diff --git a/backend/dataall/api/Objects/ShareObject/resolvers.py b/backend/dataall/api/Objects/ShareObject/resolvers.py index ce9223240..51726b1ff 100644 --- a/backend/dataall/api/Objects/ShareObject/resolvers.py +++ b/backend/dataall/api/Objects/ShareObject/resolvers.py @@ -84,7 +84,7 @@ def approve_share_object(context: Context, source, shareUri: str = None): session.add(approve_share_task) # call cdk to update bucket policy of the dataset for folder shares - stack_helper.deploy_stack(context, share.datasetUri) + # stack_helper.deploy_stack(context, share.datasetUri) Worker.queue(engine=context.engine, task_ids=[approve_share_task.taskUri]) diff --git a/backend/dataall/aws/handlers/iam.py b/backend/dataall/aws/handlers/iam.py new file mode 100644 index 000000000..414e50432 --- /dev/null +++ b/backend/dataall/aws/handlers/iam.py @@ -0,0 +1,53 @@ +import logging + +from .sts import SessionHelper + + +log = logging.getLogger(__name__) + + +class IAM: + @staticmethod + def client(account_id: str): + session = SessionHelper.remote_session(account_id) + return session.client('iam') + + @staticmethod + def update_role_policy( + account_id: str, + role_name: str, + policy_name: str, + policy: str, + ): + try: + iamcli = IAM.client(account_id) + iamcli.put_role_policy( + RoleName=role_name, + PolicyName=policy_name, + PolicyDocument=policy, + ) + except Exception as e: + log.error( + f'Failed to add S3 bucket access to target role {account_id}/{role_name} : {e}' + ) + raise e + + @staticmethod + def get_role_policy( + account_id: str, + role_name: str, + policy_name: str, + ): + try: + iamcli = IAM.client(account_id) + response = iamcli.get_role_policy( + RoleName=role_name, + PolicyName=policy_name, + ) + except Exception as e: + log.error( + f'Failed to get policy {policy_name} of role {role_name} : {e}' + ) + return None + else: + return response["PolicyDocument"] diff --git a/backend/dataall/aws/handlers/kms.py b/backend/dataall/aws/handlers/kms.py new file mode 100644 index 000000000..938fd5b01 --- /dev/null +++ b/backend/dataall/aws/handlers/kms.py @@ -0,0 +1,71 @@ +import logging + +from .sts import SessionHelper + +log = logging.getLogger(__name__) + + +class KMS: + + @staticmethod + def client(account_id: str): + session = SessionHelper.remote_session(accountid=account_id) + return session.client('kms') + + @staticmethod + def put_key_policy( + account_id: str, + key_id: str, + policy_name: str, + policy: str, + ): + try: + kms_client = KMS.client(account_id) + kms_client.put_key_policy( + KeyId=key_id, + PolicyName=policy_name, + Policy=policy, + ) + except Exception as e: + log.error( + f'Failed to attach policy to KMS key {key_id} on {account_id} : {e} ' + ) + raise e + + @staticmethod + def get_key_policy( + account_id: str, + key_id: str, + policy_name: str, + ): + try: + kms_client = KMS.client(account_id) + response = kms_client.get_key_policy( + KeyId=key_id, + PolicyName=policy_name, + ) + except Exception as e: + log.error( + f'Failed to get kms key policy of key {key_id} : {e}' + ) + return None + else: + return response['Policy'] + + @staticmethod + def get_key_id( + account_id: str, + key_alias: str, + ): + try: + kms_client = KMS.client(account_id) + response = kms_client.describe_key( + KeyId=key_alias, + ) + except Exception as e: + log.error( + f'Failed to get kms key id of {key_alias} : {e}' + ) + return None + else: + return response['KeyMetadata']['KeyId'] diff --git a/backend/dataall/aws/handlers/s3.py b/backend/dataall/aws/handlers/s3.py index 94e5dd29b..3b5c31b77 100755 --- a/backend/dataall/aws/handlers/s3.py +++ b/backend/dataall/aws/handlers/s3.py @@ -19,12 +19,16 @@ def create_dataset_location(engine, task: models.Task): S3.create_bucket_prefix(location) return location + @staticmethod + def client(account_id: str, client_type: str): + session = SessionHelper.remote_session(accountid=account_id) + return session.client(client_type) + @staticmethod def create_bucket_prefix(location): try: accountid = location.AWSAccountId - aws_session = SessionHelper.remote_session(accountid=accountid) - s3cli = aws_session.client('s3') + s3cli = S3.client(account_id=accountid, client_type='s3') response = s3cli.put_object( Bucket=location.S3BucketName, Body='', Key=location.S3Prefix + '/' ) @@ -39,3 +43,141 @@ def create_bucket_prefix(location): f'Dataset storage location creation failed on S3 for dataset location {location.locationUri} : {e}' ) raise e + + @staticmethod + def create_bucket_policy(account_id: str, bucket_name: str, policy: str): + try: + s3cli = S3.client(account_id=account_id, client_type='s3') + s3cli.put_bucket_policy( + Bucket=bucket_name, + Policy=policy, + ConfirmRemoveSelfBucketAccess=False, + ExpectedBucketOwner=account_id, + ) + log.info( + f'Created bucket policy of {bucket_name} on {account_id} successfully' + ) + except Exception as e: + log.error( + f'Bucket policy created failed on bucket {bucket_name} of {account_id} : {e}' + ) + raise e + + @staticmethod + def get_bucket_policy(account_id: str, bucket_name: str): + try: + s3cli = S3.client(account_id=account_id, client_type='s3') + response = s3cli.get_bucket_policy(Bucket=bucket_name, ExpectedBucketOwner=account_id) + except Exception as e: + log.warning( + f'Failed to get bucket policy of {bucket_name} : {e}' + ) + return None + else: + return response['Policy'] + + @staticmethod + def get_bucket_access_point(account_id: str, access_point_name: str): + try: + s3control = S3.client(account_id, 's3control') + s3control.get_access_point( + AccountId=account_id, + Name=access_point_name, + ) + return True + except Exception as e: + log.info( + f'Failed to get S3 bucket access point {access_point_name} on {account_id} : {e}' + ) + return False + + @staticmethod + def create_bucket_access_point(account_id: str, bucket_name: str, access_point_name: str): + try: + s3control = S3.client(account_id, 's3control') + access_point = s3control.create_access_point( + AccountId=account_id, + Name=access_point_name, + Bucket=bucket_name, + ) + except Exception as e: + log.error( + f'S3 bucket access point creation failed for location {bucket_name} : {e}' + ) + raise e + else: + return access_point + + @staticmethod + def get_access_point_policy(account_id: str, access_point_name: str): + try: + s3control = S3.client(account_id, 's3control') + response = s3control.get_access_point_policy( + AccountId=account_id, + Name=access_point_name, + ) + except Exception as e: + log.info( + f'Failed to get policy of access point {access_point_name} on {account_id} : {e}' + ) + return None + else: + return response['Policy'] + + @staticmethod + def attach_access_point_policy(account_id: str, access_point_name: str, policy: str): + try: + s3control = S3.client(account_id, 's3control') + s3control.put_access_point_policy( + AccountId=account_id, + Name=access_point_name, + Policy=policy + ) + except Exception as e: + log.error( + f'S3 bucket access point policy creation failed : {e}' + ) + raise e + + @staticmethod + def generate_access_point_policy_template( + principal_id: str, + access_point_arn: str, + s3_prefix: str, + ): + policy = { + 'Version': '2012-10-17', + "Statement": [ + { + "Sid": f"{principal_id}0", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "s3:ListBucket", + "Resource": f"{access_point_arn}", + "Condition": { + "StringLike": { + "s3:prefix": [f"{s3_prefix}/*"], + "aws:userId": [f"{principal_id}:*"] + } + } + }, + { + "Sid": f"{principal_id}1", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "s3:GetObject", + "Resource": [f"{access_point_arn}/object/{s3_prefix}/*"], + "Condition": { + "StringLike": { + "aws:userId": [f"{principal_id}:*"] + } + } + } + ] + + } + return policy diff --git a/backend/dataall/cdkproxy/stacks/dataset.py b/backend/dataall/cdkproxy/stacks/dataset.py index 33e9d14d7..99d035a3f 100644 --- a/backend/dataall/cdkproxy/stacks/dataset.py +++ b/backend/dataall/cdkproxy/stacks/dataset.py @@ -72,7 +72,6 @@ def get_target(self) -> models.Dataset: def get_shared_tables(self) -> typing.List[models.ShareObjectItem]: engine = self.get_engine() - dataset = self.get_target() with engine.scoped_session() as session: tables = ( session.query( @@ -261,13 +260,13 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): bucket_key_enabled=True, ) - dataset_bucket.add_to_resource_policy( - permission=iam.PolicyStatement( - actions=['s3:*'], - resources=[dataset_bucket.bucket_arn], - principals=[iam.AccountPrincipal(account_id=dataset.AwsAccountId)], - ) - ) + # dataset_bucket.add_to_resource_policy( + # permission=iam.PolicyStatement( + # actions=['s3:*'], + # resources=[dataset_bucket.bucket_arn], + # principals=[iam.AccountPrincipal(account_id=dataset.AwsAccountId)], + # ) + # ) dataset_bucket.add_lifecycle_rule( abort_incomplete_multipart_upload_after=Duration.days(7), @@ -293,15 +292,6 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): ], enabled=True, ) - shared_locations = self.get_shared_folders() - for location in shared_locations: - dataset_bucket.grant_read( - identity=iam.AccountPrincipal(location.AwsAccountId), - objects_key_pattern=f'{location.S3Prefix}/*', - ) - dataset_bucket.encryption_key.grant_decrypt( - grantee=iam.AccountPrincipal(location.AwsAccountId) - ) # Dataset Admin and ETL User dataset_admin_policy = iam.Policy( @@ -328,6 +318,22 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs): effect=iam.Effect.ALLOW, resources=[dataset_bucket.bucket_arn + '/*'], ), + iam.PolicyStatement( + actions=[ + 's3:GetAccessPoint', + 's3:GetAccessPointPolicy', + 's3:ListAccessPoints', + 's3:CreateAccessPoint', + 's3:DeleteAccessPoint', + 's3:GetAccessPointPolicyStatus', + 's3:DeleteAccessPointPolicy', + 's3:PutAccessPointPolicy', + ], + effect=iam.Effect.ALLOW, + resources=[ + f'arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/*', + ], + ), iam.PolicyStatement( actions=['s3:List*', 's3:Get*'], resources=[f'arn:aws:s3:::{env.EnvironmentDefaultBucketName}'], diff --git a/backend/dataall/cdkproxy/stacks/policies/data_policy.py b/backend/dataall/cdkproxy/stacks/policies/data_policy.py index cf3941dfc..be926b4ce 100644 --- a/backend/dataall/cdkproxy/stacks/policies/data_policy.py +++ b/backend/dataall/cdkproxy/stacks/policies/data_policy.py @@ -52,6 +52,14 @@ def generate_admins_data_access_policy(self) -> iam.Policy: 's3:PutAccountPublicAccessBlock', 's3:PutAccessPointPublicAccessBlock', 's3:PutStorageLensConfiguration', + 's3:GetAccessPoint', + 's3:GetAccessPointPolicy', + 's3:ListAccessPoints', + 's3:CreateAccessPoint', + 's3:DeleteAccessPoint', + 's3:GetAccessPointPolicyStatus', + 's3:DeleteAccessPointPolicy', + 's3:PutAccessPointPolicy', 's3:CreateJob', ], resources=['*'], @@ -104,6 +112,14 @@ def get_statements(self): 's3:PutAccessPointPublicAccessBlock', 's3:PutStorageLensConfiguration', 's3:CreateJob', + 's3:GetAccessPoint', + 's3:GetAccessPointPolicy', + 's3:ListAccessPoints', + 's3:CreateAccessPoint', + 's3:DeleteAccessPoint', + 's3:GetAccessPointPolicyStatus', + 's3:DeleteAccessPointPolicy', + 's3:PutAccessPointPolicy', ], resources=['*'], ), diff --git a/backend/dataall/db/api/dataset_location.py b/backend/dataall/db/api/dataset_location.py index e6291964c..869af0bf8 100644 --- a/backend/dataall/db/api/dataset_location.py +++ b/backend/dataall/db/api/dataset_location.py @@ -1,4 +1,5 @@ import logging +from typing import List from sqlalchemy import and_ @@ -264,3 +265,31 @@ def get_location_by_s3_prefix(session, s3_prefix, accountid, region): else: logging.info(f'Found location {location.locationUri}|{location.S3Prefix}') return location + + @staticmethod + def get_dataset_locations_shared_with_env( + session, share_uri: str, dataset_uri: str, status: List[str] + ): + locations = ( + session.query(models.DatasetStorageLocation) + .join( + models.ShareObjectItem, + and_( + models.ShareObjectItem.itemUri + == models.DatasetStorageLocation.locationUri, + models.ShareObjectItem.shareUri + == share_uri, + ), + ) + .filter( + and_( + models.DatasetStorageLocation.datasetUri == dataset_uri, + models.DatasetStorageLocation.deleted.is_(None), + models.ShareObjectItem.status.in_(status), + ) + ) + .all() + ) + logging.info(f'found {len(locations)} shared locations') + + return locations diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index ba44c7ea5..54a0a9cf8 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -119,7 +119,7 @@ def create_share_object( GlueTableName=item.GlueTableName if itemType == ShareableType.Table.value else '', - S3AccessPointName=f'{share.datasetUri}{item.locationUri}{share.principalId}'.lower() + S3AccessPointName=f'{share.datasetUri}-{share.principalId}'.lower() if itemType == ShareableType.StorageLocation.value else '', ) @@ -424,7 +424,7 @@ def add_share_object_item( GlueTableName=item.GlueTableName if itemType == ShareableType.Table.value else '', - S3AccessPointName=f'{share.datasetUri}{item.locationUri}{share.environmentUri}'.lower() + S3AccessPointName=f'{share.datasetUri}-{share.principalId}'.lower() if itemType == ShareableType.StorageLocation.value else '', ) diff --git a/backend/dataall/tasks/share_manager.py b/backend/dataall/tasks/share_manager.py index 93483c737..e36ced250 100644 --- a/backend/dataall/tasks/share_manager.py +++ b/backend/dataall/tasks/share_manager.py @@ -3,6 +3,8 @@ import sys import time import uuid +import json +from typing import Any from botocore.exceptions import ClientError from sqlalchemy import and_ @@ -11,6 +13,9 @@ from ..aws.handlers.glue import Glue from ..aws.handlers.quicksight import Quicksight from ..aws.handlers.sts import SessionHelper +from ..aws.handlers.s3 import S3 +from ..aws.handlers.kms import KMS +from ..aws.handlers.iam import IAM from ..db import get_engine from ..db import models, exceptions from ..searchproxy import connect @@ -37,15 +42,17 @@ def approve_share(engine, share_uri): """ with engine.scoped_session() as session: ( - env_group, + source_env_group, + target_env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) = ShareManager.get_share_data(session, share_uri, ['Approved']) - principals = [env_group.environmentIAMRoleArn] + principals = [target_env_group.environmentIAMRoleArn] if target_environment.dashboardsEnabled: ShareManager.add_quicksight_group_to_shared_with_principals( @@ -65,6 +72,16 @@ def approve_share(engine, share_uri): session, dataset, shared_tables, target_environment ) + ShareManager.share_folders( + session, + share, + source_env_group, + target_env_group, + target_environment, + shared_folders, + dataset, + ) + return True @staticmethod @@ -129,6 +146,90 @@ def share_tables( table, share, target_environment ) + @staticmethod + def share_folders( + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + shared_folders: [models.DatasetStorageLocation], + dataset: models.Dataset, + ): + for folder in shared_folders: + share_item = ShareManager.get_share_item(session, share, folder) + + ShareManager.update_share_item_status( + session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value + ) + + source_account_id = folder.AWSAccountId + access_point_name = share_item.S3AccessPointName + bucket_name = folder.S3BucketName + target_account_id = target_environment.AwsAccountId + source_env_admin = source_env_group.environmentIAMRoleArn + dataset_admin = dataset.IAMDatasetAdminRoleArn + target_env_admin = target_env_group.environmentIAMRoleName + s3_prefix = folder.S3Prefix + + try: + ShareManager.manage_access_point_and_bucket_policy( + dataset_admin, + source_account_id, + bucket_name, + access_point_name, + target_account_id, + source_env_admin, + ) + + ShareManager.manage_target_role_access_policy( + bucket_name, + access_point_name, + target_account_id, + target_env_admin, + dataset, + ) + ShareManager.manage_access_point_policy( + dataset_admin, + source_account_id, + access_point_name, + target_account_id, + source_env_admin, + target_env_admin, + s3_prefix, + dataset, + ) + + ShareManager.update_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset + ) + + ShareManager.update_share_item_status( + session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + except Exception as e: + logging.error( + f'Failed to share folder {folder.S3Prefix} ' + f'from source account {folder.AWSAccountId}//{folder.region} ' + f'with target account {target_environment.AwsAccountId}//{target_environment.region} ' + f'due to: {e}' + ) + ShareManager.update_share_item_status( + session, + share_item, + models.ShareObjectStatus.Share_Failed.value, + ) + AlarmService().trigger_folder_sharing_failure_alarm( + folder, share, target_environment + ) + @staticmethod def add_quicksight_group_to_shared_with_principals(target_environment, principals): try: @@ -793,26 +894,35 @@ def get_share_data(session, share_uri, status): environment_uri=target_environment.environmentUri, status=status, ) - env_group: models.EnvironmentGroup = ( - session.query(models.EnvironmentGroup) - .filter( - and_( - models.EnvironmentGroup.environmentUri == share.environmentUri, - models.EnvironmentGroup.groupUri == share.principalId, - ) - ) - .first() + shared_folders = db.api.DatasetStorageLocation.get_dataset_locations_shared_with_env( + session, + dataset_uri=dataset.datasetUri, + share_uri=share_uri, + status=status, + ) + source_env_group = db.api.Environment.get_environment_group( + session, + dataset.SamlAdminGroupName, + dataset.environmentUri + ) + target_env_group = db.api.Environment.get_environment_group( + session, + share.principalId, + share.environmentUri ) - if not env_group: + if not target_env_group: raise Exception( f'Share object Team {share.principalId} is not a member of the ' f'environment {target_environment.name}/{target_environment.AwsAccountId}' ) + return ( - env_group, + source_env_group, + target_env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) @@ -835,13 +945,23 @@ def other_approved_share_object_exists(session, environment_uri): def get_share_item( session, share: models.ShareObject, - table: models.DatasetTable, + share_category: Any, ) -> models.ShareObjectItem: + if isinstance(share_category, models.DatasetTable): + category_uri = share_category.tableUri + elif isinstance(share_category, models.DatasetStorageLocation): + category_uri = share_category.locationUri + else: + raise exceptions.InvalidInput( + 'share_category', + share_category, + 'DatasetTable or DatasetStorageLocation' + ) share_item: models.ShareObjectItem = ( session.query(models.ShareObjectItem) .filter( and_( - models.ShareObjectItem.itemUri == table.tableUri, + models.ShareObjectItem.itemUri == category_uri, models.ShareObjectItem.shareUri == share.shareUri, ) ) @@ -849,7 +969,7 @@ def get_share_item( ) if not share_item: - raise exceptions.ObjectNotFound('ShareObjectItem', table.tableUri) + raise exceptions.ObjectNotFound('ShareObjectItem', category_uri) return share_item @@ -865,6 +985,203 @@ def update_share_item_status( session.commit() return share_item + @staticmethod + def manage_access_point_and_bucket_policy( + dataset_admin: str, + source_account_id: str, + bucket_name: str, + access_point_name: str, + target_account_id: str, + source_env_admin: str, + ): + if not S3.get_bucket_access_point(source_account_id, access_point_name): + S3.create_bucket_access_point(source_account_id, bucket_name, access_point_name) + bucket_policy = json.loads(S3.get_bucket_policy(source_account_id, bucket_name)) + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + allow_owner_access = { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId + } + } + } + delegated_to_accesspoint = { + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringEquals": { + "s3:DataAccessPointAccount": f"{source_account_id}" + } + } + } + bucket_policy['Statement'].append(allow_owner_access) + bucket_policy['Statement'].append(delegated_to_accesspoint) + S3.create_bucket_policy(source_account_id, bucket_name, json.dumps(bucket_policy)) + + @staticmethod + def manage_target_role_access_policy( + bucket_name: str, + access_point_name: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + # target_env_admin = SessionHelper.extract_name_from_role_arn(target_env_admin) + existing_policy = IAM.get_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + ) + if existing_policy: # type dict + if bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): + target_resources = [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + policy = existing_policy["Statement"][0]["Resource"].extend(target_resources) + else: + return + else: + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + } + ] + } + IAM.update_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + json.dumps(policy) + ) + + @staticmethod + def manage_access_point_policy( + dataset_admin: str, + source_account_id: str, + access_point_name: str, + target_account_id: str, + source_env_admin: str, + target_env_admin: str, + s3_prefix: str, + dataset: models.Dataset, + ): + access_point_arn = f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}" + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + admin_statement = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": f"{access_point_arn}", + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId + } + } + } + existing_policy = S3.get_access_point_policy(source_account_id, access_point_name) + # requester will use this role to access resources + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + print(target_env_admin_id) + if existing_policy: + existing_policy = json.loads(existing_policy) + # sid_list = [item["Sid"] for item in existing_policy["Statement"]] + statements = {item["Sid"]: item for item in existing_policy["Statement"]} + if target_env_admin_id in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if s3_prefix not in prefix_list: + prefix_list.append(f"{s3_prefix}/*") + statements[f"{target_env_admin_id}1"]["Resource"].append(f"{access_point_arn}/object/{s3_prefix}/*") + else: + additional_policy = S3.generate_access_point_policy_template( + target_env_admin_id, + access_point_arn, + s3_prefix, + ) + existing_policy["Statement"].extend(additional_policy["Statement"]) + access_point_policy = existing_policy + else: + print(target_env_admin_id) + print(access_point_arn) + print(s3_prefix) + access_point_policy = S3.generate_access_point_policy_template( + target_env_admin_id, + access_point_arn, + s3_prefix, + ) + access_point_policy["Statement"].append(admin_statement) + print("attaching policy to access point") + print(access_point_policy) + S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) + + @staticmethod + def update_dataset_bucket_key_policy( + source_account_id: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + key_alias = f"alias/{dataset.KmsAlias}" + kms_keyId = KMS.get_key_id(source_account_id, key_alias) + existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + if existing_policy and f'{target_env_admin_id}:*' not in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"].append( + { + "Sid": f"{target_env_admin_id}", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": { + "StringLike": { + "aws:userId": f"{target_env_admin_id}:*" + } + } + } + ) + KMS.put_key_policy( + source_account_id, + kms_keyId, + "default", + json.dumps(policy) + ) + if __name__ == '__main__': diff --git a/backend/dataall/utils/alarm_service.py b/backend/dataall/utils/alarm_service.py index fe317aa44..00a750c61 100644 --- a/backend/dataall/utils/alarm_service.py +++ b/backend/dataall/utils/alarm_service.py @@ -72,6 +72,33 @@ def trigger_table_sharing_failure_alarm( """ return self.publish_message_to_alarms_topic(subject, message) + def trigger_folder_sharing_failure_alarm( + self, + folder: models.DatasetStorageLocation, + share: models.ShareObject, + target_environment: models.Environment, + ): + logger.info('Triggering share failure alarm...') + subject = ( + f'ALARM: DATAALL Folder {folder.S3Prefix} Sharing Failure Notification' + ) + message = f""" +You are receiving this email because your DATAALL {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to share the folder {folder.S3Prefix} with S3 Access Point. +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: S3 Folder sharing failure + - Timestamp: {datetime.now()} + Share Source + - Dataset URI: {share.datasetUri} + - AWS Account: {folder.AWSAccountId} + - Region: {folder.region} + - S3 Bucket: {folder.S3BucketName} + - S3 Folder: {folder.S3Prefix} + Share Target + - AWS Account: {target_environment.AwsAccountId} + - Region: {target_environment.region} +""" + def trigger_revoke_sharing_failure_alarm( self, table: models.DatasetTable, diff --git a/deploy/pivot_role/pivotRole.yaml b/deploy/pivot_role/pivotRole.yaml index fbe840dc4..75ad127bd 100644 --- a/deploy/pivot_role/pivotRole.yaml +++ b/deploy/pivot_role/pivotRole.yaml @@ -69,6 +69,19 @@ Resources: Action: 's3:GetObject' Resource: - 'arn:aws:s3:::crawler-public*' + - Sid: ManagedAccessPoints + Action: + - 's3:GetAccessPoint' + - 's3:GetAccessPointPolicy' + - 's3:ListAccessPoints' + - 's3:CreateAccessPoint' + - 's3:DeleteAccessPoint' + - 's3:GetAccessPointPolicyStatus' + - 's3:DeleteAccessPointPolicy' + - 's3:PutAccessPointPolicy' + Effect: Allow + Resource: + - !Sub 'arn:aws:s3:*:${AWS::AccountId}:accesspoint/*' - Sid: ManagedBuckets Action: - 's3:List*' @@ -165,6 +178,7 @@ Resources: - 'glue:UpdateTrigger' - 'glue:UpdateJob' - 'glue:TagResource' + - 'glue:UpdateCrawler' Effect: Allow Resource: '*' - Sid: KMS @@ -618,6 +632,11 @@ Resources: - 'iam:ListRoles' Effect: Allow Resource: '*' + - Sid: IAMPutRolePolicy + Action: + - 'iam:PutRolePolicy' + Effect: Allow + Resource: '*' - Sid: IAMPassRole Action: - 'iam:Get*' From e4f4ccbf6041fb9134b6f1728d777f83cbda598e Mon Sep 17 00:00:00 2001 From: leunguu Date: Wed, 7 Sep 2022 21:30:51 +0800 Subject: [PATCH 02/19] Add revoke folder sharing functions --- .../api/Objects/ShareObject/resolvers.py | 2 +- backend/dataall/aws/handlers/iam.py | 17 + backend/dataall/aws/handlers/s3.py | 26 +- backend/dataall/tasks/share_manager.py | 417 ++++++++++++++---- backend/dataall/utils/alarm_service.py | 27 ++ deploy/pivot_role/pivotRole.yaml | 3 +- 6 files changed, 404 insertions(+), 88 deletions(-) diff --git a/backend/dataall/api/Objects/ShareObject/resolvers.py b/backend/dataall/api/Objects/ShareObject/resolvers.py index 51726b1ff..ec6477b1a 100644 --- a/backend/dataall/api/Objects/ShareObject/resolvers.py +++ b/backend/dataall/api/Objects/ShareObject/resolvers.py @@ -109,7 +109,7 @@ def reject_share_object(context: Context, source, shareUri: str = None): ) session.add(reject_share_task) - stack_helper.deploy_stack(context, share.datasetUri) + # stack_helper.deploy_stack(context, share.datasetUri) Worker.queue(engine=context.engine, task_ids=[reject_share_task.taskUri]) diff --git a/backend/dataall/aws/handlers/iam.py b/backend/dataall/aws/handlers/iam.py index 414e50432..53fa2bfba 100644 --- a/backend/dataall/aws/handlers/iam.py +++ b/backend/dataall/aws/handlers/iam.py @@ -51,3 +51,20 @@ def get_role_policy( return None else: return response["PolicyDocument"] + + @staticmethod + def delete_role_policy( + account_id: str, + role_name: str, + policy_name: str, + ): + try: + iamcli = IAM.client(account_id) + iamcli.delete_role_policy( + RoleName=role_name, + PolicyName=policy_name, + ) + except Exception as e: + log.error( + f'Failed to delete policy {policy_name} of role {role_name} : {e}' + ) diff --git a/backend/dataall/aws/handlers/s3.py b/backend/dataall/aws/handlers/s3.py index 3b5c31b77..9e38911ef 100755 --- a/backend/dataall/aws/handlers/s3.py +++ b/backend/dataall/aws/handlers/s3.py @@ -77,19 +77,20 @@ def get_bucket_policy(account_id: str, bucket_name: str): return response['Policy'] @staticmethod - def get_bucket_access_point(account_id: str, access_point_name: str): + def get_bucket_access_point_arn(account_id: str, access_point_name: str): try: s3control = S3.client(account_id, 's3control') - s3control.get_access_point( + access_point = s3control.get_access_point( AccountId=account_id, Name=access_point_name, ) - return True except Exception as e: log.info( f'Failed to get S3 bucket access point {access_point_name} on {account_id} : {e}' ) - return False + return None + else: + return access_point["AccessPointArn"] @staticmethod def create_bucket_access_point(account_id: str, bucket_name: str, access_point_name: str): @@ -106,7 +107,21 @@ def create_bucket_access_point(account_id: str, bucket_name: str, access_point_n ) raise e else: - return access_point + return access_point["AccessPointArn"] + + @staticmethod + def delete_bucket_access_point(account_id: str, access_point_name: str): + try: + s3control = S3.client(account_id, 's3control') + s3control.delete_access_point( + AccountId=account_id, + Name=access_point_name, + ) + except Exception as e: + log.error( + f'Failed to delete S3 bucket access point {access_point_name}/{account_id} : {e}' + ) + raise e @staticmethod def get_access_point_policy(account_id: str, access_point_name: str): @@ -178,6 +193,5 @@ def generate_access_point_policy_template( } } ] - } return policy diff --git a/backend/dataall/tasks/share_manager.py b/backend/dataall/tasks/share_manager.py index e36ced250..be8d68217 100644 --- a/backend/dataall/tasks/share_manager.py +++ b/backend/dataall/tasks/share_manager.py @@ -82,6 +82,16 @@ def approve_share(engine, share_uri): dataset, ) + ShareManager.clean_shared_folders( + session, + share, + source_env_group, + target_env_group, + target_environment, + dataset, + shared_folders, + ) + return True @staticmethod @@ -175,31 +185,29 @@ def share_folders( s3_prefix = folder.S3Prefix try: - ShareManager.manage_access_point_and_bucket_policy( + ShareManager.manage_bucket_policy( dataset_admin, source_account_id, bucket_name, - access_point_name, - target_account_id, source_env_admin, ) - ShareManager.manage_target_role_access_policy( + ShareManager.grant_target_role_access_policy( bucket_name, access_point_name, target_account_id, target_env_admin, dataset, ) - ShareManager.manage_access_point_policy( + ShareManager.manage_access_point_and_policy( dataset_admin, source_account_id, - access_point_name, target_account_id, source_env_admin, target_env_admin, + bucket_name, s3_prefix, - dataset, + access_point_name, ) ShareManager.update_dataset_bucket_key_policy( @@ -541,6 +549,78 @@ def accept_ram_invitation(**data): time.sleep(5) return True + @staticmethod + def revoke_shared_folders( + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + rejected_folders: [models.DatasetStorageLocation], + dataset: models.Dataset, + ): + for folder in rejected_folders: + rejected_item = ShareManager.get_share_item(session, share, folder) + + ShareManager.update_share_item_status( + session, + rejected_item, + models.ShareObjectStatus.Revoke_In_Progress.value + ) + + source_account_id = folder.AWSAccountId + access_point_name = rejected_item.S3AccessPointName + bucket_name = folder.S3BucketName + target_account_id = target_environment.AwsAccountId + # source_env_admin = source_env_group.environmentIAMRoleArn + # dataset_admin = dataset.IAMDatasetAdminRoleArn + target_env_admin = target_env_group.environmentIAMRoleName + s3_prefix = folder.S3Prefix + + try: + ShareManager.delete_access_point_policy( + source_account_id, + target_account_id, + access_point_name, + target_env_admin, + s3_prefix, + ) + cleanup = ShareManager.delete_access_point(source_account_id, access_point_name) + if cleanup: + ShareManager.delete_target_role_access_policy( + target_account_id, + target_env_admin, + bucket_name, + access_point_name, + dataset, + ) + ShareManager.delete_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset, + ) + ShareManager.update_share_item_status( + session, + rejected_item, + models.ShareObjectStatus.Revoke_Share_Succeeded.value, + ) + except Exception as e: + log.error( + f'Failed to revoke folder {folder.S3Prefix} ' + f'from source account {folder.AWSAccountId}//{folder.region} ' + f'with target account {target_environment.AwsAccountId}//{target_environment.region} ' + f'due to: {e}' + ) + ShareManager.update_share_item_status( + session, + rejected_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_folder_sharing_failure_alarm( + folder, share, target_environment + ) + @staticmethod def revoke_iamallowedgroups_super_permission_from_table( client, accountid, database, table @@ -727,10 +807,12 @@ def reject_share(engine, share_uri): with engine.scoped_session() as session: ( - env_group, + source_env_group, + target_env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) = ShareManager.get_share_data(session, share_uri, ['Rejected']) @@ -738,7 +820,7 @@ def reject_share(engine, share_uri): log.info(f'Revoking permissions for tables : {shared_tables}') ShareManager.revoke_resource_links_access_on_target_account( - session, env_group, share, shared_tables, target_environment + session, source_env_group, share, shared_tables, target_environment ) ShareManager.delete_resource_links_on_target_account( @@ -756,6 +838,16 @@ def reject_share(engine, share_uri): shared_tables, source_environment, target_environment ) + ShareManager.revoke_shared_folders( + session, + share, + source_env_group, + target_env_group, + target_environment, + shared_folders, + dataset, + ) + return True @staticmethod @@ -986,62 +1078,66 @@ def update_share_item_status( return share_item @staticmethod - def manage_access_point_and_bucket_policy( + def manage_bucket_policy( dataset_admin: str, source_account_id: str, bucket_name: str, - access_point_name: str, - target_account_id: str, source_env_admin: str, ): - if not S3.get_bucket_access_point(source_account_id, access_point_name): - S3.create_bucket_access_point(source_account_id, bucket_name, access_point_name) - bucket_policy = json.loads(S3.get_bucket_policy(source_account_id, bucket_name)) - exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( - source_account_id, - [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] - )] - allow_owner_access = { - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*" - ], - "Condition": { - "StringLike": { - "aws:userId": exceptions_roleId - } + ''' + This function will manage bucket policy by grant admin access to dataset admin, pivot role + and environment admin. All of the policies will only be added once. + ''' + bucket_policy = json.loads(S3.get_bucket_policy(source_account_id, bucket_name)) + for statement in bucket_policy["Statement"]: + if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: + return + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + allow_owner_access = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId } } - delegated_to_accesspoint = { - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*" - ], - "Condition": { - "StringEquals": { - "s3:DataAccessPointAccount": f"{source_account_id}" - } + } + delegated_to_accesspoint = { + "Sid": "DelegateAccessToAccessPoint", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringEquals": { + "s3:DataAccessPointAccount": f"{source_account_id}" } } - bucket_policy['Statement'].append(allow_owner_access) - bucket_policy['Statement'].append(delegated_to_accesspoint) - S3.create_bucket_policy(source_account_id, bucket_name, json.dumps(bucket_policy)) + } + bucket_policy["Statement"].append(allow_owner_access) + bucket_policy["Statement"].append(delegated_to_accesspoint) + S3.create_bucket_policy(source_account_id, bucket_name, json.dumps(bucket_policy)) @staticmethod - def manage_target_role_access_policy( + def grant_target_role_access_policy( bucket_name: str, access_point_name: str, target_account_id: str, target_env_admin: str, dataset: models.Dataset, ): - # target_env_admin = SessionHelper.extract_name_from_role_arn(target_env_admin) existing_policy = IAM.get_role_policy( target_account_id, target_env_admin, @@ -1080,50 +1176,44 @@ def manage_target_role_access_policy( target_account_id, target_env_admin, "targetDatasetAccessControlPolicy", - json.dumps(policy) + json.dumps(policy), ) @staticmethod - def manage_access_point_policy( + def manage_access_point_and_policy( dataset_admin: str, source_account_id: str, - access_point_name: str, target_account_id: str, source_env_admin: str, target_env_admin: str, + bucket_name: str, s3_prefix: str, - dataset: models.Dataset, + access_point_name: str, ): - access_point_arn = f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}" - exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( - source_account_id, - [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] - )] - admin_statement = { - "Sid": "AllowAllToAdmin", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": f"{access_point_arn}", - "Condition": { - "StringLike": { - "aws:userId": exceptions_roleId - } - } - } + access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) + if not access_point_arn: + access_point_arn = S3.create_bucket_access_point(source_account_id, bucket_name, access_point_name) existing_policy = S3.get_access_point_policy(source_account_id, access_point_name) # requester will use this role to access resources target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - print(target_env_admin_id) if existing_policy: + # Update existing access point policy existing_policy = json.loads(existing_policy) - # sid_list = [item["Sid"] for item in existing_policy["Statement"]] statements = {item["Sid"]: item for item in existing_policy["Statement"]} - if target_env_admin_id in statements.keys(): + if f"{target_env_admin_id}0" in statements.keys(): prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] - if s3_prefix not in prefix_list: + if isinstance(prefix_list, str): + prefix_list = [prefix_list] + if f"{s3_prefix}/*" not in prefix_list: prefix_list.append(f"{s3_prefix}/*") - statements[f"{target_env_admin_id}1"]["Resource"].append(f"{access_point_arn}/object/{s3_prefix}/*") + statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] = prefix_list + resource_list = statements[f"{target_env_admin_id}1"]["Resource"] + if isinstance(resource_list, str): + resource_list = [resource_list] + if f"{access_point_arn}/object/{s3_prefix}/*" not in resource_list: + resource_list.append(f"{access_point_arn}/object/{s3_prefix}/*") + statements[f"{target_env_admin_id}1"]["Resource"] = resource_list + existing_policy["Statement"] = list(statements.values()) else: additional_policy = S3.generate_access_point_policy_template( target_env_admin_id, @@ -1133,17 +1223,29 @@ def manage_access_point_policy( existing_policy["Statement"].extend(additional_policy["Statement"]) access_point_policy = existing_policy else: - print(target_env_admin_id) - print(access_point_arn) - print(s3_prefix) + # First time to create access point policy access_point_policy = S3.generate_access_point_policy_template( target_env_admin_id, access_point_arn, s3_prefix, ) - access_point_policy["Statement"].append(admin_statement) - print("attaching policy to access point") - print(access_point_policy) + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + admin_statement = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": f"{access_point_arn}", + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId + } + } + } + access_point_policy["Statement"].append(admin_statement) S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) @staticmethod @@ -1182,6 +1284,161 @@ def update_dataset_bucket_key_policy( json.dumps(policy) ) + @staticmethod + def delete_access_point_policy( + source_account_id: str, + target_account_id: str, + access_point_name: str, + target_env_admin: str, + s3_prefix: str, + ): + access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) + access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + statements = {item["Sid"]: item for item in access_point_policy["Statement"]} + if f"{target_env_admin_id}0" in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if isinstance(prefix_list, list) and f"{s3_prefix}/*" in prefix_list: + prefix_list.remove(f"{s3_prefix}/*") + statements[f"{target_env_admin_id}1"]["Resource"].remove(f"{access_point_arn}/object/{s3_prefix}/*") + access_point_policy["Statement"] = list(statements.values()) + else: + access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}0"]) + access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}1"]) + S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) + + @staticmethod + def delete_access_point(source_account_id: str, access_point_name: str): + access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) + if len(access_point_policy["Statement"]) <= 1: + # At least we have the 'AllowAllToAdmin' statement + S3.delete_bucket_access_point(source_account_id, access_point_name) + return True + else: + return False + + @staticmethod + def delete_target_role_access_policy( + target_account_id: str, + target_env_admin: str, + bucket_name: str, + access_point_name: str, + dataset: models.Dataset, + ): + existing_policy = IAM.get_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + ) + if existing_policy: + if bucket_name in ",".join(existing_policy["Statement"][0]["Resource"]): + target_resources = [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + for item in target_resources: + existing_policy["Statement"][0]["Resource"].remove(item) + if not existing_policy["Statement"][0]["Resource"]: + IAM.delete_role_policy(target_account_id, target_env_admin, "targetDatasetAccessControlPolicy") + else: + IAM.update_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + json.dumps(existing_policy), + ) + + @staticmethod + def delete_dataset_bucket_key_policy( + source_account_id: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + key_alias = f"alias/{dataset.KmsAlias}" + kms_keyId = KMS.get_key_id(source_account_id, key_alias) + existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + if existing_policy and f'{target_env_admin_id}:*' in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"] = [item for item in policy["Statement"] if item["Sid"] != f"{target_env_admin_id}"] + KMS.put_key_policy( + source_account_id, + kms_keyId, + "default", + json.dumps(policy) + ) + + @staticmethod + def clean_shared_folders( + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + dataset: models.Dataset, + shared_folders: [models.DatasetStorageLocation], + ): + source_account_id = dataset.AwsAccountId + access_point_name = f"{dataset.datasetUri}-{share.principalId}".lower() + target_account_id = target_environment.AwsAccountId + target_env_admin = target_env_group.environmentIAMRoleName + access_point_policy = S3.get_access_point_policy(source_account_id, access_point_name) + if access_point_policy: + policy = json.loads(access_point_policy) + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + statements = {item["Sid"]: item for item in policy["Statement"]} + if f"{target_env_admin_id}0" in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if isinstance(prefix_list, str): + prefix_list = [prefix_list] + prefix_list = [prefix[:-2] for prefix in prefix_list] + shared_prefix = [folder.S3Prefix for folder in shared_folders] + removed_prefixes = [prefix for prefix in prefix_list if prefix not in shared_prefix] + for prefix in removed_prefixes: + bucket_name = dataset.S3BucketName + try: + ShareManager.delete_access_point_policy( + source_account_id, + target_account_id, + access_point_name, + target_env_admin, + prefix, + ) + cleanup = ShareManager.delete_access_point(source_account_id, access_point_name) + if cleanup: + ShareManager.delete_target_role_access_policy( + target_account_id, + target_env_admin, + bucket_name, + access_point_name, + dataset, + ) + ShareManager.delete_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset, + ) + except Exception as e: + log.error( + f'Failed to revoke folder {prefix} ' + f'from source account {dataset.AwsAccountId}//{dataset.region} ' + f'with target account {target_account_id}//{target_environment.region} ' + f'due to: {e}' + ) + location = db.api.DatasetStorageLocation.get_location_by_s3_prefix( + session, + prefix, + dataset.AwsAccountId, + dataset.region, + ) + AlarmService().trigger_revoke_folder_sharing_failure_alarm( + location, share, target_environment + ) + if __name__ == '__main__': diff --git a/backend/dataall/utils/alarm_service.py b/backend/dataall/utils/alarm_service.py index 00a750c61..3837b2413 100644 --- a/backend/dataall/utils/alarm_service.py +++ b/backend/dataall/utils/alarm_service.py @@ -99,6 +99,33 @@ def trigger_folder_sharing_failure_alarm( - Region: {target_environment.region} """ + def trigger_revoke_folder_sharing_failure_alarm( + self, + folder: models.DatasetStorageLocation, + share: models.ShareObject, + target_environment: models.Environment, + ): + logger.info('Triggering share failure alarm...') + subject = ( + f'ALARM: DATAALL Folder {folder.S3Prefix} Sharing Revoke Failure Notification' + ) + message = f""" +You are receiving this email because your DATAALL {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to share the folder {folder.S3Prefix} with S3 Access Point. +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: S3 Folder sharing Revoke failure + - Timestamp: {datetime.now()} + Share Source + - Dataset URI: {share.datasetUri} + - AWS Account: {folder.AWSAccountId} + - Region: {folder.region} + - S3 Bucket: {folder.S3BucketName} + - S3 Folder: {folder.S3Prefix} + Share Target + - AWS Account: {target_environment.AwsAccountId} + - Region: {target_environment.region} +""" + def trigger_revoke_sharing_failure_alarm( self, table: models.DatasetTable, diff --git a/deploy/pivot_role/pivotRole.yaml b/deploy/pivot_role/pivotRole.yaml index 75ad127bd..069b5fa27 100644 --- a/deploy/pivot_role/pivotRole.yaml +++ b/deploy/pivot_role/pivotRole.yaml @@ -632,9 +632,10 @@ Resources: - 'iam:ListRoles' Effect: Allow Resource: '*' - - Sid: IAMPutRolePolicy + - Sid: IAMRolePolicy Action: - 'iam:PutRolePolicy' + - 'iam:DeleteRolePolicy' Effect: Allow Resource: '*' - Sid: IAMPassRole From f518e75afa2e45b92a4dd9cfb8e2caf97c0128bb Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 15 Sep 2022 17:14:57 +0200 Subject: [PATCH 03/19] Cleaned migration versions and initialization of the database --- backend/migrations/README | 16 +- .../versions/033c3d6c1849_init_permissions.py | 39 - .../versions/166af5c0355b_release_3_7_1.py | 113 -- .../versions/2b40221043f1_release_3_7_0.py | 42 - ...ea02fe85af6_redshift_copy_data_location.py | 31 - .../versions/3ae3eeca475c_release_3_6_1.py | 75 - .../4392a0c9747f_pipeline_input_output.py | 31 - .../46e5a33450b1_vpc_default_env_flag.py | 28 - .../4ab27e3b3d54_stack_events_column.py | 31 - .../5d5102986ce5_add_subnet_ids_columns.py | 40 - .../5e5c84138af7_backfill_confidentiality.py | 89 -- .../versions/5e722995fa0b_release_3_8_1.py | 820 ---------- .../versions/74b89c64f330_vpc_group.py | 28 - ...94697ee46c0c_sagemaker_notebooks_update.py | 40 - .../versions/967fa9c0a147_add_ecs_task_arn.py | 28 - .../versions/97050ec09354_release_3_7_8.py | 92 -- .../9b589bf91485_dashboard_sharing.py | 31 - .../b6e0ac8f6d3f_add_env_feature_flags.py | 103 -- .../bc77fef9d0b2_new_permissions_model.py | 198 --- .../versions/bd271a2780b2_init_database.py | 1349 ----------------- .../versions/bd4bea86de30_release_3_6_0.py | 38 - .../be22468d7342_dataset_column_type.py | 30 - .../versions/c5c6bbbc5de7_release_3_5_0.py | 104 -- .../decc96c5670f_organization_groups.py | 38 - .../versions/e177eb044b31_init_tenant.py | 42 - .../e72009ab3b9a_updating_pipelines.py | 48 - 26 files changed, 13 insertions(+), 3511 deletions(-) delete mode 100644 backend/migrations/versions/033c3d6c1849_init_permissions.py delete mode 100644 backend/migrations/versions/166af5c0355b_release_3_7_1.py delete mode 100644 backend/migrations/versions/2b40221043f1_release_3_7_0.py delete mode 100644 backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py delete mode 100644 backend/migrations/versions/3ae3eeca475c_release_3_6_1.py delete mode 100644 backend/migrations/versions/4392a0c9747f_pipeline_input_output.py delete mode 100644 backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py delete mode 100644 backend/migrations/versions/4ab27e3b3d54_stack_events_column.py delete mode 100644 backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py delete mode 100644 backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py delete mode 100644 backend/migrations/versions/5e722995fa0b_release_3_8_1.py delete mode 100644 backend/migrations/versions/74b89c64f330_vpc_group.py delete mode 100644 backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py delete mode 100644 backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py delete mode 100644 backend/migrations/versions/97050ec09354_release_3_7_8.py delete mode 100644 backend/migrations/versions/9b589bf91485_dashboard_sharing.py delete mode 100644 backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py delete mode 100644 backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py delete mode 100644 backend/migrations/versions/bd271a2780b2_init_database.py delete mode 100644 backend/migrations/versions/bd4bea86de30_release_3_6_0.py delete mode 100644 backend/migrations/versions/be22468d7342_dataset_column_type.py delete mode 100644 backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py delete mode 100644 backend/migrations/versions/decc96c5670f_organization_groups.py delete mode 100644 backend/migrations/versions/e177eb044b31_init_tenant.py delete mode 100644 backend/migrations/versions/e72009ab3b9a_updating_pipelines.py diff --git a/backend/migrations/README b/backend/migrations/README index 755569166..1bcc2ce5f 100644 --- a/backend/migrations/README +++ b/backend/migrations/README @@ -1,9 +1,19 @@ -To Generate alembic migration during development +To Generate alembic migration during development: + +``` +export PYTHONPATH=backend +export envname=local +alembic -c backend/alembic.ini revision -m "_release_vX.X.X" +``` + +To run the upgrade (this is part of the deployment pipeline) ```bash -envname=local alembic revision autogenerate -m"my migration" +alembic -c backend/alembic.ini upgrade head ``` To run migrations locally ```bash -envname=local alembic revision upgrade head +envname=local +alembic revision upgrade head ``` +https://alembic.sqlalchemy.org/en/latest/ \ No newline at end of file diff --git a/backend/migrations/versions/033c3d6c1849_init_permissions.py b/backend/migrations/versions/033c3d6c1849_init_permissions.py deleted file mode 100644 index bc48176f9..000000000 --- a/backend/migrations/versions/033c3d6c1849_init_permissions.py +++ /dev/null @@ -1,39 +0,0 @@ -"""init permissions - -Revision ID: 033c3d6c1849 -Revises: bc77fef9d0b2 -Create Date: 2021-08-03 07:53:28.164238 - -""" -import os - -from alembic import op -import sqlalchemy as sa -from sqlalchemy import orm - -from dataall.db import api, get_engine, has_table - -# revision identifiers, used by Alembic. -revision = '033c3d6c1849' -down_revision = 'bc77fef9d0b2' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - try: - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Initializing permissions...') - api.Permission.init_permissions(session) - print('Permissions initialized successfully') - except Exception as e: - print(f'Failed to init permissions due to: {e}') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/backend/migrations/versions/166af5c0355b_release_3_7_1.py b/backend/migrations/versions/166af5c0355b_release_3_7_1.py deleted file mode 100644 index 46277fc3c..000000000 --- a/backend/migrations/versions/166af5c0355b_release_3_7_1.py +++ /dev/null @@ -1,113 +0,0 @@ -"""release 3.7.1 - -Revision ID: 166af5c0355b -Revises: 2b40221043f1 -Create Date: 2021-12-02 19:22:27.714326 - -""" -import datetime - -from alembic import op -from sqlalchemy import Boolean, Column, String, DateTime, orm -from sqlalchemy.dialects import postgresql -from sqlalchemy.ext.declarative import declarative_base - -from dataall.db import utils, Resource - -# revision identifiers, used by Alembic. -from dataall.utils.naming_convention import ( - NamingConventionService, - NamingConventionPattern, -) - -revision = '166af5c0355b' -down_revision = '2b40221043f1' -branch_labels = None -depends_on = None - -Base = declarative_base() - - -class Environment(Resource, Base): - __tablename__ = 'environment' - organizationUri = Column(String, nullable=False) - environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) - AwsAccountId = Column(String, nullable=False) - region = Column(String, nullable=False, default='eu-west-1') - cognitoGroupName = Column(String, nullable=True) - resourcePrefix = Column(String, nullable=False, default='dh') - validated = Column(Boolean, default=False) - environmentType = Column(String, nullable=False, default='Data') - isOrganizationDefaultEnvironment = Column(Boolean, default=False) - EnvironmentDefaultIAMRoleName = Column(String, nullable=False) - EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False) - EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) - EnvironmentDefaultBucketName = Column(String) - EnvironmentDefaultAthenaWorkGroup = Column(String) - roleCreated = Column(Boolean, nullable=False, default=False) - dashboardsEnabled = Column(Boolean, default=False) - notebooksEnabled = Column(Boolean, default=True) - mlStudiosEnabled = Column(Boolean, default=True) - pipelinesEnabled = Column(Boolean, default=True) - warehousesEnabled = Column(Boolean, default=True) - SamlGroupName = Column(String, nullable=True) - CDKRoleArn = Column(String, nullable=False) - subscriptionsEnabled = Column(Boolean, default=False) - subscriptionsProducersTopicName = Column(String) - subscriptionsProducersTopicImported = Column(Boolean, default=False) - subscriptionsConsumersTopicName = Column(String) - subscriptionsConsumersTopicImported = Column(Boolean, default=False) - - -class EnvironmentGroup(Base): - __tablename__ = 'environment_group_permission' - groupUri = Column(String, primary_key=True) - environmentUri = Column(String, primary_key=True) - invitedBy = Column(String, nullable=True) - environmentIAMRoleArn = Column(String, nullable=True) - environmentIAMRoleName = Column(String, nullable=True) - environmentIAMRoleImported = Column(Boolean, default=False) - environmentAthenaWorkGroup = Column(String, nullable=True) - description = Column(String, default='No description provided') - created = Column(DateTime, default=datetime.datetime.now) - updated = Column(DateTime, onupdate=datetime.datetime.now) - deleted = Column(DateTime) - groupRoleInEnvironment = Column(String, nullable=False, default='Invited') - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Back filling environments athena workgroup...') - envs: [Environment] = session.query(Environment).all() - env: Environment - for env in envs: - print(f'Back fill environment athena workgroup {env.label}...') - env.EnvironmentDefaultAthenaWorkGroup = NamingConventionService( - target_uri=env.environmentUri, - target_label=env.label, - pattern=NamingConventionPattern.DEFAULT, - resource_prefix=env.resourcePrefix, - ).build_compliant_name() - session.commit() - env_groups = session.query(EnvironmentGroup).all() - env_group: EnvironmentGroup - for env_group in env_groups: - print(f'Back fill group athena workgroup {env_group.groupUri}...') - env_group.environmentAthenaWorkGroup = NamingConventionService( - target_uri=env.environmentUri, - target_label=env_group.groupUri, - pattern=NamingConventionPattern.DEFAULT, - resource_prefix=env.resourcePrefix, - ).build_compliant_name() - - session.commit() - print('Successfully back filled athena workgroup names ') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/backend/migrations/versions/2b40221043f1_release_3_7_0.py b/backend/migrations/versions/2b40221043f1_release_3_7_0.py deleted file mode 100644 index 95db2590c..000000000 --- a/backend/migrations/versions/2b40221043f1_release_3_7_0.py +++ /dev/null @@ -1,42 +0,0 @@ -"""release 3.7.0 - -Revision ID: 2b40221043f1 -Revises: 3ae3eeca475c -Create Date: 2021-12-02 11:12:57.959968 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '2b40221043f1' -down_revision = '3ae3eeca475c' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('dataset', sa.Column('GlueCrawlerName', sa.String(), nullable=True)) - op.add_column( - 'dataset', sa.Column('GlueCrawlerSchedule', sa.String(), nullable=True) - ) - op.add_column( - 'environment', - sa.Column('EnvironmentDefaultAthenaWorkGroup', sa.String(), nullable=True), - ) - op.add_column( - 'environment_group_permission', - sa.Column('environmentAthenaWorkGroup', sa.String(), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('environment_group_permission', 'environmentAthenaWorkGroup') - op.drop_column('environment', 'EnvironmentDefaultAthenaWorkGroup') - op.drop_column('dataset', 'GlueCrawlerSchedule') - op.drop_column('dataset', 'GlueCrawlerName') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py b/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py deleted file mode 100644 index 7185795f4..000000000 --- a/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py +++ /dev/null @@ -1,31 +0,0 @@ -"""redshift copy data location - -Revision ID: 2ea02fe85af6 -Revises: 4ab27e3b3d54 -Create Date: 2021-07-15 07:17:38.392707 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '2ea02fe85af6' -down_revision = '4ab27e3b3d54' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - 'redshiftcluster_datasettable', - sa.Column('dataLocation', sa.String(), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('redshiftcluster_datasettable', 'dataLocation') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py b/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py deleted file mode 100644 index 657434e00..000000000 --- a/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py +++ /dev/null @@ -1,75 +0,0 @@ -"""release 3.6.1 - -Revision ID: 3ae3eeca475c -Revises: bd4bea86de30 -Create Date: 2021-11-29 07:30:10.790084 - -""" -import datetime - -import sqlalchemy as sa -from alembic import op -from sqlalchemy import Boolean, Column, String, orm, DateTime -from sqlalchemy.dialects import postgresql -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import query_expression - -from dataall.db import utils - - -# revision identifiers, used by Alembic. -revision = '3ae3eeca475c' -down_revision = 'bd4bea86de30' -branch_labels = None -depends_on = None - -Base = declarative_base() - - -class Stack(Base): - __tablename__ = 'stack' - stackUri = Column( - String, nullable=False, default=utils.uuid('stack'), primary_key=True - ) - name = Column(String, nullable=True) - targetUri = Column(String, nullable=False) - accountid = Column(String, nullable=False) - region = Column(String, nullable=False) - cronexpr = Column(String, nullable=True) - status = Column(String, nullable=False, default='pending') - stack = Column(String, nullable=False) - payload = Column(postgresql.JSON, nullable=True) - created = Column(DateTime, default=datetime.datetime.now()) - updated = Column(DateTime, onupdate=datetime.datetime.now()) - stackid = Column(String) - outputs = Column(postgresql.JSON) - resources = Column(postgresql.JSON) - error = Column(postgresql.JSON) - events = Column(postgresql.JSON) - lastSeen = Column( - DateTime, default=lambda: datetime.datetime(year=1900, month=1, day=1) - ) - EcsTaskArn = Column(String, nullable=True) - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - bind = op.get_bind() - session = orm.Session(bind=bind) - op.add_column('stack', sa.Column('name', sa.String(), nullable=True)) - print('Back filling stack names...') - stacks: [Stack] = session.query(Stack).all() - stack: Stack - for stack in stacks: - print(f'Back fill stack {stack.stackUri}...') - stack.name = f'stack-{stack.stackUri}' - session.commit() - - print('Successfully back filled stack names ') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('stack', 'name') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py b/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py deleted file mode 100644 index cb17a7789..000000000 --- a/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py +++ /dev/null @@ -1,31 +0,0 @@ -"""pipeline input output - -Revision ID: 4392a0c9747f -Revises: e72009ab3b9a -Create Date: 2022-06-10 15:27:40.777295 - -""" -from alembic import op -import sqlalchemy as sa - -# revision identifiers, used by Alembic. -revision = '4392a0c9747f' -down_revision = 'e72009ab3b9a' -branch_labels = None -depends_on = None - - -def upgrade(): - op.add_column( - 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) - ) - pass - - -def downgrade(): - op.drop_column('datapipeline', 'inputDatasetUri') - op.drop_column('datapipeline', 'outputDatasetUri') - pass diff --git a/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py b/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py deleted file mode 100644 index 95aed8e9e..000000000 --- a/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py +++ /dev/null @@ -1,28 +0,0 @@ -"""vpc default env flag - -Revision ID: 46e5a33450b1 -Revises: be22468d7342 -Create Date: 2021-07-12 19:36:20.588492 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '46e5a33450b1' -down_revision = 'be22468d7342' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('vpc', sa.Column('default', sa.Boolean(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('vpc', 'default') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py b/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py deleted file mode 100644 index c5405d9b0..000000000 --- a/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py +++ /dev/null @@ -1,31 +0,0 @@ -"""stack events column - -Revision ID: 4ab27e3b3d54 -Revises: 46e5a33450b1 -Create Date: 2021-07-13 06:56:48.350712 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '4ab27e3b3d54' -down_revision = '46e5a33450b1' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - 'stack', - sa.Column('events', postgresql.JSON(astext_type=sa.Text()), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('stack', 'events') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py b/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py deleted file mode 100644 index 76fdc2bf4..000000000 --- a/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Add subnet ids columns - -Revision ID: 5d5102986ce5 -Revises: bd271a2780b2 -Create Date: 2021-05-19 16:07:48.221086 - -""" -import os - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '5d5102986ce5' -down_revision = 'bd271a2780b2' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - envname = os.getenv('envname', 'local') - print('ENVNAME', envname) - op.add_column( - 'vpc', - sa.Column('privateSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), - ) - op.add_column( - 'vpc', - sa.Column('publicSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('vpc', 'publicSubnetIds') - op.drop_column('vpc', 'privateSubnetIds') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py deleted file mode 100644 index 123151d99..000000000 --- a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py +++ /dev/null @@ -1,89 +0,0 @@ -"""backfill confidentiality - -Revision ID: 5e5c84138af7 -Revises: 94697ee46c0c -Create Date: 2021-09-15 13:41:44.102866 - -""" -from alembic import op - -# revision identifiers, used by Alembic. -from sqlalchemy import orm, Column, String, Boolean -from sqlalchemy.dialects import postgresql -from sqlalchemy.ext.declarative import declarative_base - -from dataall.db import utils, Resource - -revision = '5e5c84138af7' -down_revision = '94697ee46c0c' -branch_labels = None -depends_on = None - -Base = declarative_base() - - -class Dataset(Resource, Base): - __tablename__ = 'dataset' - environmentUri = Column(String, nullable=False) - organizationUri = Column(String, nullable=False) - datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) - region = Column(String, default='eu-west-1') - AwsAccountId = Column(String, nullable=False) - S3BucketName = Column(String, nullable=False) - GlueDatabaseName = Column(String, nullable=False) - GlueProfilingJobName = Column(String) - GlueProfilingTriggerSchedule = Column(String) - GlueProfilingTriggerName = Column(String) - GlueDataQualityJobName = Column(String) - GlueDataQualitySchedule = Column(String) - GlueDataQualityTriggerName = Column(String) - IAMDatasetAdminRoleArn = Column(String, nullable=False) - IAMDatasetAdminUserArn = Column(String, nullable=False) - KmsAlias = Column(String, nullable=False) - language = Column(String, nullable=False, default='English') - topics = Column(postgresql.ARRAY(String), nullable=True) - confidentiality = Column(String, nullable=False, default='Unclassified') - tags = Column(postgresql.ARRAY(String)) - - bucketCreated = Column(Boolean, default=False) - glueDatabaseCreated = Column(Boolean, default=False) - iamAdminRoleCreated = Column(Boolean, default=False) - iamAdminUserCreated = Column(Boolean, default=False) - kmsAliasCreated = Column(Boolean, default=False) - lakeformationLocationCreated = Column(Boolean, default=False) - bucketPolicyCreated = Column(Boolean, default=False) - - businessOwnerEmail = Column(String, nullable=True) - businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) - stewards = Column(String, nullable=True) - - SamlAdminGroupName = Column(String, nullable=True) - - importedS3Bucket = Column(Boolean, default=False) - importedGlueDatabase = Column(Boolean, default=False) - importedKmsKey = Column(Boolean, default=False) - importedAdminRole = Column(Boolean, default=False) - imported = Column(Boolean, default=False) - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - try: - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Updating datasets...') - datasets: [Dataset] = session.query(Dataset).all() - for dataset in datasets: - if dataset.confidentiality not in ['Unclassified', 'Official', 'Secret']: - dataset.confidentiality = 'Unclassified' - session.commit() - print('Datasets updated successfully') - except Exception as e: - print(f'Failed to init permissions due to: {e}') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/backend/migrations/versions/5e722995fa0b_release_3_8_1.py b/backend/migrations/versions/5e722995fa0b_release_3_8_1.py deleted file mode 100644 index a080ad22e..000000000 --- a/backend/migrations/versions/5e722995fa0b_release_3_8_1.py +++ /dev/null @@ -1,820 +0,0 @@ -"""release 3.8.1 - -Revision ID: 5e722995fa0b -Revises: 97050ec09354 -Create Date: 2021-12-22 12:56:28.698754 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '5e722995fa0b' -down_revision = '97050ec09354' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('scheduled_query') - op.drop_table('share_object_v2') - op.drop_table('share_object_item_v2') - op.drop_table('dataset_query') - op.drop_table('key_value_pair') - op.drop_table('environment_permission') - op.drop_table('metadata_facet') - op.drop_table('EnvironmentRedshiftCluster') - op.drop_table('organization_topic') - op.drop_table('dataset_loader') - op.drop_table('dataset_storage_location_permission') - op.drop_table('athena_query_execution') - op.drop_table('airflow_project') - op.drop_table('environment_user_permission') - op.drop_table('data_access_request') - op.drop_table('organization_user') - op.drop_table('apikey') - op.drop_table('airflow_cluster_user_permission') - op.drop_table('dataset_user_permission') - op.drop_table('redshift_cluster_user_permission') - op.drop_table('airflowcluster') - op.drop_table('document') - op.drop_table('lineage_store') - op.drop_table('share_object_history') - op.drop_table('saved_query') - op.drop_table('dataset_table_permission') - op.drop_table('metadata_tag') - op.drop_table('dataset_access_point') - op.drop_table('search_index') - op.drop_table('userprofile') - op.drop_table('metric') - op.drop_table('all_permissions') - op.drop_table('dataset_topic') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'dataset_topic', - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('topicUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('datasetUri', 'topicUri', name='dataset_topic_pkey'), - ) - op.create_table( - 'all_permissions', - sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('permission', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'scope', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('objectUri', 'userName', name='all_permissions_pkey'), - ) - op.create_table( - 'metric', - sa.Column('metricUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('metricName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'metricValue', - postgresql.DOUBLE_PRECISION(precision=53), - autoincrement=False, - nullable=False, - ), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False - ), - sa.Column('emitter', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('target', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('metricUri', name='metric_pkey'), - ) - op.create_table( - 'userprofile', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('bio', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('b64EncodedAvatar', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('username', name='userprofile_pkey'), - ) - op.create_table( - 'search_index', - sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('objectType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('searcAttribute1', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('searcAttribute2', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('searcAttribute3', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('searcAttribute4', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('objectUri', name='search_index_pkey'), - ) - op.create_table( - 'dataset_access_point', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('projectUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('locationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('accessPointUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('S3BucketName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('S3Prefix', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'S3AccessPointName', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column( - 'accessPointCreated', sa.BOOLEAN(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint('accessPointUri', name='dataset_access_point_pkey'), - ) - op.create_table( - 'metadata_tag', - sa.Column('tagId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('nodeId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('nodeKind', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('Key', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('Value', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('tagId', name='metadata_tag_pkey'), - ) - op.create_table( - 'dataset_table_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleForTable', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint( - 'userName', 'tableUri', name='dataset_table_permission_pkey' - ), - ) - op.create_table( - 'saved_query', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column( - 'scheduledQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column('savedQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('queryOrder', sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('savedQueryUri', name='saved_query_pkey'), - ) - op.create_table( - 'share_object_history', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('historyUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('actionName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'actionPayload', - postgresql.JSON(astext_type=sa.Text()), - autoincrement=False, - nullable=True, - ), - sa.PrimaryKeyConstraint('historyUri', name='share_object_history_pkey'), - ) - op.create_table( - 'lineage_store', - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('version', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('guid', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'kind', - postgresql.ENUM( - 'dataset', - 'table', - 'folder', - 'job', - 'run', - 'datasource', - name='datanodetype', - ), - autoincrement=False, - nullable=False, - ), - sa.Column('parent', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('ref', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('location', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('created', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'inputs', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column( - 'outputs', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.PrimaryKeyConstraint('name', 'version', 'ref', name='lineage_store_pkey'), - ) - op.create_table( - 'document', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('md', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('parentUri', name='document_pkey'), - ) - op.create_table( - 'airflowcluster', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'subnetIds', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.Column( - 'securityGroupIds', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('IAMRoleArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('presignedUrl', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), - sa.Column( - 'configurationOptions', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.Column('airflowVersion', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('dagS3Path', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('pluginsS3Path', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'requirementsS3Path', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column('environmentClass', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'loggingConfiguration', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.Column('sourceBucketArn', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'webServerAccessMode', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column('maxWorkers', sa.INTEGER(), autoincrement=False, nullable=True), - sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('webServerUrl', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('clusterUri', name='airflowcluster_pkey'), - ) - op.create_table( - 'redshift_cluster_user_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'redshiftClusterUri', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleForRedshiftCluster', - sa.VARCHAR(), - autoincrement=False, - nullable=False, - ), - sa.PrimaryKeyConstraint( - 'userName', - 'redshiftClusterUri', - name='redshift_cluster_user_permission_pkey', - ), - ) - op.create_table( - 'dataset_user_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleForDataset', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint( - 'userName', 'datasetUri', name='dataset_user_permission_pkey' - ), - ) - op.create_table( - 'airflow_cluster_user_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleForAirflowCluster', - sa.VARCHAR(), - autoincrement=False, - nullable=False, - ), - sa.PrimaryKeyConstraint( - 'userName', 'clusterUri', name='airflow_cluster_user_permission_pkey' - ), - ) - op.create_table( - 'apikey', - sa.Column('ApiKeyId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'ApiKeySecretHash', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'SamlGroups', - postgresql.ARRAY(sa.VARCHAR()), - autoincrement=False, - nullable=True, - ), - sa.Column( - 'expires', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('ApiKeyId', name='apikey_pkey'), - ) - op.create_table( - 'organization_user', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleInOrganization', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint( - 'userName', 'organizationUri', name='organization_user_pkey' - ), - ) - op.create_table( - 'data_access_request', - sa.Column('requestUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('principalId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('principalType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('principalName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('requester', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('requestUri', name='data_access_request_pkey'), - ) - op.create_table( - 'environment_user_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleInEnvironment', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint( - 'userName', 'environmentUri', name='environment_user_permission_pkey' - ), - ) - op.create_table( - 'airflow_project', - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('projectUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('cfnStackName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('cfnStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('cfnStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'codeRepositoryName', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column( - 'codeRepositoryLink', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column( - 'codeRepositoryStatus', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column( - 'codePipelineStatus', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column('codePipelineName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('codePipelineLink', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('codePipelineArn', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('packageName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('projectUri', name='airflow_project_pkey'), - ) - op.create_table( - 'athena_query_execution', - sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'QueryExecutionId', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('queryid', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'completed', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('QueryExecutionId', name='athena_query_execution_pkey'), - ) - op.create_table( - 'dataset_storage_location_permission', - sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('locationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'userRoleForDatasetStorageLocation', - sa.VARCHAR(), - autoincrement=False, - nullable=False, - ), - sa.PrimaryKeyConstraint( - 'userName', 'locationUri', name='dataset_storage_location_permission_pkey' - ), - ) - op.create_table( - 'dataset_loader', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('loaderUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('IAMPrincipalArn', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('IAMRoleId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('loaderUri', name='dataset_loader_pkey'), - ) - op.create_table( - 'organization_topic', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('topicUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('topicUri', name='organization_topic_pkey'), - ) - op.create_table( - 'EnvironmentRedshiftCluster', - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint( - 'environmentUri', 'clusterUri', name='EnvironmentRedshiftCluster_pkey' - ), - ) - op.create_table( - 'metadata_facet', - sa.Column('facetId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('guid', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - '_schema', - postgresql.JSON(astext_type=sa.Text()), - autoincrement=False, - nullable=False, - ), - sa.Column( - 'doc', - postgresql.JSON(astext_type=sa.Text()), - autoincrement=False, - nullable=False, - ), - sa.PrimaryKeyConstraint('facetId', name='metadata_facet_pkey'), - ) - op.create_table( - 'environment_permission', - sa.Column('entityUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('entityType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'entityRoleInEnvironment', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.PrimaryKeyConstraint( - 'entityUri', 'environmentUri', name='environment_permission_pkey' - ), - ) - op.create_table( - 'key_value_pair', - sa.Column('kvId', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('key', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('value', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('kvId', name='key_value_pair_pkey'), - ) - op.create_table( - 'dataset_query', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('queryUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('body', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('queryUri', name='dataset_query_pkey'), - ) - op.create_table( - 'share_object_item_v2', - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('version', sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column('shareItemUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemType', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('itemName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'S3AccessPointName', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.PrimaryKeyConstraint('shareItemUri', name='share_object_item_v2_pkey'), - ) - op.create_table( - 'share_object_v2', - sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('version', sa.INTEGER(), autoincrement=False, nullable=False), - sa.Column('latest', sa.BOOLEAN(), autoincrement=False, nullable=False), - sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('datasetName', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('principalId', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('principalType', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'version', name='share_object_v2_pkey'), - ) - op.create_table( - 'scheduled_query', - sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column( - 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True - ), - sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.Column( - 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True - ), - sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), - sa.Column( - 'scheduledQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False - ), - sa.Column( - 'SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=True - ), - sa.Column('cronexpr', sa.VARCHAR(), autoincrement=False, nullable=True), - sa.PrimaryKeyConstraint('scheduledQueryUri', name='scheduled_query_pkey'), - ) - # ### end Alembic commands ### diff --git a/backend/migrations/versions/74b89c64f330_vpc_group.py b/backend/migrations/versions/74b89c64f330_vpc_group.py deleted file mode 100644 index d19fb6230..000000000 --- a/backend/migrations/versions/74b89c64f330_vpc_group.py +++ /dev/null @@ -1,28 +0,0 @@ -"""vpc group - -Revision ID: 74b89c64f330 -Revises: e177eb044b31 -Create Date: 2021-08-08 10:39:15.991280 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '74b89c64f330' -down_revision = 'e177eb044b31' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('vpc', sa.Column('SamlGroupName', sa.String(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('vpc', 'SamlGroupName') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py b/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py deleted file mode 100644 index 8015e677f..000000000 --- a/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py +++ /dev/null @@ -1,40 +0,0 @@ -"""sagemaker notebooks update - -Revision ID: 94697ee46c0c -Revises: 9b589bf91485 -Create Date: 2021-09-12 18:55:03.301399 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '94697ee46c0c' -down_revision = '9b589bf91485' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('sagemaker_notebook', sa.Column('VpcId', sa.String(), nullable=True)) - op.add_column( - 'sagemaker_notebook', sa.Column('SubnetId', sa.String(), nullable=True) - ) - op.add_column( - 'sagemaker_notebook', sa.Column('VolumeSizeInGB', sa.Integer(), nullable=True) - ) - op.add_column( - 'sagemaker_notebook', sa.Column('InstanceType', sa.String(), nullable=True) - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('sagemaker_notebook', 'InstanceType') - op.drop_column('sagemaker_notebook', 'VolumeSizeInGB') - op.drop_column('sagemaker_notebook', 'SubnetId') - op.drop_column('sagemaker_notebook', 'VpcId') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py b/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py deleted file mode 100644 index 73e860d5a..000000000 --- a/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py +++ /dev/null @@ -1,28 +0,0 @@ -"""add ecs task arn - -Revision ID: 967fa9c0a147 -Revises: 5e5c84138af7 -Create Date: 2021-10-06 07:48:30.726242 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '967fa9c0a147' -down_revision = '5e5c84138af7' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('stack', sa.Column('EcsTaskArn', sa.String(), nullable=True)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('stack', 'EcsTaskArn') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/97050ec09354_release_3_7_8.py b/backend/migrations/versions/97050ec09354_release_3_7_8.py deleted file mode 100644 index fd0fdce87..000000000 --- a/backend/migrations/versions/97050ec09354_release_3_7_8.py +++ /dev/null @@ -1,92 +0,0 @@ -"""release 3.7.8 - -Revision ID: 97050ec09354 -Revises: 166af5c0355b -Create Date: 2021-12-08 12:54:33.828838 - -""" -import datetime - -from alembic import op -from sqlalchemy import Boolean, Column, String, DateTime, orm -from sqlalchemy.dialects import postgresql -from sqlalchemy.ext.declarative import declarative_base - -from dataall.db import utils, Resource - -# revision identifiers, used by Alembic. -from dataall.utils.naming_convention import ( - NamingConventionService, - NamingConventionPattern, -) - -# revision identifiers, used by Alembic. -revision = '97050ec09354' -down_revision = '166af5c0355b' -branch_labels = None -depends_on = None - -Base = declarative_base() - - -class Dataset(Resource, Base): - __tablename__ = 'dataset' - environmentUri = Column(String, nullable=False) - organizationUri = Column(String, nullable=False) - datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) - region = Column(String, default='eu-west-1') - AwsAccountId = Column(String, nullable=False) - S3BucketName = Column(String, nullable=False) - GlueDatabaseName = Column(String, nullable=False) - GlueCrawlerName = Column(String) - GlueCrawlerSchedule = Column(String) - GlueProfilingJobName = Column(String) - GlueProfilingTriggerSchedule = Column(String) - GlueProfilingTriggerName = Column(String) - GlueDataQualityJobName = Column(String) - GlueDataQualitySchedule = Column(String) - GlueDataQualityTriggerName = Column(String) - IAMDatasetAdminRoleArn = Column(String, nullable=False) - IAMDatasetAdminUserArn = Column(String, nullable=False) - KmsAlias = Column(String, nullable=False) - language = Column(String, nullable=False, default='English') - topics = Column(postgresql.ARRAY(String), nullable=True) - confidentiality = Column(String, nullable=False, default='Unclassified') - tags = Column(postgresql.ARRAY(String)) - bucketCreated = Column(Boolean, default=False) - glueDatabaseCreated = Column(Boolean, default=False) - iamAdminRoleCreated = Column(Boolean, default=False) - iamAdminUserCreated = Column(Boolean, default=False) - kmsAliasCreated = Column(Boolean, default=False) - lakeformationLocationCreated = Column(Boolean, default=False) - bucketPolicyCreated = Column(Boolean, default=False) - businessOwnerEmail = Column(String, nullable=True) - businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) - stewards = Column(String, nullable=True) - SamlAdminGroupName = Column(String, nullable=True) - importedS3Bucket = Column(Boolean, default=False) - importedGlueDatabase = Column(Boolean, default=False) - importedKmsKey = Column(Boolean, default=False) - importedAdminRole = Column(Boolean, default=False) - imported = Column(Boolean, default=False) - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Back filling datasets crawler names...') - datasets: [Dataset] = session.query(Dataset).all() - dataset: Dataset - for dataset in datasets: - print(f'Back fill dataset crawler name {dataset.label}...') - dataset.GlueCrawlerName = f'{dataset.S3BucketName}-crawler' - session.commit() - print('Successfully back filled glue crawler names ') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - pass - # ### end Alembic commands ### diff --git a/backend/migrations/versions/9b589bf91485_dashboard_sharing.py b/backend/migrations/versions/9b589bf91485_dashboard_sharing.py deleted file mode 100644 index f023b8393..000000000 --- a/backend/migrations/versions/9b589bf91485_dashboard_sharing.py +++ /dev/null @@ -1,31 +0,0 @@ -"""dashboard sharing - -Revision ID: 9b589bf91485 -Revises: decc96c5670f -Create Date: 2021-09-10 10:24:37.018830 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = '9b589bf91485' -down_revision = 'decc96c5670f' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column('dashboardshare', sa.Column('status', sa.String(), nullable=False)) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.alter_column( - 'share_object', 'environmentUri', existing_type=sa.VARCHAR(), nullable=True - ) - op.drop_column('dashboardshare', 'status') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py b/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py deleted file mode 100644 index df90288ca..000000000 --- a/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py +++ /dev/null @@ -1,103 +0,0 @@ -"""add env feature flags - -Revision ID: b6e0ac8f6d3f -Revises: 967fa9c0a147 -Create Date: 2021-10-25 09:00:40.925964 - -""" -import sqlalchemy as sa -from alembic import op -from sqlalchemy import Boolean, Column, String -from sqlalchemy import orm -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import query_expression - -from dataall.db import Resource, utils - -revision = 'b6e0ac8f6d3f' -down_revision = '967fa9c0a147' -branch_labels = None -depends_on = None -Base = declarative_base() - - -class Environment(Resource, Base): - __tablename__ = 'environment' - organizationUri = Column(String, nullable=False) - environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) - AwsAccountId = Column(String, nullable=False) - region = Column(String, nullable=False, default='eu-west-1') - cognitoGroupName = Column(String, nullable=True) - - validated = Column(Boolean, default=False) - environmentType = Column(String, nullable=False, default='Data') - isOrganizationDefaultEnvironment = Column(Boolean, default=False) - EnvironmentDefaultIAMRoleName = Column(String, nullable=False) - EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) - EnvironmentDefaultBucketName = Column(String) - roleCreated = Column(Boolean, nullable=False, default=False) - - dashboardsEnabled = Column(Boolean, default=False) - notebooksEnabled = Column(Boolean, default=True) - mlStudiosEnabled = Column(Boolean, default=True) - pipelinesEnabled = Column(Boolean, default=True) - warehousesEnabled = Column(Boolean, default=True) - - userRoleInEnvironment = query_expression() - - SamlGroupName = Column(String, nullable=True) - CDKRoleArn = Column(String, nullable=False) - - subscriptionsEnabled = Column(Boolean, default=False) - subscriptionsProducersTopicName = Column(String) - subscriptionsProducersTopicImported = Column(Boolean, default=False) - subscriptionsConsumersTopicName = Column(String) - subscriptionsConsumersTopicImported = Column(Boolean, default=False) - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - try: - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Adding environment flags...') - op.add_column('environment', sa.Column('dashboardsEnabled', sa.Boolean())) - op.add_column('environment', sa.Column('notebooksEnabled', sa.Boolean())) - op.add_column('environment', sa.Column('mlStudiosEnabled', sa.Boolean())) - op.add_column('environment', sa.Column('pipelinesEnabled', sa.Boolean())) - op.add_column('environment', sa.Column('warehousesEnabled', sa.Boolean())) - environments: [Environment] = session.query(Environment).all() - for environment in environments: - print('Back filling quicksight flag') - environment.dashboardsEnabled = environment.quicksight_enabled - environment.notebooksEnabled = True - environment.mlStudiosEnabled = True - environment.pipelinesEnabled = True - environment.warehousesEnabled = True - session.commit() - - print('Dropping quicksight_enabled column...') - op.drop_column('environment', 'quicksight_enabled') - - print('Environment feature flags successfully set up') - - except Exception as e: - print(f'Failed to init permissions due to: {e}') - - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - 'environment', - sa.Column( - 'quicksight_enabled', sa.BOOLEAN(), autoincrement=False, nullable=False - ), - ) - op.drop_column('environment', 'warehousesEnabled') - op.drop_column('environment', 'pipelinesEnabled') - op.drop_column('environment', 'mlStudiosEnabled') - op.drop_column('environment', 'notebooksEnabled') - op.drop_column('environment', 'dashboardsEnabled') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py b/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py deleted file mode 100644 index e820e4317..000000000 --- a/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py +++ /dev/null @@ -1,198 +0,0 @@ -"""new permissions model - -Revision ID: bc77fef9d0b2 -Revises: 2ea02fe85af6 -Create Date: 2021-08-03 07:51:18.202980 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'bc77fef9d0b2' -down_revision = '2ea02fe85af6' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'permission', - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column( - 'type', sa.Enum('TENANT', 'RESOURCE', name='permissiontype'), nullable=False - ), - sa.Column('description', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('permissionUri'), - ) - op.create_index(op.f('ix_permission_name'), 'permission', ['name'], unique=False) - op.create_table( - 'resource_policy', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('resourceUri', sa.String(), nullable=False), - sa.Column('resourceType', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column( - 'principalType', - sa.Enum('USER', 'GROUP', 'SERVICE', name='rp_principal_type'), - nullable=True, - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('sid'), - ) - op.create_index( - op.f('ix_resource_policy_principalId'), - 'resource_policy', - ['principalId'], - unique=False, - ) - op.create_index( - op.f('ix_resource_policy_resourceType'), - 'resource_policy', - ['resourceType'], - unique=False, - ) - op.create_index( - op.f('ix_resource_policy_resourceUri'), - 'resource_policy', - ['resourceUri'], - unique=False, - ) - op.create_table( - 'tenant', - sa.Column('tenantUri', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('tenantUri'), - ) - op.create_index(op.f('ix_tenant_name'), 'tenant', ['name'], unique=True) - op.create_table( - 'resource_policy_permission', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['permissionUri'], - ['permission.permissionUri'], - ), - sa.ForeignKeyConstraint( - ['sid'], - ['resource_policy.sid'], - ), - sa.PrimaryKeyConstraint('sid', 'permissionUri'), - ) - op.create_table( - 'tenant_policy', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('tenantUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column( - 'principalType', - sa.Enum('USER', 'GROUP', 'SERVICE', name='tenant_principal_type'), - nullable=True, - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['tenantUri'], - ['tenant.tenantUri'], - ), - sa.PrimaryKeyConstraint('sid'), - ) - op.create_index( - op.f('ix_tenant_policy_principalId'), - 'tenant_policy', - ['principalId'], - unique=False, - ) - op.create_table( - 'tenant_policy_permission', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['permissionUri'], - ['permission.permissionUri'], - ), - sa.ForeignKeyConstraint( - ['sid'], - ['tenant_policy.sid'], - ), - sa.PrimaryKeyConstraint('sid', 'permissionUri'), - ) - op.add_column('dashboardshare', sa.Column('owner', sa.String(), nullable=True)) - op.add_column('dataset', sa.Column('stewards', sa.String(), nullable=True)) - op.add_column( - 'environment_group_permission', - sa.Column('invitedBy', sa.String(), nullable=True), - ) - op.add_column( - 'environment_group_permission', - sa.Column('environmentIAMRoleArn', sa.String(), nullable=True), - ) - op.add_column( - 'environment_group_permission', - sa.Column('environmentIAMRoleName', sa.String(), nullable=True), - ) - op.add_column( - 'environment_group_permission', - sa.Column('description', sa.String(), nullable=True), - ) - op.drop_column('group', 'organizationUri') - op.drop_column('group', 'groupRoleInOrganization') - op.add_column( - 'share_object', sa.Column('environmentUri', sa.String(), nullable=True) - ) - op.add_column( - 'tenant_administrator', sa.Column('tenantUri', sa.String(), nullable=False) - ) - op.create_foreign_key( - None, 'tenant_administrator', 'tenant', ['tenantUri'], ['tenantUri'] - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_constraint(None, 'tenant_administrator', type_='foreignkey') - op.drop_column('tenant_administrator', 'tenantUri') - op.drop_column('share_object', 'environmentUri') - op.add_column( - 'group', - sa.Column( - 'groupRoleInOrganization', sa.VARCHAR(), autoincrement=False, nullable=False - ), - ) - op.add_column( - 'group', - sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), - ) - op.drop_column('environment_group_permission', 'description') - op.drop_column('environment_group_permission', 'invitedBy') - op.drop_column('environment_group_permission', 'environmentIAMRoleArn') - op.drop_column('environment_group_permission', 'environmentIAMRoleName') - op.drop_column('dataset', 'stewards') - op.drop_column('dashboardshare', 'owner') - op.drop_table('tenant_policy_permission') - op.drop_index(op.f('ix_tenant_policy_principalId'), table_name='tenant_policy') - op.drop_table('tenant_policy') - op.drop_table('resource_policy_permission') - op.drop_index(op.f('ix_tenant_name'), table_name='tenant') - op.drop_table('tenant') - op.drop_index(op.f('ix_resource_policy_resourceUri'), table_name='resource_policy') - op.drop_index(op.f('ix_resource_policy_resourceType'), table_name='resource_policy') - op.drop_index(op.f('ix_resource_policy_principalId'), table_name='resource_policy') - op.drop_table('resource_policy') - op.drop_index(op.f('ix_permission_name'), table_name='permission') - op.drop_table('permission') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/bd271a2780b2_init_database.py b/backend/migrations/versions/bd271a2780b2_init_database.py deleted file mode 100644 index e84ce2062..000000000 --- a/backend/migrations/versions/bd271a2780b2_init_database.py +++ /dev/null @@ -1,1349 +0,0 @@ -"""Init database - -Revision ID: bd271a2780b2 -Revises: -Create Date: 2021-05-19 15:10:53.506962 - -""" -import os - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -from dataall.db import get_engine, has_table, create_schema_if_not_exists - -revision = 'bd271a2780b2' -down_revision = None -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - envname = os.getenv('envname', 'local') - print('ENVNAME', envname) - engine = get_engine(envname=envname).engine - create_schema_if_not_exists(engine, envname) - try: - if not has_table('EnvironmentRedshiftCluster', engine): - op.create_table( - 'EnvironmentRedshiftCluster', - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('environmentUri', 'clusterUri'), - ) - if not has_table('activity', engine): - op.create_table( - 'activity', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('activityUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=False), - sa.Column('summary', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('activityUri'), - ) - if not has_table('airflow_cluster_user_permission', engine): - op.create_table( - 'airflow_cluster_user_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleForAirflowCluster', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'clusterUri'), - ) - if not has_table('airflow_project', engine): - op.create_table( - 'airflow_project', - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('projectUri', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('cfnStackName', sa.String(), nullable=True), - sa.Column('cfnStackArn', sa.String(), nullable=True), - sa.Column('cfnStackStatus', sa.String(), nullable=True), - sa.Column('codeRepositoryName', sa.String(), nullable=True), - sa.Column('codeRepositoryLink', sa.String(), nullable=True), - sa.Column('codeRepositoryStatus', sa.String(), nullable=True), - sa.Column('codePipelineStatus', sa.String(), nullable=True), - sa.Column('codePipelineName', sa.String(), nullable=True), - sa.Column('codePipelineLink', sa.String(), nullable=True), - sa.Column('codePipelineArn', sa.String(), nullable=True), - sa.Column('packageName', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('projectUri'), - ) - if not has_table('airflowcluster', engine): - op.create_table( - 'airflowcluster', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('clusterArn', sa.String(), nullable=True), - sa.Column('clusterName', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=True), - sa.Column('kmsAlias', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('vpc', sa.String(), nullable=True), - sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('CFNStackName', sa.String(), nullable=True), - sa.Column('CFNStackStatus', sa.String(), nullable=True), - sa.Column('CFNStackArn', sa.String(), nullable=True), - sa.Column('IAMRoleArn', sa.String(), nullable=True), - sa.Column('presignedUrl', sa.String(), nullable=True), - sa.Column('imported', sa.Boolean(), nullable=True), - sa.Column('configurationOptions', sa.ARRAY(sa.String()), nullable=True), - sa.Column('airflowVersion', sa.String(), nullable=True), - sa.Column('dagS3Path', sa.String(), nullable=True), - sa.Column('pluginsS3Path', sa.String(), nullable=True), - sa.Column('requirementsS3Path', sa.String(), nullable=True), - sa.Column('environmentClass', sa.String(), nullable=True), - sa.Column('loggingConfiguration', sa.ARRAY(sa.String()), nullable=True), - sa.Column('sourceBucketArn', sa.String(), nullable=False), - sa.Column('webServerAccessMode', sa.String(), nullable=True), - sa.Column('maxWorkers', sa.Integer(), nullable=True), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('webServerUrl', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri'), - ) - if not has_table('all_permissions', engine): - op.create_table( - 'all_permissions', - sa.Column('objectUri', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.Column('permission', sa.String(), nullable=False), - sa.Column('scope', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('objectUri', 'userName'), - ) - if not has_table('apikey', engine): - op.create_table( - 'apikey', - sa.Column('ApiKeyId', sa.String(), nullable=False), - sa.Column('ApiKeySecretHash', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.Column('SamlGroups', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('expires', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('ApiKeyId'), - ) - if not has_table('athena_query_execution', engine): - op.create_table( - 'athena_query_execution', - sa.Column('parentUri', sa.String(), nullable=False), - sa.Column('QueryExecutionId', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('queryid', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('completed', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('QueryExecutionId'), - ) - if not has_table('dashboard', engine): - op.create_table( - 'dashboard', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('dashboardUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('namespace', sa.String(), nullable=False), - sa.Column('DashboardId', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('dashboardUri'), - ) - if not has_table('dashboardshare', engine): - op.create_table( - 'dashboardshare', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('dashboardUri', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'dashboardUri'), - ) - if not has_table('data_access_request', engine): - op.create_table( - 'data_access_request', - sa.Column('requestUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column('principalType', sa.String(), nullable=False), - sa.Column('principalName', sa.String(), nullable=False), - sa.Column('requester', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('message', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('requestUri'), - ) - if not has_table('dataset', engine): - op.create_table( - 'dataset', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueProfilingJobName', sa.String(), nullable=True), - sa.Column('GlueProfilingTriggerSchedule', sa.String(), nullable=True), - sa.Column('GlueProfilingTriggerName', sa.String(), nullable=True), - sa.Column('GlueDataQualityJobName', sa.String(), nullable=True), - sa.Column('GlueDataQualitySchedule', sa.String(), nullable=True), - sa.Column('GlueDataQualityTriggerName', sa.String(), nullable=True), - sa.Column('IAMDatasetAdminRoleArn', sa.String(), nullable=False), - sa.Column('IAMDatasetAdminUserArn', sa.String(), nullable=False), - sa.Column('KmsAlias', sa.String(), nullable=False), - sa.Column('language', sa.String(), nullable=False), - sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('confidentiality', sa.String(), nullable=False), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('bucketCreated', sa.Boolean(), nullable=True), - sa.Column('glueDatabaseCreated', sa.Boolean(), nullable=True), - sa.Column('iamAdminRoleCreated', sa.Boolean(), nullable=True), - sa.Column('iamAdminUserCreated', sa.Boolean(), nullable=True), - sa.Column('kmsAliasCreated', sa.Boolean(), nullable=True), - sa.Column('lakeformationLocationCreated', sa.Boolean(), nullable=True), - sa.Column('bucketPolicyCreated', sa.Boolean(), nullable=True), - sa.Column('businessOwnerEmail', sa.String(), nullable=True), - sa.Column( - 'businessOwnerDelegationEmails', - postgresql.ARRAY(sa.String()), - nullable=True, - ), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.Column('importedS3Bucket', sa.Boolean(), nullable=True), - sa.Column('importedGlueDatabase', sa.Boolean(), nullable=True), - sa.Column('importedKmsKey', sa.Boolean(), nullable=True), - sa.Column('importedAdminRole', sa.Boolean(), nullable=True), - sa.Column('imported', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('datasetUri'), - ) - if not has_table('dataset_access_point', engine): - op.create_table( - 'dataset_access_point', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('projectUri', sa.String(), nullable=False), - sa.Column('locationUri', sa.String(), nullable=False), - sa.Column('accessPointUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('S3Prefix', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('S3AccessPointName', sa.String(), nullable=False), - sa.Column('accessPointCreated', sa.Boolean(), nullable=False), - sa.PrimaryKeyConstraint('accessPointUri'), - ) - if not has_table('dataset_loader', engine): - op.create_table( - 'dataset_loader', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('loaderUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('IAMPrincipalArn', sa.String(), nullable=False), - sa.Column('IAMRoleId', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('loaderUri'), - ) - if not has_table('dataset_profiling_run', engine): - op.create_table( - 'dataset_profiling_run', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('profilingRunUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('GlueJobName', sa.String(), nullable=True), - sa.Column('GlueJobRunId', sa.String(), nullable=True), - sa.Column('GlueTriggerSchedule', sa.String(), nullable=True), - sa.Column('GlueTriggerName', sa.String(), nullable=True), - sa.Column('GlueTableName', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=True), - sa.Column( - 'results', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('status', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('profilingRunUri'), - ) - if not has_table('dataset_quality_rule', engine): - op.create_table( - 'dataset_quality_rule', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('ruleUri', sa.String(), nullable=False), - sa.Column('query', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.Column( - 'logs', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.PrimaryKeyConstraint('ruleUri'), - ) - if not has_table('dataset_query', engine): - op.create_table( - 'dataset_query', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('queryUri', sa.String(), nullable=False), - sa.Column('body', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('queryUri'), - ) - if not has_table('dataset_storage_location', engine): - op.create_table( - 'dataset_storage_location', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('locationUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('S3Prefix', sa.String(), nullable=False), - sa.Column('S3AccessPoint', sa.String(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('locationCreated', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('locationUri'), - ) - if not has_table('dataset_storage_location_permission', engine): - op.create_table( - 'dataset_storage_location_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('locationUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column( - 'userRoleForDatasetStorageLocation', sa.String(), nullable=False - ), - sa.PrimaryKeyConstraint('userName', 'locationUri'), - ) - if not has_table('dataset_table', engine): - op.create_table( - 'dataset_table', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('S3Prefix', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('GlueTableConfig', sa.Text(), nullable=True), - sa.Column( - 'GlueTableProperties', - postgresql.JSON(astext_type=sa.Text()), - nullable=True, - ), - sa.Column('LastGlueTableStatus', sa.String(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('stage', sa.String(), nullable=True), - sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('confidentiality', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tableUri'), - ) - if not has_table('dataset_table_column', engine): - op.create_table( - 'dataset_table_column', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('columnUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('typeName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('columnUri'), - ) - if not has_table('dataset_table_permission', engine): - op.create_table( - 'dataset_table_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleForTable', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'tableUri'), - ) - if not has_table('dataset_table_profiling_job', engine): - op.create_table( - 'dataset_table_profiling_job', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('jobUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RunCommandId', sa.String(), nullable=True), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('jobUri'), - ) - if not has_table('dataset_topic', engine): - op.create_table( - 'dataset_topic', - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('topicUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('datasetUri', 'topicUri'), - ) - if not has_table('dataset_user_permission', engine): - op.create_table( - 'dataset_user_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleForDataset', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'datasetUri'), - ) - if not has_table('document', engine): - op.create_table( - 'document', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('parentUri', sa.String(), nullable=False), - sa.Column('md', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('parentUri'), - ) - if not has_table('environment', engine): - op.create_table( - 'environment', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('cognitoGroupName', sa.String(), nullable=True), - sa.Column('validated', sa.Boolean(), nullable=True), - sa.Column('environmentType', sa.String(), nullable=False), - sa.Column( - 'isOrganizationDefaultEnvironment', sa.Boolean(), nullable=True - ), - sa.Column('EnvironmentDefaultIAMRoleName', sa.String(), nullable=False), - sa.Column('EnvironmentDefaultIAMRoleArn', sa.String(), nullable=False), - sa.Column('EnvironmentDefaultBucketName', sa.String(), nullable=True), - sa.Column('roleCreated', sa.Boolean(), nullable=False), - sa.Column('quicksight_enabled', sa.Boolean(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('CDKRoleArn', sa.String(), nullable=False), - sa.Column('subscriptionsEnabled', sa.Boolean(), nullable=True), - sa.Column( - 'subscriptionsProducersTopicName', sa.String(), nullable=True - ), - sa.Column( - 'subscriptionsProducersTopicImported', sa.Boolean(), nullable=True - ), - sa.Column( - 'subscriptionsConsumersTopicName', sa.String(), nullable=True - ), - sa.Column( - 'subscriptionsConsumersTopicImported', sa.Boolean(), nullable=True - ), - sa.PrimaryKeyConstraint('environmentUri'), - ) - op.create_table( - 'environment_group_permission', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('groupRoleInEnvironment', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'environmentUri'), - ) - op.create_table( - 'environment_permission', - sa.Column('entityUri', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('entityType', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('entityRoleInEnvironment', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('entityUri', 'environmentUri'), - ) - op.create_table( - 'environment_user_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleInEnvironment', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'environmentUri'), - ) - if not has_table('feed_message', engine): - op.create_table( - 'feed_message', - sa.Column('feedMessageUri', sa.String(), nullable=False), - sa.Column('creator', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('content', sa.String(), nullable=True), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('feedMessageUri'), - ) - if not has_table('glossary_node', engine): - op.create_table( - 'glossary_node', - sa.Column('nodeUri', sa.String(), nullable=False), - sa.Column('parentUri', sa.String(), nullable=True), - sa.Column('nodeType', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('path', sa.String(), nullable=False), - sa.Column('label', sa.String(), nullable=False), - sa.Column('readme', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('admin', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('nodeUri'), - ) - if not has_table('group', engine): - op.create_table( - 'group', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('groupRoleInOrganization', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri'), - ) - if not has_table('group_member', engine): - op.create_table( - 'group_member', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleInGroup', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName'), - ) - if not has_table('item_tags', engine): - op.create_table( - 'item_tags', - sa.Column('tagid', sa.String(), nullable=False), - sa.Column('itemid', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid'), - ) - if not has_table('key_value_pair', engine): - op.create_table( - 'key_value_pair', - sa.Column('kvId', sa.String(), nullable=False), - sa.Column('objectUri', sa.String(), nullable=False), - sa.Column('key', sa.String(), nullable=False), - sa.Column('value', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('kvId'), - ) - if not has_table('lineage_store', engine): - op.create_table( - 'lineage_store', - sa.Column('name', sa.String(), nullable=False), - sa.Column('version', sa.String(), nullable=False), - sa.Column('guid', sa.String(), nullable=True), - sa.Column( - 'kind', - sa.Enum( - 'dataset', - 'table', - 'folder', - 'job', - 'run', - 'datasource', - name='datanodetype', - ), - nullable=False, - ), - sa.Column('parent', sa.String(), nullable=True), - sa.Column('ref', sa.String(), nullable=False), - sa.Column('location', sa.String(), nullable=True), - sa.Column('created', sa.String(), nullable=True), - sa.Column('inputs', sa.ARRAY(sa.String()), nullable=True), - sa.Column('outputs', sa.ARRAY(sa.String()), nullable=True), - sa.PrimaryKeyConstraint('name', 'version', 'ref'), - ) - if not has_table('metadata_facet', engine): - op.create_table( - 'metadata_facet', - sa.Column('facetId', sa.String(), nullable=False), - sa.Column('guid', sa.String(), nullable=False), - sa.Column( - '_schema', postgresql.JSON(astext_type=sa.Text()), nullable=False - ), - sa.Column( - 'doc', postgresql.JSON(astext_type=sa.Text()), nullable=False - ), - sa.PrimaryKeyConstraint('facetId'), - ) - if not has_table('metadata_tag', engine): - op.create_table( - 'metadata_tag', - sa.Column('tagId', sa.String(), nullable=False), - sa.Column('nodeId', sa.String(), nullable=False), - sa.Column('nodeKind', sa.String(), nullable=False), - sa.Column('Key', sa.String(), nullable=False), - sa.Column('Value', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('tagId'), - ) - if not has_table('metric', engine): - op.create_table( - 'metric', - sa.Column('metricUri', sa.String(), nullable=False), - sa.Column('metricName', sa.String(), nullable=False), - sa.Column('metricValue', sa.Float(), nullable=False), - sa.Column('tags', sa.ARRAY(sa.String()), nullable=True), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('emitter', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('target', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('metricUri'), - ) - if not has_table('notification', engine): - op.create_table( - 'notification', - sa.Column('notificationUri', sa.String(), nullable=False), - sa.Column( - 'type', - sa.Enum( - 'SHARE_OBJECT_SUBMITTED', - 'SHARE_ITEM_REQUEST', - 'SHARE_OBJECT_APPROVED', - 'SHARE_OBJECT_REJECTED', - 'SHARE_OBJECT_PENDING_APPROVAL', - 'DATASET_VERSION', - name='notificationtype', - ), - nullable=True, - ), - sa.Column('message', sa.String(), nullable=False), - sa.Column('username', sa.String(), nullable=False), - sa.Column('is_read', sa.Boolean(), nullable=False), - sa.Column('target_uri', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('notificationUri'), - ) - if not has_table('organization', engine): - op.create_table( - 'organization', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('organizationUri'), - ) - op.create_table( - 'organization_topic', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('topicUri', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('topicUri'), - ) - op.create_table( - 'organization_user', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleInOrganization', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'organizationUri'), - ) - if not has_table('redshiftcluster', engine): - op.create_table( - 'redshift_cluster_user_permission', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('redshiftClusterUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleForRedshiftCluster', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName', 'redshiftClusterUri'), - ) - op.create_table( - 'redshiftcluster', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('clusterArn', sa.String(), nullable=True), - sa.Column('clusterName', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('databaseName', sa.String(), nullable=True), - sa.Column('databaseUser', sa.String(), nullable=True), - sa.Column('masterUsername', sa.String(), nullable=True), - sa.Column('masterDatabaseName', sa.String(), nullable=True), - sa.Column('nodeType', sa.String(), nullable=True), - sa.Column('numberOfNodes', sa.Integer(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=True), - sa.Column('kmsAlias', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('vpc', sa.String(), nullable=True), - sa.Column('subnetGroupName', sa.String(), nullable=True), - sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('CFNStackName', sa.String(), nullable=True), - sa.Column('CFNStackStatus', sa.String(), nullable=True), - sa.Column('CFNStackArn', sa.String(), nullable=True), - sa.Column('IAMRoles', sa.ARRAY(sa.String()), nullable=True), - sa.Column('endpoint', sa.String(), nullable=True), - sa.Column('port', sa.Integer(), nullable=True), - sa.Column('datahubSecret', sa.String(), nullable=True), - sa.Column('masterSecret', sa.String(), nullable=True), - sa.Column('external_schema_created', sa.Boolean(), nullable=True), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('imported', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri'), - ) - op.create_table( - 'redshiftcluster_dataset', - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('datasetCopyEnabled', sa.Boolean(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri'), - ) - op.create_table( - 'redshiftcluster_datasettable', - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('shareUri', sa.String(), nullable=True), - sa.Column('enabled', sa.Boolean(), nullable=True), - sa.Column('schema', sa.String(), nullable=False), - sa.Column('databaseName', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri'), - ) - if not has_table('sagemaker_notebook', engine): - op.create_table( - 'sagemaker_notebook', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('notebookUri', sa.String(), nullable=False), - sa.Column('NotebookInstanceName', sa.String(), nullable=False), - sa.Column('NotebookInstanceStatus', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('notebookUri'), - ) - if not has_table('sagemaker_studio_domain', engine): - op.create_table( - 'sagemaker_studio_domain', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), - sa.Column('SagemakerStudioStatus', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('sagemakerStudioUri'), - ) - op.create_table( - 'sagemaker_studio_user_profile', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioUserProfileUri', sa.String(), nullable=False), - sa.Column( - 'sagemakerStudioUserProfileStatus', sa.String(), nullable=False - ), - sa.Column( - 'sagemakerStudioUserProfileName', sa.String(), nullable=False - ), - sa.Column( - 'sagemakerStudioUserProfileNameSlugify', sa.String(), nullable=False - ), - sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('sagemakerStudioUserProfileUri'), - ) - if not has_table('saved_query', engine): - op.create_table( - 'saved_query', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('scheduledQueryUri', sa.String(), nullable=False), - sa.Column('savedQueryUri', sa.String(), nullable=False), - sa.Column('queryOrder', sa.Integer(), nullable=False), - sa.Column('sqlBody', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('savedQueryUri'), - ) - if not has_table('scheduled_query', engine): - op.create_table( - 'scheduled_query', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('scheduledQueryUri', sa.String(), nullable=False), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.Column('cronexpr', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('scheduledQueryUri'), - ) - if not has_table('search_index', engine): - op.create_table( - 'search_index', - sa.Column('objectUri', sa.String(), nullable=False), - sa.Column('objectType', sa.String(), nullable=False), - sa.Column('label', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('searcAttribute1', sa.String(), nullable=True), - sa.Column('searcAttribute2', sa.String(), nullable=True), - sa.Column('searcAttribute3', sa.String(), nullable=True), - sa.Column('searcAttribute4', sa.String(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('objectUri'), - ) - if not has_table('share_object', engine): - op.create_table( - 'share_object', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=True), - sa.Column('principalType', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('confirmed', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('shareUri'), - ) - if not has_table('share_object_history', engine): - op.create_table( - 'share_object_history', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('historyUri', sa.String(), nullable=False), - sa.Column('actionName', sa.String(), nullable=False), - sa.Column( - 'actionPayload', - postgresql.JSON(astext_type=sa.Text()), - nullable=True, - ), - sa.PrimaryKeyConstraint('historyUri'), - ) - if not has_table('share_object_item', engine): - op.create_table( - 'share_object_item', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('shareItemUri', sa.String(), nullable=False), - sa.Column('itemType', sa.String(), nullable=False), - sa.Column('itemUri', sa.String(), nullable=False), - sa.Column('itemName', sa.String(), nullable=False), - sa.Column('permission', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=True), - sa.Column('GlueTableName', sa.String(), nullable=True), - sa.Column('S3AccessPointName', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('shareItemUri'), - ) - if not has_table('share_object_item_v2', engine): - op.create_table( - 'share_object_item_v2', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('version', sa.Integer(), nullable=False), - sa.Column('shareItemUri', sa.String(), nullable=False), - sa.Column('itemType', sa.String(), nullable=False), - sa.Column('itemUri', sa.String(), nullable=False), - sa.Column('itemName', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=True), - sa.Column('GlueTableName', sa.String(), nullable=True), - sa.Column('S3AccessPointName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('shareItemUri'), - ) - if not has_table('share_object_v2', engine): - op.create_table( - 'share_object_v2', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('version', sa.Integer(), nullable=False), - sa.Column('latest', sa.Boolean(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('datasetName', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=True), - sa.Column('principalType', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'version'), - ) - if not has_table('sqlpipeline', engine): - op.create_table( - 'sqlpipeline', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sqlPipelineUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.Column('repo', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('sqlPipelineUri'), - ) - if not has_table('stack', engine): - op.create_table( - 'stack', - sa.Column('stackUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('accountid', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('cronexpr', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('stack', sa.String(), nullable=False), - sa.Column( - 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('stackid', sa.String(), nullable=True), - sa.Column( - 'outputs', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column( - 'resources', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column( - 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('lastSeen', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('stackUri'), - ) - if not has_table('tag', engine): - op.create_table( - 'tag', - sa.Column('id', sa.String(), nullable=False), - sa.Column('tag', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - ) - if not has_table('task', engine): - op.create_table( - 'task', - sa.Column('taskUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('cronexpr', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=False), - sa.Column( - 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column( - 'response', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column( - 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('lastSeen', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('taskUri'), - ) - if not has_table('tenant_administrator', engine): - op.create_table( - 'tenant_administrator', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('userRoleInTenant', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName'), - ) - if not has_table('term_link', engine): - op.create_table( - 'term_link', - sa.Column('linkUri', sa.String(), nullable=False), - sa.Column('nodeUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('approvedBySteward', sa.Boolean(), nullable=True), - sa.Column('approvedByOwner', sa.Boolean(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('linkUri'), - ) - if not has_table('user', engine): - op.create_table( - 'user', - sa.Column('userId', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userId'), - ) - if not has_table('userprofile', engine): - op.create_table( - 'userprofile', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('username', sa.String(), nullable=False), - sa.Column('bio', sa.String(), nullable=True), - sa.Column('b64EncodedAvatar', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('username'), - ) - if not has_table('vpc', engine): - op.create_table( - 'vpc', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('vpcUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('VpcId', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('vpcUri'), - ) - if not has_table('worksheet', engine): - op.create_table( - 'worksheet', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('SamlAdminGroupName', sa.String(), nullable=False), - sa.Column('sqlBody', sa.String(), nullable=True), - sa.Column( - 'chartConfig', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('lastSavedAthenaQueryIdForQuery', sa.String(), nullable=True), - sa.Column('lastSavedAthenaQueryIdForChart', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('worksheetUri'), - ) - op.create_table( - 'worksheet_query_result', - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('AthenaQueryId', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.Column( - 'queryType', - sa.Enum('chart', 'data', name='querytype'), - nullable=False, - ), - sa.Column('sqlBody', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('OutputLocation', sa.String(), nullable=False), - sa.Column('error', sa.String(), nullable=True), - sa.Column('ElapsedTimeInMs', sa.Integer(), nullable=True), - sa.Column('DataScannedInBytes', sa.Integer(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('AthenaQueryId'), - ) - op.create_table( - 'worksheet_share', - sa.Column('worksheetShareUri', sa.String(), nullable=False), - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column('principalType', sa.String(), nullable=False), - sa.Column('canEdit', sa.Boolean(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('worksheetShareUri'), - ) - except Exception as e: - print('Failed to init database due to:', e) - pass - - -# ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('worksheet_share') - op.drop_table('worksheet_query_result') - op.drop_table('worksheet') - op.drop_table('vpc') - op.drop_table('userprofile') - op.drop_table('user') - op.drop_table('term_link') - op.drop_table('tenant_administrator') - op.drop_table('task') - op.drop_table('tag') - op.drop_table('stack') - op.drop_table('sqlpipeline') - op.drop_table('share_object_v2') - op.drop_table('share_object_item_v2') - op.drop_table('share_object_item') - op.drop_table('share_object_history') - op.drop_table('share_object') - op.drop_table('search_index') - op.drop_table('scheduled_query') - op.drop_table('saved_query') - op.drop_table('sagemaker_studio_user_profile') - op.drop_table('sagemaker_studio_domain') - op.drop_table('sagemaker_notebook') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('redshiftcluster_dataset') - op.drop_table('redshiftcluster') - op.drop_table('redshift_cluster_user_permission') - op.drop_table('organization_user') - op.drop_table('organization_topic') - op.drop_table('organization') - op.drop_table('notification') - op.drop_table('metric') - op.drop_table('metadata_tag') - op.drop_table('metadata_facet') - op.drop_table('lineage_store') - op.drop_table('key_value_pair') - op.drop_table('item_tags') - op.drop_table('group_member') - op.drop_table('group') - op.drop_table('glossary_node') - op.drop_table('feed_message') - op.drop_table('environment_user_permission') - op.drop_table('environment_permission') - op.drop_table('environment_group_permission') - op.drop_table('environment') - op.drop_table('document') - op.drop_table('dataset_user_permission') - op.drop_table('dataset_topic') - op.drop_table('dataset_table_profiling_job') - op.drop_table('dataset_table_permission') - op.drop_table('dataset_table_column') - op.drop_table('dataset_table') - op.drop_table('dataset_storage_location_permission') - op.drop_table('dataset_storage_location') - op.drop_table('dataset_query') - op.drop_table('dataset_quality_rule') - op.drop_table('dataset_profiling_run') - op.drop_table('dataset_loader') - op.drop_table('dataset_access_point') - op.drop_table('dataset') - op.drop_table('data_access_request') - op.drop_table('dashboardshare') - op.drop_table('dashboard') - op.drop_table('athena_query_execution') - op.drop_table('apikey') - op.drop_table('all_permissions') - op.drop_table('airflowcluster') - op.drop_table('airflow_project') - op.drop_table('airflow_cluster_user_permission') - op.drop_table('activity') - op.drop_table('EnvironmentRedshiftCluster') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/bd4bea86de30_release_3_6_0.py b/backend/migrations/versions/bd4bea86de30_release_3_6_0.py deleted file mode 100644 index 5bef57808..000000000 --- a/backend/migrations/versions/bd4bea86de30_release_3_6_0.py +++ /dev/null @@ -1,38 +0,0 @@ -"""release 3.6.0 - -Revision ID: bd4bea86de30 -Revises: c5c6bbbc5de7 -Create Date: 2021-11-29 06:10:27.519546 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'bd4bea86de30' -down_revision = 'c5c6bbbc5de7' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'vote', - sa.Column('voteUri', sa.String(), nullable=False), - sa.Column('username', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('upvote', sa.Boolean(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('voteUri'), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('vote') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/be22468d7342_dataset_column_type.py b/backend/migrations/versions/be22468d7342_dataset_column_type.py deleted file mode 100644 index d76b7b0a6..000000000 --- a/backend/migrations/versions/be22468d7342_dataset_column_type.py +++ /dev/null @@ -1,30 +0,0 @@ -"""dataset column type - -Revision ID: be22468d7342 -Revises: 5d5102986ce5 -Create Date: 2021-07-02 07:39:46.442637 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'be22468d7342' -down_revision = '5d5102986ce5' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.add_column( - 'dataset_table_column', sa.Column('columnType', sa.String(), nullable=True) - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('dataset_table_column', 'columnType') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py b/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py deleted file mode 100644 index 34aabdf2c..000000000 --- a/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py +++ /dev/null @@ -1,104 +0,0 @@ -"""release 3.5.0 - -Revision ID: c5c6bbbc5de7 -Revises: b6e0ac8f6d3f -Create Date: 2021-11-15 08:47:40.128047 - -""" -import sqlalchemy as sa -from alembic import op -from sqlalchemy import Boolean, Column, String, orm -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import query_expression - -from dataall.db import Resource, utils - -# revision identifiers, used by Alembic. -revision = 'c5c6bbbc5de7' -down_revision = 'b6e0ac8f6d3f' -branch_labels = None -depends_on = None - -Base = declarative_base() - - -class Environment(Resource, Base): - __tablename__ = 'environment' - organizationUri = Column(String, nullable=False) - environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) - AwsAccountId = Column(String, nullable=False) - region = Column(String, nullable=False, default='eu-west-1') - cognitoGroupName = Column(String, nullable=True) - - validated = Column(Boolean, default=False) - environmentType = Column(String, nullable=False, default='Data') - isOrganizationDefaultEnvironment = Column(Boolean, default=False) - EnvironmentDefaultIAMRoleName = Column(String, nullable=False) - EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) - EnvironmentDefaultBucketName = Column(String) - roleCreated = Column(Boolean, nullable=False, default=False) - - EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False) - resourcePrefix = Column(String, nullable=False, default='dh') - - dashboardsEnabled = Column(Boolean, default=False) - notebooksEnabled = Column(Boolean, default=True) - mlStudiosEnabled = Column(Boolean, default=True) - pipelinesEnabled = Column(Boolean, default=True) - warehousesEnabled = Column(Boolean, default=True) - - userRoleInEnvironment = query_expression() - - SamlGroupName = Column(String, nullable=True) - CDKRoleArn = Column(String, nullable=False) - - subscriptionsEnabled = Column(Boolean, default=False) - subscriptionsProducersTopicName = Column(String) - subscriptionsProducersTopicImported = Column(Boolean, default=False) - subscriptionsConsumersTopicName = Column(String) - subscriptionsConsumersTopicImported = Column(Boolean, default=False) - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'keyvaluetag', - sa.Column('tagUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('key', sa.String(), nullable=False), - sa.Column('value', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tagUri'), - ) - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Adding environment resourcePrefix...') - op.add_column( - 'environment', sa.Column('resourcePrefix', sa.String(), nullable=False) - ) - op.add_column( - 'environment', - sa.Column('EnvironmentDefaultIAMRoleImported', sa.Boolean(), nullable=True), - ) - op.add_column( - 'environment_group_permission', - sa.Column('environmentIAMRoleImported', sa.Boolean(), nullable=True), - ) - - environments: [Environment] = session.query(Environment).all() - for environment in environments: - print(f'Back filling resourcePrefix to environment {environment.label}') - environment.resourcePrefix = 'dh' - session.commit() - - print('Successfully back filled resourcePrefix ') - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('environment_group_permission', 'environmentIAMRoleImported') - op.drop_column('environment', 'EnvironmentDefaultIAMRoleImported') - op.drop_column('environment', 'resourcePrefix') - op.drop_table('keyvaluetag') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/decc96c5670f_organization_groups.py b/backend/migrations/versions/decc96c5670f_organization_groups.py deleted file mode 100644 index 643522142..000000000 --- a/backend/migrations/versions/decc96c5670f_organization_groups.py +++ /dev/null @@ -1,38 +0,0 @@ -"""organization groups - -Revision ID: decc96c5670f -Revises: 74b89c64f330 -Create Date: 2021-08-13 08:17:02.257680 - -""" -from alembic import op -import sqlalchemy as sa - - -# revision identifiers, used by Alembic. -revision = 'decc96c5670f' -down_revision = '74b89c64f330' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.create_table( - 'organization_group', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('invitedBy', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('groupUri', 'organizationUri'), - ) - # ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('organization_group') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/e177eb044b31_init_tenant.py b/backend/migrations/versions/e177eb044b31_init_tenant.py deleted file mode 100644 index 8cff870d2..000000000 --- a/backend/migrations/versions/e177eb044b31_init_tenant.py +++ /dev/null @@ -1,42 +0,0 @@ -"""init tenant - -Revision ID: e177eb044b31 -Revises: 033c3d6c1849 -Create Date: 2021-08-07 16:47:19.443969 - -""" -from alembic import op - -# revision identifiers, used by Alembic. -from sqlalchemy import orm - -from dataall import db -from dataall.db import api - -revision = 'e177eb044b31' -down_revision = '033c3d6c1849' -branch_labels = None -depends_on = None - - -def upgrade(): - try: - bind = op.get_bind() - session = orm.Session(bind=bind) - print('Initializing permissions...') - db.api.Tenant.save_tenant(session, name='dataall', description='Tenant dataall') - print('Tenant initialized successfully') - print('Attaching superusers group DHAdmins...') - api.TenantPolicy.attach_group_tenant_policy( - session=session, - group='DHAdmins', - permissions=db.permissions.TENANT_ALL, - tenant_name='dataall', - ) - print('Attaching superusers groups DHAdmins') - except Exception as e: - print(f'Failed to init permissions due to: {e}') - - -def downgrade(): - pass diff --git a/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py b/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py deleted file mode 100644 index d9144e1fb..000000000 --- a/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py +++ /dev/null @@ -1,48 +0,0 @@ -"""create account table - -Revision ID: e72009ab3b9a -Revises: 5e722995fa0b -Create Date: 2022-05-16 14:52:40.347079 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = 'e72009ab3b9a' -down_revision = '5e722995fa0b' -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.rename_table('sqlpipeline', 'datapipeline') - op.add_column( - 'datapipeline', sa.Column('devStrategy', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('template', sa.String(), nullable=True) - ) - op.alter_column( - 'datapipeline', 'sqlPipelineUri', new_column_name='DataPipelineUri' - ) - # ### end Alembic commands ### - pass - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_column('datapipeline', 'devStrategy') - op.drop_column('datapipeline', 'devStages') - op.drop_column('datapipeline', 'template') - op.alter_column( - 'datapipeline', 'DataPipelineUri', new_column_name='sqlPipelineUri' - ) - op.rename_table('datapipeline', 'sqlpipeline') - # ### end Alembic commands ### - pass From 8cd89f388c03e4982f82b58061fe06826491b4fd Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 15 Sep 2022 17:27:15 +0200 Subject: [PATCH 04/19] Cleaned migration versions and initialization of the database --- .../versions/4392a0c9747f_init_database.py | 1025 +++++++++++++++++ .../versions/652b83e1065c__release_v1_1_0.py | 54 + .../versions/fb240cf070d0__release_v1_2_0.py | 54 + 3 files changed, 1133 insertions(+) create mode 100644 backend/migrations/versions/4392a0c9747f_init_database.py create mode 100644 backend/migrations/versions/652b83e1065c__release_v1_1_0.py create mode 100644 backend/migrations/versions/fb240cf070d0__release_v1_2_0.py diff --git a/backend/migrations/versions/4392a0c9747f_init_database.py b/backend/migrations/versions/4392a0c9747f_init_database.py new file mode 100644 index 000000000..cfe796073 --- /dev/null +++ b/backend/migrations/versions/4392a0c9747f_init_database.py @@ -0,0 +1,1025 @@ +"""Init database + +Revision ID: bd271a2780b2, for '4392a0c9747f' +Revises: +Create Date: 2022-09-15 15:10:53.506962 + +""" +import os + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +from dataall.db import get_engine, has_table, create_schema_if_not_exists + +revision = '4392a0c9747f' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + engine = get_engine(envname=envname).engine + create_schema_if_not_exists(engine, envname) + try: + if not has_table('activity', engine): + op.create_table( + 'activity', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('activityUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column('summary', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('activityUri'), + ) + if not has_table('dashboard', engine): + op.create_table( + 'dashboard', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('namespace', sa.String(), nullable=False), + sa.Column('DashboardId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('dashboardUri'), + ) + if not has_table('dashboardshare', engine): + op.create_table( + 'dashboardshare', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'dashboardUri'), + ) + if not has_table('dataset', engine): + op.create_table( + 'dataset', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueCrawlerName', sa.String(), nullable=True), + sa.Column('GlueCrawlerSchedule', sa.String(), nullable=True), + sa.Column('GlueProfilingJobName', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerName', sa.String(), nullable=True), + sa.Column('GlueDataQualityJobName', sa.String(), nullable=True), + sa.Column('GlueDataQualitySchedule', sa.String(), nullable=True), + sa.Column('GlueDataQualityTriggerName', sa.String(), nullable=True), + sa.Column('IAMDatasetAdminRoleArn', sa.String(), nullable=False), + sa.Column('IAMDatasetAdminUserArn', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('bucketCreated', sa.Boolean(), nullable=True), + sa.Column('glueDatabaseCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminRoleCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminUserCreated', sa.Boolean(), nullable=True), + sa.Column('kmsAliasCreated', sa.Boolean(), nullable=True), + sa.Column('lakeformationLocationCreated', sa.Boolean(), nullable=True), + sa.Column('bucketPolicyCreated', sa.Boolean(), nullable=True), + sa.Column('businessOwnerEmail', sa.String(), nullable=True), + sa.Column( + 'businessOwnerDelegationEmails', + postgresql.ARRAY(sa.String()), + nullable=True, + ), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('importedS3Bucket', sa.Boolean(), nullable=True), + sa.Column('importedGlueDatabase', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('importedAdminRole', sa.Boolean(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('datasetUri'), + ) + if not has_table('dataset_profiling_run', engine): + op.create_table( + 'dataset_profiling_run', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('profilingRunUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('GlueJobName', sa.String(), nullable=True), + sa.Column('GlueJobRunId', sa.String(), nullable=True), + sa.Column('GlueTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueTriggerName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column( + 'results', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('profilingRunUri'), + ) + if not has_table('dataset_quality_rule', engine): + op.create_table( + 'dataset_quality_rule', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('ruleUri', sa.String(), nullable=False), + sa.Column('query', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'logs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.PrimaryKeyConstraint('ruleUri'), + ) + if not has_table('dataset_storage_location', engine): + op.create_table( + 'dataset_storage_location', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('locationUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('S3AccessPoint', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('locationCreated', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('locationUri'), + ) + if not has_table('dataset_table', engine): + op.create_table( + 'dataset_table', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('GlueTableConfig', sa.Text(), nullable=True), + sa.Column( + 'GlueTableProperties', + postgresql.JSON(astext_type=sa.Text()), + nullable=True, + ), + sa.Column('LastGlueTableStatus', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('stage', sa.String(), nullable=True), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tableUri'), + ) + if not has_table('dataset_table_column', engine): + op.create_table( + 'dataset_table_column', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('columnUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('typeName', sa.String(), nullable=False), + sa.Column('columnType', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('columnUri'), + ) + if not has_table('dataset_table_profiling_job', engine): + op.create_table( + 'dataset_table_profiling_job', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('jobUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RunCommandId', sa.String(), nullable=True), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('jobUri'), + ) + if not has_table('environment', engine): + op.create_table( + 'environment', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cognitoGroupName', sa.String(), nullable=True), + sa.Column('resourcePrefix', sa.String(), nullable=False), + sa.Column('validated', sa.Boolean(), nullable=True), + sa.Column('environmentType', sa.String(), nullable=False), + sa.Column( + 'isOrganizationDefaultEnvironment', sa.Boolean(), nullable=True + ), + sa.Column('EnvironmentDefaultIAMRoleName', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultIAMRoleImported', sa.String(), nullable=True), + sa.Column('EnvironmentDefaultIAMRoleArn', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultBucketName', sa.String(), nullable=True), + sa.Column('EnvironmentDefaultAthenaWorkGroup', sa.String(), nullable=True), + sa.Column('roleCreated', sa.Boolean(), nullable=False), + sa.Column('dashboardsEnabled', sa.Boolean(), nullable=True), + sa.Column('notebooksEnabled', sa.Boolean(), nullable=True), + sa.Column('mlStudiosEnabled', sa.Boolean(), nullable=True), + sa.Column('pipelinesEnabled', sa.Boolean(), nullable=True), + sa.Column('warehousesEnabled', sa.Boolean(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('CDKRoleArn', sa.String(), nullable=False), + sa.Column('subscriptionsEnabled', sa.Boolean(), nullable=True), + sa.Column( + 'subscriptionsProducersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsProducersTopicImported', sa.Boolean(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicImported', sa.Boolean(), nullable=True + ), + sa.PrimaryKeyConstraint('environmentUri'), + ) + if not has_table('environment_group_permission', engine): + op.create_table( + 'environment_group_permission', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('invitedBy', sa.String(), nullable=True), + sa.Column('environmentIAMRoleArn', sa.String(), nullable=True), + sa.Column('environmentIAMRoleName', sa.String(), nullable=True), + sa.Column('environmentIAMRoleImported', sa.Boolean(), nullable=True), + sa.Column('environmentAthenaWorkGroup', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('groupRoleInEnvironment', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'environmentUri'), + ) + if not has_table('feed_message', engine): + op.create_table( + 'feed_message', + sa.Column('feedMessageUri', sa.String(), nullable=False), + sa.Column('creator', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('content', sa.String(), nullable=True), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('feedMessageUri'), + ) + if not has_table('glossary_node', engine): + op.create_table( + 'glossary_node', + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('parentUri', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('path', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('readme', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('admin', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('nodeUri'), + ) + if not has_table('glossary_schema', engine): + op.create_table( + 'glossary_schema', + sa.Column('schemaUri', sa.String(), nullable=False), + sa.Column('json_schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('schemaUri'), + ) + if not has_table('glossary_schema_map', engine): + op.create_table( + 'glossary_schema_map', + sa.Column('schemaUri', sa.String(), nullable=False), + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('schemaUri', 'nodeUri'), + ) + if not has_table('term_link', engine): + op.create_table( + 'term_link', + sa.Column('linkUri', sa.String(), nullable=False), + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('approvedBySteward', sa.Boolean(), nullable=True), + sa.Column('approvedByOwner', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('linkUri'), + ) + if not has_table('group', engine): + op.create_table( + 'group', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('groupUri', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri'), + ) + if not has_table('group_member', engine): + op.create_table( + 'group_member', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleInGroup', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName'), + ) + if not has_table('keyvaluetag', engine): + op.create_table( + 'keyvaluetag', + sa.Column('tagUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('key', sa.String(), nullable=False), + sa.Column('value', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagUri'), + ) + if not has_table('notification', engine): + op.create_table( + 'notification', + sa.Column('notificationUri', sa.String(), nullable=False), + sa.Column( + 'type', + sa.Enum( + 'SHARE_OBJECT_SUBMITTED', + 'SHARE_ITEM_REQUEST', + 'SHARE_OBJECT_APPROVED', + 'SHARE_OBJECT_REJECTED', + 'SHARE_OBJECT_PENDING_APPROVAL', + 'DATASET_VERSION', + name='notificationtype', + ), + nullable=True, + ), + sa.Column('message', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('is_read', sa.Boolean(), nullable=False), + sa.Column('target_uri', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('notificationUri'), + ) + if not has_table('organization', engine): + op.create_table( + 'organization', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('organizationUri'), + ) + if not has_table('organization_group', engine): + op.create_table( + 'organization_group', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('invitedBy', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('groupUri', 'organizationUri'), + ) + if not has_table('permissions', engine): + op.create_table( + 'permission', + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column( + 'type', sa.Enum('TENANT', 'RESOURCE', name='permissiontype'), nullable=False + ), + sa.Column('description', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('permissionUri'), + ) + op.create_index(op.f('ix_permission_name'), 'permission', ['name'], unique=False) + if not has_table('redshiftcluster', engine): + op.create_table( + 'redshiftcluster', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('clusterArn', sa.String(), nullable=True), + sa.Column('clusterName', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('databaseName', sa.String(), nullable=True), + sa.Column('databaseUser', sa.String(), nullable=True), + sa.Column('masterUsername', sa.String(), nullable=True), + sa.Column('masterDatabaseName', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('numberOfNodes', sa.Integer(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column('kmsAlias', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('vpc', sa.String(), nullable=True), + sa.Column('subnetGroupName', sa.String(), nullable=True), + sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('CFNStackName', sa.String(), nullable=True), + sa.Column('CFNStackStatus', sa.String(), nullable=True), + sa.Column('CFNStackArn', sa.String(), nullable=True), + sa.Column('IAMRoles', sa.ARRAY(sa.String()), nullable=True), + sa.Column('endpoint', sa.String(), nullable=True), + sa.Column('port', sa.Integer(), nullable=True), + sa.Column('datahubSecret', sa.String(), nullable=True), + sa.Column('masterSecret', sa.String(), nullable=True), + sa.Column('external_schema_created', sa.Boolean(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri'), + ) + if not has_table('redshiftcluster_dataset', engine): + op.create_table( + 'redshiftcluster_dataset', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('datasetCopyEnabled', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri'), + ) + if not has_table('redshiftcluster_datasettable', engine): + op.create_table( + 'redshiftcluster_datasettable', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('shareUri', sa.String(), nullable=True), + sa.Column('enabled', sa.Boolean(), nullable=True), + sa.Column('schema', sa.String(), nullable=False), + sa.Column('databaseName', sa.String(), nullable=False), + sa.Column('dataLocation', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri'), + ) + if not has_table('resource_policy', engine): + op.create_table( + 'resource_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('resourceUri', sa.String(), nullable=False), + sa.Column('resourceType', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='rp_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_resource_policy_principalId'), + 'resource_policy', + ['principalId'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceType'), + 'resource_policy', + ['resourceType'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceUri'), + 'resource_policy', + ['resourceUri'], + unique=False, + ) + if not has_table('resource_policy_permission', engine): + op.create_table( + 'resource_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['resource_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + if not has_table('sagemaker_notebook', engine): + op.create_table( + 'sagemaker_notebook', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('notebookUri', sa.String(), nullable=False), + sa.Column('NotebookInstanceName', sa.String(), nullable=False), + sa.Column('NotebookInstanceStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('VpcId', sa.String(), nullable=True), + sa.Column('SubnetId', sa.String(), nullable=True), + sa.Column('VolumeSizeInGB', sa.Integer(), nullable=True), + sa.Column('InstanceType', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('notebookUri'), + ) + if not has_table('sagemaker_studio_domain', engine): + op.create_table( + 'sagemaker_studio_domain', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('SagemakerStudioStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUri'), + ) + if not has_table('sagemaker_studio_user_profile', engine): + op.create_table( + 'sagemaker_studio_user_profile', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUserProfileUri', sa.String(), nullable=False), + sa.Column( + 'sagemakerStudioUserProfileStatus', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileName', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileNameSlugify', sa.String(), nullable=False + ), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUserProfileUri'), + ) + if not has_table('share_object', engine): + op.create_table( + 'share_object', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=True), + sa.Column('principalType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('confirmed', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('shareUri'), + ) + if not has_table('share_object_item', engine): + op.create_table( + 'share_object_item', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('shareItemUri', sa.String(), nullable=False), + sa.Column('itemType', sa.String(), nullable=False), + sa.Column('itemUri', sa.String(), nullable=False), + sa.Column('itemName', sa.String(), nullable=False), + sa.Column('permission', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('S3AccessPointName', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('shareItemUri'), + ) + if not has_table('sqlpipeline', engine): + op.create_table( + 'sqlpipeline', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sqlPipelineUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.Column('repo', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('sqlPipelineUri'), + ) + if not has_table('stack', engine): + op.create_table( + 'stack', + sa.Column('stackUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=True), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('accountid', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('stack', sa.String(), nullable=False), + sa.Column( + 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('stackid', sa.String(), nullable=True), + sa.Column( + 'outputs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'resources', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('events', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.Column('EcsTaskArn', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('stackUri'), + ) + if not has_table('tag', engine): + op.create_table( + 'tag', + sa.Column('id', sa.String(), nullable=False), + sa.Column('tag', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + ) + if not has_table('item_tags', engine): + op.create_table( + 'item_tags', + sa.Column('tagid', sa.String(), nullable=False), + sa.Column('itemid', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid'), + ) + if not has_table('task', engine): + op.create_table( + 'task', + sa.Column('taskUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column('payload', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('response', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('error', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('taskUri'), + ) + if not has_table('tenant', engine): + op.create_table( + 'tenant', + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('tenantUri'), + ) + op.create_index(op.f('ix_tenant_name'), 'tenant', ['name'], unique=True) + if not has_table('tenant_administrator', engine): + op.create_table( + 'tenant_administrator', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('userRoleInTenant', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName'), + sa.ForeignKeyConstraint( + ['tenantUri'], + ), + ) + if not has_table('tenant_policy', engine): + op.create_table( + 'tenant_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='tenant_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['tenantUri'], + ['tenant.tenantUri'], + ), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_tenant_policy_principalId'), + 'tenant_policy', + ['principalId'], + unique=False, + ) + if not has_table('tenant_policy_permission', engine): + op.create_table( + 'tenant_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['tenant_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + if not has_table('user', engine): + op.create_table( + 'user', + sa.Column('userId', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userId'), + ) + if not has_table('vote', engine): + op.create_table( + 'vote', + sa.Column('voteUri', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('upvote', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('voteUri'), + ) + if not has_table('vpc', engine): + op.create_table( + 'vpc', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('vpcUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('VpcId', sa.String(), nullable=False), + sa.Column('privateSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('publicSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('default', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('vpcUri'), + ) + if not has_table('worksheet', engine): + op.create_table( + 'worksheet', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('SamlAdminGroupName', sa.String(), nullable=False), + sa.Column('sqlBody', sa.String(), nullable=True), + sa.Column( + 'chartConfig', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('lastSavedAthenaQueryIdForQuery', sa.String(), nullable=True), + sa.Column('lastSavedAthenaQueryIdForChart', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('worksheetUri'), + ) + if not has_table('worksheet_query_result', engine): + op.create_table( + 'worksheet_query_result', + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('AthenaQueryId', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'queryType', + sa.Enum('chart', 'data', name='querytype'), + nullable=False, + ), + sa.Column('sqlBody', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('OutputLocation', sa.String(), nullable=False), + sa.Column('error', sa.String(), nullable=True), + sa.Column('ElapsedTimeInMs', sa.Integer(), nullable=True), + sa.Column('DataScannedInBytes', sa.Integer(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('AthenaQueryId'), + ) + if not has_table('worksheet_share', engine): + op.create_table( + 'worksheet_share', + sa.Column('worksheetShareUri', sa.String(), nullable=False), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column('principalType', sa.String(), nullable=False), + sa.Column('canEdit', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('worksheetShareUri'), + ) + except Exception as e: + print('Failed to init database due to:', e) + pass +# ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('worksheet_share') + op.drop_table('worksheet_query_result') + op.drop_table('worksheet') + op.drop_table('vpc') + op.drop_table('user') + op.drop_table('tenant_policy_permission') + op.drop_table('tenant_policy') + op.drop_table('tenant_administrator') + op.drop_table('tenant') + op.drop_table('task') + op.drop_table('item_tags') + op.drop_table('tag') + op.drop_table('stack') + op.drop_table('sqlpipeline') + op.drop_table('share_object_item') + op.drop_table('share_object') + op.drop_table('sagemaker_studio_user_profile') + op.drop_table('sagemaker_studio_domain') + op.drop_table('sagemaker_notebook') + op.drop_table('resource_policy_permission') + op.drop_table('resource_policy') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('redshiftcluster_dataset') + op.drop_table('redshiftcluster') + op.drop_table('permissions') + op.drop_table('organization_group') + op.drop_table('organization') + op.drop_table('notification') + op.drop_table('keyvaluetag') + op.drop_table('group_member') + op.drop_table('group') + op.drop_table('term_link') + op.drop_table('glossary_schema_map') + op.drop_table('glossary_schema') + op.drop_table('glossary_node') + op.drop_table('feed_message') + op.drop_table('environment_group_permission') + op.drop_table('environment') + op.drop_table('dataset_table_profiling_job') + op.drop_table('dataset_table_column') + op.drop_table('dataset_table') + op.drop_table('dataset_storage_location') + op.drop_table('dataset_quality_rule') + op.drop_table('dataset_profiling_run') + op.drop_table('dataset') + op.drop_table('dashboardshare') + op.drop_table('dashboard') + op.drop_table('activity') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/652b83e1065c__release_v1_1_0.py b/backend/migrations/versions/652b83e1065c__release_v1_1_0.py new file mode 100644 index 000000000..596b04ebe --- /dev/null +++ b/backend/migrations/versions/652b83e1065c__release_v1_1_0.py @@ -0,0 +1,54 @@ +"""_release_v1.1.0 + +Revision ID: 652b83e1065c +Revises: ada02a56cd32 +Create Date: 2022-09-15 15:10:53.506962 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '652b83e1065c' +down_revision = '4392a0c9747f' +branch_labels = None +depends_on = None + + +def upgrade(): + op.rename_table('sqlpipeline', 'datapipeline') + op.add_column( + 'datapipeline', sa.Column('devStrategy', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('template', sa.String(), nullable=True) + ) + op.alter_column( + 'datapipeline', 'sqlPipelineUri', new_column_name='DataPipelineUri' + ) + op.add_column( + 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) + ) + + pass + + +def downgrade(): + op.drop_column('datapipeline', 'inputDatasetUri') + op.drop_column('datapipeline', 'outputDatasetUri') + op.drop_column('datapipeline', 'devStrategy') + op.drop_column('datapipeline', 'devStages') + op.drop_column('datapipeline', 'template') + op.alter_column( + 'datapipeline', 'DataPipelineUri', new_column_name='sqlPipelineUri' + ) + op.rename_table('datapipeline', 'sqlpipeline') + + pass diff --git a/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py b/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py new file mode 100644 index 000000000..50458a37f --- /dev/null +++ b/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py @@ -0,0 +1,54 @@ +"""_release_v1.2.0 + +Revision ID: fb240cf070d0 +Revises: 652b83e1065c +Create Date: 2022-09-15 15:10:53.506962 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy import Column, TIMESTAMP, INTEGER, VARCHAR, NVARCHAR + + +# revision identifiers, used by Alembic. +revision = 'fb240cf070d0' +down_revision = '652b83e1065c' +branch_labels = None +depends_on = None + + +def upgrade(): + op.drop_column('datapipeline', 'devStages') + op.drop_column('datapipeline', 'inputDatasetUri') + op.drop_column('datapipeline', 'outputDatasetUri') + + op.create_table( + 'datapipelineenvironments', + Column('envPipelineUri', VARCHAR(50), primary_key=True), + Column('environmentUri', VARCHAR(50), nullable=False), + Column('environmentLabel', VARCHAR(50), nullable=False), + Column('pipelineUri', VARCHAR(50), nullable=False), + Column('pipelineLabel', VARCHAR(50), nullable=False), + Column('stage', VARCHAR(50), nullable=False), + Column('order', INTEGER, nullable=False), + Column('region', VARCHAR(50), nullable=False), + Column('AwsAccountId', VARCHAR(50), nullable=False), + Column('samlGroupName', VARCHAR(50), nullable=False), + sa.PrimaryKeyConstraint('envPipelineUri'), + ) + pass + + +def downgrade(): + op.add_column( + 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) + ) + op.drop_table('datapipelineenvironments') + pass From 43d85ad38332b4641343158b65d305d62e299cac Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 15 Sep 2022 17:28:04 +0200 Subject: [PATCH 05/19] Cleaned migration versions and initialization of the database --- .../versions/4392a0c9747f_init_database.py | 1025 ----------------- .../versions/652b83e1065c__release_v1_1_0.py | 2 +- 2 files changed, 1 insertion(+), 1026 deletions(-) delete mode 100644 backend/migrations/versions/4392a0c9747f_init_database.py diff --git a/backend/migrations/versions/4392a0c9747f_init_database.py b/backend/migrations/versions/4392a0c9747f_init_database.py deleted file mode 100644 index cfe796073..000000000 --- a/backend/migrations/versions/4392a0c9747f_init_database.py +++ /dev/null @@ -1,1025 +0,0 @@ -"""Init database - -Revision ID: bd271a2780b2, for '4392a0c9747f' -Revises: -Create Date: 2022-09-15 15:10:53.506962 - -""" -import os - -import sqlalchemy as sa -from alembic import op -from sqlalchemy.dialects import postgresql - -from dataall.db import get_engine, has_table, create_schema_if_not_exists - -revision = '4392a0c9747f' -down_revision = None -branch_labels = None -depends_on = None - - -def upgrade(): - # ### commands auto generated by Alembic - please adjust! ### - envname = os.getenv('envname', 'local') - print('ENVNAME', envname) - engine = get_engine(envname=envname).engine - create_schema_if_not_exists(engine, envname) - try: - if not has_table('activity', engine): - op.create_table( - 'activity', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('activityUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=False), - sa.Column('summary', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('activityUri'), - ) - if not has_table('dashboard', engine): - op.create_table( - 'dashboard', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('dashboardUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('namespace', sa.String(), nullable=False), - sa.Column('DashboardId', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('dashboardUri'), - ) - if not has_table('dashboardshare', engine): - op.create_table( - 'dashboardshare', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('dashboardUri', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('shareUri', 'dashboardUri'), - ) - if not has_table('dataset', engine): - op.create_table( - 'dataset', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueCrawlerName', sa.String(), nullable=True), - sa.Column('GlueCrawlerSchedule', sa.String(), nullable=True), - sa.Column('GlueProfilingJobName', sa.String(), nullable=True), - sa.Column('GlueProfilingTriggerSchedule', sa.String(), nullable=True), - sa.Column('GlueProfilingTriggerName', sa.String(), nullable=True), - sa.Column('GlueDataQualityJobName', sa.String(), nullable=True), - sa.Column('GlueDataQualitySchedule', sa.String(), nullable=True), - sa.Column('GlueDataQualityTriggerName', sa.String(), nullable=True), - sa.Column('IAMDatasetAdminRoleArn', sa.String(), nullable=False), - sa.Column('IAMDatasetAdminUserArn', sa.String(), nullable=False), - sa.Column('KmsAlias', sa.String(), nullable=False), - sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('confidentiality', sa.String(), nullable=False), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('bucketCreated', sa.Boolean(), nullable=True), - sa.Column('glueDatabaseCreated', sa.Boolean(), nullable=True), - sa.Column('iamAdminRoleCreated', sa.Boolean(), nullable=True), - sa.Column('iamAdminUserCreated', sa.Boolean(), nullable=True), - sa.Column('kmsAliasCreated', sa.Boolean(), nullable=True), - sa.Column('lakeformationLocationCreated', sa.Boolean(), nullable=True), - sa.Column('bucketPolicyCreated', sa.Boolean(), nullable=True), - sa.Column('businessOwnerEmail', sa.String(), nullable=True), - sa.Column( - 'businessOwnerDelegationEmails', - postgresql.ARRAY(sa.String()), - nullable=True, - ), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.Column('importedS3Bucket', sa.Boolean(), nullable=True), - sa.Column('importedGlueDatabase', sa.Boolean(), nullable=True), - sa.Column('importedKmsKey', sa.Boolean(), nullable=True), - sa.Column('importedAdminRole', sa.Boolean(), nullable=True), - sa.Column('imported', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('datasetUri'), - ) - if not has_table('dataset_profiling_run', engine): - op.create_table( - 'dataset_profiling_run', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('profilingRunUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('GlueJobName', sa.String(), nullable=True), - sa.Column('GlueJobRunId', sa.String(), nullable=True), - sa.Column('GlueTriggerSchedule', sa.String(), nullable=True), - sa.Column('GlueTriggerName', sa.String(), nullable=True), - sa.Column('GlueTableName', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=True), - sa.Column( - 'results', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('status', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('profilingRunUri'), - ) - if not has_table('dataset_quality_rule', engine): - op.create_table( - 'dataset_quality_rule', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('ruleUri', sa.String(), nullable=False), - sa.Column('query', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.Column( - 'logs', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.PrimaryKeyConstraint('ruleUri'), - ) - if not has_table('dataset_storage_location', engine): - op.create_table( - 'dataset_storage_location', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('locationUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('S3Prefix', sa.String(), nullable=False), - sa.Column('S3AccessPoint', sa.String(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('locationCreated', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('locationUri'), - ) - if not has_table('dataset_table', engine): - op.create_table( - 'dataset_table', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('S3BucketName', sa.String(), nullable=False), - sa.Column('S3Prefix', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('GlueTableConfig', sa.Text(), nullable=True), - sa.Column( - 'GlueTableProperties', - postgresql.JSON(astext_type=sa.Text()), - nullable=True, - ), - sa.Column('LastGlueTableStatus', sa.String(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('stage', sa.String(), nullable=True), - sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('confidentiality', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tableUri'), - ) - if not has_table('dataset_table_column', engine): - op.create_table( - 'dataset_table_column', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('columnUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('typeName', sa.String(), nullable=False), - sa.Column('columnType', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('columnUri'), - ) - if not has_table('dataset_table_profiling_job', engine): - op.create_table( - 'dataset_table_profiling_job', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('jobUri', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RunCommandId', sa.String(), nullable=True), - sa.Column('GlueDatabaseName', sa.String(), nullable=False), - sa.Column('GlueTableName', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('jobUri'), - ) - if not has_table('environment', engine): - op.create_table( - 'environment', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('cognitoGroupName', sa.String(), nullable=True), - sa.Column('resourcePrefix', sa.String(), nullable=False), - sa.Column('validated', sa.Boolean(), nullable=True), - sa.Column('environmentType', sa.String(), nullable=False), - sa.Column( - 'isOrganizationDefaultEnvironment', sa.Boolean(), nullable=True - ), - sa.Column('EnvironmentDefaultIAMRoleName', sa.String(), nullable=False), - sa.Column('EnvironmentDefaultIAMRoleImported', sa.String(), nullable=True), - sa.Column('EnvironmentDefaultIAMRoleArn', sa.String(), nullable=False), - sa.Column('EnvironmentDefaultBucketName', sa.String(), nullable=True), - sa.Column('EnvironmentDefaultAthenaWorkGroup', sa.String(), nullable=True), - sa.Column('roleCreated', sa.Boolean(), nullable=False), - sa.Column('dashboardsEnabled', sa.Boolean(), nullable=True), - sa.Column('notebooksEnabled', sa.Boolean(), nullable=True), - sa.Column('mlStudiosEnabled', sa.Boolean(), nullable=True), - sa.Column('pipelinesEnabled', sa.Boolean(), nullable=True), - sa.Column('warehousesEnabled', sa.Boolean(), nullable=True), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('CDKRoleArn', sa.String(), nullable=False), - sa.Column('subscriptionsEnabled', sa.Boolean(), nullable=True), - sa.Column( - 'subscriptionsProducersTopicName', sa.String(), nullable=True - ), - sa.Column( - 'subscriptionsProducersTopicImported', sa.Boolean(), nullable=True - ), - sa.Column( - 'subscriptionsConsumersTopicName', sa.String(), nullable=True - ), - sa.Column( - 'subscriptionsConsumersTopicImported', sa.Boolean(), nullable=True - ), - sa.PrimaryKeyConstraint('environmentUri'), - ) - if not has_table('environment_group_permission', engine): - op.create_table( - 'environment_group_permission', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('invitedBy', sa.String(), nullable=True), - sa.Column('environmentIAMRoleArn', sa.String(), nullable=True), - sa.Column('environmentIAMRoleName', sa.String(), nullable=True), - sa.Column('environmentIAMRoleImported', sa.Boolean(), nullable=True), - sa.Column('environmentAthenaWorkGroup', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('groupRoleInEnvironment', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'environmentUri'), - ) - if not has_table('feed_message', engine): - op.create_table( - 'feed_message', - sa.Column('feedMessageUri', sa.String(), nullable=False), - sa.Column('creator', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('content', sa.String(), nullable=True), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('feedMessageUri'), - ) - if not has_table('glossary_node', engine): - op.create_table( - 'glossary_node', - sa.Column('nodeUri', sa.String(), nullable=False), - sa.Column('parentUri', sa.String(), nullable=True), - sa.Column('nodeType', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('path', sa.String(), nullable=False), - sa.Column('label', sa.String(), nullable=False), - sa.Column('readme', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('admin', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('nodeUri'), - ) - if not has_table('glossary_schema', engine): - op.create_table( - 'glossary_schema', - sa.Column('schemaUri', sa.String(), nullable=False), - sa.Column('json_schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), - sa.PrimaryKeyConstraint('schemaUri'), - ) - if not has_table('glossary_schema_map', engine): - op.create_table( - 'glossary_schema_map', - sa.Column('schemaUri', sa.String(), nullable=False), - sa.Column('nodeUri', sa.String(), nullable=False), - sa.Column('schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), - sa.PrimaryKeyConstraint('schemaUri', 'nodeUri'), - ) - if not has_table('term_link', engine): - op.create_table( - 'term_link', - sa.Column('linkUri', sa.String(), nullable=False), - sa.Column('nodeUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('approvedBySteward', sa.Boolean(), nullable=True), - sa.Column('approvedByOwner', sa.Boolean(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('linkUri'), - ) - if not has_table('group', engine): - op.create_table( - 'group', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('groupUri', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri'), - ) - if not has_table('group_member', engine): - op.create_table( - 'group_member', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('userRoleInGroup', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('groupUri', 'userName'), - ) - if not has_table('keyvaluetag', engine): - op.create_table( - 'keyvaluetag', - sa.Column('tagUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('key', sa.String(), nullable=False), - sa.Column('value', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tagUri'), - ) - if not has_table('notification', engine): - op.create_table( - 'notification', - sa.Column('notificationUri', sa.String(), nullable=False), - sa.Column( - 'type', - sa.Enum( - 'SHARE_OBJECT_SUBMITTED', - 'SHARE_ITEM_REQUEST', - 'SHARE_OBJECT_APPROVED', - 'SHARE_OBJECT_REJECTED', - 'SHARE_OBJECT_PENDING_APPROVAL', - 'DATASET_VERSION', - name='notificationtype', - ), - nullable=True, - ), - sa.Column('message', sa.String(), nullable=False), - sa.Column('username', sa.String(), nullable=False), - sa.Column('is_read', sa.Boolean(), nullable=False), - sa.Column('target_uri', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('notificationUri'), - ) - if not has_table('organization', engine): - op.create_table( - 'organization', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('organizationUri'), - ) - if not has_table('organization_group', engine): - op.create_table( - 'organization_group', - sa.Column('groupUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('invitedBy', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('groupUri', 'organizationUri'), - ) - if not has_table('permissions', engine): - op.create_table( - 'permission', - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column( - 'type', sa.Enum('TENANT', 'RESOURCE', name='permissiontype'), nullable=False - ), - sa.Column('description', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('permissionUri'), - ) - op.create_index(op.f('ix_permission_name'), 'permission', ['name'], unique=False) - if not has_table('redshiftcluster', engine): - op.create_table( - 'redshiftcluster', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('organizationUri', sa.String(), nullable=False), - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('clusterArn', sa.String(), nullable=True), - sa.Column('clusterName', sa.String(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('databaseName', sa.String(), nullable=True), - sa.Column('databaseUser', sa.String(), nullable=True), - sa.Column('masterUsername', sa.String(), nullable=True), - sa.Column('masterDatabaseName', sa.String(), nullable=True), - sa.Column('nodeType', sa.String(), nullable=True), - sa.Column('numberOfNodes', sa.Integer(), nullable=True), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=True), - sa.Column('kmsAlias', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=True), - sa.Column('vpc', sa.String(), nullable=True), - sa.Column('subnetGroupName', sa.String(), nullable=True), - sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), - sa.Column('CFNStackName', sa.String(), nullable=True), - sa.Column('CFNStackStatus', sa.String(), nullable=True), - sa.Column('CFNStackArn', sa.String(), nullable=True), - sa.Column('IAMRoles', sa.ARRAY(sa.String()), nullable=True), - sa.Column('endpoint', sa.String(), nullable=True), - sa.Column('port', sa.Integer(), nullable=True), - sa.Column('datahubSecret', sa.String(), nullable=True), - sa.Column('masterSecret', sa.String(), nullable=True), - sa.Column('external_schema_created', sa.Boolean(), nullable=True), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('imported', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri'), - ) - if not has_table('redshiftcluster_dataset', engine): - op.create_table( - 'redshiftcluster_dataset', - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('datasetCopyEnabled', sa.Boolean(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri'), - ) - if not has_table('redshiftcluster_datasettable', engine): - op.create_table( - 'redshiftcluster_datasettable', - sa.Column('clusterUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('tableUri', sa.String(), nullable=False), - sa.Column('shareUri', sa.String(), nullable=True), - sa.Column('enabled', sa.Boolean(), nullable=True), - sa.Column('schema', sa.String(), nullable=False), - sa.Column('databaseName', sa.String(), nullable=False), - sa.Column('dataLocation', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri'), - ) - if not has_table('resource_policy', engine): - op.create_table( - 'resource_policy', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('resourceUri', sa.String(), nullable=False), - sa.Column('resourceType', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column( - 'principalType', - sa.Enum('USER', 'GROUP', 'SERVICE', name='rp_principal_type'), - nullable=True, - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('sid'), - ) - op.create_index( - op.f('ix_resource_policy_principalId'), - 'resource_policy', - ['principalId'], - unique=False, - ) - op.create_index( - op.f('ix_resource_policy_resourceType'), - 'resource_policy', - ['resourceType'], - unique=False, - ) - op.create_index( - op.f('ix_resource_policy_resourceUri'), - 'resource_policy', - ['resourceUri'], - unique=False, - ) - if not has_table('resource_policy_permission', engine): - op.create_table( - 'resource_policy_permission', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['permissionUri'], - ['permission.permissionUri'], - ), - sa.ForeignKeyConstraint( - ['sid'], - ['resource_policy.sid'], - ), - sa.PrimaryKeyConstraint('sid', 'permissionUri'), - ) - if not has_table('sagemaker_notebook', engine): - op.create_table( - 'sagemaker_notebook', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('notebookUri', sa.String(), nullable=False), - sa.Column('NotebookInstanceName', sa.String(), nullable=False), - sa.Column('NotebookInstanceStatus', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.Column('VpcId', sa.String(), nullable=True), - sa.Column('SubnetId', sa.String(), nullable=True), - sa.Column('VolumeSizeInGB', sa.Integer(), nullable=True), - sa.Column('InstanceType', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('notebookUri'), - ) - if not has_table('sagemaker_studio_domain', engine): - op.create_table( - 'sagemaker_studio_domain', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), - sa.Column('SagemakerStudioStatus', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('sagemakerStudioUri'), - ) - if not has_table('sagemaker_studio_user_profile', engine): - op.create_table( - 'sagemaker_studio_user_profile', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sagemakerStudioUserProfileUri', sa.String(), nullable=False), - sa.Column( - 'sagemakerStudioUserProfileStatus', sa.String(), nullable=False - ), - sa.Column( - 'sagemakerStudioUserProfileName', sa.String(), nullable=False - ), - sa.Column( - 'sagemakerStudioUserProfileNameSlugify', sa.String(), nullable=False - ), - sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), - sa.Column('AWSAccountId', sa.String(), nullable=False), - sa.Column('RoleArn', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('SamlAdminGroupName', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('sagemakerStudioUserProfileUri'), - ) - if not has_table('share_object', engine): - op.create_table( - 'share_object', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('datasetUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=True), - sa.Column('principalType', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('confirmed', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('shareUri'), - ) - if not has_table('share_object_item', engine): - op.create_table( - 'share_object_item', - sa.Column('shareUri', sa.String(), nullable=False), - sa.Column('shareItemUri', sa.String(), nullable=False), - sa.Column('itemType', sa.String(), nullable=False), - sa.Column('itemUri', sa.String(), nullable=False), - sa.Column('itemName', sa.String(), nullable=False), - sa.Column('permission', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=False), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('GlueDatabaseName', sa.String(), nullable=True), - sa.Column('GlueTableName', sa.String(), nullable=True), - sa.Column('S3AccessPointName', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('shareItemUri'), - ) - if not has_table('sqlpipeline', engine): - op.create_table( - 'sqlpipeline', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('sqlPipelineUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=False), - sa.Column('repo', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('sqlPipelineUri'), - ) - if not has_table('stack', engine): - op.create_table( - 'stack', - sa.Column('stackUri', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=True), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('accountid', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('cronexpr', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('stack', sa.String(), nullable=False), - sa.Column( - 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('stackid', sa.String(), nullable=True), - sa.Column( - 'outputs', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column( - 'resources', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column( - 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('events', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column('lastSeen', sa.DateTime(), nullable=True), - sa.Column('EcsTaskArn', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('stackUri'), - ) - if not has_table('tag', engine): - op.create_table( - 'tag', - sa.Column('id', sa.String(), nullable=False), - sa.Column('tag', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('id'), - ) - if not has_table('item_tags', engine): - op.create_table( - 'item_tags', - sa.Column('tagid', sa.String(), nullable=False), - sa.Column('itemid', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('tagid', 'itemid'), - ) - if not has_table('task', engine): - op.create_table( - 'task', - sa.Column('taskUri', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('cronexpr', sa.String(), nullable=True), - sa.Column('status', sa.String(), nullable=False), - sa.Column('action', sa.String(), nullable=False), - sa.Column('payload', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('response', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column('error', postgresql.JSON(astext_type=sa.Text()), nullable=True), - sa.Column('lastSeen', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('taskUri'), - ) - if not has_table('tenant', engine): - op.create_table( - 'tenant', - sa.Column('tenantUri', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('description', sa.String(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('tenantUri'), - ) - op.create_index(op.f('ix_tenant_name'), 'tenant', ['name'], unique=True) - if not has_table('tenant_administrator', engine): - op.create_table( - 'tenant_administrator', - sa.Column('userName', sa.String(), nullable=False), - sa.Column('tenantUri', sa.String(), nullable=False), - sa.Column('userRoleInTenant', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userName'), - sa.ForeignKeyConstraint( - ['tenantUri'], - ), - ) - if not has_table('tenant_policy', engine): - op.create_table( - 'tenant_policy', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('tenantUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column( - 'principalType', - sa.Enum('USER', 'GROUP', 'SERVICE', name='tenant_principal_type'), - nullable=True, - ), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['tenantUri'], - ['tenant.tenantUri'], - ), - sa.PrimaryKeyConstraint('sid'), - ) - op.create_index( - op.f('ix_tenant_policy_principalId'), - 'tenant_policy', - ['principalId'], - unique=False, - ) - if not has_table('tenant_policy_permission', engine): - op.create_table( - 'tenant_policy_permission', - sa.Column('sid', sa.String(), nullable=False), - sa.Column('permissionUri', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.ForeignKeyConstraint( - ['permissionUri'], - ['permission.permissionUri'], - ), - sa.ForeignKeyConstraint( - ['sid'], - ['tenant_policy.sid'], - ), - sa.PrimaryKeyConstraint('sid', 'permissionUri'), - ) - if not has_table('user', engine): - op.create_table( - 'user', - sa.Column('userId', sa.String(), nullable=False), - sa.Column('userName', sa.String(), nullable=False), - sa.PrimaryKeyConstraint('userId'), - ) - if not has_table('vote', engine): - op.create_table( - 'vote', - sa.Column('voteUri', sa.String(), nullable=False), - sa.Column('username', sa.String(), nullable=False), - sa.Column('targetUri', sa.String(), nullable=False), - sa.Column('targetType', sa.String(), nullable=False), - sa.Column('upvote', sa.Boolean(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('voteUri'), - ) - if not has_table('vpc', engine): - op.create_table( - 'vpc', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('environmentUri', sa.String(), nullable=False), - sa.Column('vpcUri', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=True), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('SamlGroupName', sa.String(), nullable=True), - sa.Column('VpcId', sa.String(), nullable=False), - sa.Column('privateSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('publicSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('default', sa.Boolean(), nullable=True), - sa.PrimaryKeyConstraint('vpcUri'), - ) - if not has_table('worksheet', engine): - op.create_table( - 'worksheet', - sa.Column('label', sa.String(), nullable=False), - sa.Column('name', sa.String(), nullable=False), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.Column('deleted', sa.DateTime(), nullable=True), - sa.Column('description', sa.String(), nullable=True), - sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('SamlAdminGroupName', sa.String(), nullable=False), - sa.Column('sqlBody', sa.String(), nullable=True), - sa.Column( - 'chartConfig', postgresql.JSON(astext_type=sa.Text()), nullable=True - ), - sa.Column('lastSavedAthenaQueryIdForQuery', sa.String(), nullable=True), - sa.Column('lastSavedAthenaQueryIdForChart', sa.String(), nullable=True), - sa.PrimaryKeyConstraint('worksheetUri'), - ) - if not has_table('worksheet_query_result', engine): - op.create_table( - 'worksheet_query_result', - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('AthenaQueryId', sa.String(), nullable=False), - sa.Column('status', sa.String(), nullable=False), - sa.Column( - 'queryType', - sa.Enum('chart', 'data', name='querytype'), - nullable=False, - ), - sa.Column('sqlBody', sa.String(), nullable=False), - sa.Column('AwsAccountId', sa.String(), nullable=False), - sa.Column('region', sa.String(), nullable=False), - sa.Column('OutputLocation', sa.String(), nullable=False), - sa.Column('error', sa.String(), nullable=True), - sa.Column('ElapsedTimeInMs', sa.Integer(), nullable=True), - sa.Column('DataScannedInBytes', sa.Integer(), nullable=True), - sa.Column('created', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('AthenaQueryId'), - ) - if not has_table('worksheet_share', engine): - op.create_table( - 'worksheet_share', - sa.Column('worksheetShareUri', sa.String(), nullable=False), - sa.Column('worksheetUri', sa.String(), nullable=False), - sa.Column('principalId', sa.String(), nullable=False), - sa.Column('principalType', sa.String(), nullable=False), - sa.Column('canEdit', sa.Boolean(), nullable=True), - sa.Column('owner', sa.String(), nullable=False), - sa.Column('created', sa.DateTime(), nullable=True), - sa.Column('updated', sa.DateTime(), nullable=True), - sa.PrimaryKeyConstraint('worksheetShareUri'), - ) - except Exception as e: - print('Failed to init database due to:', e) - pass -# ### end Alembic commands ### - - -def downgrade(): - # ### commands auto generated by Alembic - please adjust! ### - op.drop_table('worksheet_share') - op.drop_table('worksheet_query_result') - op.drop_table('worksheet') - op.drop_table('vpc') - op.drop_table('user') - op.drop_table('tenant_policy_permission') - op.drop_table('tenant_policy') - op.drop_table('tenant_administrator') - op.drop_table('tenant') - op.drop_table('task') - op.drop_table('item_tags') - op.drop_table('tag') - op.drop_table('stack') - op.drop_table('sqlpipeline') - op.drop_table('share_object_item') - op.drop_table('share_object') - op.drop_table('sagemaker_studio_user_profile') - op.drop_table('sagemaker_studio_domain') - op.drop_table('sagemaker_notebook') - op.drop_table('resource_policy_permission') - op.drop_table('resource_policy') - op.drop_table('redshiftcluster_datasettable') - op.drop_table('redshiftcluster_dataset') - op.drop_table('redshiftcluster') - op.drop_table('permissions') - op.drop_table('organization_group') - op.drop_table('organization') - op.drop_table('notification') - op.drop_table('keyvaluetag') - op.drop_table('group_member') - op.drop_table('group') - op.drop_table('term_link') - op.drop_table('glossary_schema_map') - op.drop_table('glossary_schema') - op.drop_table('glossary_node') - op.drop_table('feed_message') - op.drop_table('environment_group_permission') - op.drop_table('environment') - op.drop_table('dataset_table_profiling_job') - op.drop_table('dataset_table_column') - op.drop_table('dataset_table') - op.drop_table('dataset_storage_location') - op.drop_table('dataset_quality_rule') - op.drop_table('dataset_profiling_run') - op.drop_table('dataset') - op.drop_table('dashboardshare') - op.drop_table('dashboard') - op.drop_table('activity') - # ### end Alembic commands ### diff --git a/backend/migrations/versions/652b83e1065c__release_v1_1_0.py b/backend/migrations/versions/652b83e1065c__release_v1_1_0.py index 596b04ebe..4ca2a48ed 100644 --- a/backend/migrations/versions/652b83e1065c__release_v1_1_0.py +++ b/backend/migrations/versions/652b83e1065c__release_v1_1_0.py @@ -11,7 +11,7 @@ # revision identifiers, used by Alembic. revision = '652b83e1065c' -down_revision = '4392a0c9747f' +down_revision = 'bd271a2780b2' branch_labels = None depends_on = None From c29f8a265c3640ed493a94d4b27c8363d5348e76 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 16 Sep 2022 08:03:03 +0200 Subject: [PATCH 06/19] Cleaned migration versions and initialization of the database --- .../versions/652b83e1065c__release_v1_1_0.py | 54 ------------------- .../versions/fb240cf070d0__release_v1_2_0.py | 54 ------------------- 2 files changed, 108 deletions(-) delete mode 100644 backend/migrations/versions/652b83e1065c__release_v1_1_0.py delete mode 100644 backend/migrations/versions/fb240cf070d0__release_v1_2_0.py diff --git a/backend/migrations/versions/652b83e1065c__release_v1_1_0.py b/backend/migrations/versions/652b83e1065c__release_v1_1_0.py deleted file mode 100644 index 4ca2a48ed..000000000 --- a/backend/migrations/versions/652b83e1065c__release_v1_1_0.py +++ /dev/null @@ -1,54 +0,0 @@ -"""_release_v1.1.0 - -Revision ID: 652b83e1065c -Revises: ada02a56cd32 -Create Date: 2022-09-15 15:10:53.506962 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql - -# revision identifiers, used by Alembic. -revision = '652b83e1065c' -down_revision = 'bd271a2780b2' -branch_labels = None -depends_on = None - - -def upgrade(): - op.rename_table('sqlpipeline', 'datapipeline') - op.add_column( - 'datapipeline', sa.Column('devStrategy', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('template', sa.String(), nullable=True) - ) - op.alter_column( - 'datapipeline', 'sqlPipelineUri', new_column_name='DataPipelineUri' - ) - op.add_column( - 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) - ) - - pass - - -def downgrade(): - op.drop_column('datapipeline', 'inputDatasetUri') - op.drop_column('datapipeline', 'outputDatasetUri') - op.drop_column('datapipeline', 'devStrategy') - op.drop_column('datapipeline', 'devStages') - op.drop_column('datapipeline', 'template') - op.alter_column( - 'datapipeline', 'DataPipelineUri', new_column_name='sqlPipelineUri' - ) - op.rename_table('datapipeline', 'sqlpipeline') - - pass diff --git a/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py b/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py deleted file mode 100644 index 50458a37f..000000000 --- a/backend/migrations/versions/fb240cf070d0__release_v1_2_0.py +++ /dev/null @@ -1,54 +0,0 @@ -"""_release_v1.2.0 - -Revision ID: fb240cf070d0 -Revises: 652b83e1065c -Create Date: 2022-09-15 15:10:53.506962 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy.dialects import postgresql -from sqlalchemy import Column, TIMESTAMP, INTEGER, VARCHAR, NVARCHAR - - -# revision identifiers, used by Alembic. -revision = 'fb240cf070d0' -down_revision = '652b83e1065c' -branch_labels = None -depends_on = None - - -def upgrade(): - op.drop_column('datapipeline', 'devStages') - op.drop_column('datapipeline', 'inputDatasetUri') - op.drop_column('datapipeline', 'outputDatasetUri') - - op.create_table( - 'datapipelineenvironments', - Column('envPipelineUri', VARCHAR(50), primary_key=True), - Column('environmentUri', VARCHAR(50), nullable=False), - Column('environmentLabel', VARCHAR(50), nullable=False), - Column('pipelineUri', VARCHAR(50), nullable=False), - Column('pipelineLabel', VARCHAR(50), nullable=False), - Column('stage', VARCHAR(50), nullable=False), - Column('order', INTEGER, nullable=False), - Column('region', VARCHAR(50), nullable=False), - Column('AwsAccountId', VARCHAR(50), nullable=False), - Column('samlGroupName', VARCHAR(50), nullable=False), - sa.PrimaryKeyConstraint('envPipelineUri'), - ) - pass - - -def downgrade(): - op.add_column( - 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) - ) - op.add_column( - 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) - ) - op.drop_table('datapipelineenvironments') - pass From 71f3bde46cec0d89cad649caac183d7e51f8386c Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 16 Sep 2022 11:08:17 +0200 Subject: [PATCH 07/19] Added clean-up version and v1.2.0 --- .../versions/033c3d6c1849_init_permissions.py | 39 + .../versions/166af5c0355b_release_3_7_1.py | 113 ++ .../versions/2b40221043f1_release_3_7_0.py | 42 + ...ea02fe85af6_redshift_copy_data_location.py | 31 + .../versions/3ae3eeca475c_release_3_6_1.py | 75 + .../4392a0c9747f_pipeline_input_output.py | 31 + .../45a4a4702af1_opensource_v1_2_0.py | 56 + .../46e5a33450b1_vpc_default_env_flag.py | 28 + .../4ab27e3b3d54_stack_events_column.py | 31 + .../5d5102986ce5_add_subnet_ids_columns.py | 40 + .../5e5c84138af7_backfill_confidentiality.py | 89 ++ .../versions/5e722995fa0b_release_3_8_1.py | 820 ++++++++++ .../versions/74b89c64f330_vpc_group.py | 28 + ...94697ee46c0c_sagemaker_notebooks_update.py | 40 + .../versions/967fa9c0a147_add_ecs_task_arn.py | 28 + .../versions/97050ec09354_release_3_7_8.py | 92 ++ .../9b589bf91485_dashboard_sharing.py | 31 + .../b6e0ac8f6d3f_add_env_feature_flags.py | 103 ++ .../versions/bc6ff74a16bc_clean_up.py | 1039 +++++++++++++ .../bc77fef9d0b2_new_permissions_model.py | 198 +++ .../versions/bd271a2780b2_init_database.py | 1349 +++++++++++++++++ .../versions/bd4bea86de30_release_3_6_0.py | 38 + .../be22468d7342_dataset_column_type.py | 30 + .../versions/c5c6bbbc5de7_release_3_5_0.py | 104 ++ .../decc96c5670f_organization_groups.py | 38 + .../versions/e177eb044b31_init_tenant.py | 42 + .../e72009ab3b9a_updating_pipelines.py | 48 + 27 files changed, 4603 insertions(+) create mode 100644 backend/migrations/versions/033c3d6c1849_init_permissions.py create mode 100644 backend/migrations/versions/166af5c0355b_release_3_7_1.py create mode 100644 backend/migrations/versions/2b40221043f1_release_3_7_0.py create mode 100644 backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py create mode 100644 backend/migrations/versions/3ae3eeca475c_release_3_6_1.py create mode 100644 backend/migrations/versions/4392a0c9747f_pipeline_input_output.py create mode 100644 backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py create mode 100644 backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py create mode 100644 backend/migrations/versions/4ab27e3b3d54_stack_events_column.py create mode 100644 backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py create mode 100644 backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py create mode 100644 backend/migrations/versions/5e722995fa0b_release_3_8_1.py create mode 100644 backend/migrations/versions/74b89c64f330_vpc_group.py create mode 100644 backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py create mode 100644 backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py create mode 100644 backend/migrations/versions/97050ec09354_release_3_7_8.py create mode 100644 backend/migrations/versions/9b589bf91485_dashboard_sharing.py create mode 100644 backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py create mode 100644 backend/migrations/versions/bc6ff74a16bc_clean_up.py create mode 100644 backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py create mode 100644 backend/migrations/versions/bd271a2780b2_init_database.py create mode 100644 backend/migrations/versions/bd4bea86de30_release_3_6_0.py create mode 100644 backend/migrations/versions/be22468d7342_dataset_column_type.py create mode 100644 backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py create mode 100644 backend/migrations/versions/decc96c5670f_organization_groups.py create mode 100644 backend/migrations/versions/e177eb044b31_init_tenant.py create mode 100644 backend/migrations/versions/e72009ab3b9a_updating_pipelines.py diff --git a/backend/migrations/versions/033c3d6c1849_init_permissions.py b/backend/migrations/versions/033c3d6c1849_init_permissions.py new file mode 100644 index 000000000..bc48176f9 --- /dev/null +++ b/backend/migrations/versions/033c3d6c1849_init_permissions.py @@ -0,0 +1,39 @@ +"""init permissions + +Revision ID: 033c3d6c1849 +Revises: bc77fef9d0b2 +Create Date: 2021-08-03 07:53:28.164238 + +""" +import os + +from alembic import op +import sqlalchemy as sa +from sqlalchemy import orm + +from dataall.db import api, get_engine, has_table + +# revision identifiers, used by Alembic. +revision = '033c3d6c1849' +down_revision = 'bc77fef9d0b2' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + try: + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Initializing permissions...') + api.Permission.init_permissions(session) + print('Permissions initialized successfully') + except Exception as e: + print(f'Failed to init permissions due to: {e}') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/migrations/versions/166af5c0355b_release_3_7_1.py b/backend/migrations/versions/166af5c0355b_release_3_7_1.py new file mode 100644 index 000000000..46277fc3c --- /dev/null +++ b/backend/migrations/versions/166af5c0355b_release_3_7_1.py @@ -0,0 +1,113 @@ +"""release 3.7.1 + +Revision ID: 166af5c0355b +Revises: 2b40221043f1 +Create Date: 2021-12-02 19:22:27.714326 + +""" +import datetime + +from alembic import op +from sqlalchemy import Boolean, Column, String, DateTime, orm +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.declarative import declarative_base + +from dataall.db import utils, Resource + +# revision identifiers, used by Alembic. +from dataall.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) + +revision = '166af5c0355b' +down_revision = '2b40221043f1' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + organizationUri = Column(String, nullable=False) + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + AwsAccountId = Column(String, nullable=False) + region = Column(String, nullable=False, default='eu-west-1') + cognitoGroupName = Column(String, nullable=True) + resourcePrefix = Column(String, nullable=False, default='dh') + validated = Column(Boolean, default=False) + environmentType = Column(String, nullable=False, default='Data') + isOrganizationDefaultEnvironment = Column(Boolean, default=False) + EnvironmentDefaultIAMRoleName = Column(String, nullable=False) + EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False) + EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) + EnvironmentDefaultBucketName = Column(String) + EnvironmentDefaultAthenaWorkGroup = Column(String) + roleCreated = Column(Boolean, nullable=False, default=False) + dashboardsEnabled = Column(Boolean, default=False) + notebooksEnabled = Column(Boolean, default=True) + mlStudiosEnabled = Column(Boolean, default=True) + pipelinesEnabled = Column(Boolean, default=True) + warehousesEnabled = Column(Boolean, default=True) + SamlGroupName = Column(String, nullable=True) + CDKRoleArn = Column(String, nullable=False) + subscriptionsEnabled = Column(Boolean, default=False) + subscriptionsProducersTopicName = Column(String) + subscriptionsProducersTopicImported = Column(Boolean, default=False) + subscriptionsConsumersTopicName = Column(String) + subscriptionsConsumersTopicImported = Column(Boolean, default=False) + + +class EnvironmentGroup(Base): + __tablename__ = 'environment_group_permission' + groupUri = Column(String, primary_key=True) + environmentUri = Column(String, primary_key=True) + invitedBy = Column(String, nullable=True) + environmentIAMRoleArn = Column(String, nullable=True) + environmentIAMRoleName = Column(String, nullable=True) + environmentIAMRoleImported = Column(Boolean, default=False) + environmentAthenaWorkGroup = Column(String, nullable=True) + description = Column(String, default='No description provided') + created = Column(DateTime, default=datetime.datetime.now) + updated = Column(DateTime, onupdate=datetime.datetime.now) + deleted = Column(DateTime) + groupRoleInEnvironment = Column(String, nullable=False, default='Invited') + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Back filling environments athena workgroup...') + envs: [Environment] = session.query(Environment).all() + env: Environment + for env in envs: + print(f'Back fill environment athena workgroup {env.label}...') + env.EnvironmentDefaultAthenaWorkGroup = NamingConventionService( + target_uri=env.environmentUri, + target_label=env.label, + pattern=NamingConventionPattern.DEFAULT, + resource_prefix=env.resourcePrefix, + ).build_compliant_name() + session.commit() + env_groups = session.query(EnvironmentGroup).all() + env_group: EnvironmentGroup + for env_group in env_groups: + print(f'Back fill group athena workgroup {env_group.groupUri}...') + env_group.environmentAthenaWorkGroup = NamingConventionService( + target_uri=env.environmentUri, + target_label=env_group.groupUri, + pattern=NamingConventionPattern.DEFAULT, + resource_prefix=env.resourcePrefix, + ).build_compliant_name() + + session.commit() + print('Successfully back filled athena workgroup names ') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/migrations/versions/2b40221043f1_release_3_7_0.py b/backend/migrations/versions/2b40221043f1_release_3_7_0.py new file mode 100644 index 000000000..95db2590c --- /dev/null +++ b/backend/migrations/versions/2b40221043f1_release_3_7_0.py @@ -0,0 +1,42 @@ +"""release 3.7.0 + +Revision ID: 2b40221043f1 +Revises: 3ae3eeca475c +Create Date: 2021-12-02 11:12:57.959968 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '2b40221043f1' +down_revision = '3ae3eeca475c' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('dataset', sa.Column('GlueCrawlerName', sa.String(), nullable=True)) + op.add_column( + 'dataset', sa.Column('GlueCrawlerSchedule', sa.String(), nullable=True) + ) + op.add_column( + 'environment', + sa.Column('EnvironmentDefaultAthenaWorkGroup', sa.String(), nullable=True), + ) + op.add_column( + 'environment_group_permission', + sa.Column('environmentAthenaWorkGroup', sa.String(), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('environment_group_permission', 'environmentAthenaWorkGroup') + op.drop_column('environment', 'EnvironmentDefaultAthenaWorkGroup') + op.drop_column('dataset', 'GlueCrawlerSchedule') + op.drop_column('dataset', 'GlueCrawlerName') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py b/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py new file mode 100644 index 000000000..7185795f4 --- /dev/null +++ b/backend/migrations/versions/2ea02fe85af6_redshift_copy_data_location.py @@ -0,0 +1,31 @@ +"""redshift copy data location + +Revision ID: 2ea02fe85af6 +Revises: 4ab27e3b3d54 +Create Date: 2021-07-15 07:17:38.392707 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '2ea02fe85af6' +down_revision = '4ab27e3b3d54' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + 'redshiftcluster_datasettable', + sa.Column('dataLocation', sa.String(), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('redshiftcluster_datasettable', 'dataLocation') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py b/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py new file mode 100644 index 000000000..657434e00 --- /dev/null +++ b/backend/migrations/versions/3ae3eeca475c_release_3_6_1.py @@ -0,0 +1,75 @@ +"""release 3.6.1 + +Revision ID: 3ae3eeca475c +Revises: bd4bea86de30 +Create Date: 2021-11-29 07:30:10.790084 + +""" +import datetime + +import sqlalchemy as sa +from alembic import op +from sqlalchemy import Boolean, Column, String, orm, DateTime +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import query_expression + +from dataall.db import utils + + +# revision identifiers, used by Alembic. +revision = '3ae3eeca475c' +down_revision = 'bd4bea86de30' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Stack(Base): + __tablename__ = 'stack' + stackUri = Column( + String, nullable=False, default=utils.uuid('stack'), primary_key=True + ) + name = Column(String, nullable=True) + targetUri = Column(String, nullable=False) + accountid = Column(String, nullable=False) + region = Column(String, nullable=False) + cronexpr = Column(String, nullable=True) + status = Column(String, nullable=False, default='pending') + stack = Column(String, nullable=False) + payload = Column(postgresql.JSON, nullable=True) + created = Column(DateTime, default=datetime.datetime.now()) + updated = Column(DateTime, onupdate=datetime.datetime.now()) + stackid = Column(String) + outputs = Column(postgresql.JSON) + resources = Column(postgresql.JSON) + error = Column(postgresql.JSON) + events = Column(postgresql.JSON) + lastSeen = Column( + DateTime, default=lambda: datetime.datetime(year=1900, month=1, day=1) + ) + EcsTaskArn = Column(String, nullable=True) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + bind = op.get_bind() + session = orm.Session(bind=bind) + op.add_column('stack', sa.Column('name', sa.String(), nullable=True)) + print('Back filling stack names...') + stacks: [Stack] = session.query(Stack).all() + stack: Stack + for stack in stacks: + print(f'Back fill stack {stack.stackUri}...') + stack.name = f'stack-{stack.stackUri}' + session.commit() + + print('Successfully back filled stack names ') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('stack', 'name') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py b/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py new file mode 100644 index 000000000..cb17a7789 --- /dev/null +++ b/backend/migrations/versions/4392a0c9747f_pipeline_input_output.py @@ -0,0 +1,31 @@ +"""pipeline input output + +Revision ID: 4392a0c9747f +Revises: e72009ab3b9a +Create Date: 2022-06-10 15:27:40.777295 + +""" +from alembic import op +import sqlalchemy as sa + +# revision identifiers, used by Alembic. +revision = '4392a0c9747f' +down_revision = 'e72009ab3b9a' +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) + ) + pass + + +def downgrade(): + op.drop_column('datapipeline', 'inputDatasetUri') + op.drop_column('datapipeline', 'outputDatasetUri') + pass diff --git a/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py b/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py new file mode 100644 index 000000000..16471893c --- /dev/null +++ b/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py @@ -0,0 +1,56 @@ +"""opensource_v1.2.0 + +Revision ID: 45a4a4702af1 +Revises: ec6ab02aa0cc +Create Date: 2022-09-15 17:53:13.455441 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql +from sqlalchemy import Column, TIMESTAMP, INTEGER, VARCHAR, NVARCHAR +from dataall.db import get_engine, has_table, create_schema_if_not_exists + + +# revision identifiers, used by Alembic. +revision = '45a4a4702af1' +down_revision = 'bc6ff74a16bc' +branch_labels = None +depends_on = None + + +def upgrade(): + print('Open-source v_1.2.0') + op.drop_column('datapipeline', 'devStages') + op.drop_column('datapipeline', 'inputDatasetUri') + op.drop_column('datapipeline', 'outputDatasetUri') + + op.create_table( + 'datapipelineenvironments', + Column('envPipelineUri', VARCHAR(50), primary_key=True), + Column('environmentUri', VARCHAR(50), nullable=False), + Column('environmentLabel', VARCHAR(50), nullable=False), + Column('pipelineUri', VARCHAR(50), nullable=False), + Column('pipelineLabel', VARCHAR(50), nullable=False), + Column('stage', VARCHAR(50), nullable=False), + Column('order', INTEGER, nullable=False), + Column('region', VARCHAR(50), nullable=False), + Column('AwsAccountId', VARCHAR(50), nullable=False), + Column('samlGroupName', VARCHAR(50), nullable=False), + sa.PrimaryKeyConstraint('envPipelineUri'), + ) + pass + + +def downgrade(): + op.add_column( + 'datapipeline', sa.Column('inputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('outputDatasetUri', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) + ) + op.drop_table('datapipelineenvironments') + pass diff --git a/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py b/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py new file mode 100644 index 000000000..95aed8e9e --- /dev/null +++ b/backend/migrations/versions/46e5a33450b1_vpc_default_env_flag.py @@ -0,0 +1,28 @@ +"""vpc default env flag + +Revision ID: 46e5a33450b1 +Revises: be22468d7342 +Create Date: 2021-07-12 19:36:20.588492 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '46e5a33450b1' +down_revision = 'be22468d7342' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('vpc', sa.Column('default', sa.Boolean(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('vpc', 'default') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py b/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py new file mode 100644 index 000000000..c5405d9b0 --- /dev/null +++ b/backend/migrations/versions/4ab27e3b3d54_stack_events_column.py @@ -0,0 +1,31 @@ +"""stack events column + +Revision ID: 4ab27e3b3d54 +Revises: 46e5a33450b1 +Create Date: 2021-07-13 06:56:48.350712 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '4ab27e3b3d54' +down_revision = '46e5a33450b1' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + 'stack', + sa.Column('events', postgresql.JSON(astext_type=sa.Text()), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('stack', 'events') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py b/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py new file mode 100644 index 000000000..76fdc2bf4 --- /dev/null +++ b/backend/migrations/versions/5d5102986ce5_add_subnet_ids_columns.py @@ -0,0 +1,40 @@ +"""Add subnet ids columns + +Revision ID: 5d5102986ce5 +Revises: bd271a2780b2 +Create Date: 2021-05-19 16:07:48.221086 + +""" +import os + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '5d5102986ce5' +down_revision = 'bd271a2780b2' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + op.add_column( + 'vpc', + sa.Column('privateSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + ) + op.add_column( + 'vpc', + sa.Column('publicSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('vpc', 'publicSubnetIds') + op.drop_column('vpc', 'privateSubnetIds') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py new file mode 100644 index 000000000..123151d99 --- /dev/null +++ b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py @@ -0,0 +1,89 @@ +"""backfill confidentiality + +Revision ID: 5e5c84138af7 +Revises: 94697ee46c0c +Create Date: 2021-09-15 13:41:44.102866 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +from sqlalchemy import orm, Column, String, Boolean +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.declarative import declarative_base + +from dataall.db import utils, Resource + +revision = '5e5c84138af7' +down_revision = '94697ee46c0c' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Dataset(Resource, Base): + __tablename__ = 'dataset' + environmentUri = Column(String, nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + region = Column(String, default='eu-west-1') + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=False) + GlueProfilingJobName = Column(String) + GlueProfilingTriggerSchedule = Column(String) + GlueProfilingTriggerName = Column(String) + GlueDataQualityJobName = Column(String) + GlueDataQualitySchedule = Column(String) + GlueDataQualityTriggerName = Column(String) + IAMDatasetAdminRoleArn = Column(String, nullable=False) + IAMDatasetAdminUserArn = Column(String, nullable=False) + KmsAlias = Column(String, nullable=False) + language = Column(String, nullable=False, default='English') + topics = Column(postgresql.ARRAY(String), nullable=True) + confidentiality = Column(String, nullable=False, default='Unclassified') + tags = Column(postgresql.ARRAY(String)) + + bucketCreated = Column(Boolean, default=False) + glueDatabaseCreated = Column(Boolean, default=False) + iamAdminRoleCreated = Column(Boolean, default=False) + iamAdminUserCreated = Column(Boolean, default=False) + kmsAliasCreated = Column(Boolean, default=False) + lakeformationLocationCreated = Column(Boolean, default=False) + bucketPolicyCreated = Column(Boolean, default=False) + + businessOwnerEmail = Column(String, nullable=True) + businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) + stewards = Column(String, nullable=True) + + SamlAdminGroupName = Column(String, nullable=True) + + importedS3Bucket = Column(Boolean, default=False) + importedGlueDatabase = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + importedAdminRole = Column(Boolean, default=False) + imported = Column(Boolean, default=False) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + try: + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Updating datasets...') + datasets: [Dataset] = session.query(Dataset).all() + for dataset in datasets: + if dataset.confidentiality not in ['Unclassified', 'Official', 'Secret']: + dataset.confidentiality = 'Unclassified' + session.commit() + print('Datasets updated successfully') + except Exception as e: + print(f'Failed to init permissions due to: {e}') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/migrations/versions/5e722995fa0b_release_3_8_1.py b/backend/migrations/versions/5e722995fa0b_release_3_8_1.py new file mode 100644 index 000000000..a080ad22e --- /dev/null +++ b/backend/migrations/versions/5e722995fa0b_release_3_8_1.py @@ -0,0 +1,820 @@ +"""release 3.8.1 + +Revision ID: 5e722995fa0b +Revises: 97050ec09354 +Create Date: 2021-12-22 12:56:28.698754 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '5e722995fa0b' +down_revision = '97050ec09354' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('scheduled_query') + op.drop_table('share_object_v2') + op.drop_table('share_object_item_v2') + op.drop_table('dataset_query') + op.drop_table('key_value_pair') + op.drop_table('environment_permission') + op.drop_table('metadata_facet') + op.drop_table('EnvironmentRedshiftCluster') + op.drop_table('organization_topic') + op.drop_table('dataset_loader') + op.drop_table('dataset_storage_location_permission') + op.drop_table('athena_query_execution') + op.drop_table('airflow_project') + op.drop_table('environment_user_permission') + op.drop_table('data_access_request') + op.drop_table('organization_user') + op.drop_table('apikey') + op.drop_table('airflow_cluster_user_permission') + op.drop_table('dataset_user_permission') + op.drop_table('redshift_cluster_user_permission') + op.drop_table('airflowcluster') + op.drop_table('document') + op.drop_table('lineage_store') + op.drop_table('share_object_history') + op.drop_table('saved_query') + op.drop_table('dataset_table_permission') + op.drop_table('metadata_tag') + op.drop_table('dataset_access_point') + op.drop_table('search_index') + op.drop_table('userprofile') + op.drop_table('metric') + op.drop_table('all_permissions') + op.drop_table('dataset_topic') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'dataset_topic', + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('topicUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('datasetUri', 'topicUri', name='dataset_topic_pkey'), + ) + op.create_table( + 'all_permissions', + sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('permission', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'scope', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('objectUri', 'userName', name='all_permissions_pkey'), + ) + op.create_table( + 'metric', + sa.Column('metricUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('metricName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'metricValue', + postgresql.DOUBLE_PRECISION(precision=53), + autoincrement=False, + nullable=False, + ), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False + ), + sa.Column('emitter', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('target', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('metricUri', name='metric_pkey'), + ) + op.create_table( + 'userprofile', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('username', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('bio', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('b64EncodedAvatar', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('username', name='userprofile_pkey'), + ) + op.create_table( + 'search_index', + sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('objectType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('searcAttribute1', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('searcAttribute2', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('searcAttribute3', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('searcAttribute4', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('objectUri', name='search_index_pkey'), + ) + op.create_table( + 'dataset_access_point', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('projectUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('locationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('accessPointUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AWSAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('S3BucketName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('S3Prefix', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'S3AccessPointName', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column( + 'accessPointCreated', sa.BOOLEAN(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint('accessPointUri', name='dataset_access_point_pkey'), + ) + op.create_table( + 'metadata_tag', + sa.Column('tagId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('nodeId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('nodeKind', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('Key', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('Value', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('tagId', name='metadata_tag_pkey'), + ) + op.create_table( + 'dataset_table_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('tableUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleForTable', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint( + 'userName', 'tableUri', name='dataset_table_permission_pkey' + ), + ) + op.create_table( + 'saved_query', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column( + 'scheduledQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column('savedQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('queryOrder', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('sqlBody', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('savedQueryUri', name='saved_query_pkey'), + ) + op.create_table( + 'share_object_history', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('historyUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('actionName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'actionPayload', + postgresql.JSON(astext_type=sa.Text()), + autoincrement=False, + nullable=True, + ), + sa.PrimaryKeyConstraint('historyUri', name='share_object_history_pkey'), + ) + op.create_table( + 'lineage_store', + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('version', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('guid', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'kind', + postgresql.ENUM( + 'dataset', + 'table', + 'folder', + 'job', + 'run', + 'datasource', + name='datanodetype', + ), + autoincrement=False, + nullable=False, + ), + sa.Column('parent', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('ref', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('location', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('created', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'inputs', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column( + 'outputs', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.PrimaryKeyConstraint('name', 'version', 'ref', name='lineage_store_pkey'), + ) + op.create_table( + 'document', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('md', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('parentUri', name='document_pkey'), + ) + op.create_table( + 'airflowcluster', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('clusterName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('kmsAlias', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('vpc', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'subnetIds', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'securityGroupIds', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.Column('CFNStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('CFNStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('IAMRoleArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('presignedUrl', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('imported', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column( + 'configurationOptions', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.Column('airflowVersion', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('dagS3Path', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('pluginsS3Path', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'requirementsS3Path', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column('environmentClass', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'loggingConfiguration', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.Column('sourceBucketArn', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'webServerAccessMode', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column('maxWorkers', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('SamlGroupName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('webServerUrl', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('clusterUri', name='airflowcluster_pkey'), + ) + op.create_table( + 'redshift_cluster_user_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'redshiftClusterUri', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleForRedshiftCluster', + sa.VARCHAR(), + autoincrement=False, + nullable=False, + ), + sa.PrimaryKeyConstraint( + 'userName', + 'redshiftClusterUri', + name='redshift_cluster_user_permission_pkey', + ), + ) + op.create_table( + 'dataset_user_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleForDataset', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint( + 'userName', 'datasetUri', name='dataset_user_permission_pkey' + ), + ) + op.create_table( + 'airflow_cluster_user_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleForAirflowCluster', + sa.VARCHAR(), + autoincrement=False, + nullable=False, + ), + sa.PrimaryKeyConstraint( + 'userName', 'clusterUri', name='airflow_cluster_user_permission_pkey' + ), + ) + op.create_table( + 'apikey', + sa.Column('ApiKeyId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'ApiKeySecretHash', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'SamlGroups', + postgresql.ARRAY(sa.VARCHAR()), + autoincrement=False, + nullable=True, + ), + sa.Column( + 'expires', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('ApiKeyId', name='apikey_pkey'), + ) + op.create_table( + 'organization_user', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleInOrganization', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint( + 'userName', 'organizationUri', name='organization_user_pkey' + ), + ) + op.create_table( + 'data_access_request', + sa.Column('requestUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('principalId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('principalType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('principalName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('requester', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('message', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('requestUri', name='data_access_request_pkey'), + ) + op.create_table( + 'environment_user_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleInEnvironment', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint( + 'userName', 'environmentUri', name='environment_user_permission_pkey' + ), + ) + op.create_table( + 'airflow_project', + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('projectUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('region', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('cfnStackName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('cfnStackArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('cfnStackStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'codeRepositoryName', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column( + 'codeRepositoryLink', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column( + 'codeRepositoryStatus', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column( + 'codePipelineStatus', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column('codePipelineName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('codePipelineLink', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('codePipelineArn', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('packageName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('projectUri', name='airflow_project_pkey'), + ) + op.create_table( + 'athena_query_execution', + sa.Column('parentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'QueryExecutionId', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column('AwsAccountId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('queryid', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'completed', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('QueryExecutionId', name='athena_query_execution_pkey'), + ) + op.create_table( + 'dataset_storage_location_permission', + sa.Column('userName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('locationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'userRoleForDatasetStorageLocation', + sa.VARCHAR(), + autoincrement=False, + nullable=False, + ), + sa.PrimaryKeyConstraint( + 'userName', 'locationUri', name='dataset_storage_location_permission_pkey' + ), + ) + op.create_table( + 'dataset_loader', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('loaderUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('IAMPrincipalArn', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('IAMRoleId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('loaderUri', name='dataset_loader_pkey'), + ) + op.create_table( + 'organization_topic', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('topicUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('topicUri', name='organization_topic_pkey'), + ) + op.create_table( + 'EnvironmentRedshiftCluster', + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('clusterUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint( + 'environmentUri', 'clusterUri', name='EnvironmentRedshiftCluster_pkey' + ), + ) + op.create_table( + 'metadata_facet', + sa.Column('facetId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('guid', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + '_schema', + postgresql.JSON(astext_type=sa.Text()), + autoincrement=False, + nullable=False, + ), + sa.Column( + 'doc', + postgresql.JSON(astext_type=sa.Text()), + autoincrement=False, + nullable=False, + ), + sa.PrimaryKeyConstraint('facetId', name='metadata_facet_pkey'), + ) + op.create_table( + 'environment_permission', + sa.Column('entityUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('entityType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'entityRoleInEnvironment', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.PrimaryKeyConstraint( + 'entityUri', 'environmentUri', name='environment_permission_pkey' + ), + ) + op.create_table( + 'key_value_pair', + sa.Column('kvId', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('objectUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('key', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('value', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('kvId', name='key_value_pair_pkey'), + ) + op.create_table( + 'dataset_query', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('queryUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('body', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('queryUri', name='dataset_query_pkey'), + ) + op.create_table( + 'share_object_item_v2', + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('version', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('shareItemUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemType', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('itemName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=False + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('GlueDatabaseName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('GlueTableName', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'S3AccessPointName', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.PrimaryKeyConstraint('shareItemUri', name='share_object_item_v2_pkey'), + ) + op.create_table( + 'share_object_v2', + sa.Column('shareUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('version', sa.INTEGER(), autoincrement=False, nullable=False), + sa.Column('latest', sa.BOOLEAN(), autoincrement=False, nullable=False), + sa.Column('datasetUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('datasetName', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('principalId', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('principalType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('status', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'version', name='share_object_v2_pkey'), + ) + op.create_table( + 'scheduled_query', + sa.Column('label', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('name', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column('owner', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'created', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'updated', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column( + 'deleted', postgresql.TIMESTAMP(), autoincrement=False, nullable=True + ), + sa.Column('description', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column( + 'tags', postgresql.ARRAY(sa.VARCHAR()), autoincrement=False, nullable=True + ), + sa.Column('environmentUri', sa.VARCHAR(), autoincrement=False, nullable=False), + sa.Column( + 'scheduledQueryUri', sa.VARCHAR(), autoincrement=False, nullable=False + ), + sa.Column( + 'SamlAdminGroupName', sa.VARCHAR(), autoincrement=False, nullable=True + ), + sa.Column('cronexpr', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.PrimaryKeyConstraint('scheduledQueryUri', name='scheduled_query_pkey'), + ) + # ### end Alembic commands ### diff --git a/backend/migrations/versions/74b89c64f330_vpc_group.py b/backend/migrations/versions/74b89c64f330_vpc_group.py new file mode 100644 index 000000000..d19fb6230 --- /dev/null +++ b/backend/migrations/versions/74b89c64f330_vpc_group.py @@ -0,0 +1,28 @@ +"""vpc group + +Revision ID: 74b89c64f330 +Revises: e177eb044b31 +Create Date: 2021-08-08 10:39:15.991280 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '74b89c64f330' +down_revision = 'e177eb044b31' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('vpc', sa.Column('SamlGroupName', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('vpc', 'SamlGroupName') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py b/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py new file mode 100644 index 000000000..8015e677f --- /dev/null +++ b/backend/migrations/versions/94697ee46c0c_sagemaker_notebooks_update.py @@ -0,0 +1,40 @@ +"""sagemaker notebooks update + +Revision ID: 94697ee46c0c +Revises: 9b589bf91485 +Create Date: 2021-09-12 18:55:03.301399 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '94697ee46c0c' +down_revision = '9b589bf91485' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('sagemaker_notebook', sa.Column('VpcId', sa.String(), nullable=True)) + op.add_column( + 'sagemaker_notebook', sa.Column('SubnetId', sa.String(), nullable=True) + ) + op.add_column( + 'sagemaker_notebook', sa.Column('VolumeSizeInGB', sa.Integer(), nullable=True) + ) + op.add_column( + 'sagemaker_notebook', sa.Column('InstanceType', sa.String(), nullable=True) + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('sagemaker_notebook', 'InstanceType') + op.drop_column('sagemaker_notebook', 'VolumeSizeInGB') + op.drop_column('sagemaker_notebook', 'SubnetId') + op.drop_column('sagemaker_notebook', 'VpcId') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py b/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py new file mode 100644 index 000000000..73e860d5a --- /dev/null +++ b/backend/migrations/versions/967fa9c0a147_add_ecs_task_arn.py @@ -0,0 +1,28 @@ +"""add ecs task arn + +Revision ID: 967fa9c0a147 +Revises: 5e5c84138af7 +Create Date: 2021-10-06 07:48:30.726242 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '967fa9c0a147' +down_revision = '5e5c84138af7' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('stack', sa.Column('EcsTaskArn', sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('stack', 'EcsTaskArn') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/97050ec09354_release_3_7_8.py b/backend/migrations/versions/97050ec09354_release_3_7_8.py new file mode 100644 index 000000000..fd0fdce87 --- /dev/null +++ b/backend/migrations/versions/97050ec09354_release_3_7_8.py @@ -0,0 +1,92 @@ +"""release 3.7.8 + +Revision ID: 97050ec09354 +Revises: 166af5c0355b +Create Date: 2021-12-08 12:54:33.828838 + +""" +import datetime + +from alembic import op +from sqlalchemy import Boolean, Column, String, DateTime, orm +from sqlalchemy.dialects import postgresql +from sqlalchemy.ext.declarative import declarative_base + +from dataall.db import utils, Resource + +# revision identifiers, used by Alembic. +from dataall.utils.naming_convention import ( + NamingConventionService, + NamingConventionPattern, +) + +# revision identifiers, used by Alembic. +revision = '97050ec09354' +down_revision = '166af5c0355b' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Dataset(Resource, Base): + __tablename__ = 'dataset' + environmentUri = Column(String, nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + region = Column(String, default='eu-west-1') + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=False) + GlueCrawlerName = Column(String) + GlueCrawlerSchedule = Column(String) + GlueProfilingJobName = Column(String) + GlueProfilingTriggerSchedule = Column(String) + GlueProfilingTriggerName = Column(String) + GlueDataQualityJobName = Column(String) + GlueDataQualitySchedule = Column(String) + GlueDataQualityTriggerName = Column(String) + IAMDatasetAdminRoleArn = Column(String, nullable=False) + IAMDatasetAdminUserArn = Column(String, nullable=False) + KmsAlias = Column(String, nullable=False) + language = Column(String, nullable=False, default='English') + topics = Column(postgresql.ARRAY(String), nullable=True) + confidentiality = Column(String, nullable=False, default='Unclassified') + tags = Column(postgresql.ARRAY(String)) + bucketCreated = Column(Boolean, default=False) + glueDatabaseCreated = Column(Boolean, default=False) + iamAdminRoleCreated = Column(Boolean, default=False) + iamAdminUserCreated = Column(Boolean, default=False) + kmsAliasCreated = Column(Boolean, default=False) + lakeformationLocationCreated = Column(Boolean, default=False) + bucketPolicyCreated = Column(Boolean, default=False) + businessOwnerEmail = Column(String, nullable=True) + businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) + stewards = Column(String, nullable=True) + SamlAdminGroupName = Column(String, nullable=True) + importedS3Bucket = Column(Boolean, default=False) + importedGlueDatabase = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + importedAdminRole = Column(Boolean, default=False) + imported = Column(Boolean, default=False) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Back filling datasets crawler names...') + datasets: [Dataset] = session.query(Dataset).all() + dataset: Dataset + for dataset in datasets: + print(f'Back fill dataset crawler name {dataset.label}...') + dataset.GlueCrawlerName = f'{dataset.S3BucketName}-crawler' + session.commit() + print('Successfully back filled glue crawler names ') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + pass + # ### end Alembic commands ### diff --git a/backend/migrations/versions/9b589bf91485_dashboard_sharing.py b/backend/migrations/versions/9b589bf91485_dashboard_sharing.py new file mode 100644 index 000000000..f023b8393 --- /dev/null +++ b/backend/migrations/versions/9b589bf91485_dashboard_sharing.py @@ -0,0 +1,31 @@ +"""dashboard sharing + +Revision ID: 9b589bf91485 +Revises: decc96c5670f +Create Date: 2021-09-10 10:24:37.018830 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = '9b589bf91485' +down_revision = 'decc96c5670f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column('dashboardshare', sa.Column('status', sa.String(), nullable=False)) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + 'share_object', 'environmentUri', existing_type=sa.VARCHAR(), nullable=True + ) + op.drop_column('dashboardshare', 'status') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py b/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py new file mode 100644 index 000000000..df90288ca --- /dev/null +++ b/backend/migrations/versions/b6e0ac8f6d3f_add_env_feature_flags.py @@ -0,0 +1,103 @@ +"""add env feature flags + +Revision ID: b6e0ac8f6d3f +Revises: 967fa9c0a147 +Create Date: 2021-10-25 09:00:40.925964 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy import Boolean, Column, String +from sqlalchemy import orm +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import query_expression + +from dataall.db import Resource, utils + +revision = 'b6e0ac8f6d3f' +down_revision = '967fa9c0a147' +branch_labels = None +depends_on = None +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + organizationUri = Column(String, nullable=False) + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + AwsAccountId = Column(String, nullable=False) + region = Column(String, nullable=False, default='eu-west-1') + cognitoGroupName = Column(String, nullable=True) + + validated = Column(Boolean, default=False) + environmentType = Column(String, nullable=False, default='Data') + isOrganizationDefaultEnvironment = Column(Boolean, default=False) + EnvironmentDefaultIAMRoleName = Column(String, nullable=False) + EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) + EnvironmentDefaultBucketName = Column(String) + roleCreated = Column(Boolean, nullable=False, default=False) + + dashboardsEnabled = Column(Boolean, default=False) + notebooksEnabled = Column(Boolean, default=True) + mlStudiosEnabled = Column(Boolean, default=True) + pipelinesEnabled = Column(Boolean, default=True) + warehousesEnabled = Column(Boolean, default=True) + + userRoleInEnvironment = query_expression() + + SamlGroupName = Column(String, nullable=True) + CDKRoleArn = Column(String, nullable=False) + + subscriptionsEnabled = Column(Boolean, default=False) + subscriptionsProducersTopicName = Column(String) + subscriptionsProducersTopicImported = Column(Boolean, default=False) + subscriptionsConsumersTopicName = Column(String) + subscriptionsConsumersTopicImported = Column(Boolean, default=False) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + try: + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Adding environment flags...') + op.add_column('environment', sa.Column('dashboardsEnabled', sa.Boolean())) + op.add_column('environment', sa.Column('notebooksEnabled', sa.Boolean())) + op.add_column('environment', sa.Column('mlStudiosEnabled', sa.Boolean())) + op.add_column('environment', sa.Column('pipelinesEnabled', sa.Boolean())) + op.add_column('environment', sa.Column('warehousesEnabled', sa.Boolean())) + environments: [Environment] = session.query(Environment).all() + for environment in environments: + print('Back filling quicksight flag') + environment.dashboardsEnabled = environment.quicksight_enabled + environment.notebooksEnabled = True + environment.mlStudiosEnabled = True + environment.pipelinesEnabled = True + environment.warehousesEnabled = True + session.commit() + + print('Dropping quicksight_enabled column...') + op.drop_column('environment', 'quicksight_enabled') + + print('Environment feature flags successfully set up') + + except Exception as e: + print(f'Failed to init permissions due to: {e}') + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + 'environment', + sa.Column( + 'quicksight_enabled', sa.BOOLEAN(), autoincrement=False, nullable=False + ), + ) + op.drop_column('environment', 'warehousesEnabled') + op.drop_column('environment', 'pipelinesEnabled') + op.drop_column('environment', 'mlStudiosEnabled') + op.drop_column('environment', 'notebooksEnabled') + op.drop_column('environment', 'dashboardsEnabled') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/bc6ff74a16bc_clean_up.py b/backend/migrations/versions/bc6ff74a16bc_clean_up.py new file mode 100644 index 000000000..bdedc77b5 --- /dev/null +++ b/backend/migrations/versions/bc6ff74a16bc_clean_up.py @@ -0,0 +1,1039 @@ +"""clean-up + +Revision ID: bc6ff74a16bc +Revises: 4392a0c9747f +Create Date: 2022-09-16 10:45:21.612824 + +""" +from alembic import op +import os +import sqlalchemy as sa +from dataall.db import get_engine, has_table, create_schema_if_not_exists + + +# revision identifiers, used by Alembic. +revision = 'bc6ff74a16bc' +down_revision = '4392a0c9747f' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + print('Clean-up version') + engine = get_engine(envname=envname).engine + try: + # ### These are the tables that should be defined + """ + if not has_table('activity', engine): + print('Create table activity') + op.create_table( + 'activity', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('activityUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column('summary', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('activityUri'), + ) + if not has_table('dashboard', engine): + op.create_table( + 'dashboard', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('namespace', sa.String(), nullable=False), + sa.Column('DashboardId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('dashboardUri'), + ) + if not has_table('dashboardshare', engine): + op.create_table( + 'dashboardshare', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'dashboardUri'), + ) + if not has_table('dataset', engine): + op.create_table( + 'dataset', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueCrawlerName', sa.String(), nullable=True), + sa.Column('GlueCrawlerSchedule', sa.String(), nullable=True), + sa.Column('GlueProfilingJobName', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerName', sa.String(), nullable=True), + sa.Column('GlueDataQualityJobName', sa.String(), nullable=True), + sa.Column('GlueDataQualitySchedule', sa.String(), nullable=True), + sa.Column('GlueDataQualityTriggerName', sa.String(), nullable=True), + sa.Column('IAMDatasetAdminRoleArn', sa.String(), nullable=False), + sa.Column('IAMDatasetAdminUserArn', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('bucketCreated', sa.Boolean(), nullable=True), + sa.Column('glueDatabaseCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminRoleCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminUserCreated', sa.Boolean(), nullable=True), + sa.Column('kmsAliasCreated', sa.Boolean(), nullable=True), + sa.Column('lakeformationLocationCreated', sa.Boolean(), nullable=True), + sa.Column('bucketPolicyCreated', sa.Boolean(), nullable=True), + sa.Column('businessOwnerEmail', sa.String(), nullable=True), + sa.Column( + 'businessOwnerDelegationEmails', + postgresql.ARRAY(sa.String()), + nullable=True, + ), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('importedS3Bucket', sa.Boolean(), nullable=True), + sa.Column('importedGlueDatabase', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('importedAdminRole', sa.Boolean(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('datasetUri'), + ) + if not has_table('dataset_profiling_run', engine): + op.create_table( + 'dataset_profiling_run', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('profilingRunUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('GlueJobName', sa.String(), nullable=True), + sa.Column('GlueJobRunId', sa.String(), nullable=True), + sa.Column('GlueTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueTriggerName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column( + 'results', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('profilingRunUri'), + ) + if not has_table('dataset_quality_rule', engine): + op.create_table( + 'dataset_quality_rule', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('ruleUri', sa.String(), nullable=False), + sa.Column('query', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'logs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.PrimaryKeyConstraint('ruleUri'), + ) + if not has_table('dataset_storage_location', engine): + op.create_table( + 'dataset_storage_location', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('locationUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('S3AccessPoint', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('locationCreated', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('locationUri'), + ) + if not has_table('dataset_table', engine): + op.create_table( + 'dataset_table', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('GlueTableConfig', sa.Text(), nullable=True), + sa.Column( + 'GlueTableProperties', + postgresql.JSON(astext_type=sa.Text()), + nullable=True, + ), + sa.Column('LastGlueTableStatus', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('stage', sa.String(), nullable=True), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tableUri'), + ) + if not has_table('dataset_table_column', engine): + op.create_table( + 'dataset_table_column', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('columnUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('typeName', sa.String(), nullable=False), + sa.Column('columnType', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('columnUri'), + ) + if not has_table('dataset_table_profiling_job', engine): + op.create_table( + 'dataset_table_profiling_job', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('jobUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RunCommandId', sa.String(), nullable=True), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('jobUri'), + ) + if not has_table('environment', engine): + op.create_table( + 'environment', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cognitoGroupName', sa.String(), nullable=True), + sa.Column('resourcePrefix', sa.String(), nullable=False), + sa.Column('validated', sa.Boolean(), nullable=True), + sa.Column('environmentType', sa.String(), nullable=False), + sa.Column( + 'isOrganizationDefaultEnvironment', sa.Boolean(), nullable=True + ), + sa.Column('EnvironmentDefaultIAMRoleName', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultIAMRoleImported', sa.String(), nullable=True), + sa.Column('EnvironmentDefaultIAMRoleArn', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultBucketName', sa.String(), nullable=True), + sa.Column('EnvironmentDefaultAthenaWorkGroup', sa.String(), nullable=True), + sa.Column('roleCreated', sa.Boolean(), nullable=False), + sa.Column('dashboardsEnabled', sa.Boolean(), nullable=True), + sa.Column('notebooksEnabled', sa.Boolean(), nullable=True), + sa.Column('mlStudiosEnabled', sa.Boolean(), nullable=True), + sa.Column('pipelinesEnabled', sa.Boolean(), nullable=True), + sa.Column('warehousesEnabled', sa.Boolean(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('CDKRoleArn', sa.String(), nullable=False), + sa.Column('subscriptionsEnabled', sa.Boolean(), nullable=True), + sa.Column( + 'subscriptionsProducersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsProducersTopicImported', sa.Boolean(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicImported', sa.Boolean(), nullable=True + ), + sa.PrimaryKeyConstraint('environmentUri'), + ) + if not has_table('environment_group_permission', engine): + op.create_table( + 'environment_group_permission', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('invitedBy', sa.String(), nullable=True), + sa.Column('environmentIAMRoleArn', sa.String(), nullable=True), + sa.Column('environmentIAMRoleName', sa.String(), nullable=True), + sa.Column('environmentIAMRoleImported', sa.Boolean(), nullable=True), + sa.Column('environmentAthenaWorkGroup', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('groupRoleInEnvironment', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'environmentUri'), + ) + if not has_table('feed_message', engine): + op.create_table( + 'feed_message', + sa.Column('feedMessageUri', sa.String(), nullable=False), + sa.Column('creator', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('content', sa.String(), nullable=True), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('feedMessageUri'), + ) + if not has_table('glossary_node', engine): + op.create_table( + 'glossary_node', + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('parentUri', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('path', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('readme', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('admin', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('nodeUri'), + ) + if not has_table('glossary_schema', engine): + op.create_table( + 'glossary_schema', + sa.Column('schemaUri', sa.String(), nullable=False), + sa.Column('json_schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('schemaUri'), + ) + if not has_table('glossary_schema_map', engine): + op.create_table( + 'glossary_schema_map', + sa.Column('schemaUri', sa.String(), nullable=False), + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('schema', postgresql.JSON(astext_type=sa.Text()), nullable=False), + sa.PrimaryKeyConstraint('schemaUri', 'nodeUri'), + ) + if not has_table('term_link', engine): + op.create_table( + 'term_link', + sa.Column('linkUri', sa.String(), nullable=False), + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('approvedBySteward', sa.Boolean(), nullable=True), + sa.Column('approvedByOwner', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('linkUri'), + ) + if not has_table('group', engine): + op.create_table( + 'group', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('groupUri', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri'), + ) + if not has_table('group_member', engine): + op.create_table( + 'group_member', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleInGroup', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName'), + ) + if not has_table('keyvaluetag', engine): + op.create_table( + 'keyvaluetag', + sa.Column('tagUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('key', sa.String(), nullable=False), + sa.Column('value', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagUri'), + ) + if not has_table('notification', engine): + op.create_table( + 'notification', + sa.Column('notificationUri', sa.String(), nullable=False), + sa.Column( + 'type', + sa.Enum( + 'SHARE_OBJECT_SUBMITTED', + 'SHARE_ITEM_REQUEST', + 'SHARE_OBJECT_APPROVED', + 'SHARE_OBJECT_REJECTED', + 'SHARE_OBJECT_PENDING_APPROVAL', + 'DATASET_VERSION', + name='notificationtype', + ), + nullable=True, + ), + sa.Column('message', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('is_read', sa.Boolean(), nullable=False), + sa.Column('target_uri', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('notificationUri'), + ) + if not has_table('organization', engine): + op.create_table( + 'organization', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('organizationUri'), + ) + if not has_table('organization_group', engine): + op.create_table( + 'organization_group', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('invitedBy', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('groupUri', 'organizationUri'), + ) + if not has_table('permissions', engine): + op.create_table( + 'permission', + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column( + 'type', sa.Enum('TENANT', 'RESOURCE', name='permissiontype'), nullable=False + ), + sa.Column('description', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('permissionUri'), + ) + op.create_index(op.f('ix_permission_name'), 'permission', ['name'], unique=False) + if not has_table('redshiftcluster', engine): + op.create_table( + 'redshiftcluster', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('clusterArn', sa.String(), nullable=True), + sa.Column('clusterName', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('databaseName', sa.String(), nullable=True), + sa.Column('databaseUser', sa.String(), nullable=True), + sa.Column('masterUsername', sa.String(), nullable=True), + sa.Column('masterDatabaseName', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('numberOfNodes', sa.Integer(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column('kmsAlias', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('vpc', sa.String(), nullable=True), + sa.Column('subnetGroupName', sa.String(), nullable=True), + sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('CFNStackName', sa.String(), nullable=True), + sa.Column('CFNStackStatus', sa.String(), nullable=True), + sa.Column('CFNStackArn', sa.String(), nullable=True), + sa.Column('IAMRoles', sa.ARRAY(sa.String()), nullable=True), + sa.Column('endpoint', sa.String(), nullable=True), + sa.Column('port', sa.Integer(), nullable=True), + sa.Column('datahubSecret', sa.String(), nullable=True), + sa.Column('masterSecret', sa.String(), nullable=True), + sa.Column('external_schema_created', sa.Boolean(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri'), + ) + if not has_table('redshiftcluster_dataset', engine): + op.create_table( + 'redshiftcluster_dataset', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('datasetCopyEnabled', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri'), + ) + if not has_table('redshiftcluster_datasettable', engine): + op.create_table( + 'redshiftcluster_datasettable', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('shareUri', sa.String(), nullable=True), + sa.Column('enabled', sa.Boolean(), nullable=True), + sa.Column('schema', sa.String(), nullable=False), + sa.Column('databaseName', sa.String(), nullable=False), + sa.Column('dataLocation', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri'), + ) + if not has_table('resource_policy', engine): + op.create_table( + 'resource_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('resourceUri', sa.String(), nullable=False), + sa.Column('resourceType', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='rp_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_resource_policy_principalId'), + 'resource_policy', + ['principalId'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceType'), + 'resource_policy', + ['resourceType'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceUri'), + 'resource_policy', + ['resourceUri'], + unique=False, + ) + if not has_table('resource_policy_permission', engine): + op.create_table( + 'resource_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['resource_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + if not has_table('sagemaker_notebook', engine): + op.create_table( + 'sagemaker_notebook', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('notebookUri', sa.String(), nullable=False), + sa.Column('NotebookInstanceName', sa.String(), nullable=False), + sa.Column('NotebookInstanceStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('VpcId', sa.String(), nullable=True), + sa.Column('SubnetId', sa.String(), nullable=True), + sa.Column('VolumeSizeInGB', sa.Integer(), nullable=True), + sa.Column('InstanceType', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('notebookUri'), + ) + if not has_table('sagemaker_studio_domain', engine): + op.create_table( + 'sagemaker_studio_domain', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('SagemakerStudioStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUri'), + ) + if not has_table('sagemaker_studio_user_profile', engine): + op.create_table( + 'sagemaker_studio_user_profile', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUserProfileUri', sa.String(), nullable=False), + sa.Column( + 'sagemakerStudioUserProfileStatus', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileName', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileNameSlugify', sa.String(), nullable=False + ), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUserProfileUri'), + ) + if not has_table('share_object', engine): + op.create_table( + 'share_object', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=True), + sa.Column('principalType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('confirmed', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('shareUri'), + ) + if not has_table('share_object_item', engine): + op.create_table( + 'share_object_item', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('shareItemUri', sa.String(), nullable=False), + sa.Column('itemType', sa.String(), nullable=False), + sa.Column('itemUri', sa.String(), nullable=False), + sa.Column('itemName', sa.String(), nullable=False), + sa.Column('permission', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('S3AccessPointName', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('shareItemUri'), + ) + if not has_table('sqlpipeline', engine): + op.create_table( + 'sqlpipeline', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sqlPipelineUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.Column('repo', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('sqlPipelineUri'), + ) + if not has_table('stack', engine): + op.create_table( + 'stack', + sa.Column('stackUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=True), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('accountid', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('stack', sa.String(), nullable=False), + sa.Column( + 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('stackid', sa.String(), nullable=True), + sa.Column( + 'outputs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'resources', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('events', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.Column('EcsTaskArn', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('stackUri'), + ) + if not has_table('tag', engine): + op.create_table( + 'tag', + sa.Column('id', sa.String(), nullable=False), + sa.Column('tag', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + ) + if not has_table('item_tags', engine): + op.create_table( + 'item_tags', + sa.Column('tagid', sa.String(), nullable=False), + sa.Column('itemid', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid'), + ) + if not has_table('task', engine): + op.create_table( + 'task', + sa.Column('taskUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column('payload', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('response', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('error', postgresql.JSON(astext_type=sa.Text()), nullable=True), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('taskUri'), + ) + if not has_table('tenant', engine): + op.create_table( + 'tenant', + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('tenantUri'), + ) + op.create_index(op.f('ix_tenant_name'), 'tenant', ['name'], unique=True) + if not has_table('tenant_administrator', engine): + op.create_table( + 'tenant_administrator', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('userRoleInTenant', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName'), + sa.ForeignKeyConstraint( + ['tenantUri'], + ), + ) + if not has_table('tenant_policy', engine): + op.create_table( + 'tenant_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='tenant_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['tenantUri'], + ['tenant.tenantUri'], + ), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_tenant_policy_principalId'), + 'tenant_policy', + ['principalId'], + unique=False, + ) + if not has_table('tenant_policy_permission', engine): + op.create_table( + 'tenant_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['tenant_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + if not has_table('user', engine): + op.create_table( + 'user', + sa.Column('userId', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userId'), + ) + if not has_table('vote', engine): + op.create_table( + 'vote', + sa.Column('voteUri', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('upvote', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('voteUri'), + ) + if not has_table('vpc', engine): + op.create_table( + 'vpc', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('vpcUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('VpcId', sa.String(), nullable=False), + sa.Column('privateSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('publicSubnetIds', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('default', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('vpcUri'), + ) + if not has_table('worksheet', engine): + op.create_table( + 'worksheet', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('SamlAdminGroupName', sa.String(), nullable=False), + sa.Column('sqlBody', sa.String(), nullable=True), + sa.Column( + 'chartConfig', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('lastSavedAthenaQueryIdForQuery', sa.String(), nullable=True), + sa.Column('lastSavedAthenaQueryIdForChart', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('worksheetUri'), + ) + if not has_table('worksheet_query_result', engine): + op.create_table( + 'worksheet_query_result', + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('AthenaQueryId', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'queryType', + sa.Enum('chart', 'data', name='querytype'), + nullable=False, + ), + sa.Column('sqlBody', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('OutputLocation', sa.String(), nullable=False), + sa.Column('error', sa.String(), nullable=True), + sa.Column('ElapsedTimeInMs', sa.Integer(), nullable=True), + sa.Column('DataScannedInBytes', sa.Integer(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('AthenaQueryId'), + ) + if not has_table('worksheet_share', engine): + op.create_table( + 'worksheet_share', + sa.Column('worksheetShareUri', sa.String(), nullable=False), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column('principalType', sa.String(), nullable=False), + sa.Column('canEdit', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('worksheetShareUri'), + ) + """ + + # ### Clean-up old tables ### + old_tables = ['all_permissions', 'apikey', 'athena_query_execution', 'data_access_request', 'dataset_access_point', 'dataset_loader', 'dataset_query', 'dataset_storage_location_permission', 'dataset_table_permission', 'dataset_topic', 'dataset_user_permission', 'document', 'EnvironmentRedshiftCluster', 'environment_permission', 'environment_user_permission', 'key_value_pair', 'lineage_store', 'metadata_facet', 'metadata_tag', 'metric', 'organization_topic', 'organization_user', 'redshift_cluster_user_permission', 'saved_query', 'scheduled_query', 'search_index', 'share_object_history', 'share_object_item_v2', 'share_object_v2', 'userprofile'] + for table in old_tables: + if has_table(table, engine): + print(f"Dropping table: {table}") + op.drop_table(table) + + except Exception as e: + print('Failed to init database due to:', e) + pass +# ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + # ### These are the tables that should be defined + """ + op.drop_table('worksheet_share') + op.drop_table('worksheet_query_result') + op.drop_table('worksheet') + op.drop_table('vpc') + op.drop_table('user') + op.drop_table('tenant_policy_permission') + op.drop_table('tenant_policy') + op.drop_table('tenant_administrator') + op.drop_table('tenant') + op.drop_table('task') + op.drop_table('item_tags') + op.drop_table('tag') + op.drop_table('stack') + op.drop_table('sqlpipeline') + op.drop_table('share_object_item') + op.drop_table('share_object') + op.drop_table('sagemaker_studio_user_profile') + op.drop_table('sagemaker_studio_domain') + op.drop_table('sagemaker_notebook') + op.drop_table('resource_policy_permission') + op.drop_table('resource_policy') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('redshiftcluster_dataset') + op.drop_table('redshiftcluster') + op.drop_table('permissions') + op.drop_table('organization_group') + op.drop_table('organization') + op.drop_table('notification') + op.drop_table('keyvaluetag') + op.drop_table('group_member') + op.drop_table('group') + op.drop_table('term_link') + op.drop_table('glossary_schema_map') + op.drop_table('glossary_schema') + op.drop_table('glossary_node') + op.drop_table('feed_message') + op.drop_table('environment_group_permission') + op.drop_table('environment') + op.drop_table('dataset_table_profiling_job') + op.drop_table('dataset_table_column') + op.drop_table('dataset_table') + op.drop_table('dataset_storage_location') + op.drop_table('dataset_quality_rule') + op.drop_table('dataset_profiling_run') + op.drop_table('dataset') + op.drop_table('dashboardshare') + op.drop_table('dashboard') + op.drop_table('activity') + """ + # ### end Alembic commands ### diff --git a/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py b/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py new file mode 100644 index 000000000..e820e4317 --- /dev/null +++ b/backend/migrations/versions/bc77fef9d0b2_new_permissions_model.py @@ -0,0 +1,198 @@ +"""new permissions model + +Revision ID: bc77fef9d0b2 +Revises: 2ea02fe85af6 +Create Date: 2021-08-03 07:51:18.202980 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'bc77fef9d0b2' +down_revision = '2ea02fe85af6' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'permission', + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column( + 'type', sa.Enum('TENANT', 'RESOURCE', name='permissiontype'), nullable=False + ), + sa.Column('description', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('permissionUri'), + ) + op.create_index(op.f('ix_permission_name'), 'permission', ['name'], unique=False) + op.create_table( + 'resource_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('resourceUri', sa.String(), nullable=False), + sa.Column('resourceType', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='rp_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_resource_policy_principalId'), + 'resource_policy', + ['principalId'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceType'), + 'resource_policy', + ['resourceType'], + unique=False, + ) + op.create_index( + op.f('ix_resource_policy_resourceUri'), + 'resource_policy', + ['resourceUri'], + unique=False, + ) + op.create_table( + 'tenant', + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('tenantUri'), + ) + op.create_index(op.f('ix_tenant_name'), 'tenant', ['name'], unique=True) + op.create_table( + 'resource_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['resource_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + op.create_table( + 'tenant_policy', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('tenantUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column( + 'principalType', + sa.Enum('USER', 'GROUP', 'SERVICE', name='tenant_principal_type'), + nullable=True, + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['tenantUri'], + ['tenant.tenantUri'], + ), + sa.PrimaryKeyConstraint('sid'), + ) + op.create_index( + op.f('ix_tenant_policy_principalId'), + 'tenant_policy', + ['principalId'], + unique=False, + ) + op.create_table( + 'tenant_policy_permission', + sa.Column('sid', sa.String(), nullable=False), + sa.Column('permissionUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.ForeignKeyConstraint( + ['permissionUri'], + ['permission.permissionUri'], + ), + sa.ForeignKeyConstraint( + ['sid'], + ['tenant_policy.sid'], + ), + sa.PrimaryKeyConstraint('sid', 'permissionUri'), + ) + op.add_column('dashboardshare', sa.Column('owner', sa.String(), nullable=True)) + op.add_column('dataset', sa.Column('stewards', sa.String(), nullable=True)) + op.add_column( + 'environment_group_permission', + sa.Column('invitedBy', sa.String(), nullable=True), + ) + op.add_column( + 'environment_group_permission', + sa.Column('environmentIAMRoleArn', sa.String(), nullable=True), + ) + op.add_column( + 'environment_group_permission', + sa.Column('environmentIAMRoleName', sa.String(), nullable=True), + ) + op.add_column( + 'environment_group_permission', + sa.Column('description', sa.String(), nullable=True), + ) + op.drop_column('group', 'organizationUri') + op.drop_column('group', 'groupRoleInOrganization') + op.add_column( + 'share_object', sa.Column('environmentUri', sa.String(), nullable=True) + ) + op.add_column( + 'tenant_administrator', sa.Column('tenantUri', sa.String(), nullable=False) + ) + op.create_foreign_key( + None, 'tenant_administrator', 'tenant', ['tenantUri'], ['tenantUri'] + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'tenant_administrator', type_='foreignkey') + op.drop_column('tenant_administrator', 'tenantUri') + op.drop_column('share_object', 'environmentUri') + op.add_column( + 'group', + sa.Column( + 'groupRoleInOrganization', sa.VARCHAR(), autoincrement=False, nullable=False + ), + ) + op.add_column( + 'group', + sa.Column('organizationUri', sa.VARCHAR(), autoincrement=False, nullable=False), + ) + op.drop_column('environment_group_permission', 'description') + op.drop_column('environment_group_permission', 'invitedBy') + op.drop_column('environment_group_permission', 'environmentIAMRoleArn') + op.drop_column('environment_group_permission', 'environmentIAMRoleName') + op.drop_column('dataset', 'stewards') + op.drop_column('dashboardshare', 'owner') + op.drop_table('tenant_policy_permission') + op.drop_index(op.f('ix_tenant_policy_principalId'), table_name='tenant_policy') + op.drop_table('tenant_policy') + op.drop_table('resource_policy_permission') + op.drop_index(op.f('ix_tenant_name'), table_name='tenant') + op.drop_table('tenant') + op.drop_index(op.f('ix_resource_policy_resourceUri'), table_name='resource_policy') + op.drop_index(op.f('ix_resource_policy_resourceType'), table_name='resource_policy') + op.drop_index(op.f('ix_resource_policy_principalId'), table_name='resource_policy') + op.drop_table('resource_policy') + op.drop_index(op.f('ix_permission_name'), table_name='permission') + op.drop_table('permission') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/bd271a2780b2_init_database.py b/backend/migrations/versions/bd271a2780b2_init_database.py new file mode 100644 index 000000000..e84ce2062 --- /dev/null +++ b/backend/migrations/versions/bd271a2780b2_init_database.py @@ -0,0 +1,1349 @@ +"""Init database + +Revision ID: bd271a2780b2 +Revises: +Create Date: 2021-05-19 15:10:53.506962 + +""" +import os + +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +from dataall.db import get_engine, has_table, create_schema_if_not_exists + +revision = 'bd271a2780b2' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + engine = get_engine(envname=envname).engine + create_schema_if_not_exists(engine, envname) + try: + if not has_table('EnvironmentRedshiftCluster', engine): + op.create_table( + 'EnvironmentRedshiftCluster', + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('environmentUri', 'clusterUri'), + ) + if not has_table('activity', engine): + op.create_table( + 'activity', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('activityUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column('summary', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('activityUri'), + ) + if not has_table('airflow_cluster_user_permission', engine): + op.create_table( + 'airflow_cluster_user_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleForAirflowCluster', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'clusterUri'), + ) + if not has_table('airflow_project', engine): + op.create_table( + 'airflow_project', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('projectUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cfnStackName', sa.String(), nullable=True), + sa.Column('cfnStackArn', sa.String(), nullable=True), + sa.Column('cfnStackStatus', sa.String(), nullable=True), + sa.Column('codeRepositoryName', sa.String(), nullable=True), + sa.Column('codeRepositoryLink', sa.String(), nullable=True), + sa.Column('codeRepositoryStatus', sa.String(), nullable=True), + sa.Column('codePipelineStatus', sa.String(), nullable=True), + sa.Column('codePipelineName', sa.String(), nullable=True), + sa.Column('codePipelineLink', sa.String(), nullable=True), + sa.Column('codePipelineArn', sa.String(), nullable=True), + sa.Column('packageName', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('projectUri'), + ) + if not has_table('airflowcluster', engine): + op.create_table( + 'airflowcluster', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('clusterArn', sa.String(), nullable=True), + sa.Column('clusterName', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column('kmsAlias', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('vpc', sa.String(), nullable=True), + sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('CFNStackName', sa.String(), nullable=True), + sa.Column('CFNStackStatus', sa.String(), nullable=True), + sa.Column('CFNStackArn', sa.String(), nullable=True), + sa.Column('IAMRoleArn', sa.String(), nullable=True), + sa.Column('presignedUrl', sa.String(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.Column('configurationOptions', sa.ARRAY(sa.String()), nullable=True), + sa.Column('airflowVersion', sa.String(), nullable=True), + sa.Column('dagS3Path', sa.String(), nullable=True), + sa.Column('pluginsS3Path', sa.String(), nullable=True), + sa.Column('requirementsS3Path', sa.String(), nullable=True), + sa.Column('environmentClass', sa.String(), nullable=True), + sa.Column('loggingConfiguration', sa.ARRAY(sa.String()), nullable=True), + sa.Column('sourceBucketArn', sa.String(), nullable=False), + sa.Column('webServerAccessMode', sa.String(), nullable=True), + sa.Column('maxWorkers', sa.Integer(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('webServerUrl', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri'), + ) + if not has_table('all_permissions', engine): + op.create_table( + 'all_permissions', + sa.Column('objectUri', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.Column('permission', sa.String(), nullable=False), + sa.Column('scope', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('objectUri', 'userName'), + ) + if not has_table('apikey', engine): + op.create_table( + 'apikey', + sa.Column('ApiKeyId', sa.String(), nullable=False), + sa.Column('ApiKeySecretHash', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.Column('SamlGroups', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('expires', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('ApiKeyId'), + ) + if not has_table('athena_query_execution', engine): + op.create_table( + 'athena_query_execution', + sa.Column('parentUri', sa.String(), nullable=False), + sa.Column('QueryExecutionId', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('queryid', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('completed', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('QueryExecutionId'), + ) + if not has_table('dashboard', engine): + op.create_table( + 'dashboard', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('namespace', sa.String(), nullable=False), + sa.Column('DashboardId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('dashboardUri'), + ) + if not has_table('dashboardshare', engine): + op.create_table( + 'dashboardshare', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('dashboardUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'dashboardUri'), + ) + if not has_table('data_access_request', engine): + op.create_table( + 'data_access_request', + sa.Column('requestUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column('principalType', sa.String(), nullable=False), + sa.Column('principalName', sa.String(), nullable=False), + sa.Column('requester', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('message', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('requestUri'), + ) + if not has_table('dataset', engine): + op.create_table( + 'dataset', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueProfilingJobName', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueProfilingTriggerName', sa.String(), nullable=True), + sa.Column('GlueDataQualityJobName', sa.String(), nullable=True), + sa.Column('GlueDataQualitySchedule', sa.String(), nullable=True), + sa.Column('GlueDataQualityTriggerName', sa.String(), nullable=True), + sa.Column('IAMDatasetAdminRoleArn', sa.String(), nullable=False), + sa.Column('IAMDatasetAdminUserArn', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('language', sa.String(), nullable=False), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('bucketCreated', sa.Boolean(), nullable=True), + sa.Column('glueDatabaseCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminRoleCreated', sa.Boolean(), nullable=True), + sa.Column('iamAdminUserCreated', sa.Boolean(), nullable=True), + sa.Column('kmsAliasCreated', sa.Boolean(), nullable=True), + sa.Column('lakeformationLocationCreated', sa.Boolean(), nullable=True), + sa.Column('bucketPolicyCreated', sa.Boolean(), nullable=True), + sa.Column('businessOwnerEmail', sa.String(), nullable=True), + sa.Column( + 'businessOwnerDelegationEmails', + postgresql.ARRAY(sa.String()), + nullable=True, + ), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('importedS3Bucket', sa.Boolean(), nullable=True), + sa.Column('importedGlueDatabase', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('importedAdminRole', sa.Boolean(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('datasetUri'), + ) + if not has_table('dataset_access_point', engine): + op.create_table( + 'dataset_access_point', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('projectUri', sa.String(), nullable=False), + sa.Column('locationUri', sa.String(), nullable=False), + sa.Column('accessPointUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('S3AccessPointName', sa.String(), nullable=False), + sa.Column('accessPointCreated', sa.Boolean(), nullable=False), + sa.PrimaryKeyConstraint('accessPointUri'), + ) + if not has_table('dataset_loader', engine): + op.create_table( + 'dataset_loader', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('loaderUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('IAMPrincipalArn', sa.String(), nullable=False), + sa.Column('IAMRoleId', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('loaderUri'), + ) + if not has_table('dataset_profiling_run', engine): + op.create_table( + 'dataset_profiling_run', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('profilingRunUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('GlueJobName', sa.String(), nullable=True), + sa.Column('GlueJobRunId', sa.String(), nullable=True), + sa.Column('GlueTriggerSchedule', sa.String(), nullable=True), + sa.Column('GlueTriggerName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column( + 'results', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('profilingRunUri'), + ) + if not has_table('dataset_quality_rule', engine): + op.create_table( + 'dataset_quality_rule', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('ruleUri', sa.String(), nullable=False), + sa.Column('query', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'logs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.PrimaryKeyConstraint('ruleUri'), + ) + if not has_table('dataset_query', engine): + op.create_table( + 'dataset_query', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('queryUri', sa.String(), nullable=False), + sa.Column('body', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('queryUri'), + ) + if not has_table('dataset_storage_location', engine): + op.create_table( + 'dataset_storage_location', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('locationUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('S3AccessPoint', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('locationCreated', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('locationUri'), + ) + if not has_table('dataset_storage_location_permission', engine): + op.create_table( + 'dataset_storage_location_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('locationUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column( + 'userRoleForDatasetStorageLocation', sa.String(), nullable=False + ), + sa.PrimaryKeyConstraint('userName', 'locationUri'), + ) + if not has_table('dataset_table', engine): + op.create_table( + 'dataset_table', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('S3Prefix', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('GlueTableConfig', sa.Text(), nullable=True), + sa.Column( + 'GlueTableProperties', + postgresql.JSON(astext_type=sa.Text()), + nullable=True, + ), + sa.Column('LastGlueTableStatus', sa.String(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('stage', sa.String(), nullable=True), + sa.Column('topics', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('confidentiality', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tableUri'), + ) + if not has_table('dataset_table_column', engine): + op.create_table( + 'dataset_table_column', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('columnUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('typeName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('columnUri'), + ) + if not has_table('dataset_table_permission', engine): + op.create_table( + 'dataset_table_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleForTable', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'tableUri'), + ) + if not has_table('dataset_table_profiling_job', engine): + op.create_table( + 'dataset_table_profiling_job', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('jobUri', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RunCommandId', sa.String(), nullable=True), + sa.Column('GlueDatabaseName', sa.String(), nullable=False), + sa.Column('GlueTableName', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('jobUri'), + ) + if not has_table('dataset_topic', engine): + op.create_table( + 'dataset_topic', + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('topicUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('datasetUri', 'topicUri'), + ) + if not has_table('dataset_user_permission', engine): + op.create_table( + 'dataset_user_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleForDataset', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'datasetUri'), + ) + if not has_table('document', engine): + op.create_table( + 'document', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('parentUri', sa.String(), nullable=False), + sa.Column('md', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('parentUri'), + ) + if not has_table('environment', engine): + op.create_table( + 'environment', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cognitoGroupName', sa.String(), nullable=True), + sa.Column('validated', sa.Boolean(), nullable=True), + sa.Column('environmentType', sa.String(), nullable=False), + sa.Column( + 'isOrganizationDefaultEnvironment', sa.Boolean(), nullable=True + ), + sa.Column('EnvironmentDefaultIAMRoleName', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultIAMRoleArn', sa.String(), nullable=False), + sa.Column('EnvironmentDefaultBucketName', sa.String(), nullable=True), + sa.Column('roleCreated', sa.Boolean(), nullable=False), + sa.Column('quicksight_enabled', sa.Boolean(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('CDKRoleArn', sa.String(), nullable=False), + sa.Column('subscriptionsEnabled', sa.Boolean(), nullable=True), + sa.Column( + 'subscriptionsProducersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsProducersTopicImported', sa.Boolean(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicName', sa.String(), nullable=True + ), + sa.Column( + 'subscriptionsConsumersTopicImported', sa.Boolean(), nullable=True + ), + sa.PrimaryKeyConstraint('environmentUri'), + ) + op.create_table( + 'environment_group_permission', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('groupRoleInEnvironment', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'environmentUri'), + ) + op.create_table( + 'environment_permission', + sa.Column('entityUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('entityType', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('entityRoleInEnvironment', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('entityUri', 'environmentUri'), + ) + op.create_table( + 'environment_user_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleInEnvironment', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'environmentUri'), + ) + if not has_table('feed_message', engine): + op.create_table( + 'feed_message', + sa.Column('feedMessageUri', sa.String(), nullable=False), + sa.Column('creator', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('content', sa.String(), nullable=True), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('feedMessageUri'), + ) + if not has_table('glossary_node', engine): + op.create_table( + 'glossary_node', + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('parentUri', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('path', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('readme', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('admin', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('nodeUri'), + ) + if not has_table('group', engine): + op.create_table( + 'group', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('groupRoleInOrganization', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri'), + ) + if not has_table('group_member', engine): + op.create_table( + 'group_member', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleInGroup', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('groupUri', 'userName'), + ) + if not has_table('item_tags', engine): + op.create_table( + 'item_tags', + sa.Column('tagid', sa.String(), nullable=False), + sa.Column('itemid', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagid', 'itemid'), + ) + if not has_table('key_value_pair', engine): + op.create_table( + 'key_value_pair', + sa.Column('kvId', sa.String(), nullable=False), + sa.Column('objectUri', sa.String(), nullable=False), + sa.Column('key', sa.String(), nullable=False), + sa.Column('value', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('kvId'), + ) + if not has_table('lineage_store', engine): + op.create_table( + 'lineage_store', + sa.Column('name', sa.String(), nullable=False), + sa.Column('version', sa.String(), nullable=False), + sa.Column('guid', sa.String(), nullable=True), + sa.Column( + 'kind', + sa.Enum( + 'dataset', + 'table', + 'folder', + 'job', + 'run', + 'datasource', + name='datanodetype', + ), + nullable=False, + ), + sa.Column('parent', sa.String(), nullable=True), + sa.Column('ref', sa.String(), nullable=False), + sa.Column('location', sa.String(), nullable=True), + sa.Column('created', sa.String(), nullable=True), + sa.Column('inputs', sa.ARRAY(sa.String()), nullable=True), + sa.Column('outputs', sa.ARRAY(sa.String()), nullable=True), + sa.PrimaryKeyConstraint('name', 'version', 'ref'), + ) + if not has_table('metadata_facet', engine): + op.create_table( + 'metadata_facet', + sa.Column('facetId', sa.String(), nullable=False), + sa.Column('guid', sa.String(), nullable=False), + sa.Column( + '_schema', postgresql.JSON(astext_type=sa.Text()), nullable=False + ), + sa.Column( + 'doc', postgresql.JSON(astext_type=sa.Text()), nullable=False + ), + sa.PrimaryKeyConstraint('facetId'), + ) + if not has_table('metadata_tag', engine): + op.create_table( + 'metadata_tag', + sa.Column('tagId', sa.String(), nullable=False), + sa.Column('nodeId', sa.String(), nullable=False), + sa.Column('nodeKind', sa.String(), nullable=False), + sa.Column('Key', sa.String(), nullable=False), + sa.Column('Value', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('tagId'), + ) + if not has_table('metric', engine): + op.create_table( + 'metric', + sa.Column('metricUri', sa.String(), nullable=False), + sa.Column('metricName', sa.String(), nullable=False), + sa.Column('metricValue', sa.Float(), nullable=False), + sa.Column('tags', sa.ARRAY(sa.String()), nullable=True), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('emitter', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('target', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('metricUri'), + ) + if not has_table('notification', engine): + op.create_table( + 'notification', + sa.Column('notificationUri', sa.String(), nullable=False), + sa.Column( + 'type', + sa.Enum( + 'SHARE_OBJECT_SUBMITTED', + 'SHARE_ITEM_REQUEST', + 'SHARE_OBJECT_APPROVED', + 'SHARE_OBJECT_REJECTED', + 'SHARE_OBJECT_PENDING_APPROVAL', + 'DATASET_VERSION', + name='notificationtype', + ), + nullable=True, + ), + sa.Column('message', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('is_read', sa.Boolean(), nullable=False), + sa.Column('target_uri', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('notificationUri'), + ) + if not has_table('organization', engine): + op.create_table( + 'organization', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('organizationUri'), + ) + op.create_table( + 'organization_topic', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('topicUri', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('topicUri'), + ) + op.create_table( + 'organization_user', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleInOrganization', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'organizationUri'), + ) + if not has_table('redshiftcluster', engine): + op.create_table( + 'redshift_cluster_user_permission', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('redshiftClusterUri', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('userRoleForRedshiftCluster', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName', 'redshiftClusterUri'), + ) + op.create_table( + 'redshiftcluster', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('clusterArn', sa.String(), nullable=True), + sa.Column('clusterName', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('databaseName', sa.String(), nullable=True), + sa.Column('databaseUser', sa.String(), nullable=True), + sa.Column('masterUsername', sa.String(), nullable=True), + sa.Column('masterDatabaseName', sa.String(), nullable=True), + sa.Column('nodeType', sa.String(), nullable=True), + sa.Column('numberOfNodes', sa.Integer(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=True), + sa.Column('kmsAlias', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=True), + sa.Column('vpc', sa.String(), nullable=True), + sa.Column('subnetGroupName', sa.String(), nullable=True), + sa.Column('subnetIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('securityGroupIds', sa.ARRAY(sa.String()), nullable=True), + sa.Column('CFNStackName', sa.String(), nullable=True), + sa.Column('CFNStackStatus', sa.String(), nullable=True), + sa.Column('CFNStackArn', sa.String(), nullable=True), + sa.Column('IAMRoles', sa.ARRAY(sa.String()), nullable=True), + sa.Column('endpoint', sa.String(), nullable=True), + sa.Column('port', sa.Integer(), nullable=True), + sa.Column('datahubSecret', sa.String(), nullable=True), + sa.Column('masterSecret', sa.String(), nullable=True), + sa.Column('external_schema_created', sa.Boolean(), nullable=True), + sa.Column('SamlGroupName', sa.String(), nullable=True), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri'), + ) + op.create_table( + 'redshiftcluster_dataset', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('datasetCopyEnabled', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri'), + ) + op.create_table( + 'redshiftcluster_datasettable', + sa.Column('clusterUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('tableUri', sa.String(), nullable=False), + sa.Column('shareUri', sa.String(), nullable=True), + sa.Column('enabled', sa.Boolean(), nullable=True), + sa.Column('schema', sa.String(), nullable=False), + sa.Column('databaseName', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('clusterUri', 'datasetUri', 'tableUri'), + ) + if not has_table('sagemaker_notebook', engine): + op.create_table( + 'sagemaker_notebook', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('notebookUri', sa.String(), nullable=False), + sa.Column('NotebookInstanceName', sa.String(), nullable=False), + sa.Column('NotebookInstanceStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('notebookUri'), + ) + if not has_table('sagemaker_studio_domain', engine): + op.create_table( + 'sagemaker_studio_domain', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('SagemakerStudioStatus', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUri'), + ) + op.create_table( + 'sagemaker_studio_user_profile', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sagemakerStudioUserProfileUri', sa.String(), nullable=False), + sa.Column( + 'sagemakerStudioUserProfileStatus', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileName', sa.String(), nullable=False + ), + sa.Column( + 'sagemakerStudioUserProfileNameSlugify', sa.String(), nullable=False + ), + sa.Column('sagemakerStudioDomainID', sa.String(), nullable=False), + sa.Column('AWSAccountId', sa.String(), nullable=False), + sa.Column('RoleArn', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('sagemakerStudioUserProfileUri'), + ) + if not has_table('saved_query', engine): + op.create_table( + 'saved_query', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('scheduledQueryUri', sa.String(), nullable=False), + sa.Column('savedQueryUri', sa.String(), nullable=False), + sa.Column('queryOrder', sa.Integer(), nullable=False), + sa.Column('sqlBody', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('savedQueryUri'), + ) + if not has_table('scheduled_query', engine): + op.create_table( + 'scheduled_query', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('scheduledQueryUri', sa.String(), nullable=False), + sa.Column('SamlAdminGroupName', sa.String(), nullable=True), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('scheduledQueryUri'), + ) + if not has_table('search_index', engine): + op.create_table( + 'search_index', + sa.Column('objectUri', sa.String(), nullable=False), + sa.Column('objectType', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('searcAttribute1', sa.String(), nullable=True), + sa.Column('searcAttribute2', sa.String(), nullable=True), + sa.Column('searcAttribute3', sa.String(), nullable=True), + sa.Column('searcAttribute4', sa.String(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('objectUri'), + ) + if not has_table('share_object', engine): + op.create_table( + 'share_object', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=True), + sa.Column('principalType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('confirmed', sa.Boolean(), nullable=True), + sa.PrimaryKeyConstraint('shareUri'), + ) + if not has_table('share_object_history', engine): + op.create_table( + 'share_object_history', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('historyUri', sa.String(), nullable=False), + sa.Column('actionName', sa.String(), nullable=False), + sa.Column( + 'actionPayload', + postgresql.JSON(astext_type=sa.Text()), + nullable=True, + ), + sa.PrimaryKeyConstraint('historyUri'), + ) + if not has_table('share_object_item', engine): + op.create_table( + 'share_object_item', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('shareItemUri', sa.String(), nullable=False), + sa.Column('itemType', sa.String(), nullable=False), + sa.Column('itemUri', sa.String(), nullable=False), + sa.Column('itemName', sa.String(), nullable=False), + sa.Column('permission', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('S3AccessPointName', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('shareItemUri'), + ) + if not has_table('share_object_item_v2', engine): + op.create_table( + 'share_object_item_v2', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('version', sa.Integer(), nullable=False), + sa.Column('shareItemUri', sa.String(), nullable=False), + sa.Column('itemType', sa.String(), nullable=False), + sa.Column('itemUri', sa.String(), nullable=False), + sa.Column('itemName', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=False), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('GlueDatabaseName', sa.String(), nullable=True), + sa.Column('GlueTableName', sa.String(), nullable=True), + sa.Column('S3AccessPointName', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('shareItemUri'), + ) + if not has_table('share_object_v2', engine): + op.create_table( + 'share_object_v2', + sa.Column('shareUri', sa.String(), nullable=False), + sa.Column('version', sa.Integer(), nullable=False), + sa.Column('latest', sa.Boolean(), nullable=False), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('datasetName', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=True), + sa.Column('principalType', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('shareUri', 'version'), + ) + if not has_table('sqlpipeline', engine): + op.create_table( + 'sqlpipeline', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('sqlPipelineUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('SamlGroupName', sa.String(), nullable=False), + sa.Column('repo', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('sqlPipelineUri'), + ) + if not has_table('stack', engine): + op.create_table( + 'stack', + sa.Column('stackUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('accountid', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('stack', sa.String(), nullable=False), + sa.Column( + 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('stackid', sa.String(), nullable=True), + sa.Column( + 'outputs', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'resources', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('stackUri'), + ) + if not has_table('tag', engine): + op.create_table( + 'tag', + sa.Column('id', sa.String(), nullable=False), + sa.Column('tag', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id'), + ) + if not has_table('task', engine): + op.create_table( + 'task', + sa.Column('taskUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('cronexpr', sa.String(), nullable=True), + sa.Column('status', sa.String(), nullable=False), + sa.Column('action', sa.String(), nullable=False), + sa.Column( + 'payload', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column( + 'response', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column( + 'error', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('lastSeen', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('taskUri'), + ) + if not has_table('tenant_administrator', engine): + op.create_table( + 'tenant_administrator', + sa.Column('userName', sa.String(), nullable=False), + sa.Column('userRoleInTenant', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userName'), + ) + if not has_table('term_link', engine): + op.create_table( + 'term_link', + sa.Column('linkUri', sa.String(), nullable=False), + sa.Column('nodeUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('approvedBySteward', sa.Boolean(), nullable=True), + sa.Column('approvedByOwner', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('linkUri'), + ) + if not has_table('user', engine): + op.create_table( + 'user', + sa.Column('userId', sa.String(), nullable=False), + sa.Column('userName', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('userId'), + ) + if not has_table('userprofile', engine): + op.create_table( + 'userprofile', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('username', sa.String(), nullable=False), + sa.Column('bio', sa.String(), nullable=True), + sa.Column('b64EncodedAvatar', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('username'), + ) + if not has_table('vpc', engine): + op.create_table( + 'vpc', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('vpcUri', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=True), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('VpcId', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('vpcUri'), + ) + if not has_table('worksheet', engine): + op.create_table( + 'worksheet', + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('SamlAdminGroupName', sa.String(), nullable=False), + sa.Column('sqlBody', sa.String(), nullable=True), + sa.Column( + 'chartConfig', postgresql.JSON(astext_type=sa.Text()), nullable=True + ), + sa.Column('lastSavedAthenaQueryIdForQuery', sa.String(), nullable=True), + sa.Column('lastSavedAthenaQueryIdForChart', sa.String(), nullable=True), + sa.PrimaryKeyConstraint('worksheetUri'), + ) + op.create_table( + 'worksheet_query_result', + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('AthenaQueryId', sa.String(), nullable=False), + sa.Column('status', sa.String(), nullable=False), + sa.Column( + 'queryType', + sa.Enum('chart', 'data', name='querytype'), + nullable=False, + ), + sa.Column('sqlBody', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('OutputLocation', sa.String(), nullable=False), + sa.Column('error', sa.String(), nullable=True), + sa.Column('ElapsedTimeInMs', sa.Integer(), nullable=True), + sa.Column('DataScannedInBytes', sa.Integer(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('AthenaQueryId'), + ) + op.create_table( + 'worksheet_share', + sa.Column('worksheetShareUri', sa.String(), nullable=False), + sa.Column('worksheetUri', sa.String(), nullable=False), + sa.Column('principalId', sa.String(), nullable=False), + sa.Column('principalType', sa.String(), nullable=False), + sa.Column('canEdit', sa.Boolean(), nullable=True), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('worksheetShareUri'), + ) + except Exception as e: + print('Failed to init database due to:', e) + pass + + +# ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('worksheet_share') + op.drop_table('worksheet_query_result') + op.drop_table('worksheet') + op.drop_table('vpc') + op.drop_table('userprofile') + op.drop_table('user') + op.drop_table('term_link') + op.drop_table('tenant_administrator') + op.drop_table('task') + op.drop_table('tag') + op.drop_table('stack') + op.drop_table('sqlpipeline') + op.drop_table('share_object_v2') + op.drop_table('share_object_item_v2') + op.drop_table('share_object_item') + op.drop_table('share_object_history') + op.drop_table('share_object') + op.drop_table('search_index') + op.drop_table('scheduled_query') + op.drop_table('saved_query') + op.drop_table('sagemaker_studio_user_profile') + op.drop_table('sagemaker_studio_domain') + op.drop_table('sagemaker_notebook') + op.drop_table('redshiftcluster_datasettable') + op.drop_table('redshiftcluster_dataset') + op.drop_table('redshiftcluster') + op.drop_table('redshift_cluster_user_permission') + op.drop_table('organization_user') + op.drop_table('organization_topic') + op.drop_table('organization') + op.drop_table('notification') + op.drop_table('metric') + op.drop_table('metadata_tag') + op.drop_table('metadata_facet') + op.drop_table('lineage_store') + op.drop_table('key_value_pair') + op.drop_table('item_tags') + op.drop_table('group_member') + op.drop_table('group') + op.drop_table('glossary_node') + op.drop_table('feed_message') + op.drop_table('environment_user_permission') + op.drop_table('environment_permission') + op.drop_table('environment_group_permission') + op.drop_table('environment') + op.drop_table('document') + op.drop_table('dataset_user_permission') + op.drop_table('dataset_topic') + op.drop_table('dataset_table_profiling_job') + op.drop_table('dataset_table_permission') + op.drop_table('dataset_table_column') + op.drop_table('dataset_table') + op.drop_table('dataset_storage_location_permission') + op.drop_table('dataset_storage_location') + op.drop_table('dataset_query') + op.drop_table('dataset_quality_rule') + op.drop_table('dataset_profiling_run') + op.drop_table('dataset_loader') + op.drop_table('dataset_access_point') + op.drop_table('dataset') + op.drop_table('data_access_request') + op.drop_table('dashboardshare') + op.drop_table('dashboard') + op.drop_table('athena_query_execution') + op.drop_table('apikey') + op.drop_table('all_permissions') + op.drop_table('airflowcluster') + op.drop_table('airflow_project') + op.drop_table('airflow_cluster_user_permission') + op.drop_table('activity') + op.drop_table('EnvironmentRedshiftCluster') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/bd4bea86de30_release_3_6_0.py b/backend/migrations/versions/bd4bea86de30_release_3_6_0.py new file mode 100644 index 000000000..5bef57808 --- /dev/null +++ b/backend/migrations/versions/bd4bea86de30_release_3_6_0.py @@ -0,0 +1,38 @@ +"""release 3.6.0 + +Revision ID: bd4bea86de30 +Revises: c5c6bbbc5de7 +Create Date: 2021-11-29 06:10:27.519546 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'bd4bea86de30' +down_revision = 'c5c6bbbc5de7' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'vote', + sa.Column('voteUri', sa.String(), nullable=False), + sa.Column('username', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('upvote', sa.Boolean(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('voteUri'), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('vote') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/be22468d7342_dataset_column_type.py b/backend/migrations/versions/be22468d7342_dataset_column_type.py new file mode 100644 index 000000000..d76b7b0a6 --- /dev/null +++ b/backend/migrations/versions/be22468d7342_dataset_column_type.py @@ -0,0 +1,30 @@ +"""dataset column type + +Revision ID: be22468d7342 +Revises: 5d5102986ce5 +Create Date: 2021-07-02 07:39:46.442637 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'be22468d7342' +down_revision = '5d5102986ce5' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.add_column( + 'dataset_table_column', sa.Column('columnType', sa.String(), nullable=True) + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('dataset_table_column', 'columnType') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py b/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py new file mode 100644 index 000000000..34aabdf2c --- /dev/null +++ b/backend/migrations/versions/c5c6bbbc5de7_release_3_5_0.py @@ -0,0 +1,104 @@ +"""release 3.5.0 + +Revision ID: c5c6bbbc5de7 +Revises: b6e0ac8f6d3f +Create Date: 2021-11-15 08:47:40.128047 + +""" +import sqlalchemy as sa +from alembic import op +from sqlalchemy import Boolean, Column, String, orm +from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy.orm import query_expression + +from dataall.db import Resource, utils + +# revision identifiers, used by Alembic. +revision = 'c5c6bbbc5de7' +down_revision = 'b6e0ac8f6d3f' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Environment(Resource, Base): + __tablename__ = 'environment' + organizationUri = Column(String, nullable=False) + environmentUri = Column(String, primary_key=True, default=utils.uuid('environment')) + AwsAccountId = Column(String, nullable=False) + region = Column(String, nullable=False, default='eu-west-1') + cognitoGroupName = Column(String, nullable=True) + + validated = Column(Boolean, default=False) + environmentType = Column(String, nullable=False, default='Data') + isOrganizationDefaultEnvironment = Column(Boolean, default=False) + EnvironmentDefaultIAMRoleName = Column(String, nullable=False) + EnvironmentDefaultIAMRoleArn = Column(String, nullable=False) + EnvironmentDefaultBucketName = Column(String) + roleCreated = Column(Boolean, nullable=False, default=False) + + EnvironmentDefaultIAMRoleImported = Column(Boolean, default=False) + resourcePrefix = Column(String, nullable=False, default='dh') + + dashboardsEnabled = Column(Boolean, default=False) + notebooksEnabled = Column(Boolean, default=True) + mlStudiosEnabled = Column(Boolean, default=True) + pipelinesEnabled = Column(Boolean, default=True) + warehousesEnabled = Column(Boolean, default=True) + + userRoleInEnvironment = query_expression() + + SamlGroupName = Column(String, nullable=True) + CDKRoleArn = Column(String, nullable=False) + + subscriptionsEnabled = Column(Boolean, default=False) + subscriptionsProducersTopicName = Column(String) + subscriptionsProducersTopicImported = Column(Boolean, default=False) + subscriptionsConsumersTopicName = Column(String) + subscriptionsConsumersTopicImported = Column(Boolean, default=False) + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'keyvaluetag', + sa.Column('tagUri', sa.String(), nullable=False), + sa.Column('targetUri', sa.String(), nullable=False), + sa.Column('targetType', sa.String(), nullable=False), + sa.Column('key', sa.String(), nullable=False), + sa.Column('value', sa.String(), nullable=False), + sa.PrimaryKeyConstraint('tagUri'), + ) + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Adding environment resourcePrefix...') + op.add_column( + 'environment', sa.Column('resourcePrefix', sa.String(), nullable=False) + ) + op.add_column( + 'environment', + sa.Column('EnvironmentDefaultIAMRoleImported', sa.Boolean(), nullable=True), + ) + op.add_column( + 'environment_group_permission', + sa.Column('environmentIAMRoleImported', sa.Boolean(), nullable=True), + ) + + environments: [Environment] = session.query(Environment).all() + for environment in environments: + print(f'Back filling resourcePrefix to environment {environment.label}') + environment.resourcePrefix = 'dh' + session.commit() + + print('Successfully back filled resourcePrefix ') + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('environment_group_permission', 'environmentIAMRoleImported') + op.drop_column('environment', 'EnvironmentDefaultIAMRoleImported') + op.drop_column('environment', 'resourcePrefix') + op.drop_table('keyvaluetag') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/decc96c5670f_organization_groups.py b/backend/migrations/versions/decc96c5670f_organization_groups.py new file mode 100644 index 000000000..643522142 --- /dev/null +++ b/backend/migrations/versions/decc96c5670f_organization_groups.py @@ -0,0 +1,38 @@ +"""organization groups + +Revision ID: decc96c5670f +Revises: 74b89c64f330 +Create Date: 2021-08-13 08:17:02.257680 + +""" +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'decc96c5670f' +down_revision = '74b89c64f330' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.create_table( + 'organization_group', + sa.Column('groupUri', sa.String(), nullable=False), + sa.Column('organizationUri', sa.String(), nullable=False), + sa.Column('invitedBy', sa.String(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('groupUri', 'organizationUri'), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('organization_group') + # ### end Alembic commands ### diff --git a/backend/migrations/versions/e177eb044b31_init_tenant.py b/backend/migrations/versions/e177eb044b31_init_tenant.py new file mode 100644 index 000000000..8cff870d2 --- /dev/null +++ b/backend/migrations/versions/e177eb044b31_init_tenant.py @@ -0,0 +1,42 @@ +"""init tenant + +Revision ID: e177eb044b31 +Revises: 033c3d6c1849 +Create Date: 2021-08-07 16:47:19.443969 + +""" +from alembic import op + +# revision identifiers, used by Alembic. +from sqlalchemy import orm + +from dataall import db +from dataall.db import api + +revision = 'e177eb044b31' +down_revision = '033c3d6c1849' +branch_labels = None +depends_on = None + + +def upgrade(): + try: + bind = op.get_bind() + session = orm.Session(bind=bind) + print('Initializing permissions...') + db.api.Tenant.save_tenant(session, name='dataall', description='Tenant dataall') + print('Tenant initialized successfully') + print('Attaching superusers group DHAdmins...') + api.TenantPolicy.attach_group_tenant_policy( + session=session, + group='DHAdmins', + permissions=db.permissions.TENANT_ALL, + tenant_name='dataall', + ) + print('Attaching superusers groups DHAdmins') + except Exception as e: + print(f'Failed to init permissions due to: {e}') + + +def downgrade(): + pass diff --git a/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py b/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py new file mode 100644 index 000000000..d9144e1fb --- /dev/null +++ b/backend/migrations/versions/e72009ab3b9a_updating_pipelines.py @@ -0,0 +1,48 @@ +"""create account table + +Revision ID: e72009ab3b9a +Revises: 5e722995fa0b +Create Date: 2022-05-16 14:52:40.347079 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'e72009ab3b9a' +down_revision = '5e722995fa0b' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.rename_table('sqlpipeline', 'datapipeline') + op.add_column( + 'datapipeline', sa.Column('devStrategy', sa.String(), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('devStages', postgresql.ARRAY(sa.String()), nullable=True) + ) + op.add_column( + 'datapipeline', sa.Column('template', sa.String(), nullable=True) + ) + op.alter_column( + 'datapipeline', 'sqlPipelineUri', new_column_name='DataPipelineUri' + ) + # ### end Alembic commands ### + pass + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column('datapipeline', 'devStrategy') + op.drop_column('datapipeline', 'devStages') + op.drop_column('datapipeline', 'template') + op.alter_column( + 'datapipeline', 'DataPipelineUri', new_column_name='sqlPipelineUri' + ) + op.rename_table('datapipeline', 'sqlpipeline') + # ### end Alembic commands ### + pass From e6b024aa53f7832a8cad49d08cb4b13c3cae98ed Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 16 Sep 2022 14:21:15 +0200 Subject: [PATCH 08/19] Added clean-up version and v1.2.0 - added Resource columns for pipeline environment --- .../45a4a4702af1_opensource_v1_2_0.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py b/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py index 16471893c..9422b49df 100644 --- a/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py +++ b/backend/migrations/versions/45a4a4702af1_opensource_v1_2_0.py @@ -6,10 +6,8 @@ """ from alembic import op -import sqlalchemy as sa from sqlalchemy.dialects import postgresql -from sqlalchemy import Column, TIMESTAMP, INTEGER, VARCHAR, NVARCHAR -from dataall.db import get_engine, has_table, create_schema_if_not_exists +import sqlalchemy as sa # revision identifiers, used by Alembic. @@ -20,23 +18,30 @@ def upgrade(): - print('Open-source v_1.2.0') op.drop_column('datapipeline', 'devStages') op.drop_column('datapipeline', 'inputDatasetUri') op.drop_column('datapipeline', 'outputDatasetUri') op.create_table( 'datapipelineenvironments', - Column('envPipelineUri', VARCHAR(50), primary_key=True), - Column('environmentUri', VARCHAR(50), nullable=False), - Column('environmentLabel', VARCHAR(50), nullable=False), - Column('pipelineUri', VARCHAR(50), nullable=False), - Column('pipelineLabel', VARCHAR(50), nullable=False), - Column('stage', VARCHAR(50), nullable=False), - Column('order', INTEGER, nullable=False), - Column('region', VARCHAR(50), nullable=False), - Column('AwsAccountId', VARCHAR(50), nullable=False), - Column('samlGroupName', VARCHAR(50), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('envPipelineUri', sa.String(), nullable=False), + sa.Column('environmentUri', sa.String(), nullable=False), + sa.Column('environmentLabel', sa.String(), nullable=False), + sa.Column('pipelineUri', sa.String(), nullable=False), + sa.Column('pipelineLabel', sa.String(), nullable=False), + sa.Column('stage', sa.String(), nullable=False), + sa.Column('order', sa.Integer, nullable=False), + sa.Column('region', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('samlGroupName', sa.String(), nullable=False), sa.PrimaryKeyConstraint('envPipelineUri'), ) pass From 6c9aa2634736592c644fa93f3f1d675c5a54a4a9 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 10 Oct 2022 15:40:12 +0200 Subject: [PATCH 09/19] SSM cdk.json deploy application --- cdk.json | 33 +-------------------------------- deploy/app.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 33 insertions(+), 39 deletions(-) diff --git a/cdk.json b/cdk.json index a208e7ac4..eda207219 100644 --- a/cdk.json +++ b/cdk.json @@ -4,37 +4,6 @@ "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": false, "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": false, "@aws-cdk/aws-rds:lowercaseDbIdentifier": false, - "@aws-cdk/core:stackRelativeExports": false, - "tooling_region": "string_TOOLING_REGION|DEFAULT=eu-west-1", - "tooling_vpc_id": "string_IMPORT_AN_EXISTING_VPC_FROM_TOOLING|DEFAULT=None", - "git_branch": "string_GIT_BRANCH_NAME|DEFAULT=dataall", - "git_release": "boolean_MANAGE_GIT_RELEASE|DEFAULT=false", - "quality_gate": "boolean_MANAGE_QUALITY_GATE_STAGE|DEFAULT=true", - "resource_prefix": "string_PREFIX_FOR_ALL_RESOURCES_CREATED_BY_THIS_APP|DEFAULT=dataall", - "DeploymentEnvironments": [ - { - "envname": "string_ENVIRONMENT_NAME|REQUIRED", - "account": "string_DEPLOYMENT_ACCOUNT|REQUIRED", - "region": "string_DEPLOYMENT_REGION|REQUIRED", - "with_approval": "boolean_ADD_CODEPIPELINE_APPROVAL_STEP|DEFAULT=false", - "vpc_id": "string_DEPLOY_WITHIN_AN_EXISTING_VPC|DEFAULT=None", - "vpc_endpoints_sg": "string_DEPLOY_WITHIN_EXISTING_VPC_SG|DEFAULT=None", - "internet_facing": "boolean_CLOUDFRONT_IF_TRUE_ELSE_ECS_BEHIND_INTERNAL_ALB|DEFAULT=true", - "custom_domain": { - "hosted_zone_name": "string_ROUTE_53_EXISTING_DOMAIN_NAME|DEFAULT=None, REQUIRED if internet_facing=false", - "hosted_zone_id": "string_ROUTE_53_EXISTING_HOSTED_ZONE_ID|DEFAULT=None, REQUIRED if internet_facing=false" - }, - "custom_waf_rules": { - "allowed_geo_list": "list_of_strings_COUNTRIES_CODE_ALLOWED_THROUGH_WAF_RULE", - "allowed_ip_list": "list_of_strings_IP_ADDRESSES_ALLOWED_THROUGH_WAF_RULE" - }, - "ip_ranges": "list_of_strings_IP_RANGES_TO_ALLOW_IF_NOT_INTERNET_FACING|DEFAULT=None", - "apig_vpce": "string_USE_AN_EXISTING_VPCE_FOR_APIG_IF_NOT_INTERNET_FACING|DEFAULT=None", - "prod_sizing": "boolean_SET_INFRA_SIZING_TO_PROD_VALUES_IF_TRUE|DEFAULT=true", - "enable_cw_rum": "boolean_SET_CLOUDWATCH_RUM_APP_MONITOR|DEFAULT=false", - "enable_cw_canaries": "boolean_SET_CLOUDWATCH_CANARIES_FOR_FRONTEND_TESTING|DEFAULT=false", - "enable_quicksight_monitoring": "boolean_SET_CLOUDWATCH_CANARIES_FOR_FRONTEND_TESTING|DEFAULT=false" - } - ] + "@aws-cdk/core:stackRelativeExports": false } } diff --git a/deploy/app.py b/deploy/app.py index e3da0fb12..38f82a1cd 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -1,34 +1,59 @@ #!/usr/bin/env python3 - +import json import os +import re import boto3 +from aws_cdk import aws_ssm as ssm from aws_cdk import App, Environment, Aspects from cdk_nag import AwsSolutionsChecks, NagSuppressions, NagPackSuppression from stacks.cdk_nag_exclusions import PIPELINE_STACK_CDK_NAG_EXCLUSIONS from stacks.pipeline import PipelineStack + +def get_cdk_json_from_ssm(): + ssmc = boto3.client('ssm') + + try: + return ssmc.get_parameter(Name="/dataall/v1m1m0/cdkjson") + except ssmc.exceptions.ParameterNotFound as err: + raise Exception(err) + account_id = boto3.client('sts').get_caller_identity().get('Account') or os.getenv( 'CDK_DEFAULT_ACCOUNT' ) -app = App() + + +# Configuration of the cdk.json SSM or in Repository +ssmc = boto3.client('ssm') +try: + print("Trying to get cdkjson parameter from SSM") + response = ssmc.get_parameter(Name="/dataall/v1m1m0/cdkjson") + cdkjson = json.loads(response['Parameter']['Value']).get('context') + print(f"context = {str(cdkjson)}") + + app = App(context=cdkjson) + + +except ssmc.exceptions.ParameterNotFound: + print("SSM parameter not found - Proceeding with cdk.json and cdk.context.json in code") + app = App() git_branch = app.node.try_get_context('git_branch') or 'main' +print("git_branch") -cdk_pipeline_region = app.node.try_get_context('tooling_region') or os.getenv( - 'CDK_DEFAULT_REGION' -) +cdk_pipeline_region = app.node.try_get_context('tooling_region') or os.getenv('CDK_DEFAULT_REGION') target_envs = app.node.try_get_context('DeploymentEnvironments') or [ {'envname': 'dev', 'account': account_id, 'region': 'eu-west-1'} ] -env = Environment(account=account_id, region=cdk_pipeline_region) - resource_prefix = app.node.try_get_context('resource_prefix') or 'dataall' +env = Environment(account=account_id, region=cdk_pipeline_region) + pipeline = PipelineStack( app, "{resource_prefix}-{git_branch}-cicd-stack".format(resource_prefix=resource_prefix,git_branch=git_branch), From c11d5d27ed7b3941c5b6c460b72789026f4f011b Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 09:28:12 -0400 Subject: [PATCH 10/19] Parameterized branch; some cleanup --- deploy/app.py | 21 ++++++++++++--------- deploy/stacks/pipeline.py | 9 +++++++++ 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 38f82a1cd..52618d559 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -3,6 +3,7 @@ import json import os import re +import subprocess import boto3 from aws_cdk import aws_ssm as ssm @@ -12,12 +13,11 @@ from stacks.cdk_nag_exclusions import PIPELINE_STACK_CDK_NAG_EXCLUSIONS from stacks.pipeline import PipelineStack +ssmc = boto3.client('ssm') -def get_cdk_json_from_ssm(): - ssmc = boto3.client('ssm') - +def get_cdk_json_from_ssm(git_branch): try: - return ssmc.get_parameter(Name="/dataall/v1m1m0/cdkjson") + return ssmc.get_parameter(Name=f"/dataall/{git_branch}/cdkjson") except ssmc.exceptions.ParameterNotFound as err: raise Exception(err) @@ -25,12 +25,18 @@ def get_cdk_json_from_ssm(): 'CDK_DEFAULT_ACCOUNT' ) +if not os.environ.get("DATAALL_GITHUB_BRANCH", None): + git_branch = ( + subprocess.Popen(['git', 'branch', '--show-current'], stdout=subprocess.PIPE) + .stdout.read().decode('utf-8').removesuffix('\n') + ) +else: + git_branch = os.environ().get("DATAALL_GITHUB_BRANCH") # Configuration of the cdk.json SSM or in Repository -ssmc = boto3.client('ssm') try: print("Trying to get cdkjson parameter from SSM") - response = ssmc.get_parameter(Name="/dataall/v1m1m0/cdkjson") + response = ssmc.get_parameter(Name=f"/dataall/{git_branch}/cdkjson") cdkjson = json.loads(response['Parameter']['Value']).get('context') print(f"context = {str(cdkjson)}") @@ -41,9 +47,6 @@ def get_cdk_json_from_ssm(): print("SSM parameter not found - Proceeding with cdk.json and cdk.context.json in code") app = App() -git_branch = app.node.try_get_context('git_branch') or 'main' -print("git_branch") - cdk_pipeline_region = app.node.try_get_context('tooling_region') or os.getenv('CDK_DEFAULT_REGION') target_envs = app.node.try_get_context('DeploymentEnvironments') or [ diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index ceb393205..f1aee86c2 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -210,6 +210,15 @@ def __init__( vpc=self.vpc, ), cross_account_keys=True, + code_build_defaults=pipelines.CodeBuildOptions( + build_environment=codebuild.BuildEnvironment( + environment_variables={ + "DATAALL_GITHUB_BRANCH": codebuild.BuildEnvironmentVariable( + value=git_branch + ), + } + ) + ) ) self.pipeline.node.add_dependency(self.aurora_devdb) From 721629e6e78966704a3b7fb8564a24a7a5ad5715 Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 11:01:11 -0400 Subject: [PATCH 11/19] Removed function no longer needed --- deploy/app.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 52618d559..116620a12 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -15,12 +15,6 @@ ssmc = boto3.client('ssm') -def get_cdk_json_from_ssm(git_branch): - try: - return ssmc.get_parameter(Name=f"/dataall/{git_branch}/cdkjson") - except ssmc.exceptions.ParameterNotFound as err: - raise Exception(err) - account_id = boto3.client('sts').get_caller_identity().get('Account') or os.getenv( 'CDK_DEFAULT_ACCOUNT' ) From 89f55d962466ffea57eda67d8a46c7519c8ead82 Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 11:02:31 -0400 Subject: [PATCH 12/19] Updated branch var --- deploy/app.py | 4 ++-- deploy/stacks/pipeline.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 116620a12..530ffb797 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -19,13 +19,13 @@ 'CDK_DEFAULT_ACCOUNT' ) -if not os.environ.get("DATAALL_GITHUB_BRANCH", None): +if not os.environ.get("DATAALL_REPO_BRANCH", None): git_branch = ( subprocess.Popen(['git', 'branch', '--show-current'], stdout=subprocess.PIPE) .stdout.read().decode('utf-8').removesuffix('\n') ) else: - git_branch = os.environ().get("DATAALL_GITHUB_BRANCH") + git_branch = os.environ().get("DATAALL_REPO_BRANCH") # Configuration of the cdk.json SSM or in Repository try: diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index f1aee86c2..d3717d37e 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -213,7 +213,7 @@ def __init__( code_build_defaults=pipelines.CodeBuildOptions( build_environment=codebuild.BuildEnvironment( environment_variables={ - "DATAALL_GITHUB_BRANCH": codebuild.BuildEnvironmentVariable( + "DATAALL_REPO_BRANCH": codebuild.BuildEnvironmentVariable( value=git_branch ), } From 3f52c27f812789bf7bfcf9a8c3cf7b4fb4d25a87 Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 11:14:17 -0400 Subject: [PATCH 13/19] Cleaned up unused libs --- deploy/app.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 530ffb797..610cfdb34 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -2,11 +2,9 @@ import json import os -import re import subprocess import boto3 -from aws_cdk import aws_ssm as ssm from aws_cdk import App, Environment, Aspects from cdk_nag import AwsSolutionsChecks, NagSuppressions, NagPackSuppression @@ -36,7 +34,6 @@ app = App(context=cdkjson) - except ssmc.exceptions.ParameterNotFound: print("SSM parameter not found - Proceeding with cdk.json and cdk.context.json in code") app = App() From 66eca874bb34a1802653c602dc96ed329f581920 Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 12:14:42 -0400 Subject: [PATCH 14/19] Typo in get env var --- deploy/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/app.py b/deploy/app.py index 610cfdb34..c024b812b 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -23,7 +23,7 @@ .stdout.read().decode('utf-8').removesuffix('\n') ) else: - git_branch = os.environ().get("DATAALL_REPO_BRANCH") + git_branch = os.environ.get("DATAALL_REPO_BRANCH") # Configuration of the cdk.json SSM or in Repository try: From f7cbb48d2505ee1080543c3ec491e8ac3ccc8b14 Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 16:41:07 -0400 Subject: [PATCH 15/19] Added logging; added exception to SSM --- deploy/app.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index c024b812b..326eba4cb 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -1,16 +1,22 @@ #!/usr/bin/env python3 import json +import logging import os import subprocess import boto3 +import botocore from aws_cdk import App, Environment, Aspects from cdk_nag import AwsSolutionsChecks, NagSuppressions, NagPackSuppression from stacks.cdk_nag_exclusions import PIPELINE_STACK_CDK_NAG_EXCLUSIONS from stacks.pipeline import PipelineStack +LOGGING_FORMAT = "[%(asctime)s][%(filename)-13s:%(lineno)3d] %(message)s" +logging.basicConfig(level=logging.INFO, format=LOGGING_FORMAT) +logger = logging.getLogger(__name__) + ssmc = boto3.client('ssm') account_id = boto3.client('sts').get_caller_identity().get('Account') or os.getenv( @@ -27,16 +33,21 @@ # Configuration of the cdk.json SSM or in Repository try: - print("Trying to get cdkjson parameter from SSM") + logger.info("Trying to get cdkjson parameter from SSM") response = ssmc.get_parameter(Name=f"/dataall/{git_branch}/cdkjson") cdkjson = json.loads(response['Parameter']['Value']).get('context') - print(f"context = {str(cdkjson)}") app = App(context=cdkjson) + logger.info("Loaded context from SSM") + +except (ssmc.exceptions.ParameterNotFound, botocore.exceptions.ClientError) as err: + if isinstance(err, ssmc.exceptions.ParameterNotFound): + logger.warning("SSM parameter not found - Proceeding with cdk.json and cdk.context.json in code") + else: + logger.error(err) -except ssmc.exceptions.ParameterNotFound: - print("SSM parameter not found - Proceeding with cdk.json and cdk.context.json in code") app = App() + logger.info("Loaded context from file") cdk_pipeline_region = app.node.try_get_context('tooling_region') or os.getenv('CDK_DEFAULT_REGION') From 4f922980c52fe4e462d45c00802ec0f82cee58df Mon Sep 17 00:00:00 2001 From: Ramon Bautista Date: Tue, 11 Oct 2022 20:17:47 -0400 Subject: [PATCH 16/19] tmp github as source --- deploy/stacks/pipeline.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index d3717d37e..14a9343e3 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -2,7 +2,7 @@ import uuid from typing import List -from aws_cdk import Stack, Tags, RemovalPolicy +from aws_cdk import SecretValue, Stack, Tags, RemovalPolicy from aws_cdk import aws_codebuild as codebuild from aws_cdk import aws_codecommit as codecommit from aws_cdk import aws_ec2 as ec2 @@ -188,11 +188,10 @@ def __init__( publish_assets_in_parallel=False, synth=pipelines.CodeBuildStep( 'Synth', - input=CodePipelineSource.code_commit( - repository=codecommit.Repository.from_repository_name( - self, 'sourcerepo', repository_name='dataall' - ), + input=CodePipelineSource.git_hub( + repo_string="awslabs/aws-dataall", branch=self.git_branch, + authentication=SecretValue.secrets_manager(secret_id="github-access-token-secret") ), build_environment=codebuild.BuildEnvironment( build_image=codebuild.LinuxBuildImage.AMAZON_LINUX_2_3, From 36f3f2df594bbe6d81f19634e252374858136d07 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 12 Oct 2022 08:16:25 +0200 Subject: [PATCH 17/19] make source control configurable + echo on codebuild env vars --- deploy/app.py | 3 +++ deploy/stacks/pipeline.py | 25 ++++++++++++++++++++----- 2 files changed, 23 insertions(+), 5 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 326eba4cb..c549b7684 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -57,6 +57,8 @@ resource_prefix = app.node.try_get_context('resource_prefix') or 'dataall' +source = app.node.try_get_context('repository_source') or 'codecommit' + env = Environment(account=account_id, region=cdk_pipeline_region) pipeline = PipelineStack( @@ -66,6 +68,7 @@ target_envs=target_envs, git_branch=git_branch, resource_prefix=resource_prefix, + source=source ) Aspects.of(app).add(AwsSolutionsChecks(reports=True, verbose=False)) diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index 14a9343e3..9ce9d56ee 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -28,12 +28,14 @@ def __init__( target_envs: List = None, git_branch='main', resource_prefix='dataall', + source='codecommit', **kwargs, ): super().__init__(id, scope, **kwargs) self.validate_deployment_params(git_branch, resource_prefix, target_envs) self.git_branch = git_branch + self.source = source self.resource_prefix = resource_prefix self.target_envs = target_envs @@ -180,6 +182,21 @@ def __init__( ) for policy in self.codebuild_policy: self.pipeline_iam_role.add_to_policy(policy) + + if self.source == "github": + source = CodePipelineSource.git_hub( + repo_string="awslabs/aws-dataall", + branch=self.git_branch, + authentication=SecretValue.secrets_manager(secret_id="github-access-token-secret") + ) + + else: + source = CodePipelineSource.code_commit( + repository=codecommit.Repository.from_repository_name( + self, 'sourcerepo', repository_name='dataall' + ), + branch=self.git_branch, + ) self.pipeline = pipelines.CodePipeline( self, @@ -188,11 +205,7 @@ def __init__( publish_assets_in_parallel=False, synth=pipelines.CodeBuildStep( 'Synth', - input=CodePipelineSource.git_hub( - repo_string="awslabs/aws-dataall", - branch=self.git_branch, - authentication=SecretValue.secrets_manager(secret_id="github-access-token-secret") - ), + input=source, build_environment=codebuild.BuildEnvironment( build_image=codebuild.LinuxBuildImage.AMAZON_LINUX_2_3, ), @@ -204,6 +217,8 @@ def __init__( f'aws codeartifact login --tool pip --repository {self.codeartifact.pip_repo.attr_name} --domain {self.codeartifact.domain.attr_name} --domain-owner {self.codeartifact.domain.attr_owner}', 'pip install -r deploy/requirements.txt', 'cdk synth', + 'echo ${CODEBUILD_SOURCE_VERSION}', + 'echo ${CODEBUILD_SOURCE_REPO_URL}' ], role_policy_statements=self.codebuild_policy, vpc=self.vpc, From cac2dfee442294983579ddbe6f0a1572c65f047c Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 12 Oct 2022 09:51:15 +0200 Subject: [PATCH 18/19] git branch from codebuild source environment variable --- deploy/app.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index c549b7684..4b7c77356 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -23,13 +23,16 @@ 'CDK_DEFAULT_ACCOUNT' ) -if not os.environ.get("DATAALL_REPO_BRANCH", None): +if not os.environ.get("CODEBUILD_SOURCE_VERSION", None): git_branch = ( subprocess.Popen(['git', 'branch', '--show-current'], stdout=subprocess.PIPE) .stdout.read().decode('utf-8').removesuffix('\n') ) else: - git_branch = os.environ.get("DATAALL_REPO_BRANCH") + codebuild_source = os.environ.get("CODEBUILD_SOURCE_VERSION") + git_branch = codebuild_source.replace("arn:aws:s3:::dataall-","").split("-cicd")[0] + +git_branch = git_branch if git_branch != "" else "main" # Configuration of the cdk.json SSM or in Repository try: From 4409ec8383dae36aff7b3ab403490bc14a2a3193 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Wed, 12 Oct 2022 09:56:14 +0200 Subject: [PATCH 19/19] modified echo in codebuild --- deploy/app.py | 2 +- deploy/stacks/pipeline.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/deploy/app.py b/deploy/app.py index 4b7c77356..28d926f6b 100644 --- a/deploy/app.py +++ b/deploy/app.py @@ -66,7 +66,7 @@ pipeline = PipelineStack( app, - "{resource_prefix}-{git_branch}-cicd-stack".format(resource_prefix=resource_prefix,git_branch=git_branch), + "{resource_prefix}-{git_branch}-cicd-stack".format(resource_prefix=resource_prefix, git_branch=git_branch), env=env, target_envs=target_envs, git_branch=git_branch, diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index 9ce9d56ee..aa33da3d1 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -217,8 +217,7 @@ def __init__( f'aws codeartifact login --tool pip --repository {self.codeartifact.pip_repo.attr_name} --domain {self.codeartifact.domain.attr_name} --domain-owner {self.codeartifact.domain.attr_owner}', 'pip install -r deploy/requirements.txt', 'cdk synth', - 'echo ${CODEBUILD_SOURCE_VERSION}', - 'echo ${CODEBUILD_SOURCE_REPO_URL}' + 'echo ${CODEBUILD_SOURCE_VERSION}' ], role_policy_statements=self.codebuild_policy, vpc=self.vpc,