From 1f6682824544415ef4438c33f68fbbeadca5ca0a Mon Sep 17 00:00:00 2001 From: Yuri Liang Date: Fri, 7 Oct 2022 18:39:58 +0800 Subject: [PATCH 01/28] add Cognito Group resolver --- backend/dataall/api/Objects/Group/queries.py | 8 +++++- .../dataall/api/Objects/Group/resolvers.py | 18 +++++++++++++ backend/dataall/api/Objects/Group/schema.py | 7 ++++++ backend/dataall/aws/handlers/cognito.py | 25 +++++++++++++++++++ frontend/src/api/Groups/listCognitoGroups.js | 14 +++++++++++ 5 files changed, 71 insertions(+), 1 deletion(-) create mode 100644 backend/dataall/aws/handlers/cognito.py create mode 100644 frontend/src/api/Groups/listCognitoGroups.js diff --git a/backend/dataall/api/Objects/Group/queries.py b/backend/dataall/api/Objects/Group/queries.py index 4d85f0fb4..0af6a0a7f 100644 --- a/backend/dataall/api/Objects/Group/queries.py +++ b/backend/dataall/api/Objects/Group/queries.py @@ -1,5 +1,5 @@ from ... import gql -from .resolvers import get_group, list_datasets_owned_by_env_group, list_data_items_shared_with_env_group +from .resolvers import get_group, list_datasets_owned_by_env_group, list_data_items_shared_with_env_group, list_cognito_groups getGroup = gql.QueryField( name='getGroup', @@ -33,3 +33,9 @@ type=gql.Ref('EnvironmentPublishedItemSearchResults'), test_scope='Dataset', ) + +listCognitoGroups = gql.QueryField( + name='listCognitoGroups', + type=gql.Ref('CognitoGroup'), + resolver=list_cognito_groups +) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index bc9c97815..f8ce604a1 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -1,7 +1,11 @@ +import os from .... import db from ....db import exceptions from ....db.models import Group from ...constants import * +from ....aws.handlers.parameter_store import ParameterStoreManager +from ....aws.handlers.sts import SessionHelper +from ....aws.handlers.cognito import Cognito def resolve_group_environment_permissions(context, source, environmentUri): @@ -70,3 +74,17 @@ def list_data_items_shared_with_env_group( data=filter, check_perm=True, ) + +def list_cognito_groups(context, source): + current_account = SessionHelper.get_account() + current_region = os.getenv('AWS_REGION', 'eu-west-1') + envname = os.getenv('envname', 'local') + if envname in ['local', 'dkrcompose']: + return ['DAAdministrators'] + parameter_path = f'/dataall/{envname}/cognito/userpool' + user_pool_id = ParameterStoreManager.get_parameter_value(current_account, current_region, parameter_path) + groups = Cognito.list_cognito_groups(current_account, current_region, user_pool_id) + res = [] + for group in groups: + res.append(group['GroupName']) + return res diff --git a/backend/dataall/api/Objects/Group/schema.py b/backend/dataall/api/Objects/Group/schema.py index 624f81db8..75f5350a5 100644 --- a/backend/dataall/api/Objects/Group/schema.py +++ b/backend/dataall/api/Objects/Group/schema.py @@ -46,3 +46,10 @@ gql.Field(name='nodes', type=gql.ArrayType(Group)), ], ) + +CognitoGroup = gql.ObjectType( + name='CognitoGroup', + fields=[ + gql.Field(name='groupName', type=gql.String), + ], +) diff --git a/backend/dataall/aws/handlers/cognito.py b/backend/dataall/aws/handlers/cognito.py new file mode 100644 index 000000000..860bf9f61 --- /dev/null +++ b/backend/dataall/aws/handlers/cognito.py @@ -0,0 +1,25 @@ +import logging + +from .sts import SessionHelper + + +log = logging.getLogger(__name__) + + +class Cognito: + @staticmethod + def client(account_id: str, region_name: str, client_type: str): + session = SessionHelper.remote_session(account_id) + return session.client(client_type, region_name=region_name) + + @staticmethod + def list_cognito_groups(account_id: str, region: str, user_pool_id: str): + try: + cognitoCli = Cognito.client(account_id, region, "cognito-idp") + response = cognitoCli.list_groups(UsePoolId=user_pool_id) + except Exception as e: + log.error( + f'Failed to list groups of user pool {user_pool_id} due to {e}' + ) + else: + return response['Groups'] diff --git a/frontend/src/api/Groups/listCognitoGroups.js b/frontend/src/api/Groups/listCognitoGroups.js new file mode 100644 index 000000000..6188fbb95 --- /dev/null +++ b/frontend/src/api/Groups/listCognitoGroups.js @@ -0,0 +1,14 @@ +import { gql } from 'apollo-boost'; + +const listCognitoGroups = () => ({ + variables: { + }, + query: gql` + query listCognitoGroups() { + listCognitoGroups() { + } + } + ` +}); + +export default listCognitoGroups; From e5fc455a6a35f790a51289e588c9e17c84d50952 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Sun, 9 Oct 2022 09:04:16 +0200 Subject: [PATCH 02/28] Modified output to array of groups and list of dictionaries - backend --- backend/dataall/api/Objects/Group/queries.py | 2 +- backend/dataall/api/Objects/Group/resolvers.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/dataall/api/Objects/Group/queries.py b/backend/dataall/api/Objects/Group/queries.py index 0af6a0a7f..6ad2c1ddf 100644 --- a/backend/dataall/api/Objects/Group/queries.py +++ b/backend/dataall/api/Objects/Group/queries.py @@ -36,6 +36,6 @@ listCognitoGroups = gql.QueryField( name='listCognitoGroups', - type=gql.Ref('CognitoGroup'), + type=gql.ArrayType(gql.Ref('CognitoGroup')), resolver=list_cognito_groups ) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index f8ce604a1..7c0221751 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -86,5 +86,6 @@ def list_cognito_groups(context, source): groups = Cognito.list_cognito_groups(current_account, current_region, user_pool_id) res = [] for group in groups: - res.append(group['GroupName']) + res.append({"groupName": group['GroupName']}) + return res From 12198f6b59ef79038ba26f7e0f78377c517dd2db Mon Sep 17 00:00:00 2001 From: dlpzx Date: Sun, 9 Oct 2022 09:08:21 +0200 Subject: [PATCH 03/28] api call in frontend view --- .../src/views/Environments/EnvironmentTeamInviteForm.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js index a9082d583..35663ea05 100644 --- a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js +++ b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js @@ -28,6 +28,7 @@ import useClient from '../../hooks/useClient'; import listEnvironmentGroupInvitationPermissions from '../../api/Environment/listEnvironmentPermissions'; import inviteGroupOnEnvironment from '../../api/Environment/inviteGroup'; import listEnvironmentNotInvitedGroups from '../../api/Environment/listNotInvitedGroups'; +import listCognitoGroups from '../../api/Groups/listCognitoGroups'; const EnvironmentTeamInviteForm = (props) => { const { environment, onClose, open, reloadTeams, ...other } = props; @@ -45,16 +46,16 @@ const EnvironmentTeamInviteForm = (props) => { try { setLoadingGroups(true); const response = await client.query( - listEnvironmentNotInvitedGroups({ + listCognitoGroups({ environmentUri: environment.environmentUri }) ); if (!response.errors) { setGroupOptions( - response.data.listEnvironmentNotInvitedGroups.nodes.map((g) => ({ + response.data.listCognitoGroups.map((g) => ({ ...g, - value: g.groupUri, - label: g.groupUri + value: g.groupName, + label: g.groupName })) ); } else { From bf2a7c04185a3c31bccbef5e13be1e2f79545a7f Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 10 Oct 2022 08:50:17 +0200 Subject: [PATCH 04/28] api call in frontend view + fix input output in resolver local + fix api src definition --- backend/dataall/api/Objects/Group/resolvers.py | 2 +- frontend/src/api/Groups/listCognitoGroups.js | 7 +++---- .../src/views/Environments/EnvironmentTeamInviteForm.js | 8 +++----- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index 7c0221751..035ae61cd 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -80,7 +80,7 @@ def list_cognito_groups(context, source): current_region = os.getenv('AWS_REGION', 'eu-west-1') envname = os.getenv('envname', 'local') if envname in ['local', 'dkrcompose']: - return ['DAAdministrators'] + return [{"groupName": 'DAAdministrators'}, {"groupName": 'Engineers'}, {"groupName": 'Scientists'}] parameter_path = f'/dataall/{envname}/cognito/userpool' user_pool_id = ParameterStoreManager.get_parameter_value(current_account, current_region, parameter_path) groups = Cognito.list_cognito_groups(current_account, current_region, user_pool_id) diff --git a/frontend/src/api/Groups/listCognitoGroups.js b/frontend/src/api/Groups/listCognitoGroups.js index 6188fbb95..ede8a6cdf 100644 --- a/frontend/src/api/Groups/listCognitoGroups.js +++ b/frontend/src/api/Groups/listCognitoGroups.js @@ -1,11 +1,10 @@ import { gql } from 'apollo-boost'; const listCognitoGroups = () => ({ - variables: { - }, query: gql` - query listCognitoGroups() { - listCognitoGroups() { + query listCognitoGroups { + listCognitoGroups{ + groupName } } ` diff --git a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js index 35663ea05..a4767af15 100644 --- a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js +++ b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js @@ -45,11 +45,9 @@ const EnvironmentTeamInviteForm = (props) => { const fetchGroups = useCallback(async () => { try { setLoadingGroups(true); - const response = await client.query( - listCognitoGroups({ - environmentUri: environment.environmentUri - }) - ); + console.log("fetchgroups") + const response = await client.query(listCognitoGroups()); + console.log(response) if (!response.errors) { setGroupOptions( response.data.listCognitoGroups.map((g) => ({ From 056b545eedfa4e9cc1e9a226e03a0119bcf02d45 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 10 Oct 2022 08:53:19 +0200 Subject: [PATCH 05/28] flake --- backend/dataall/api/Objects/Group/resolvers.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index 035ae61cd..d0f7294be 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -75,6 +75,7 @@ def list_data_items_shared_with_env_group( check_perm=True, ) + def list_cognito_groups(context, source): current_account = SessionHelper.get_account() current_region = os.getenv('AWS_REGION', 'eu-west-1') From 3cdcd12777b8bf8d8599ebdcc18310a61b3ab1b8 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 13 Oct 2022 14:31:10 +0200 Subject: [PATCH 06/28] Added orga views and filter for orga or environment --- backend/dataall/api/Objects/Group/input_types.py | 8 ++++++++ backend/dataall/api/Objects/Group/queries.py | 3 +++ backend/dataall/api/Objects/Group/resolvers.py | 5 ++++- frontend/src/api/Groups/listCognitoGroups.js | 13 ++++++++++--- .../views/Environments/EnvironmentTeamInviteForm.js | 5 +---- .../Organizations/OrganizationTeamInviteForm.js | 10 +++------- 6 files changed, 29 insertions(+), 15 deletions(-) diff --git a/backend/dataall/api/Objects/Group/input_types.py b/backend/dataall/api/Objects/Group/input_types.py index 6ba08c2f0..9cccb014c 100644 --- a/backend/dataall/api/Objects/Group/input_types.py +++ b/backend/dataall/api/Objects/Group/input_types.py @@ -8,3 +8,11 @@ gql.Argument(name='pageSize', type=gql.Integer), ], ) + +CognitoGroupFilter = gql.InputType( + name='CognitoGroupFilter', + arguments=[ + gql.Argument(name='type', type=gql.String), + gql.Argument(name='uri', type=gql.String), + ], +) diff --git a/backend/dataall/api/Objects/Group/queries.py b/backend/dataall/api/Objects/Group/queries.py index 6ad2c1ddf..5cbf484ff 100644 --- a/backend/dataall/api/Objects/Group/queries.py +++ b/backend/dataall/api/Objects/Group/queries.py @@ -36,6 +36,9 @@ listCognitoGroups = gql.QueryField( name='listCognitoGroups', + args=[ + gql.Argument(name='filter', type=gql.Ref('CognitoGroupFilter')), + ], type=gql.ArrayType(gql.Ref('CognitoGroup')), resolver=list_cognito_groups ) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index d0f7294be..b47e0ea33 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -76,7 +76,10 @@ def list_data_items_shared_with_env_group( ) -def list_cognito_groups(context, source): +def list_cognito_groups(context, source, filter: dict = None): + # filter: + # filter.get("type") = 'organization' or 'environment' + # filter.get("uri") = 'organizationUri' or 'environmentUri' correspondingly current_account = SessionHelper.get_account() current_region = os.getenv('AWS_REGION', 'eu-west-1') envname = os.getenv('envname', 'local') diff --git a/frontend/src/api/Groups/listCognitoGroups.js b/frontend/src/api/Groups/listCognitoGroups.js index ede8a6cdf..1128849b1 100644 --- a/frontend/src/api/Groups/listCognitoGroups.js +++ b/frontend/src/api/Groups/listCognitoGroups.js @@ -1,9 +1,16 @@ import { gql } from 'apollo-boost'; -const listCognitoGroups = () => ({ +const listCognitoGroups ({ filter }) => ({ + variables: { + filter + }, query: gql` - query listCognitoGroups { - listCognitoGroups{ + query listCognitoGroups ( + $filter: CognitoGroupFilter + ) { + listCognitoGroups ( + filter: $filter + ){ groupName } } diff --git a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js index a4767af15..0191c3c65 100644 --- a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js +++ b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js @@ -27,7 +27,6 @@ import { useDispatch } from '../../store'; import useClient from '../../hooks/useClient'; import listEnvironmentGroupInvitationPermissions from '../../api/Environment/listEnvironmentPermissions'; import inviteGroupOnEnvironment from '../../api/Environment/inviteGroup'; -import listEnvironmentNotInvitedGroups from '../../api/Environment/listNotInvitedGroups'; import listCognitoGroups from '../../api/Groups/listCognitoGroups'; const EnvironmentTeamInviteForm = (props) => { @@ -45,9 +44,7 @@ const EnvironmentTeamInviteForm = (props) => { const fetchGroups = useCallback(async () => { try { setLoadingGroups(true); - console.log("fetchgroups") - const response = await client.query(listCognitoGroups()); - console.log(response) + const response = await client.query(listCognitoGroups({type: "environment", uri: environment.environmentUri})); if (!response.errors) { setGroupOptions( response.data.listCognitoGroups.map((g) => ({ diff --git a/frontend/src/views/Organizations/OrganizationTeamInviteForm.js b/frontend/src/views/Organizations/OrganizationTeamInviteForm.js index 3b93f822a..23ca855dd 100644 --- a/frontend/src/views/Organizations/OrganizationTeamInviteForm.js +++ b/frontend/src/views/Organizations/OrganizationTeamInviteForm.js @@ -25,7 +25,7 @@ import { SET_ERROR } from '../../store/errorReducer'; import { useDispatch } from '../../store'; import useClient from '../../hooks/useClient'; import inviteGroupToOrganization from '../../api/Organization/inviteGroup'; -import listOrganizationNotInvitedGroups from '../../api/Organization/listNotInvitedGroups'; +import listCognitoGroups from '../../api/Groups/listCognitoGroups'; const OrganizationTeamInviteForm = (props) => { const { organization, onClose, open, reloadTeams, ...other } = props; @@ -40,14 +40,10 @@ const OrganizationTeamInviteForm = (props) => { const fetchGroups = useCallback(async () => { try { setLoadingGroups(true); - const response = await client.query( - listOrganizationNotInvitedGroups({ - organizationUri: organization.organizationUri - }) - ); + const response = await client.query(listCognitoGroups({type: "organization", uri: organization.organizationUri})); if (!response.errors) { setGroupOptions( - response.data.listOrganizationNotInvitedGroups.nodes.map((g) => ({ + response.data.listCognitoGroups.map((g) => ({ ...g, value: g.groupUri, label: g.groupUri From 2754739bd90c8b311cbda81c626c739c2b8b5002 Mon Sep 17 00:00:00 2001 From: Yuri Liang Date: Mon, 17 Oct 2022 16:00:55 +0800 Subject: [PATCH 07/28] Show only not invited groups and remove two unused apis --- .../api/Objects/Environment/queries.py | 10 ----- .../api/Objects/Environment/resolvers.py | 16 ------- .../dataall/api/Objects/Group/resolvers.py | 43 +++++++++++++------ .../api/Objects/Organization/queries.py | 10 ----- .../api/Objects/Organization/resolvers.py | 16 ------- backend/dataall/aws/handlers/cognito.py | 2 +- backend/dataall/db/api/environment.py | 19 -------- backend/dataall/db/api/organization.py | 20 --------- deploy/stacks/lambda_api.py | 1 + .../api/Environment/listNotInvitedGroups.js | 30 ------------- frontend/src/api/Groups/listCognitoGroups.js | 2 +- .../api/Organization/listNotInvitedGroups.js | 30 ------------- .../Environments/EnvironmentTeamInviteForm.js | 7 ++- .../OrganizationTeamInviteForm.js | 11 +++-- tests/api/test_environment.py | 40 ----------------- tests/api/test_organization.py | 40 ----------------- 16 files changed, 48 insertions(+), 249 deletions(-) delete mode 100644 frontend/src/api/Environment/listNotInvitedGroups.js delete mode 100644 frontend/src/api/Organization/listNotInvitedGroups.js diff --git a/backend/dataall/api/Objects/Environment/queries.py b/backend/dataall/api/Objects/Environment/queries.py index acc8fefd7..9143cae1b 100644 --- a/backend/dataall/api/Objects/Environment/queries.py +++ b/backend/dataall/api/Objects/Environment/queries.py @@ -116,16 +116,6 @@ resolver=list_environment_invited_groups, ) -listEnvironmentNotInvitedGroups = gql.QueryField( - name='listEnvironmentNotInvitedGroups', - type=gql.Ref('GroupSearchResult'), - args=[ - gql.Argument(name='environmentUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='filter', type=gql.Ref('GroupFilter')), - ], - resolver=list_environment_not_invited_groups, -) - listEnvironmentGroups = gql.QueryField( name='listEnvironmentGroups', type=gql.Ref('GroupSearchResult'), diff --git a/backend/dataall/api/Objects/Environment/resolvers.py b/backend/dataall/api/Objects/Environment/resolvers.py index cb16aa79a..5ef2d7a4d 100644 --- a/backend/dataall/api/Objects/Environment/resolvers.py +++ b/backend/dataall/api/Objects/Environment/resolvers.py @@ -180,22 +180,6 @@ def list_environment_invited_groups( ) -def list_environment_not_invited_groups( - context: Context, source, environmentUri=None, filter=None -): - if filter is None: - filter = {} - with context.engine.scoped_session() as session: - return db.api.Environment.not_environment_groups( - session=session, - username=context.username, - groups=context.groups, - uri=environmentUri, - data=filter, - check_perm=True, - ) - - def list_environment_groups(context: Context, source, environmentUri=None, filter=None): if filter is None: filter = {} diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index b47e0ea33..2eaea725d 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -1,13 +1,14 @@ import os +import logging +import boto3 from .... import db from ....db import exceptions from ....db.models import Group from ...constants import * -from ....aws.handlers.parameter_store import ParameterStoreManager -from ....aws.handlers.sts import SessionHelper -from ....aws.handlers.cognito import Cognito +log = logging.getLogger() + def resolve_group_environment_permissions(context, source, environmentUri): if not source: return None @@ -77,19 +78,37 @@ def list_data_items_shared_with_env_group( def list_cognito_groups(context, source, filter: dict = None): - # filter: - # filter.get("type") = 'organization' or 'environment' - # filter.get("uri") = 'organizationUri' or 'environmentUri' correspondingly - current_account = SessionHelper.get_account() - current_region = os.getenv('AWS_REGION', 'eu-west-1') envname = os.getenv('envname', 'local') if envname in ['local', 'dkrcompose']: return [{"groupName": 'DAAdministrators'}, {"groupName": 'Engineers'}, {"groupName": 'Scientists'}] + current_region = os.getenv('AWS_REGION', 'eu-west-1') parameter_path = f'/dataall/{envname}/cognito/userpool' - user_pool_id = ParameterStoreManager.get_parameter_value(current_account, current_region, parameter_path) - groups = Cognito.list_cognito_groups(current_account, current_region, user_pool_id) + ssm = boto3.client('ssm', region_name=current_region) + cognito = boto3.client('cognito-idp', region_name=current_region) + user_pool_id = ssm.get_parameter(Name=parameter_path)['Parameter']['Value'] + groups = cognito.list_groups(UserPoolId=user_pool_id)['Groups'] + category, category_uri = filter.get("type"), filter.get("uri") + if category and category_uri: + if category == 'environment': + with context.engine.scoped_session() as session: + invited_groups = db.api.Environment.query_all_environment_groups( + session=session, + username=context.username, + groups=context.groups, + uri=category_uri, + filter=None, + ).all() + if category == 'organization': + with context.engine.scoped_session() as session: + organization = db.api.Organization.get_organization_by_uri(session, category_uri) + invited_groups = db.api.Organization.query_organization_groups( + session=session, + uri=organization.organizationUri, + filter=None, + ).all() + invited_group_uris = [item.groupUri for item in invited_groups] res = [] for group in groups: - res.append({"groupName": group['GroupName']}) - + if group['GroupName'] not in invited_group_uris: + res.append({"groupName": group['GroupName']}) return res diff --git a/backend/dataall/api/Objects/Organization/queries.py b/backend/dataall/api/Objects/Organization/queries.py index a9a2453c9..3f47e88b0 100644 --- a/backend/dataall/api/Objects/Organization/queries.py +++ b/backend/dataall/api/Objects/Organization/queries.py @@ -33,16 +33,6 @@ resolver=list_organization_invited_groups, ) -listOrganizationNotInvitedGroups = gql.QueryField( - name='listOrganizationNotInvitedGroups', - type=gql.Ref('GroupSearchResult'), - args=[ - gql.Argument(name='organizationUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='filter', type=gql.Ref('GroupFilter')), - ], - resolver=list_organization_not_invited_groups, -) - listOrganizationGroups = gql.QueryField( name='listOrganizationGroups', type=gql.Ref('GroupSearchResult'), diff --git a/backend/dataall/api/Objects/Organization/resolvers.py b/backend/dataall/api/Objects/Organization/resolvers.py index c7b55e699..f97f2849c 100644 --- a/backend/dataall/api/Objects/Organization/resolvers.py +++ b/backend/dataall/api/Objects/Organization/resolvers.py @@ -161,22 +161,6 @@ def list_organization_invited_groups( ) -def list_organization_not_invited_groups( - context: Context, source, organizationUri=None, filter=None -): - if filter is None: - filter = {} - with context.engine.scoped_session() as session: - return db.api.Organization.not_organization_groups( - session=session, - username=context.username, - groups=context.groups, - uri=organizationUri, - data=filter, - check_perm=True, - ) - - def list_organization_groups( context: Context, source, organizationUri=None, filter=None ): diff --git a/backend/dataall/aws/handlers/cognito.py b/backend/dataall/aws/handlers/cognito.py index 860bf9f61..eb0c23f62 100644 --- a/backend/dataall/aws/handlers/cognito.py +++ b/backend/dataall/aws/handlers/cognito.py @@ -16,7 +16,7 @@ def client(account_id: str, region_name: str, client_type: str): def list_cognito_groups(account_id: str, region: str, user_pool_id: str): try: cognitoCli = Cognito.client(account_id, region, "cognito-idp") - response = cognitoCli.list_groups(UsePoolId=user_pool_id) + response = cognitoCli.list_groups(UserPoolId=user_pool_id) except Exception as e: log.error( f'Failed to list groups of user pool {user_pool_id} due to {e}' diff --git a/backend/dataall/db/api/environment.py b/backend/dataall/db/api/environment.py index 79954c862..1e7fff7b3 100644 --- a/backend/dataall/db/api/environment.py +++ b/backend/dataall/db/api/environment.py @@ -630,25 +630,6 @@ def list_environment_invited_groups( session, username, groups, uri, data ).all() - @staticmethod - @has_resource_perm(permissions.LIST_ENVIRONMENT_GROUPS) - def not_environment_groups( - session, username, groups, uri, data=None, check_perm=None - ) -> dict: - environment_groups: [] = ( - session.query(models.EnvironmentGroup).filter( - and_( - models.EnvironmentGroup.groupUri.in_(groups), - models.EnvironmentGroup.environmentUri == uri, - ), - ) - ).all() - environment_groups = [g.groupUri for g in environment_groups] - not_invited_groups = [ - {'groupUri': group} for group in groups if group not in environment_groups - ] - return Page(not_invited_groups, 1, 1000, len(not_invited_groups)).to_dict() - @staticmethod def query_environment_datasets(session, username, groups, uri, filter) -> Query: query = session.query(models.Dataset).filter( diff --git a/backend/dataall/db/api/organization.py b/backend/dataall/db/api/organization.py index e1ce8fdec..320519e2e 100644 --- a/backend/dataall/db/api/organization.py +++ b/backend/dataall/db/api/organization.py @@ -374,26 +374,6 @@ def paginated_organization_invited_groups( page_size=data.get('pageSize', 10), ).to_dict() - @staticmethod - @has_tenant_perm(permissions.MANAGE_ORGANIZATIONS) - @has_resource_perm(permissions.GET_ORGANIZATION) - def not_organization_groups( - session, username, groups, uri, data=None, check_perm=False - ) -> dict: - org_groups: [] = ( - session.query(models.OrganizationGroup).filter( - and_( - models.OrganizationGroup.groupUri.in_(groups), - models.OrganizationGroup.organizationUri == uri, - ), - ) - ).all() - org_groups = [g.groupUri for g in org_groups] - not_invited_groups = [ - {'groupUri': group} for group in groups if group not in org_groups - ] - return Page(not_invited_groups, 1, 1000, len(not_invited_groups)).to_dict() - @staticmethod def count_organization_invited_groups(session, uri, group) -> int: groups = ( diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py index e562ee376..300397446 100644 --- a/deploy/stacks/lambda_api.py +++ b/deploy/stacks/lambda_api.py @@ -240,6 +240,7 @@ def create_function_role(self, envname, resource_prefix, fn_name): 'xray:GetSamplingRules', 'xray:GetSamplingTargets', 'xray:GetSamplingStatisticSummaries', + 'cognito-idp:ListGroups' ], resources=['*'], ), diff --git a/frontend/src/api/Environment/listNotInvitedGroups.js b/frontend/src/api/Environment/listNotInvitedGroups.js deleted file mode 100644 index 88a33c245..000000000 --- a/frontend/src/api/Environment/listNotInvitedGroups.js +++ /dev/null @@ -1,30 +0,0 @@ -import { gql } from 'apollo-boost'; - -const listEnvironmentNotInvitedGroups = ({ filter, environmentUri }) => ({ - variables: { - environmentUri, - filter - }, - query: gql` - query listEnvironmentNotInvitedGroups( - $filter: GroupFilter - $environmentUri: String - ) { - listEnvironmentNotInvitedGroups( - environmentUri: $environmentUri - filter: $filter - ) { - count - page - pages - hasNext - hasPrevious - nodes { - groupUri - } - } - } - ` -}); - -export default listEnvironmentNotInvitedGroups; diff --git a/frontend/src/api/Groups/listCognitoGroups.js b/frontend/src/api/Groups/listCognitoGroups.js index 1128849b1..49d473a50 100644 --- a/frontend/src/api/Groups/listCognitoGroups.js +++ b/frontend/src/api/Groups/listCognitoGroups.js @@ -1,6 +1,6 @@ import { gql } from 'apollo-boost'; -const listCognitoGroups ({ filter }) => ({ +const listCognitoGroups = ({ filter }) => ({ variables: { filter }, diff --git a/frontend/src/api/Organization/listNotInvitedGroups.js b/frontend/src/api/Organization/listNotInvitedGroups.js deleted file mode 100644 index 72138643e..000000000 --- a/frontend/src/api/Organization/listNotInvitedGroups.js +++ /dev/null @@ -1,30 +0,0 @@ -import { gql } from 'apollo-boost'; - -const listOrganizationNotInvitedGroups = ({ filter, organizationUri }) => ({ - variables: { - organizationUri, - filter - }, - query: gql` - query listOrganizationNotInvitedGroups( - $filter: GroupFilter - $organizationUri: String - ) { - listOrganizationNotInvitedGroups( - organizationUri: $organizationUri - filter: $filter - ) { - count - page - pages - hasNext - hasPrevious - nodes { - groupUri - } - } - } - ` -}); - -export default listOrganizationNotInvitedGroups; diff --git a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js index 0191c3c65..843351082 100644 --- a/frontend/src/views/Environments/EnvironmentTeamInviteForm.js +++ b/frontend/src/views/Environments/EnvironmentTeamInviteForm.js @@ -41,10 +41,15 @@ const EnvironmentTeamInviteForm = (props) => { const [groupOptions, setGroupOptions] = useState([]); const [permissionsError, setPermissionsError] = useState(null); + const filter = { + type: "environment", + uri: environment.environmentUri + } + const fetchGroups = useCallback(async () => { try { setLoadingGroups(true); - const response = await client.query(listCognitoGroups({type: "environment", uri: environment.environmentUri})); + const response = await client.query(listCognitoGroups({ filter })); if (!response.errors) { setGroupOptions( response.data.listCognitoGroups.map((g) => ({ diff --git a/frontend/src/views/Organizations/OrganizationTeamInviteForm.js b/frontend/src/views/Organizations/OrganizationTeamInviteForm.js index 23ca855dd..d0ea4b11b 100644 --- a/frontend/src/views/Organizations/OrganizationTeamInviteForm.js +++ b/frontend/src/views/Organizations/OrganizationTeamInviteForm.js @@ -37,16 +37,21 @@ const OrganizationTeamInviteForm = (props) => { const [loadingGroups, setLoadingGroups] = useState(true); const [groupOptions, setGroupOptions] = useState([]); + const filter = { + type: "organization", + uri: organization.organizationUri + } + const fetchGroups = useCallback(async () => { try { setLoadingGroups(true); - const response = await client.query(listCognitoGroups({type: "organization", uri: organization.organizationUri})); + const response = await client.query(listCognitoGroups({ filter })); if (!response.errors) { setGroupOptions( response.data.listCognitoGroups.map((g) => ({ ...g, - value: g.groupUri, - label: g.groupUri + value: g.groupName, + label: g.groupName })) ); } else { diff --git a/tests/api/test_environment.py b/tests/api/test_environment.py index c4dc64433..e961a445c 100644 --- a/tests/api/test_environment.py +++ b/tests/api/test_environment.py @@ -472,26 +472,6 @@ def test_group_invitation(db, client, env1, org1, group2, user, group3, group, d assert response.data.listEnvironmentInvitedGroups.count == 1 - response = client.query( - """ - query listEnvironmentNotInvitedGroups($environmentUri: String!, $filter:GroupFilter){ - listEnvironmentNotInvitedGroups(environmentUri:$environmentUri, filter:$filter){ - count - nodes{ - groupUri - name - } - } - } - """, - username=user.userName, - groups=[group.name, group2.name, group3.name], - environmentUri=env1.environmentUri, - filter={}, - ) - - assert response.data.listEnvironmentNotInvitedGroups.count == 1 - response = client.query( """ query listEnvironmentGroups($environmentUri: String!, $filter:GroupFilter){ @@ -618,26 +598,6 @@ def test_group_invitation(db, client, env1, org1, group2, user, group3, group, d assert response.data.listEnvironmentInvitedGroups.count == 0 - response = client.query( - """ - query listEnvironmentNotInvitedGroups($environmentUri: String!, $filter:GroupFilter){ - listEnvironmentNotInvitedGroups(environmentUri:$environmentUri, filter:$filter){ - count - nodes{ - groupUri - name - } - } - } - """, - username=user.userName, - groups=[group.name, group2.name, group3.name], - environmentUri=env1.environmentUri, - filter={}, - ) - - assert response.data.listEnvironmentNotInvitedGroups.count == 2 - response = client.query( """ query listEnvironmentGroups($environmentUri: String!, $filter:GroupFilter){ diff --git a/tests/api/test_organization.py b/tests/api/test_organization.py index 87ee65127..74f656278 100644 --- a/tests/api/test_organization.py +++ b/tests/api/test_organization.py @@ -222,26 +222,6 @@ def test_group_invitation( assert response.data.listOrganizationInvitedGroups.count == 1 - response = client.query( - """ - query listOrganizationNotInvitedGroups($organizationUri: String!, $filter:GroupFilter){ - listOrganizationNotInvitedGroups(organizationUri:$organizationUri, filter:$filter){ - count - nodes{ - groupUri - name - } - } - } - """, - username=user.userName, - groups=[group.name, group2.name, group3.name], - organizationUri=org1.organizationUri, - filter={}, - ) - - assert response.data.listOrganizationNotInvitedGroups.count == 1 - response = client.query( """ query listOrganizationGroups($organizationUri: String!, $filter:GroupFilter){ @@ -326,26 +306,6 @@ def test_group_invitation( assert response.data.listOrganizationInvitedGroups.count == 0 - response = client.query( - """ - query listOrganizationNotInvitedGroups($organizationUri: String!, $filter:GroupFilter){ - listOrganizationNotInvitedGroups(organizationUri:$organizationUri, filter:$filter){ - count - nodes{ - groupUri - name - } - } - } - """, - username=user.userName, - groups=[group.name, group2.name, group3.name], - organizationUri=org1.organizationUri, - filter={}, - ) - - assert response.data.listOrganizationNotInvitedGroups.count == 2 - response = client.query( """ query listOrganizationGroups($organizationUri: String!, $filter:GroupFilter){ From 359e15d4ed72156de84a476b91fe3467c29e82e6 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 17 Oct 2022 13:05:21 +0200 Subject: [PATCH 08/28] Cognito list groups in handlers + added integration test for list groups --- backend/dataall/api/Objects/Group/resolvers.py | 10 +++------- backend/dataall/aws/handlers/cognito.py | 12 ++++++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index 2eaea725d..54219a5d1 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -1,14 +1,14 @@ import os import logging -import boto3 from .... import db from ....db import exceptions from ....db.models import Group -from ...constants import * +from ....aws.handlers.cognito import Cognito log = logging.getLogger() + def resolve_group_environment_permissions(context, source, environmentUri): if not source: return None @@ -82,11 +82,7 @@ def list_cognito_groups(context, source, filter: dict = None): if envname in ['local', 'dkrcompose']: return [{"groupName": 'DAAdministrators'}, {"groupName": 'Engineers'}, {"groupName": 'Scientists'}] current_region = os.getenv('AWS_REGION', 'eu-west-1') - parameter_path = f'/dataall/{envname}/cognito/userpool' - ssm = boto3.client('ssm', region_name=current_region) - cognito = boto3.client('cognito-idp', region_name=current_region) - user_pool_id = ssm.get_parameter(Name=parameter_path)['Parameter']['Value'] - groups = cognito.list_groups(UserPoolId=user_pool_id)['Groups'] + groups = Cognito.list_cognito_groups(envname=envname, region=current_region) category, category_uri = filter.get("type"), filter.get("uri") if category and category_uri: if category == 'environment': diff --git a/backend/dataall/aws/handlers/cognito.py b/backend/dataall/aws/handlers/cognito.py index eb0c23f62..e3c9ea7c2 100644 --- a/backend/dataall/aws/handlers/cognito.py +++ b/backend/dataall/aws/handlers/cognito.py @@ -1,4 +1,5 @@ import logging +import boto3 from .sts import SessionHelper @@ -13,13 +14,16 @@ def client(account_id: str, region_name: str, client_type: str): return session.client(client_type, region_name=region_name) @staticmethod - def list_cognito_groups(account_id: str, region: str, user_pool_id: str): + def list_cognito_groups(envname: str, region: str): try: - cognitoCli = Cognito.client(account_id, region, "cognito-idp") - response = cognitoCli.list_groups(UserPoolId=user_pool_id) + parameter_path = f'/dataall/{envname}/cognito/userpool' + ssm = boto3.client('ssm', region_name=region) + user_pool_id = ssm.get_parameter(Name=parameter_path)['Parameter']['Value'] + cognito = boto3.client('cognito-idp', region_name=region) + groups = cognito.list_groups(UserPoolId=user_pool_id)['Groups'] except Exception as e: log.error( f'Failed to list groups of user pool {user_pool_id} due to {e}' ) else: - return response['Groups'] + return groups From 2984336c79eb14e160531d4f939e9ed0b4258f7d Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 17 Oct 2022 13:21:00 +0200 Subject: [PATCH 09/28] added integration test for list groups --- tests/api/test_group.py | 45 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 tests/api/test_group.py diff --git a/tests/api/test_group.py b/tests/api/test_group.py new file mode 100644 index 000000000..27b7f1a70 --- /dev/null +++ b/tests/api/test_group.py @@ -0,0 +1,45 @@ +import pytest + +import dataall +from dataall.db import permissions + + +@pytest.fixture(scope='module', autouse=True) +def org1(org, user, group, tenant): + org1 = org('testorg', user.userName, group.name) + yield org1 + + +@pytest.fixture(scope='module', autouse=True) +def env1(env, org1, user, group, tenant, module_mocker): + module_mocker.patch('requests.post', return_value=True) + module_mocker.patch( + 'dataall.api.Objects.Environment.resolvers.check_environment', return_value=True + ) + env1 = env(org1, 'dev', user.userName, group.name, '111111111111', 'eu-west-1') + yield env1 + + +def test_list_cognito_groups_env(client, env1, group, module_mocker): + module_mocker.patch( + 'dataall.aws.handlers.cognito.Cognito.list_cognito_groups', + return_value=[{"groupName": 'cognitos'}, {"groupName": 'testadmins'}], + ) + response = client.query( + """ + query listCognitoGroups ( + $filter: CognitoGroupFilter + ) { + listCognitoGroups ( + filter: $filter + ){ + groupName + } + } + """, + username='alice', + filter={'type': 'environment', 'uri': env1.environmentUri}, + ) + assert response.data.listCognitoGroups[0].groupName == 'cognitos' + + From e2c7f2847f127b0ea779c9c1f6d06f9ee876ddf7 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 17 Oct 2022 13:36:43 +0200 Subject: [PATCH 10/28] testing of env non-local --- backend/dataall/api/Objects/Group/resolvers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/api/Objects/Group/resolvers.py b/backend/dataall/api/Objects/Group/resolvers.py index 54219a5d1..c17b052f7 100644 --- a/backend/dataall/api/Objects/Group/resolvers.py +++ b/backend/dataall/api/Objects/Group/resolvers.py @@ -79,8 +79,8 @@ def list_data_items_shared_with_env_group( def list_cognito_groups(context, source, filter: dict = None): envname = os.getenv('envname', 'local') - if envname in ['local', 'dkrcompose']: - return [{"groupName": 'DAAdministrators'}, {"groupName": 'Engineers'}, {"groupName": 'Scientists'}] + if envname in ['dkrcompose']: + return [{"groupName": 'Docker'}] current_region = os.getenv('AWS_REGION', 'eu-west-1') groups = Cognito.list_cognito_groups(envname=envname, region=current_region) category, category_uri = filter.get("type"), filter.get("uri") From b9b839db790d2d976287f5e628e274fce8689077 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 17 Oct 2022 13:48:18 +0200 Subject: [PATCH 11/28] testing of env non-local --- tests/api/test_group.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/api/test_group.py b/tests/api/test_group.py index 27b7f1a70..7cab78314 100644 --- a/tests/api/test_group.py +++ b/tests/api/test_group.py @@ -23,7 +23,7 @@ def env1(env, org1, user, group, tenant, module_mocker): def test_list_cognito_groups_env(client, env1, group, module_mocker): module_mocker.patch( 'dataall.aws.handlers.cognito.Cognito.list_cognito_groups', - return_value=[{"groupName": 'cognitos'}, {"groupName": 'testadmins'}], + return_value=[{"GroupName": 'cognitos'}, {"GroupName": 'testadmins'}], ) response = client.query( """ From 039d936e3dee35ad3f49fc353c53e1711d83cd14 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 18 Oct 2022 17:05:28 +0200 Subject: [PATCH 12/28] Renamed cdk.json and cdk.context.json --- cdk.json | 9 ----- cdk.context.json => template_cdk.context.json | 0 template_cdk.json | 36 +++++++++++++++++++ 3 files changed, 36 insertions(+), 9 deletions(-) delete mode 100644 cdk.json rename cdk.context.json => template_cdk.context.json (100%) create mode 100644 template_cdk.json diff --git a/cdk.json b/cdk.json deleted file mode 100644 index eda207219..000000000 --- a/cdk.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "app": "python ./deploy/app.py", - "context": { - "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": false, - "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": false, - "@aws-cdk/aws-rds:lowercaseDbIdentifier": false, - "@aws-cdk/core:stackRelativeExports": false - } -} diff --git a/cdk.context.json b/template_cdk.context.json similarity index 100% rename from cdk.context.json rename to template_cdk.context.json diff --git a/template_cdk.json b/template_cdk.json new file mode 100644 index 000000000..8f152c0b9 --- /dev/null +++ b/template_cdk.json @@ -0,0 +1,36 @@ +{ + "app": "python ./deploy/app.py", + "context": { + "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": false, + "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": false, + "@aws-cdk/aws-rds:lowercaseDbIdentifier": false, + "@aws-cdk/core:stackRelativeExports": false, + "tooling_region": "string_TOOLING_REGION|DEFAULT=eu-west-1", + "tooling_vpc_id": "string_IMPORT_AN_EXISTING_VPC_FROM_TOOLING|DEFAULT=None", + "git_branch": "string_GIT_BRANCH_NAME|DEFAULT=dataall", + "git_release": "boolean_MANAGE_GIT_RELEASE|DEFAULT=false", + "quality_gate": "boolean_MANAGE_QUALITY_GATE_STAGE|DEFAULT=true", + "resource_prefix": "string_PREFIX_FOR_ALL_RESOURCES_CREATED_BY_THIS_APP|DEFAULT=dataall", + "repository_source": "string_VERSION_CONTROL_SERVICE|DEFAULT=codecommit", + "DeploymentEnvironments": [ + { + "envname": "string_ENVIRONMENT_NAME|REQUIRED", + "account": "string_DEPLOYMENT_ACCOUNT|REQUIRED", + "region": "string_DEPLOYMENT_REGION|REQUIRED", + "with_approval": "boolean_ADD_CODEPIPELINE_APPROVAL_STEP|DEFAULT=false", + "vpc_id": "string_DEPLOY_WITHIN_AN_EXISTING_VPC|DEFAULT=None", + "vpc_endpoints_sg": "string_DEPLOY_WITHIN_EXISTING_VPC_SG|DEFAULT=None", + "internet_facing": "boolean_CLOUDFRONT_IF_TRUE_ELSE_ECS_BEHIND_INTERNAL_ALB|DEFAULT=true", + "custom_domain": { + "hosted_zone_name": "string_ROUTE_53_EXISTING_DOMAIN_NAME|DEFAULT=None, REQUIRED if internet_facing=false", + "hosted_zone_id": "string_ROUTE_53_EXISTING_HOSTED_ZONE_ID|DEFAULT=None, REQUIRED if internet_facing=false" + }, + "ip_ranges": "list_of_strings_IP_RANGES_TO_ALLOW_IF_NOT_INTERNET_FACING|DEFAULT=None", + "apig_vpce": "string_USE_AN_EXISTING_VPCE_FOR_APIG_IF_NOT_INTERNET_FACING|DEFAULT=None", + "prod_sizing": "boolean_SET_INFRA_SIZING_TO_PROD_VALUES_IF_TRUE|DEFAULT=true", + "enable_cw_rum": "boolean_SET_CLOUDWATCH_RUM_APP_MONITOR|DEFAULT=false", + "enable_cw_canaries": "boolean_SET_CLOUDWATCH_CANARIES_FOR_FRONTEND_TESTING|DEFAULT=false" + } + ] + } +} From 086cbbea4495a8ffdd9b735819433788dbc220ec Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 20 Oct 2022 13:42:31 +0200 Subject: [PATCH 13/28] required --app for non cdk.context (SSM cdk.json) deployments --- cdk.json | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 cdk.json diff --git a/cdk.json b/cdk.json new file mode 100644 index 000000000..eda207219 --- /dev/null +++ b/cdk.json @@ -0,0 +1,9 @@ +{ + "app": "python ./deploy/app.py", + "context": { + "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": false, + "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": false, + "@aws-cdk/aws-rds:lowercaseDbIdentifier": false, + "@aws-cdk/core:stackRelativeExports": false + } +} From 1c226906f33a451f0afe15a7a11d94f11597f3ef Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Tue, 25 Oct 2022 10:14:20 +0200 Subject: [PATCH 14/28] Better sharing management --- backend/dataall/aws/handlers/ecs.py | 6 +- backend/dataall/aws/handlers/glue.py | 129 ++- backend/dataall/aws/handlers/lakeformation.py | 223 +++++ backend/dataall/aws/handlers/ram.py | 225 +++++ backend/dataall/db/api/share_object.py | 153 ++- .../dataall/tasks/data_sharing/__init__.py | 0 .../tasks/data_sharing/common/__init__.py | 0 .../data_sharing/common/share_approval.py | 369 ++++++++ .../tasks/data_sharing/common/share_revoke.py | 165 ++++ .../data_sharing/cross_account/__init__.py | 0 .../cross_account/approve_share.py | 173 ++++ .../cross_account/revoke_share.py | 120 +++ .../data_sharing/data_sharing_service.py | 234 +++++ .../data_sharing/same_account/__init__.py | 0 .../same_account/approve_share.py | 102 ++ .../data_sharing/same_account/revoke_share.py | 46 + backend/dataall/tasks/share_manager.py | 886 +----------------- backend/dataall/tasks/shares_refresh.py | 28 + deploy/stacks/container.py | 35 + tests/tasks/test_share_manager.py | 310 ++++++ 20 files changed, 2307 insertions(+), 897 deletions(-) create mode 100644 backend/dataall/aws/handlers/lakeformation.py create mode 100644 backend/dataall/aws/handlers/ram.py create mode 100644 backend/dataall/tasks/data_sharing/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/common/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/common/share_approval.py create mode 100644 backend/dataall/tasks/data_sharing/common/share_revoke.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/approve_share.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/revoke_share.py create mode 100644 backend/dataall/tasks/data_sharing/data_sharing_service.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/approve_share.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/revoke_share.py create mode 100644 backend/dataall/tasks/shares_refresh.py create mode 100644 tests/tasks/test_share_manager.py diff --git a/backend/dataall/aws/handlers/ecs.py b/backend/dataall/aws/handlers/ecs.py index 65d5ff188..9c8d880d5 100644 --- a/backend/dataall/aws/handlers/ecs.py +++ b/backend/dataall/aws/handlers/ecs.py @@ -9,7 +9,7 @@ from ... import db from ...db import models from ...utils import Parameter -from ...tasks.share_manager import ShareManager +from ...tasks.data_sharing.data_sharing_service import DataSharingService log = logging.getLogger('aws:ecs') @@ -23,7 +23,7 @@ def __init__(self): def approve_share(engine, task: models.Task): envname = os.environ.get('envname', 'local') if envname in ['local', 'dkrcompose']: - return ShareManager.approve_share(engine, task.targetUri) + return DataSharingService.approve_share(engine, task.targetUri) else: return Ecs.run_share_management_ecs_task( envname, task.targetUri, 'approve_share' @@ -34,7 +34,7 @@ def approve_share(engine, task: models.Task): def reject_share(engine, task: models.Task): envname = os.environ.get('envname', 'local') if envname in ['local', 'dkrcompose']: - return ShareManager.reject_share(engine, task.targetUri) + return DataSharingService.reject_share(engine, task.targetUri) else: return Ecs.run_share_management_ecs_task( envname, task.targetUri, 'reject_share' diff --git a/backend/dataall/aws/handlers/glue.py b/backend/dataall/aws/handlers/glue.py index ef4fce1f7..cac696c4b 100644 --- a/backend/dataall/aws/handlers/glue.py +++ b/backend/dataall/aws/handlers/glue.py @@ -15,7 +15,7 @@ def __init__(self): pass @staticmethod - def _create_database(accountid, database, region, location): + def create_database(accountid, database, region, location): try: existing_database = Glue.database_exists( accountid=accountid, database=database, region=region @@ -27,7 +27,9 @@ def _create_database(accountid, database, region, location): glue_database_created = True return glue_database_created except ClientError as e: - log.debug(f'Failed to create database {database}', e) + log.error( + f'Failed to create database {database} on account {accountid} due to {e}' + ) raise e @staticmethod @@ -64,15 +66,11 @@ def database_exists(**data): session = SessionHelper.remote_session(accountid) try: glue_client = session.client('glue', region_name=region) - response = glue_client.get_database( - CatalogId=data['accountid'], Name=database - ) - if response.get('Database'): - return response - else: - return None - except ClientError as e: - log.debug(f'Database already exists in Glue{database}', e) + glue_client.get_database(CatalogId=data['accountid'], Name=database) + return True + except ClientError: + log.info(f'Database {database} does not exist on account {accountid}...') + return False @staticmethod @Worker.handler(path='glue.dataset.database.tables') @@ -140,12 +138,13 @@ def table_exists(**data): log.info(f'Glue table not found: {data}') return None + @staticmethod def _create_table(**data): accountid = data['accountid'] - session = SessionHelper.remote_session(accountid=accountid) region = data.get('region', 'eu-west-1') database = data.get('database', 'UnknownDatabaseName') + session = SessionHelper.remote_session(accountid=accountid) glue = session.client('glue', region_name=region) log.info( 'Creating table {} in database {}'.format( @@ -155,7 +154,7 @@ def _create_table(**data): if not Glue.database_exists( database=database, region=region, accountid=accountid ): - Glue._create_database(accountid, database, region, None) + Glue.create_database(accountid, database, region, None) if 'table_input' not in data: table_input = { 'Name': data['tablename'], @@ -222,6 +221,47 @@ def _create_table(**data): ) return response + @staticmethod + def create_resource_link(**data): + accountid = data['accountid'] + region = data['region'] + database = data['database'] + resource_link_name = data['resource_link_name'] + resource_link_input = data['resource_link_input'] + log.info( + f'Creating ResourceLink {resource_link_name} in database {accountid}://{database}' + ) + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + resource_link = Glue.table_exists( + accountid=accountid, + region=region, + database=database, + tablename=resource_link_name, + ) + if resource_link: + log.info( + f'ResourceLink {resource_link_name} already exists in database {accountid}://{database}' + ) + else: + resource_link = glue.create_table( + CatalogId=accountid, + DatabaseName=database, + TableInput=resource_link_input, + ) + log.info( + f'Successfully created ResourceLink {resource_link_name} in database {accountid}://{database}' + ) + return resource_link + except ClientError as e: + log.error( + f'Could not create ResourceLink {resource_link_name} ' + f'in database {accountid}://{database} ' + f'due to: {e}' + ) + raise e + @staticmethod def is_resource_link(table_input: dict): """ @@ -268,21 +308,64 @@ def delete_table_and_create_resourcelink(glue, database, accountid, table_input) ) raise e + @staticmethod + def delete_database(**data): + accountid = data['accountid'] + region = data['region'] + database = data['database'] + log.info(f'Deleting database {accountid}://{database} ...') + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + if Glue.database_exists( + accountid=accountid, + region=region, + database=database, + ): + glue.delete_database(CatalogId=accountid, Name=database) + return True + except ClientError as e: + log.error( + f'Could not delete database {database} ' + f'in account {accountid} ' + f'due to: {e}' + ) + raise e + @staticmethod def batch_delete_tables(**data): accountid = data['accountid'] - session = SessionHelper.remote_session(accountid=accountid) - glue = session.client('glue', region_name=data.get('region', 'eu-west-1')) + region = data['region'] database = data['database'] tables = data['tables'] - log.debug(f'Batch deleting tables: {tables}') - response = glue.batch_delete_table( - CatalogId=accountid, DatabaseName=database, TablesToDelete=tables - ) - log.debug( - f'Batch deleted tables {len(tables)} from database {database} successfully' - ) - return response + + if not tables: + log.info('No tables to delete exiting method...') + return + + log.info(f'Batch deleting tables: {tables}') + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + if Glue.database_exists( + accountid=accountid, + region=region, + database=database, + ): + glue.batch_delete_table( + CatalogId=accountid, DatabaseName=database, TablesToDelete=tables + ) + log.debug( + f'Batch deleted tables {len(tables)} from database {database} successfully' + ) + return True + except ClientError as e: + log.error( + f'Could not batch delete tables {tables} ' + f'in database {accountid}://{database} ' + f'due to: {e}' + ) + raise e @staticmethod @Worker.handler(path='glue.dataset.crawler.create') diff --git a/backend/dataall/aws/handlers/lakeformation.py b/backend/dataall/aws/handlers/lakeformation.py new file mode 100644 index 000000000..5b0de94a1 --- /dev/null +++ b/backend/dataall/aws/handlers/lakeformation.py @@ -0,0 +1,223 @@ +import logging +import uuid + +from botocore.exceptions import ClientError + +from .sts import SessionHelper + +log = logging.getLogger('aws:lakeformation') + + +class LakeFormation: + def __init__(self): + pass + + @staticmethod + def grant_pivot_role_all_database_permissions(accountid, region, database): + LakeFormation.grant_permissions_to_database( + client=SessionHelper.remote_session(accountid=accountid).client( + 'lakeformation', region_name=region + ), + principals=[SessionHelper.get_delegation_role_arn(accountid)], + database_name=database, + permissions=['ALL'], + ) + + @staticmethod + def grant_permissions_to_database( + client, + principals, + database_name, + permissions, + permissions_with_grant_options=None, + ): + for principal in principals: + log.info( + f'Granting database permissions {permissions} to {principal} on database {database_name}' + ) + try: + client.grant_permissions( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'Database': {'Name': database_name}, + }, + Permissions=permissions, + ) + log.info( + f'Successfully granted principal {principal} permissions {permissions} ' + f'to {database_name}' + ) + except ClientError as e: + log.error( + f'Could not grant permissions ' + f'principal {principal} ' + f'{permissions} to database {database_name} due to: {e}' + ) + + @staticmethod + def grant_permissions_to_table( + client, + principal, + database_name, + table_name, + permissions, + permissions_with_grant_options=None, + ): + try: + grant_dict = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={'Table': {'DatabaseName': database_name, 'Name': table_name}}, + Permissions=permissions, + ) + if permissions_with_grant_options: + grant_dict[ + 'PermissionsWithGrantOption' + ] = permissions_with_grant_options + + response = client.grant_permissions(**grant_dict) + + log.info( + f'Successfully granted principal {principal} permissions {permissions} ' + f'to {database_name}.{table_name}: {response}' + ) + except ClientError as e: + log.warning( + f'Could not grant principal {principal} ' + f'permissions {permissions} to table ' + f'{database_name}.{table_name} due to: {e}' + ) + # raise e + + @staticmethod + def revoke_iamallowedgroups_super_permission_from_table( + client, accountid, database, table + ): + """ + When upgrading to LF tables can still have IAMAllowedGroups permissions + Unless this is revoked the table can not be shared using LakeFormation + :param client: + :param accountid: + :param database: + :param table: + :return: + """ + try: + log.info( + f'Revoking IAMAllowedGroups Super ' + f'permission for table {database}|{table}' + ) + LakeFormation.batch_revoke_permissions( + client, + accountid, + entries=[ + { + 'Id': str(uuid.uuid4()), + 'Principal': {'DataLakePrincipalIdentifier': 'EVERYONE'}, + 'Resource': { + 'Table': { + 'DatabaseName': database, + 'Name': table, + 'CatalogId': accountid, + } + }, + 'Permissions': ['ALL'], + 'PermissionsWithGrantOption': [], + } + ], + ) + except ClientError as e: + log.debug( + f'Could not revoke IAMAllowedGroups Super ' + f'permission on table {database}|{table} due to {e}' + ) + + @staticmethod + def batch_revoke_permissions(client, accountid, entries): + """ + Batch revoke permissions to entries + Retry is set for api throttling + :param client: + :param accountid: + :param entries: + :return: + """ + entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] + failures = [] + try: + for entries_chunk in entries_chunks: + response = client.batch_revoke_permissions( + CatalogId=accountid, Entries=entries_chunk + ) + log.info(f'Batch Revoke {entries_chunk} response: {response}') + failures.extend(response.get('Failures')) + except ClientError as e: + for failure in failures: + if not ( + failure['Error']['ErrorCode'] == 'InvalidInputException' + and ( + 'Grantee has no permissions' in failure['Error']['ErrorMessage'] + or 'No permissions revoked' in failure['Error']['ErrorMessage'] + ) + ): + log.warning(f'Batch Revoke ended with failures: {failures}') + raise e + + @staticmethod + def grant_resource_link_permission_on_target(client, source, target): + for principal in target['principals']: + try: + table_grant = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'TableWithColumns': { + 'DatabaseName': source['database'], + 'Name': source['tablename'], + 'ColumnWildcard': {}, + 'CatalogId': source['accountid'], + } + }, + Permissions=['DESCRIBE', 'SELECT'], + PermissionsWithGrantOption=[], + ) + client.grant_permissions(**table_grant) + log.info( + f'Successfully granted permissions DESCRIBE,SELECT to {principal} on target ' + f'{source["accountid"]}://{source["database"]}/{source["tablename"]}' + ) + except ClientError as e: + logging.error( + f'Failed granting principal {principal} ' + 'read access to resource link on target' + f' {source["accountid"]}://{source["database"]}/{source["tablename"]} ' + f'due to: {e}' + ) + raise e + + @staticmethod + def grant_resource_link_permission(client, source, target, target_database): + for principal in target['principals']: + resourcelink_grant = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'Table': { + 'DatabaseName': target_database, + 'Name': source['tablename'], + 'CatalogId': target['accountid'], + } + }, + # Resource link only supports DESCRIBE and DROP permissions no SELECT + Permissions=['DESCRIBE'], + ) + try: + client.grant_permissions(**resourcelink_grant) + log.info( + f'Granted resource link DESCRIBE access ' + f'to principal {principal} on {target["accountid"]}://{target_database}/{source["tablename"]}' + ) + except ClientError as e: + logging.error( + f'Failed granting principal {principal} ' + f'read access to resource link on {target["accountid"]}://{target_database}/{source["tablename"]} ' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/aws/handlers/ram.py b/backend/dataall/aws/handlers/ram.py new file mode 100644 index 000000000..f089db15b --- /dev/null +++ b/backend/dataall/aws/handlers/ram.py @@ -0,0 +1,225 @@ +import logging +import time + +from botocore.exceptions import ClientError + +from .sts import SessionHelper + +log = logging.getLogger('aws:ram') + + +class Ram: + @staticmethod + def get_resource_share_invitations( + client, resource_share_arns, sender_account, receiver_account + ): + log.info(f'Listing invitations for resourceShareArns: {resource_share_arns}') + try: + resource_share_invitations = [] + + paginator = client.get_paginator('get_resource_share_invitations') + invitation_pages = paginator.paginate(resourceShareArns=resource_share_arns) + for page in invitation_pages: + resource_share_invitations.extend(page.get('resourceShareInvitations')) + + filtered_invitations = [ + i + for i in resource_share_invitations + if i['senderAccountId'] == sender_account + and i['receiverAccountId'] == receiver_account + ] + return filtered_invitations + except ClientError as e: + log.error( + f'Failed retrieving RAM resource ' + f'share invitations {resource_share_arns} due to {e}' + ) + raise e + + @staticmethod + def accept_resource_share_invitation(client, resource_share_invitation_arn): + try: + response = client.accept_resource_share_invitation( + resourceShareInvitationArn=resource_share_invitation_arn + ) + log.info(f'Accepted ram invitation {resource_share_invitation_arn}') + return response.get('resourceShareInvitation') + except ClientError as e: + if ( + e.response['Error']['Code'] + == 'ResourceShareInvitationAlreadyAcceptedException' + ): + log.info( + f'Failed to accept RAM invitation ' + f'{resource_share_invitation_arn} already accepted' + ) + else: + log.error( + f'Failed to accept RAM invitation ' + f'{resource_share_invitation_arn} due to {e}' + ) + raise e + + @staticmethod + def accept_ram_invitation(**data): + """ + Accepts RAM invitations on the target account + """ + retry_share_table = False + failed_invitations = [] + source = data['source'] + target = data['target'] + + if source['accountid'] == target['accountid']: + log.debug('Skipping RAM invitation management for same account sharing.') + return True + + source_session = SessionHelper.remote_session(accountid=source['accountid']) + source_ram = source_session.client('ram', region_name=source['region']) + + target_session = SessionHelper.remote_session(accountid=target['accountid']) + target_ram = target_session.client('ram', region_name=target['region']) + + resource_arn = ( + f'arn:aws:glue:{source["region"]}:{source["accountid"]}:' + f'table/{data["source"]["database"]}/{data["source"]["tablename"]}' + ) + associations = Ram.list_resource_share_associations(source_ram, resource_arn) + resource_share_arns = [a['resourceShareArn'] for a in associations] + + ram_invitations = Ram.get_resource_share_invitations( + target_ram, resource_share_arns, source['accountid'], target['accountid'] + ) + log.info( + f'Found {len(ram_invitations)} RAM invitations for resourceShareArn: {resource_share_arns}' + ) + for invitation in ram_invitations: + if 'LakeFormation' in invitation['resourceShareName']: + if invitation['status'] == 'PENDING': + log.info( + f'Invitation {invitation} is in PENDING status accepting it ...' + ) + Ram.accept_resource_share_invitation( + target_ram, invitation['resourceShareInvitationArn'] + ) + # Ram invitation acceptance is slow + time.sleep(5) + elif ( + invitation['status'] == 'EXPIRED' + or invitation['status'] == 'REJECTED' + ): + log.warning( + f'Invitation {invitation} has expired or was rejected. ' + 'Table flagged for revoke re-share.' + 'Deleting the resource share to reset the invitation... ' + ) + failed_invitations.append(invitation) + retry_share_table = True + source_ram.delete_resource_share( + resourceShareArn=invitation['resourceShareArn'] + ) + + elif invitation['status'] == 'ACCEPTED': + log.info( + f'Invitation {invitation} already accepted nothing to do ...' + ) + else: + log.warning( + f'Invitation is in an unknown status adding {invitation["status"]}. ' + 'Adding it to retry share list ...' + ) + + return retry_share_table, failed_invitations + + @staticmethod + def list_resource_share_associations(client, resource_arn): + associations = [] + try: + log.debug(f'RAM list_resource_share_associations : {resource_arn}') + + paginator = client.get_paginator( + 'get_resource_share_associations' + ).paginate( + associationType='RESOURCE', + resourceArn=resource_arn, + ) + for page in paginator: + associations.extend(page['resourceShareAssociations']) + + log.info(f'Found resource_share_associations : {associations}') + return associations + + except ClientError as e: + log.error( + f'Could not find resource share associations for resource {resource_arn} due to: {e}' + ) + raise e + + @staticmethod + def delete_resource_shares(client, resource_arn): + log.info(f'Cleaning RAM resource shares for resource: {resource_arn}') + try: + associations = Ram.list_resource_share_associations(client, resource_arn) + for a in associations: + log.info(f"Deleting resource share: {a['resourceShareArn']}") + client.delete_resource_share(resourceShareArn=a['resourceShareArn']) + return associations + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') + + @staticmethod + def delete_lfv1_resource_shares_for_table(client, resource_arn): + log.info(f'Cleaning LF V1 RAM resource shares for resource: {resource_arn}') + try: + associations = Ram.list_resource_share_associations(client, resource_arn) + for a in associations: + if ( + 'LakeFormation' in a['resourceShareName'] + and 'LakeFormation-V2' in a['resourceShareName'] + ): + log.info( + f"Found lakeformation V1 RAM association: {a['resourceShareName']}." + 'Deleting it ...' + ) + client.delete_resource_share(resourceShareArn=a['resourceShareArn']) + return associations + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') + + @staticmethod + def delete_lakeformation_v1_resource_shares(client): + log.info('Cleaning LF V1 RAM resource shares...') + + try: + resources = [] + paginator = client.get_paginator('list_resources').paginate( + resourceOwner='SELF', + resourceRegionScope='REGIONAL', + ) + for page in paginator: + resources.extend(page['resources']) + + log.info(f'Found resources : {len(resources)}') + resource_shares = [] + for r in resources: + paginator = client.get_paginator('get_resource_shares').paginate( + resourceShareArns=[r['resourceShareArn']], + resourceOwner='SELF', + ) + for page in paginator: + resource_shares.extend(page['resourceShares']) + for rs in resource_shares: + if ( + 'LakeFormation' in rs['name'] + and 'LakeFormation-V2' not in rs['name'] + ): + log.info( + f"Found lakeformation V1 RAM association: {rs['name']}." + 'Deleting it ...' + ) + client.delete_resource_share( + resourceShareArn=r['resourceShareArn'] + ) + + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index e1df35e74..8c2a13fbd 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -1,15 +1,14 @@ import logging -from datetime import datetime from sqlalchemy import and_, or_, func, case -from .. import models, exceptions, permissions, paginate -from .. import api from . import ( has_resource_perm, ResourcePolicy, Environment, ) +from .. import api +from .. import models, exceptions, permissions, paginate from ..models.Enums import ShareObjectStatus, ShareableType, PrincipalType logger = logging.getLogger(__name__) @@ -611,3 +610,151 @@ def list_user_sent_share_requests( ) ) return paginate(query, data.get('page', 1), data.get('pageSize', 10)).to_dict() + + @staticmethod + def get_share_by_dataset_and_environment(session, dataset_uri, environment_uri): + environment_groups = session.query(models.EnvironmentGroup).filter( + models.EnvironmentGroup.environmentUri == environment_uri + ) + groups = [g.groupUri for g in environment_groups] + share = session.query(models.ShareObject).filter( + and_( + models.ShareObject.datasetUri == dataset_uri, + models.ShareObject.environmentUri == environment_uri, + models.ShareObject.principalId.in_(groups), + ) + ) + if not share: + raise exceptions.ObjectNotFound('Share', f'{dataset_uri}/{environment_uri}') + return share + + @staticmethod + def update_share_item_status( + session, + share_item: models.ShareObjectItem, + status: str, + ) -> models.ShareObjectItem: + + logger.info(f'Updating share item status to {status}') + share_item.status = status + session.commit() + return share_item + + @staticmethod + def find_share_item_by_table( + session, + share: models.ShareObject, + table: models.DatasetTable, + ) -> models.ShareObjectItem: + share_item: models.ShareObjectItem = ( + session.query(models.ShareObjectItem) + .filter( + and_( + models.ShareObjectItem.itemUri == table.tableUri, + models.ShareObjectItem.shareUri == share.shareUri, + ) + ) + .first() + ) + return share_item + + @staticmethod + def get_share_data(session, share_uri, status): + share: models.ShareObject = session.query(models.ShareObject).get(share_uri) + if not share: + raise exceptions.ObjectNotFound('Share', share_uri) + + dataset: models.Dataset = session.query(models.Dataset).get(share.datasetUri) + if not dataset: + raise exceptions.ObjectNotFound('Dataset', share.datasetUri) + + source_environment: models.Environment = session.query(models.Environment).get( + dataset.environmentUri + ) + if not source_environment: + raise exceptions.ObjectNotFound('SourceEnvironment', dataset.environmentUri) + + target_environment: models.Environment = session.query(models.Environment).get( + share.environmentUri + ) + if not target_environment: + raise exceptions.ObjectNotFound('TargetEnvironment', share.environmentUri) + + shared_tables = ( + session.query(models.DatasetTable) + .join( + models.ShareObjectItem, + models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, + ) + .join( + models.ShareObject, + models.ShareObject.shareUri == models.ShareObjectItem.shareUri, + ) + .filter( + and_( + models.ShareObject.datasetUri == dataset.datasetUri, + models.ShareObject.environmentUri + == target_environment.environmentUri, + models.ShareObject.status.in_(status), + ) + ) + .all() + ) + + env_group: models.EnvironmentGroup = ( + session.query(models.EnvironmentGroup) + .filter( + and_( + models.EnvironmentGroup.environmentUri == share.environmentUri, + models.EnvironmentGroup.groupUri == share.principalId, + ) + ) + .first() + ) + if not env_group: + raise Exception( + f'Share object Team {share.principalId} is not a member of the ' + f'environment {target_environment.name}/{target_environment.AwsAccountId}' + ) + return ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) + + @staticmethod + def other_approved_share_object_exists(session, environment_uri): + return ( + session.query(models.ShareObject) + .filter( + and_( + models.Environment.environmentUri == environment_uri, + models.ShareObject.status + == models.Enums.ShareObjectStatus.Approved.value, + ) + ) + .all() + ) + + @staticmethod + def is_shared_table(session, environment_uri, dataset_uri, table_name): + return ( + session.query(models.ShareObjectItem) + .join( + models.ShareObject, + models.ShareObjectItem.shareUri == models.ShareObject.shareUri, + ) + .filter( + and_( + models.ShareObjectItem.GlueTableName == table_name, + models.ShareObject.datasetUri == dataset_uri, + models.ShareObject.status == models.Enums.ShareObjectStatus.Approved.value, + models.ShareObject.environmentUri == environment_uri, + ) + ) + .first() + ) + diff --git a/backend/dataall/tasks/data_sharing/__init__.py b/backend/dataall/tasks/data_sharing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/common/__init__.py b/backend/dataall/tasks/data_sharing/common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/common/share_approval.py b/backend/dataall/tasks/data_sharing/common/share_approval.py new file mode 100644 index 000000000..ffa53ca73 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/share_approval.py @@ -0,0 +1,369 @@ +import abc +import logging +import uuid + +from botocore.exceptions import ClientError + +from ....aws.handlers.glue import Glue +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.quicksight import Quicksight +from ....aws.handlers.sts import SessionHelper +from ....db import api, exceptions, models +from ....utils.alarm_service import AlarmService + +logger = logging.getLogger(__name__) + + +class ShareApproval: + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + self.session = session + self.env_group = env_group + self.dataset = dataset + self.share = share + self.shared_tables = shared_tables + self.source_environment = source_environment + self.target_environment = target_environment + self.shared_db_name = shared_db_name + + @abc.abstractmethod + def approve_share(self) -> [str]: + return NotImplementedError + + def get_share_principals(self) -> [str]: + """ + Builds list of principals of the share request + Returns + ------- + List of principals + """ + principals = [self.env_group.environmentIAMRoleArn] + if self.target_environment.dashboardsEnabled: + q_group = Quicksight.get_quicksight_group_arn( + self.target_environment.AwsAccountId + ) + if q_group: + principals.append(q_group) + return principals + + def check_share_item_exists_on_glue_catalog( + self, share_item: models.ShareObjectItem, table: models.DatasetTable + ) -> None: + """ + Checks if a table in the share request + still exists on the Glue catalog before sharing + + Parameters + ---------- + share_item : request share item + table : dataset table + + Returns + ------- + exceptions.AWSResourceNotFound + """ + if not Glue.table_exists( + accountid=self.source_environment.AwsAccountId, + region=self.source_environment.region, + database=table.GlueDatabaseName, + tablename=table.GlueTableName, + ): + raise exceptions.AWSResourceNotFound( + action='ApproveShare', + message=( + f'Share Item {share_item.itemUri} found on share request' + f' but its correspondent Glue table {table.GlueTableName} does not exist.' + ), + ) + + @classmethod + def create_shared_database( + cls, + target_environment: models.Environment, + dataset: models.Dataset, + shared_db_name: str, + principals: [str], + ) -> dict: + + """ + Creates the shared database if does not exists. + 1) Grants pivot role ALL permission on shareddb + 2) Grant Team role DESCRIBE Only permission + + Parameters + ---------- + target_environment : + dataset : + shared_db_name : + principals : + + Returns + ------- + boto3 glue create_database + """ + + logger.info( + f'Creating shared db ...' + f'{target_environment.AwsAccountId}://{shared_db_name}' + ) + + database = Glue.create_database( + target_environment.AwsAccountId, + shared_db_name, + target_environment.region, + f's3://{dataset.S3BucketName}', + ) + + LakeFormation.grant_pivot_role_all_database_permissions( + target_environment.AwsAccountId, target_environment.region, shared_db_name + ) + + LakeFormation.grant_permissions_to_database( + client=SessionHelper.remote_session( + accountid=target_environment.AwsAccountId + ).client('lakeformation', region_name=target_environment.region), + principals=principals, + database_name=shared_db_name, + permissions=['DESCRIBE'], + ) + + return database + + @classmethod + def create_resource_link(cls, **data) -> dict: + """ + Creates a resource link to the source shared Glue table + Parameters + ---------- + data : data of source and target accounts + + Returns + ------- + boto3 creation response + """ + source = data['source'] + target = data['target'] + target_session = SessionHelper.remote_session(accountid=target['accountid']) + lakeformation_client = target_session.client( + 'lakeformation', region_name=target['region'] + ) + target_database = target['database'] + resource_link_input = { + 'Name': source['tablename'], + 'TargetTable': { + 'CatalogId': data['source']['accountid'], + 'DatabaseName': source['database'], + 'Name': source['tablename'], + }, + } + + try: + resource_link = Glue.create_resource_link( + accountid=target['accountid'], + region=target['region'], + database=target_database, + resource_link_name=source['tablename'], + resource_link_input=resource_link_input, + ) + + LakeFormation.grant_resource_link_permission( + lakeformation_client, source, target, target_database + ) + + LakeFormation.grant_resource_link_permission_on_target( + lakeformation_client, source, target + ) + + return resource_link + + except ClientError as e: + logger.warning( + f'Resource Link {resource_link_input} was not created due to: {e}' + ) + raise e + + @classmethod + def clean_shared_database( + cls, + session, + dataset: models.Dataset, + shared_tables: [models.DatasetTable], + target_environment: models.Environment, + shared_db_name: str, + ) -> [str]: + """ + After share approval verify that the shared database + do not have any removed items from the share request. + + Parameters + ---------- + session : db + dataset : models.Dataset + shared_tables : [models.DatasetTable] + target_environment : models.Environment + shared_db_name : shared database name + + Returns + ------- + List of deleted tables from the shared database + """ + tables_to_delete = [] + + shared_glue_tables = Glue.list_glue_database_tables( + accountid=target_environment.AwsAccountId, + database=shared_db_name, + region=target_environment.region, + ) + logger.info( + f'Shared database {shared_db_name} glue tables: {shared_glue_tables}' + ) + + shared_tables = [t.GlueTableName for t in shared_tables] + logger.info(f'Share items of the share object {shared_tables}') + + aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) + client = aws_session.client('lakeformation', region_name=dataset.region) + + for table in shared_glue_tables: + if table['Name'] not in shared_tables: + logger.info( + f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' + ) + is_shared = api.ShareObject.is_shared_table( + session, + target_environment.environmentUri, + dataset.datasetUri, + table['Name'], + ) + if not is_shared: + logger.info( + f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' + f'will be removed for account {target_environment.AwsAccountId}' + ) + if Glue.table_exists( + **{ + 'accountid': dataset.AwsAccountId, + 'region': dataset.region, + 'database': dataset.GlueDatabaseName, + 'tablename': table['Name'], + } + ): + LakeFormation.batch_revoke_permissions( + client, + target_environment.AwsAccountId, + [ + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': dataset.GlueDatabaseName, + 'Name': table['Name'], + 'ColumnWildcard': {}, + 'CatalogId': dataset.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': [ + 'DESCRIBE', + 'SELECT', + ], + } + ], + ) + + tables_to_delete.append(table['Name']) + + Glue.batch_delete_tables( + accountid=target_environment.AwsAccountId, + region=target_environment.region, + database=shared_db_name, + tables=tables_to_delete, + ) + + return tables_to_delete + + def handle_share_failure( + self, + table: models.DatasetTable, + share_item: models.ShareObjectItem, + error: Exception, + ) -> None: + """ + Handles share failure by raising an alarm to alarmsTopic + Parameters + ---------- + table : dataset table + share_item : failed item + error : share error + + Returns + ------- + None + """ + logging.error( + f'Failed to share table {table.GlueTableName} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Failed.value, + ) + AlarmService().trigger_table_sharing_failure_alarm( + table, self.share, self.target_environment + ) + + def build_share_data(self, principals: [str], table: models.DatasetTable) -> dict: + """ + Build aws dict for boto3 operations on Glue and LF from share data + Parameters + ---------- + principals : team role + table : dataset table + + Returns + ------- + dict for boto3 operations + """ + data = { + 'source': { + 'accountid': self.source_environment.AwsAccountId, + 'region': self.source_environment.region, + 'database': table.GlueDatabaseName, + 'tablename': table.GlueTableName, + }, + 'target': { + 'accountid': self.target_environment.AwsAccountId, + 'region': self.target_environment.region, + 'principals': principals, + 'database': self.shared_db_name, + }, + } + return data + + def delete_deprecated_shared_database(self) -> bool: + """ + Deletes deprecated shared db + Returns + ------- + True if delete is successful + """ + return Glue.delete_database( + accountid=self.dataset.AwsAccountId, + region=self.dataset.region, + database=f'{self.dataset.GlueDatabaseName}shared', + ) diff --git a/backend/dataall/tasks/data_sharing/common/share_revoke.py b/backend/dataall/tasks/data_sharing/common/share_revoke.py new file mode 100644 index 000000000..a92896366 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/share_revoke.py @@ -0,0 +1,165 @@ +import abc +import logging +import uuid + +from ....aws.handlers.glue import Glue +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.sts import SessionHelper +from ....db import models, api, exceptions +from ....utils.alarm_service import AlarmService + +log = logging.getLogger(__name__) + + +class ShareRevoke: + def __init__( + self, + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ): + self.session = session + self.env_group = env_group + self.dataset = dataset + self.share = share + self.shared_tables = shared_tables + self.source_environment = source_environment + self.target_environment = target_environment + self.shared_db_name = shared_db_name + + @abc.abstractmethod + def revoke_share(self): + return NotImplementedError + + def revoke_resource_links_access(self) -> [dict]: + """ + Loops through share request items and revokes access on LF + Returns + ------- + List of revoke entries + """ + aws_session = SessionHelper.remote_session( + accountid=self.target_environment.AwsAccountId + ) + client = aws_session.client( + 'lakeformation', region_name=self.target_environment.region + ) + revoke_entries = [] + + for table in self.shared_tables: + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_In_Progress.value, + ) + + try: + data = { + 'accountid': self.target_environment.AwsAccountId, + 'region': self.target_environment.region, + 'database': self.shared_db_name, + 'tablename': table.GlueTableName, + } + + log.info(f'Starting revoke for: {data}') + + if Glue.table_exists(**data): + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn + }, + 'Resource': { + 'Table': { + 'DatabaseName': self.shared_db_name, + 'Name': table.GlueTableName, + 'CatalogId': self.target_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + } + ) + + log.info(f'Revoking permissions for entries : {revoke_entries}') + + LakeFormation.batch_revoke_permissions( + client, self.target_environment.AwsAccountId, revoke_entries + ) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Succeeded.value, + ) + + except Exception as e: + logging.error( + f'Failed to revoke LF permissions to table share {table.GlueTableName} ' + f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {e}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_sharing_failure_alarm( + table, self.share, self.target_environment + ) + + return revoke_entries + + def delete_shared_database(self) -> bool: + """ + Deletes shared database when share request is rejected + + Returns + ------- + bool + """ + log.info(f'Deleting shared database {self.shared_db_name}') + return Glue.delete_database( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + ) + + def check_share_item_exists_on_glue_catalog( + self, share_item: models.ShareObjectItem, table: models.DatasetTable + ) -> None: + """ + Checks if a table in the share request + still exists on the Glue catalog before revoking share + + Parameters + ---------- + share_item : request share item + table : dataset table + + Returns + ------- + exceptions.AWSResourceNotFound + """ + if not Glue.table_exists( + accountid=self.source_environment.AwsAccountId, + region=self.source_environment.region, + database=table.GlueDatabaseName, + tablename=table.GlueTableName, + ): + raise exceptions.AWSResourceNotFound( + action='RevokeShare', + message=( + f'Share Item {share_item.itemUri} found on share request' + f' but its correspondent Glue table {table.GlueTableName} does not exist.' + ), + ) diff --git a/backend/dataall/tasks/data_sharing/cross_account/__init__.py b/backend/dataall/tasks/data_sharing/cross_account/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py new file mode 100644 index 000000000..f2c9b47df --- /dev/null +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -0,0 +1,173 @@ +import logging +import time + +from botocore.exceptions import ClientError + +from ..common.share_approval import ShareApproval +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.ram import Ram +from ....aws.handlers.sts import SessionHelper +from ....db import models, api + +log = logging.getLogger(__name__) + + +class CrossAccountShareApproval(ShareApproval): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def approve_share( + self, + ) -> bool: + """ + 1) Gets share principals + 2) Creates the shared database if doesn't exist + 3) For each share request item: + a) update its status to share in progress + b) check if share item exists on glue catalog raise error if not and flag share item status to failed + c) grant external account to target account + d) accept Ram invitation if pending + e) create resource link on target account + f) grant permission to resource link for team role on target account + g) grant permission to resource link for team role on source account + h) update share item status to share successful + 4) Update shareddb by removing items not part of the share request + 5) Delete deprecated shareddb + + Returns + ------- + True if share is approved successfully + """ + principals = self.get_share_principals() + + self.create_shared_database( + self.target_environment, self.dataset, self.shared_db_name, principals + ) + + for table in self.shared_tables: + + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + if not share_item: + log.warning( + f'Share Item not found for {self.share.shareUri} ' + f'and Dataset Table {table.GlueTableName} continuing loop...' + ) + continue + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value, + ) + + try: + + self.check_share_item_exists_on_glue_catalog(share_item, table) + + data = self.build_share_data(principals, table) + + self.share_table_with_target_account(**data) + + ( + retry_share_table, + failed_invitations, + ) = Ram.accept_ram_invitation(**data) + + if retry_share_table: + self.share_table_with_target_account(**data) + Ram.accept_ram_invitation(**data) + + self.create_resource_link(**data) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + + except Exception as e: + self.handle_share_failure(table, share_item, e) + + self.clean_shared_database( + self.session, + self.dataset, + self.shared_tables, + self.target_environment, + self.shared_db_name, + ) + + self.delete_deprecated_shared_database() + + return True + + @classmethod + def share_table_with_target_account(cls, **data): + """ + Shares tables using Lake Formation + Sharing feature may take some extra seconds + :param data: + :return: + """ + source_accountid = data['source']['accountid'] + source_region = data['source']['region'] + + target_accountid = data['target']['accountid'] + target_region = data['target']['region'] + + source_session = SessionHelper.remote_session(accountid=source_accountid) + source_lf_client = source_session.client( + 'lakeformation', region_name=source_region + ) + try: + + LakeFormation.revoke_iamallowedgroups_super_permission_from_table( + source_lf_client, + source_accountid, + data['source']['database'], + data['source']['tablename'], + ) + + LakeFormation.grant_permissions_to_table( + source_lf_client, + target_accountid, + data['source']['database'], + data['source']['tablename'], + ['DESCRIBE', 'SELECT'], + ['DESCRIBE', 'SELECT'], + ) + + log.info( + f"Granted access to table {data['source']['tablename']} " + f'to external account {target_accountid} ' + ) + return True + + except ClientError as e: + logging.error( + f'Failed granting access to table {data["source"]["tablename"]} ' + f'from {source_accountid} / {source_region} ' + f'to external account{target_accountid}/{target_region}' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py new file mode 100644 index 000000000..fcb1afa43 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py @@ -0,0 +1,120 @@ +import logging +import uuid + +from ..common.share_revoke import ShareRevoke +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.ram import Ram +from ....aws.handlers.sts import SessionHelper +from ....db import api, models + +log = logging.getLogger(__name__) + + +class CrossAccountShareRevoke(ShareRevoke): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def revoke_share(self) -> bool: + """ + Revokes a share cross account + 1) revoke resource link access on target account + 2) delete shared database on target account + 3) revoke resource link access on source account + Returns + ------- + True if revoke is successful + """ + + self.revoke_resource_links_access() + + self.delete_shared_database() + + if not api.ShareObject.other_approved_share_object_exists( + self.session, self.target_environment.environmentUri + ): + self.revoke_external_account_access_on_source_account() + + return True + + def revoke_external_account_access_on_source_account(self) -> [dict]: + """ + 1) Revokes access to external account + if dataset is not shared with any other team from the same workspace + 2) Deletes resource_shares on RAM associated to revoked tables + + Returns + ------- + List of revoke entries + """ + log.info( + f'Revoking Access for AWS account: {self.target_environment.AwsAccountId}' + ) + aws_session = SessionHelper.remote_session( + accountid=self.source_environment.AwsAccountId + ) + client = aws_session.client( + 'lakeformation', region_name=self.source_environment.region + ) + revoke_entries = [] + for table in self.shared_tables: + + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': table.GlueDatabaseName, + 'Name': table.GlueTableName, + 'ColumnWildcard': {}, + 'CatalogId': self.source_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], + } + ) + LakeFormation.batch_revoke_permissions( + client, self.source_environment.AwsAccountId, revoke_entries + ) + return revoke_entries + + def delete_ram_resource_shares(self, resource_arn: str) -> [dict]: + """ + Deletes resource share for the resource arn + Parameters + ---------- + resource_arn : glue table arn + + Returns + ------- + list of ram associations + """ + log.info(f'Cleaning RAM resource shares for resource: {resource_arn} ...') + return Ram.delete_resource_shares( + SessionHelper.remote_session( + accountid=self.source_environment.AwsAccountId + ).client('ram', region_name=self.source_environment.region), + resource_arn, + ) diff --git a/backend/dataall/tasks/data_sharing/data_sharing_service.py b/backend/dataall/tasks/data_sharing/data_sharing_service.py new file mode 100644 index 000000000..8fda17e2a --- /dev/null +++ b/backend/dataall/tasks/data_sharing/data_sharing_service.py @@ -0,0 +1,234 @@ +import logging +import os + +from .cross_account.approve_share import ( + CrossAccountShareApproval, +) +from .cross_account.revoke_share import ( + CrossAccountShareRevoke, +) +from .same_account.approve_share import ( + SameAccountShareApproval, +) +from .same_account.revoke_share import SameAccountShareRevoke +from ...aws.handlers.lakeformation import LakeFormation +from ...aws.handlers.ram import Ram +from ...aws.handlers.sts import SessionHelper +from ...db import api, models, Engine +from ...utils import Parameter + +log = logging.getLogger(__name__) + + +class DataSharingService: + def __init__(self): + pass + + @classmethod + def approve_share(cls, engine: Engine, share_uri: str) -> bool: + """ + 1) Retrieves share related model objects + 2) Build shared database name (unique db per team for a dataset) + 3) Grants pivot role ALL permissions on dataset db and its tables + 4) Calls sharing approval service + Parameters + ---------- + engine : db.engine + share_uri : share uri + + Returns + ------- + True if approve succeeds + """ + with engine.scoped_session() as session: + ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Approved.value]) + + shared_db_name = cls.build_shared_db_name(dataset, share) + + LakeFormation.grant_pivot_role_all_database_permissions( + source_environment.AwsAccountId, + source_environment.region, + dataset.GlueDatabaseName, + ) + + if source_environment.AwsAccountId != target_environment.AwsAccountId: + return CrossAccountShareApproval( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ).approve_share() + + return SameAccountShareApproval( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ).approve_share() + + @classmethod + def reject_share(cls, engine: Engine, share_uri: str): + """ + 1) Retrieves share related model objects + 2) Build shared database name (unique db per team for a dataset) + 3) Grants pivot role ALL permissions on dataset db and its tables + 4) Calls sharing revoke service + + Parameters + ---------- + engine : db.engine + share_uri : share uri + + Returns + ------- + True if reject succeeds + """ + + with engine.scoped_session() as session: + ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Rejected.value]) + + log.info(f'Revoking permissions for tables : {shared_tables}') + + shared_db_name = cls.build_shared_db_name(dataset, share) + + LakeFormation.grant_pivot_role_all_database_permissions( + source_environment.AwsAccountId, + source_environment.region, + dataset.GlueDatabaseName, + ) + + if source_environment.AwsAccountId != target_environment.AwsAccountId: + return CrossAccountShareRevoke( + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ).revoke_share() + + return SameAccountShareRevoke( + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ).revoke_share() + + @classmethod + def build_shared_db_name( + cls, dataset: models.Dataset, share: models.ShareObject + ) -> str: + """ + Build Glue shared database name. + Unique per share Uri. + Parameters + ---------- + dataset : models.Dataset + share : models.ShareObject + + Returns + ------- + Shared database name + """ + return (dataset.GlueDatabaseName + '_shared_' + share.shareUri)[:254] + + @classmethod + def clean_lfv1_ram_resources(cls, environment: models.Environment): + """ + Deletes LFV1 resource shares for an environment + Parameters + ---------- + environment : models.Environment + + Returns + ------- + None + """ + return Ram.delete_lakeformation_v1_resource_shares( + SessionHelper.remote_session(accountid=environment.AwsAccountId).client( + 'ram', region_name=environment.region + ) + ) + + @classmethod + def refresh_shares(cls, engine: Engine) -> bool: + """ + Refreshes the shares at scheduled frequency + Also cleans up LFV1 ram resource shares if enabled on SSM + Parameters + ---------- + engine : db.engine + + Returns + ------- + true if refresh succeeds + """ + with engine.scoped_session() as session: + environments = session.query(models.Environment).all() + shares = ( + session.query(models.ShareObject) + .filter(models.ShareObject.status.in_(['Approved', 'Rejected'])) + .all() + ) + + # Feature toggle: default value is False + if ( + Parameter().get_parameter( + os.getenv('envname', 'local'), 'shares/cleanlfv1ram' + ) + == 'True' + ): + log.info('LFV1 Cleanup toggle is enabled') + for e in environments: + log.info( + f'Cleaning LFV1 ram resource for environment: {e.AwsAccountId}/{e.region}...' + ) + cls.clean_lfv1_ram_resources(e) + + if not shares: + log.info('No Approved nor Rejected shares found. Nothing to do...') + return True + + for share in shares: + try: + log.info( + f'Refreshing share {share.shareUri} with {share.status} status...' + ) + if share.status == 'Approved': + cls.approve_share(engine, share.shareUri) + elif share.status == 'Rejected': + cls.reject_share(engine, share.shareUri) + except Exception as e: + log.error( + f'Failed refreshing share {share.shareUri} with {share.status}. ' + f'due to: {e}' + ) + return True diff --git a/backend/dataall/tasks/data_sharing/same_account/__init__.py b/backend/dataall/tasks/data_sharing/same_account/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/same_account/approve_share.py b/backend/dataall/tasks/data_sharing/same_account/approve_share.py new file mode 100644 index 000000000..ec40d12d1 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/same_account/approve_share.py @@ -0,0 +1,102 @@ +import logging + +from ..common.share_approval import ShareApproval +from ....db import models, api + +log = logging.getLogger(__name__) + + +class SameAccountShareApproval(ShareApproval): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def approve_share(self) -> bool: + """ + Approves a share request for same account sharing + 1) Gets share principals + 2) Creates the shared database if doesn't exist + 3) For each share request item: + a) update its status to share in progress + b) check if share item exists on glue catalog raise error if not and flag share item status to failed + e) create resource link on same account + g) grant permission to resource link for team role on source account + h) update share item status to share successful + 4) Update shareddb by removing items not part of the share request + 5) Delete deprecated shareddb + + Returns + ------- + True if share is successful + """ + + principals = self.get_share_principals() + + self.create_shared_database( + self.target_environment, self.dataset, self.shared_db_name, principals + ) + + for table in self.shared_tables: + + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + if not share_item: + log.info( + f'Share Item not found for {self.share.shareUri} ' + f'and Dataset Table {table.GlueTableName} continuing loop...' + ) + continue + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value, + ) + + try: + + self.check_share_item_exists_on_glue_catalog(share_item, table) + + data = self.build_share_data(principals, table) + + self.create_resource_link(**data) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + + except Exception as e: + self.handle_share_failure(table, share_item, e) + + self.clean_shared_database( + self.session, + self.dataset, + self.shared_tables, + self.target_environment, + self.shared_db_name, + ) + + self.delete_deprecated_shared_database() + + return True diff --git a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py new file mode 100644 index 000000000..b3cfe6a6d --- /dev/null +++ b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py @@ -0,0 +1,46 @@ +import logging + +from ..common.share_revoke import ShareRevoke +from ....db import models + +log = logging.getLogger(__name__) + + +class SameAccountShareRevoke(ShareRevoke): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def revoke_share(self) -> bool: + """ + Revokes a share on same account + 1) revoke resource link access + 2) delete shared database on target account + Returns + ------- + True if revoke is successful + """ + + self.revoke_resource_links_access() + + self.delete_shared_database() + + return True diff --git a/backend/dataall/tasks/share_manager.py b/backend/dataall/tasks/share_manager.py index 93483c737..637f86d39 100644 --- a/backend/dataall/tasks/share_manager.py +++ b/backend/dataall/tasks/share_manager.py @@ -1,20 +1,9 @@ import logging import os import sys -import time -import uuid -from botocore.exceptions import ClientError -from sqlalchemy import and_ - -from .. import db -from ..aws.handlers.glue import Glue -from ..aws.handlers.quicksight import Quicksight -from ..aws.handlers.sts import SessionHelper +from .data_sharing.data_sharing_service import DataSharingService from ..db import get_engine -from ..db import models, exceptions -from ..searchproxy import connect -from ..utils.alarm_service import AlarmService root = logging.getLogger() root.setLevel(logging.INFO) @@ -23,865 +12,26 @@ log = logging.getLogger(__name__) -class ShareManager: - def __init__(self): - pass - - @staticmethod - def approve_share(engine, share_uri): - """ - Manages the approval of Glue tables sharing through LakeFormation - :param engine: - :param share_uri: - :return: - """ - with engine.scoped_session() as session: - ( - env_group, - dataset, - share, - shared_tables, - source_environment, - target_environment, - ) = ShareManager.get_share_data(session, share_uri, ['Approved']) - - principals = [env_group.environmentIAMRoleArn] - - if target_environment.dashboardsEnabled: - ShareManager.add_quicksight_group_to_shared_with_principals( - target_environment, principals - ) - - ShareManager.share_tables( - session, - share, - source_environment, - target_environment, - shared_tables, - principals, - ) - - ShareManager.clean_shared_database( - session, dataset, shared_tables, target_environment - ) - - return True - - @staticmethod - def share_tables( - session, - share: models.ShareObject, - source_environment: models.Environment, - target_environment: models.Environment, - shared_tables: [models.DatasetTable], - principals: [str], - ): - for table in shared_tables: - - share_item = ShareManager.get_share_item(session, share, table) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_In_Progress.value, - ) - - try: - data = { - 'source': { - 'accountid': source_environment.AwsAccountId, - 'region': source_environment.region, - 'database': table.GlueDatabaseName, - 'tablename': table.GlueTableName, - }, - 'target': { - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'principals': principals, - }, - } - - ShareManager.share_table_with_target_account(**data) - - ShareManager.accept_ram_invitation(**data) - - ShareManager.create_resource_link_on_target_account(**data) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Succeeded.value, - ) - - except Exception as e: - logging.error( - f'Failed to share table {table.GlueTableName} ' - f'from source account {source_environment.AwsAccountId}//{source_environment.region} ' - f'with target account {target_environment.AwsAccountId}/{target_environment.region}' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Failed.value, - ) - AlarmService().trigger_table_sharing_failure_alarm( - table, share, target_environment - ) - - @staticmethod - def add_quicksight_group_to_shared_with_principals(target_environment, principals): - try: - group = Quicksight.describe_group( - client=Quicksight.get_quicksight_client_in_identity_region( - target_environment.AwsAccountId - ), - AwsAccountId=target_environment.AwsAccountId, - ) - if group and group.get('Group', {}).get('Arn'): - principals.append(group['Group']['Arn']) - except ClientError as e: - log.warning(f'Failed to retrieve Quicksight . group due to: {e}') - - @staticmethod - def share_table_with_target_account(**data): - """ - Shares tables using Lake Formation and RAM only when cross account - Sharing feature may take some extra seconds that is why we are retrying here - :param data: - :return: - """ - source_accountid = data['source']['accountid'] - source_region = data['source']['region'] - source_session = SessionHelper.remote_session(accountid=source_accountid) - source_lf_client = source_session.client( - 'lakeformation', region_name=source_region - ) - target_accountid = data['target']['accountid'] - target_region = data['target']['region'] - - try: - - ShareManager.revoke_iamallowedgroups_super_permission_from_table( - source_lf_client, - source_accountid, - data['source']['database'], - data['source']['tablename'], - ) - - time.sleep(5) - - ShareManager.grant_permissions_to_table( - source_lf_client, - target_accountid, - data['source']['database'], - data['source']['tablename'], - ['DESCRIBE', 'SELECT'], - ['DESCRIBE', 'SELECT'], - ) - - # Issue with ram associations taking more than 10 seconds - time.sleep(15) - - log.info( - f"Granted access to table {data['source']['tablename']} " - f'to external account {target_accountid} ' - ) - return True - - except ClientError as e: - logging.error( - f'Failed granting access to table {data["source"]["tablename"]} ' - f'from {source_accountid} / {source_region} ' - f'to external account{target_accountid}/{target_region}' - f'due to: {e}' - ) - raise e - - @staticmethod - def grant_permissions_to_database( - client, - principals, - database_name, - permissions, - permissions_with_grant_options=None, - ): - for principal in principals: - log.info( - f'Grant full permissions to role {principals} on database {database_name}' - ) - try: - - response = client.grant_permissions( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'Database': {'Name': database_name}, - }, - Permissions=permissions, - ) - log.info( - f'Successfully granted principal {principal} permissions {permissions} ' - f'to {database_name}: {response}' - ) - except ClientError as e: - log.error( - f'Could not grant permissions ' - f'principal {principal} ' - f'{permissions} to database {database_name} due to: {e}' - ) - - @staticmethod - def grant_permissions_to_table( - client, - principal, - database_name, - table_name, - permissions, - permissions_with_grant_options=None, - ): - try: - grant_dict = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={'Table': {'DatabaseName': database_name, 'Name': table_name}}, - Permissions=permissions, - ) - if permissions_with_grant_options: - grant_dict[ - 'PermissionsWithGrantOption' - ] = permissions_with_grant_options - - response = client.grant_permissions(**grant_dict) - - log.info( - f'Successfully granted principal {principal} permissions {permissions} ' - f'to {database_name}.{table_name}: {response}' - ) - except ClientError as e: - log.warning( - f'Could not grant principal {principal}' - f'permissions {permissions} to table ' - f'{database_name}.{table_name} due to: {e}' - ) - # raise e - - @staticmethod - def create_resource_link_on_target_account(**data): - """ - When table is shared via Lake Formation from source account - A Glue resource link is created on the target account and the target database - :param data: - :return: - """ - source = data['source'] - target = data['target'] - target_session = SessionHelper.remote_session(accountid=target['accountid']) - lakeformation_client = target_session.client( - 'lakeformation', region_name=target['region'] - ) - target_database = f"{source['database']}shared" - resource_link_input = { - 'Name': source['tablename'], - 'TargetTable': { - 'CatalogId': data['source']['accountid'], - 'DatabaseName': source['database'], - 'Name': source['tablename'], - }, - } - - # Creates the database if it doesnt exist - try: - - Glue._create_table( - **{ - 'accountid': target['accountid'], - 'region': target['region'], - 'database': target_database, - 'tablename': source['tablename'], - 'table_input': resource_link_input, - } - ) - ShareManager.grant_permissions_to_database( - lakeformation_client, target['principals'], target_database, ['ALL'] - ) - - ShareManager.grant_resource_link_permission( - lakeformation_client, source, target, target_database - ) - - ShareManager.grant_resource_link_permission_on_target( - lakeformation_client, source, target - ) - - log.info( - f'Granted resource link SELECT read access on target ' - f"to principals {target['principals']}" - ) - - except ClientError as e: - log.warning( - f'Resource Link {resource_link_input} was not created because: {e}' - ) - raise e - - @staticmethod - def grant_resource_link_permission_on_target(client, source, target): - for principal in target['principals']: - table_grant = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'TableWithColumns': { - 'DatabaseName': source['database'], - 'Name': source['tablename'], - 'ColumnWildcard': {}, - 'CatalogId': source['accountid'], - } - }, - Permissions=['DESCRIBE', 'SELECT'], - PermissionsWithGrantOption=[], - ) - response = client.grant_permissions(**table_grant) - log.info( - f'Successfully granted permission to {principal} on target {source["tablename"]}: {response}' - ) - - @staticmethod - def grant_resource_link_permission( - lakeformation_client, source, target, target_database - ): - for principal in target['principals']: - resourcelink_grant = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'Table': { - 'DatabaseName': target_database, - 'Name': source['tablename'], - 'CatalogId': target['accountid'], - } - }, - Permissions=['DESCRIBE', 'DROP', 'ALL'], - PermissionsWithGrantOption=[], - ) - try: - response = lakeformation_client.grant_permissions(**resourcelink_grant) - log.info( - f'Granted resource link DESCRIBE access ' - f'to project {principal} with response: {response}' - ) - except ClientError as e: - logging.error( - f'Failed granting {resourcelink_grant} to project role {principal} ' - f'read access to resource link {source["tablename"]} ' - f'due to: {e}' - ) - - @staticmethod - def get_resource_share_invitations(client, resource_share_arn): - try: - # Accepting one ram invitation - # response = client.get_resource_share_invitations( - # resourceShareArns=[resource_share_arn] - # ) - # Accepting All RAM invitations - response = client.get_resource_share_invitations() - invitation_list = response.get('resourceShareInvitations', []) - return invitation_list - except ClientError as e: - log.error( - f'Failed retrieving RAM resource ' - f'share invitations {resource_share_arn} due to {e}' - ) - raise e - - @staticmethod - def accept_resource_share_invitation(client, resource_share_invitation_arn): - try: - response = client.accept_resource_share_invitation( - resourceShareInvitationArn=resource_share_invitation_arn - ) - log.info(f'Accepted ram invitation {resource_share_invitation_arn}') - return response.get('resourceShareInvitation') - except ClientError as e: - if ( - e.response['Error']['Code'] - == 'ResourceShareInvitationAlreadyAcceptedException' - ): - log.info( - f'Failed to accept RAM invitation ' - f'{resource_share_invitation_arn} already accepted' - ) - else: - log.error( - f'Failed to accept RAM invitation ' - f'{resource_share_invitation_arn} due to {e}' - ) - raise e - - @staticmethod - def accept_ram_invitation(**data): - """ - Accepts RAM invitations on the target account - """ - source = data['source'] - target = data['target'] - target_session = SessionHelper.remote_session(accountid=target['accountid']) - ram = target_session.client('ram', region_name=target['region']) - resource_share_arn = ( - f'arn:aws:glue:{source["region"]}:{source["accountid"]}:' - f'table/{data["source"]["database"]}/{data["source"]["tablename"]}' - ) - ram_invitations = ShareManager.get_resource_share_invitations( - ram, resource_share_arn - ) - for invitation in ram_invitations: - ShareManager.accept_resource_share_invitation( - ram, invitation['resourceShareInvitationArn'] - ) - # Ram invitation acceptance is slow - time.sleep(5) - return True - - @staticmethod - def revoke_iamallowedgroups_super_permission_from_table( - client, accountid, database, table - ): - """ - When upgrading to LF tables can still have IAMAllowedGroups permissions - Unless this is revoked the table can not be shared using LakeFormation - :param client: - :param accountid: - :param database: - :param table: - :return: - """ - try: - log.info( - f'Revoking IAMAllowedGroups Super ' - f'permission for table {database}|{table}' - ) - ShareManager.batch_revoke_permissions( - client, - accountid, - entries=[ - { - 'Id': str(uuid.uuid4()), - 'Principal': {'DataLakePrincipalIdentifier': 'EVERYONE'}, - 'Resource': { - 'Table': { - 'DatabaseName': database, - 'Name': table, - 'CatalogId': accountid, - } - }, - 'Permissions': ['ALL'], - 'PermissionsWithGrantOption': [], - } - ], - ) - except ClientError as e: - log.warning( - f'Cloud not revoke IAMAllowedGroups Super ' - f'permission on table {database}|{table} due to {e}' - ) - - @staticmethod - def clean_shared_database(session, dataset, shared_tables, target_environment): - shared_glue_tables = Glue.list_glue_database_tables( - accountid=target_environment.AwsAccountId, - database=dataset.GlueDatabaseName + 'shared', - region=target_environment.region, - ) - shared_tables = [t.GlueTableName for t in shared_tables] - log.info( - f'Shared database {dataset.GlueDatabaseName}shared glue tables: {shared_glue_tables}' - ) - log.info(f'Share items of the share object {shared_tables}') - tables_to_delete = [] - aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) - client = aws_session.client('lakeformation', region_name=dataset.region) - for table in shared_glue_tables: - if table['Name'] not in shared_tables: - log.info( - f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' - ) - is_shared = ( - session.query(models.ShareObjectItem) - .join( - models.ShareObject, - models.ShareObjectItem.shareUri == models.ShareObject.shareUri, - ) - .filter( - and_( - models.ShareObjectItem.GlueTableName == table['Name'], - models.ShareObject.datasetUri == dataset.datasetUri, - models.ShareObject.status == 'Approved', - models.ShareObject.environmentUri - == target_environment.environmentUri, - ) - ) - .first() - ) - - if not is_shared: - log.info( - f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' - f'will be removed for account {target_environment.AwsAccountId}' - ) - if Glue.table_exists( - **{ - 'accountid': dataset.AwsAccountId, - 'region': dataset.region, - 'database': dataset.GlueDatabaseName, - 'tablename': table['Name'], - } - ): - ShareManager.batch_revoke_permissions( - client, - target_environment.AwsAccountId, - [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': dataset.GlueDatabaseName, - 'Name': table['Name'], - 'ColumnWildcard': {}, - 'CatalogId': dataset.AwsAccountId, - } - }, - 'Permissions': ['SELECT'], - 'PermissionsWithGrantOption': ['SELECT'], - } - ], - ) - - tables_to_delete.append(table['Name']) - - if tables_to_delete: - log.info( - f'Deleting: {tables_to_delete} from shared database {dataset.GlueDatabaseName}shared' - ) - Glue.batch_delete_tables( - **{ - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': dataset.GlueDatabaseName + 'shared', - 'tables': tables_to_delete, - } - ) - - @staticmethod - def batch_revoke_permissions(client, accountid, entries): - """ - Batch revoke permissions to entries - Retry is set for api throttling - :param client: - :param accountid: - :param entries: - :return: - """ - entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] - failures = [] - try: - for entries_chunk in entries_chunks: - response = client.batch_revoke_permissions( - CatalogId=accountid, Entries=entries_chunk - ) - log.info(f'Batch Revoke {entries_chunk} response: {response}') - failures.extend(response.get('Failures')) - if failures: - raise ClientError( - error_response={ - 'Error': { - 'Code': 'LakeFormation.batch_revoke_permissions', - 'Message': f'Operation ended with failures: {failures}', - } - }, - operation_name='LakeFormation.batch_revoke_permissions', - ) - except ClientError as e: - for failure in failures: - if not ( - failure['Error']['ErrorCode'] == 'InvalidInputException' - and ( - 'Grantee has no permissions' in failure['Error']['ErrorMessage'] - or 'No permissions revoked' in failure['Error']['ErrorMessage'] - ) - ): - log.warning(f'Batch Revoke ended with failures: {failures}') - raise e - - @staticmethod - def reject_share(engine, share_uri): - """ - Revokes access to the environment group that tables were share with - If there is no other approved share object for the same environment - Then revoke access to the AWS account on LakeFormation and delete the resource links - :param engine: - :param share_uri: - :return: - """ - - with engine.scoped_session() as session: - ( - env_group, - dataset, - share, - shared_tables, - source_environment, - target_environment, - ) = ShareManager.get_share_data(session, share_uri, ['Rejected']) - - log.info(f'Revoking permissions for tables : {shared_tables}') - - ShareManager.revoke_resource_links_access_on_target_account( - session, env_group, share, shared_tables, target_environment - ) - - ShareManager.delete_resource_links_on_target_account( - dataset, shared_tables, target_environment - ) - - ShareManager.clean_shared_database( - session, dataset, shared_tables, target_environment - ) - - if not ShareManager.other_approved_share_object_exists( - session, target_environment.environmentUri - ): - ShareManager.revoke_external_account_access_on_source_account( - shared_tables, source_environment, target_environment - ) - - return True - - @staticmethod - def revoke_external_account_access_on_source_account( - shared_tables, source_environment, target_environment - ): - log.info(f'Revoking Access for AWS account: {target_environment.AwsAccountId}') - aws_session = SessionHelper.remote_session( - accountid=source_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=source_environment.region - ) - revoke_entries = [] - for table in shared_tables: - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': table.GlueDatabaseName, - 'Name': table.GlueTableName, - 'ColumnWildcard': {}, - 'CatalogId': source_environment.AwsAccountId, - } - }, - 'Permissions': ['SELECT'], - 'PermissionsWithGrantOption': ['SELECT'], - } - ) - ShareManager.batch_revoke_permissions( - client, target_environment.AwsAccountId, revoke_entries - ) - - @staticmethod - def delete_resource_links_on_target_account( - dataset, shared_tables, target_environment - ): - resource_links = [table.GlueTableName for table in shared_tables] - log.info(f'Deleting resource links {resource_links}') - return Glue.batch_delete_tables( - **{ - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': dataset.GlueDatabaseName + 'shared', - 'tables': resource_links, - } - ) - - @staticmethod - def revoke_resource_links_access_on_target_account( - session, env_group, share, shared_tables, target_environment - ): - aws_session = SessionHelper.remote_session( - accountid=target_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=target_environment.region - ) - revoke_entries = [] - for table in shared_tables: - share_item = ShareManager.get_share_item(session, share, table) - - ShareManager.update_share_item_status( - session, share_item, models.ShareObjectStatus.Revoke_In_Progress.value - ) - try: - data = { - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': table.GlueDatabaseName + 'shared', - 'tablename': table.GlueTableName, - } - log.info(f'Starting revoke for: {data}') - - if Glue.table_exists(**data): - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': env_group.environmentIAMRoleArn - }, - 'Resource': { - 'Table': { - 'DatabaseName': table.GlueDatabaseName + 'shared', - 'Name': table.GlueTableName, - 'CatalogId': target_environment.AwsAccountId, - } - }, - 'Permissions': ['ALL', 'DESCRIBE', 'DROP'], - } - ) - - log.info(f'Revoking permissions for entries : {revoke_entries}') - - ShareManager.batch_revoke_permissions( - client, target_environment.AwsAccountId, revoke_entries - ) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Revoke_Share_Succeeded.value, - ) - except Exception as e: - logging.error( - f'Failed to revoke LF permissions to table share {table.GlueTableName} ' - f'on target account {target_environment.AwsAccountId}/{target_environment.region}' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Revoke_Share_Failed.value, - ) - AlarmService().trigger_revoke_sharing_failure_alarm( - table, share, target_environment - ) - - @staticmethod - def get_share_data(session, share_uri, status): - share: models.ShareObject = session.query(models.ShareObject).get(share_uri) - dataset: models.Dataset = session.query(models.Dataset).get(share.datasetUri) - source_environment: models.Environment = ( - db.api.Environment.get_environment_by_uri(session, dataset.environmentUri) - ) - target_environment: models.Environment = ( - db.api.Environment.get_environment_by_uri(session, share.environmentUri) - ) - shared_tables = db.api.DatasetTable.get_dataset_tables_shared_with_env( - session, - dataset_uri=dataset.datasetUri, - environment_uri=target_environment.environmentUri, - status=status, - ) - env_group: models.EnvironmentGroup = ( - session.query(models.EnvironmentGroup) - .filter( - and_( - models.EnvironmentGroup.environmentUri == share.environmentUri, - models.EnvironmentGroup.groupUri == share.principalId, - ) - ) - .first() - ) - if not env_group: - raise Exception( - f'Share object Team {share.principalId} is not a member of the ' - f'environment {target_environment.name}/{target_environment.AwsAccountId}' - ) - return ( - env_group, - dataset, - share, - shared_tables, - source_environment, - target_environment, - ) - - @staticmethod - def other_approved_share_object_exists(session, environment_uri): - return ( - session.query(models.ShareObject) - .filter( - and_( - models.Environment.environmentUri == environment_uri, - models.ShareObject.status - == models.Enums.ShareObjectStatus.Approved.value, - ) - ) - .all() - ) - - @staticmethod - def get_share_item( - session, - share: models.ShareObject, - table: models.DatasetTable, - ) -> models.ShareObjectItem: - share_item: models.ShareObjectItem = ( - session.query(models.ShareObjectItem) - .filter( - and_( - models.ShareObjectItem.itemUri == table.tableUri, - models.ShareObjectItem.shareUri == share.shareUri, - ) - ) - .first() - ) - - if not share_item: - raise exceptions.ObjectNotFound('ShareObjectItem', table.tableUri) - - return share_item - - @staticmethod - def update_share_item_status( - session, - share_item: models.ShareObjectItem, - status: str, - ) -> models.ShareObjectItem: - - log.info(f'Updating share item status to {status}') - share_item.status = status - session.commit() - return share_item - - if __name__ == '__main__': - ENVNAME = os.environ.get('envname', 'local') - ENGINE = get_engine(envname=ENVNAME) - ES = connect(envname=ENVNAME) + try: + ENVNAME = os.environ.get('envname', 'local') + ENGINE = get_engine(envname=ENVNAME) + + share_uri = os.getenv('shareUri') + share_item_uri = os.getenv('shareItemUri') + handler = os.getenv('handler') - share_uri = os.getenv('shareUri') - share_item_uri = os.getenv('shareItemUri') - handler = os.getenv('handler') + if handler == 'approve_share': + log.info(f'Starting approval task for share : {share_uri}...') + DataSharingService.approve_share(engine=ENGINE, share_uri=share_uri) - if handler == 'approve_share': - log.info(f'Starting approval task for share : {share_uri}...') - ShareManager.approve_share(engine=ENGINE, share_uri=share_uri) + elif handler == 'reject_share': + log.info(f'Starting revoke task for share : {share_uri}...') + DataSharingService.reject_share(engine=ENGINE, share_uri=share_uri) - elif handler == 'reject_share': - log.info(f'Starting revoke task for share : {share_uri}...') - ShareManager.reject_share(engine=ENGINE, share_uri=share_uri) + log.info('Sharing task finished successfully') - log.info('Sharing task finished successfully') + except Exception as e: + log.error(f'Sharing task failed due to: {e}') + raise e diff --git a/backend/dataall/tasks/shares_refresh.py b/backend/dataall/tasks/shares_refresh.py new file mode 100644 index 000000000..d1957bc74 --- /dev/null +++ b/backend/dataall/tasks/shares_refresh.py @@ -0,0 +1,28 @@ +import logging +import os +import sys + +from .data_sharing.data_sharing_service import DataSharingService +from ..db import get_engine + +root = logging.getLogger() +root.setLevel(logging.INFO) +if not root.hasHandlers(): + root.addHandler(logging.StreamHandler(sys.stdout)) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + + try: + ENVNAME = os.environ.get('envname', 'local') + ENGINE = get_engine(envname=ENVNAME) + + log.info('Starting refresh shares task...') + DataSharingService.refresh_shares(engine=ENGINE) + + log.info('Sharing task finished successfully') + + except Exception as e: + log.error(f'Sharing task failed due to: {e}') + raise e diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 458077553..0cf6f1950 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -220,6 +220,41 @@ def __init__( ) self.ecs_security_groups.extend(subscriptions_task.task.security_groups) + shares_refresh_task = self.set_scheduled_task( + cluster=cluster, + command=[ + 'python3.8', + '-m', + 'datahub.tasks.shares_refresh', + ], + container_id='container', + ecr_repository=ecr_repository, + environment={ + 'AWS_REGION': self.region, + 'envname': envname, + 'LOGLEVEL': 'INFO', + }, + image_tag=cdkproxy_image_tag, + log_group=self.create_log_group( + envname, resource_prefix, log_group_name='shares-refresh' + ), + schedule_expression=Schedule.expression('cron(0 2 * * ? *)'), + scheduled_task_id=f'{resource_prefix}-{envname}-shares-refresh-schedule', + task_id=f'{resource_prefix}-{envname}-shares-refresh', + task_role=task_role, + vpc=vpc, + security_group=scheduled_tasks_sg, + prod_sizing=prod_sizing, + ) + self.ecs_security_groups.extend(shares_refresh_task.task.security_groups) + + ssm.StringParameter( + self, + f'RamCleanUpToggle{envname}', + parameter_name=f'/datahubsa/{envname}/shares/cleanlfv1ram', + string_value='False', + ) + share_management_task_definition = ecs.FargateTaskDefinition( self, f'{resource_prefix}-{envname}-share-manager', diff --git a/tests/tasks/test_share_manager.py b/tests/tasks/test_share_manager.py new file mode 100644 index 000000000..ab6bfbaee --- /dev/null +++ b/tests/tasks/test_share_manager.py @@ -0,0 +1,310 @@ +import logging + +import pytest + +import dataall + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logging.getLogger('boto3').setLevel(logging.CRITICAL) +logging.getLogger('botocore').setLevel(logging.CRITICAL) + +REGION = 'eu-central-1' + +ENV_ACCOUNT = '' +ENV_ROLE_NAME = 'dataall-World-Happiness-Report-i6v1v1c2' +ENV_ROLE_ARN = f'arn:aws:iam::{ENV_ACCOUNT}:role/{ENV_ROLE_NAME}' + + +CROSS_ACCOUNT_ENV = '' +CROSS_ACCOUNT_ENV_ROLE_NAME = 'dataall-ConsumersEnvironment-r71ucp4m' +CROSS_ACCOUNT_ENV_ROLE_ARN = ( + f'arn:aws:iam::{CROSS_ACCOUNT_ENV}:role/{CROSS_ACCOUNT_ENV_ROLE_NAME}' +) + +DATASET_GLUE_DB = 'dataall_world_happiness_report_i6v1v1c2' +DATASET_S3_BUCKET = 'dataall-world-happiness-report-i6v1v1c2' + +TABLE_NAME = 'dataall_world_happiness_report_i6v1v1c2' +TABLE_S3_PREFIX = f's3://{DATASET_S3_BUCKET}/' + + +@pytest.fixture(scope='module') +def org(db): + with db.scoped_session() as session: + org = dataall.db.models.Organization( + label='org', + owner='alice', + tags=[], + description='desc', + SamlGroupName='admins', + ) + session.add(org) + yield org + + +@pytest.fixture(scope='module') +def env(org, db): + with db.scoped_session() as session: + env = dataall.db.models.Environment( + organizationUri=org.organizationUri, + AwsAccountId=ENV_ACCOUNT, + region='eu-central-1', + label='org', + owner='alice', + tags=[], + description='desc', + SamlGroupName='admins', + EnvironmentDefaultIAMRoleName=ENV_ROLE_NAME, + EnvironmentDefaultIAMRoleArn=ENV_ROLE_ARN, + CDKRoleArn=f'arn:aws::{ENV_ACCOUNT}:role/EnvRole', + environmentUri='mytest', + ) + session.add(env) + session.commit() + env_group = dataall.db.models.EnvironmentGroup( + environmentUri=env.environmentUri, + groupUri='bobTeam', + environmentIAMRoleArn=env.EnvironmentDefaultIAMRoleArn, + environmentIAMRoleName=env.EnvironmentDefaultIAMRoleName, + environmentAthenaWorkGroup='workgroup', + ) + session.add(env_group) + yield env + + +@pytest.fixture(scope='module') +def cross_account_env(org, db): + with db.scoped_session() as session: + env = dataall.db.models.Environment( + organizationUri=org.organizationUri, + AwsAccountId=CROSS_ACCOUNT_ENV, + region='eu-central-1', + label='org', + owner='bob', + tags=[], + description='desc', + SamlGroupName='bobTeam', + EnvironmentDefaultIAMRoleName=CROSS_ACCOUNT_ENV_ROLE_NAME, + EnvironmentDefaultIAMRoleArn=CROSS_ACCOUNT_ENV_ROLE_ARN, + CDKRoleArn=f'arn:aws::{CROSS_ACCOUNT_ENV}:role/EnvRole', + ) + session.add(env) + session.commit() + env_group = dataall.db.models.EnvironmentGroup( + environmentUri=env.environmentUri, + groupUri=env.SamlGroupName, + environmentIAMRoleArn=env.EnvironmentDefaultIAMRoleArn, + environmentIAMRoleName=env.EnvironmentDefaultIAMRoleName, + environmentAthenaWorkGroup='workgroup', + ) + session.add(env_group) + yield env + + +@pytest.fixture(scope='module') +def dataset(org, env, db): + with db.scoped_session() as session: + dataset = dataall.db.models.Dataset( + organizationUri=org.organizationUri, + environmentUri=env.environmentUri, + label=DATASET_S3_BUCKET, + owner='alice', + SamlAdminGroupName='admins', + businessOwnerDelegationEmails=['foo@amazon.com'], + name=DATASET_S3_BUCKET, + S3BucketName=DATASET_S3_BUCKET, + GlueDatabaseName=DATASET_GLUE_DB, + KmsAlias='kmsalias', + AwsAccountId=env.AwsAccountId, + region=env.region, + IAMDatasetAdminUserArn=f'arn:aws:iam::{ENV_ACCOUNT}:user/dataset', + IAMDatasetAdminRoleArn=f'arn:aws:iam::{ENV_ACCOUNT}:role/dataset', + ) + session.add(dataset) + yield dataset + + +@pytest.fixture(scope='module') +def table(org, env, db, dataset): + with db.scoped_session() as session: + table = dataall.db.models.DatasetTable( + label=TABLE_NAME, + name=TABLE_NAME, + owner='alice', + description='test table', + tags=['a', 'b'], + datasetUri=dataset.datasetUri, + S3Prefix=TABLE_S3_PREFIX, + GlueDatabaseName=dataset.GlueDatabaseName, + GlueTableName=TABLE_NAME, + S3BucketName=dataset.S3BucketName, + AWSAccountId=dataset.AwsAccountId, + region=dataset.region, + ) + session.add(table) + yield table + + +@pytest.fixture(scope='module') +def table2(org, env, db, dataset): + with db.scoped_session() as session: + table = dataall.db.models.DatasetTable( + label='deleted_glue_table', + name='deleted_glue_table', + owner='alice', + description='test table', + tags=['a', 'b'], + datasetUri=dataset.datasetUri, + S3Prefix='s3://dataall-world-happiness-report-i6v1v1c2/', + GlueDatabaseName=dataset.GlueDatabaseName, + GlueTableName='deleted_glue_table', + S3BucketName=dataset.S3BucketName, + AWSAccountId=dataset.AwsAccountId, + region=dataset.region, + ) + session.add(table) + yield table + + +@pytest.fixture(scope='module') +def cross_account_share( + dataset: dataall.db.models.Dataset, + db: dataall.db.Engine, + cross_account_env: dataall.db.models.Environment, + table: dataall.db.models.DatasetTable, + table2: dataall.db.models.DatasetTable, +): + with db.scoped_session() as session: + share = dataall.db.models.ShareObject( + shareUri='cross', + datasetUri=dataset.datasetUri, + environmentUri=cross_account_env.environmentUri, + owner='bob', + principalId=cross_account_env.SamlGroupName, + principalType=dataall.api.constants.PrincipalType.Environment.value, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share) + session.commit() + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table.GlueTableName, + GlueDatabaseName=table.GlueDatabaseName, + GlueTableName=table.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table2.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table2.GlueTableName, + GlueDatabaseName=table2.GlueDatabaseName, + GlueTableName=table2.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + session.commit() + yield share + + +@pytest.fixture(scope='module') +def same_account_share( + dataset: dataall.db.models.Dataset, + db: dataall.db.Engine, + env: dataall.db.models.Environment, + table: dataall.db.models.DatasetTable, +): + with db.scoped_session() as session: + share = dataall.db.models.ShareObject( + shareUri='same', + datasetUri=dataset.datasetUri, + environmentUri=env.environmentUri, + owner='bob', + principalId='bobTeam', + principalType=dataall.api.constants.PrincipalType.Group.value, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share) + session.commit() + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table.GlueTableName, + GlueDatabaseName=table.GlueDatabaseName, + GlueTableName=table.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + yield share + + +def __update_to_rejected_status(db, share): + with db.scoped_session() as session: + share.status = dataall.api.constants.ShareObjectStatus.Rejected.value + session.merge(share) + + +def test_cross_account_sharing(db, cross_account_share, dataset, mocker): + """mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', + return_value=True, + ) + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', + return_value=True, + )""" + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( + db, cross_account_share.shareUri + ) + + __update_to_rejected_status(db, cross_account_share) + + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share( + db, cross_account_share.shareUri + ) + + +def test_same_account_sharing(db, same_account_share, dataset, mocker): + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', + return_value=True, + ) + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', + return_value=True, + ) + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( + db, same_account_share.shareUri + ) + + __update_to_rejected_status(db, same_account_share) + + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share( + db, same_account_share.shareUri + ) + + +def test_refresh_shares(db, same_account_share, cross_account_share, dataset, mocker): + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares', + return_value=True, + ) + mocker.patch('dataall.utils.Parameter.get_parameter', return_value='True') + assert dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares( + db + ) + + __update_to_rejected_status(db, same_account_share) + __update_to_rejected_status(db, cross_account_share) + + assert dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares( + db + ) From e4b6edac0a2c83aafa4e17fccd90b3f6fb8a283f Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 05:47:28 +0200 Subject: [PATCH 15/28] Testing feedbacks --- backend/dataall/db/api/share_object.py | 27 +-- .../data_sharing/common/share_approval.py | 108 +++------- .../tasks/data_sharing/common/share_revoke.py | 198 ++++++++++++------ .../cross_account/approve_share.py | 14 +- .../cross_account/revoke_share.py | 11 +- .../data_sharing/data_sharing_service.py | 8 +- .../same_account/approve_share.py | 8 +- .../data_sharing/same_account/revoke_share.py | 5 +- 8 files changed, 199 insertions(+), 180 deletions(-) diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index 8c2a13fbd..aeb4d0205 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -682,34 +682,35 @@ def get_share_data(session, share_uri, status): shared_tables = ( session.query(models.DatasetTable) - .join( + .join( models.ShareObjectItem, models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, ) - .join( + .join( models.ShareObject, models.ShareObject.shareUri == models.ShareObjectItem.shareUri, ) - .filter( + .filter( and_( models.ShareObject.datasetUri == dataset.datasetUri, models.ShareObject.environmentUri == target_environment.environmentUri, models.ShareObject.status.in_(status), + models.ShareObject.shareUri == share_uri, ) ) - .all() + .all() ) env_group: models.EnvironmentGroup = ( session.query(models.EnvironmentGroup) - .filter( + .filter( and_( models.EnvironmentGroup.environmentUri == share.environmentUri, models.EnvironmentGroup.groupUri == share.principalId, ) ) - .first() + .first() ) if not env_group: raise Exception( @@ -726,28 +727,29 @@ def get_share_data(session, share_uri, status): ) @staticmethod - def other_approved_share_object_exists(session, environment_uri): + def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(models.ShareObject) - .filter( + .filter( and_( models.Environment.environmentUri == environment_uri, models.ShareObject.status == models.Enums.ShareObjectStatus.Approved.value, + models.ShareObject.datasetUri == dataset_uri, ) ) - .all() + .all() ) @staticmethod def is_shared_table(session, environment_uri, dataset_uri, table_name): return ( session.query(models.ShareObjectItem) - .join( + .join( models.ShareObject, models.ShareObjectItem.shareUri == models.ShareObject.shareUri, ) - .filter( + .filter( and_( models.ShareObjectItem.GlueTableName == table_name, models.ShareObject.datasetUri == dataset_uri, @@ -755,6 +757,5 @@ def is_shared_table(session, environment_uri, dataset_uri, table_name): models.ShareObject.environmentUri == environment_uri, ) ) - .first() + .first() ) - diff --git a/backend/dataall/tasks/data_sharing/common/share_approval.py b/backend/dataall/tasks/data_sharing/common/share_approval.py index ffa53ca73..2815d929e 100644 --- a/backend/dataall/tasks/data_sharing/common/share_approval.py +++ b/backend/dataall/tasks/data_sharing/common/share_approval.py @@ -1,6 +1,5 @@ import abc import logging -import uuid from botocore.exceptions import ClientError @@ -191,27 +190,11 @@ def create_resource_link(cls, **data) -> dict: ) raise e - @classmethod - def clean_shared_database( - cls, - session, - dataset: models.Dataset, - shared_tables: [models.DatasetTable], - target_environment: models.Environment, - shared_db_name: str, - ) -> [str]: + def clean_shared_database(self) -> [str]: """ After share approval verify that the shared database do not have any removed items from the share request. - Parameters - ---------- - session : db - dataset : models.Dataset - shared_tables : [models.DatasetTable] - target_environment : models.Environment - shared_db_name : shared database name - Returns ------- List of deleted tables from the shared database @@ -219,76 +202,44 @@ def clean_shared_database( tables_to_delete = [] shared_glue_tables = Glue.list_glue_database_tables( - accountid=target_environment.AwsAccountId, - database=shared_db_name, - region=target_environment.region, + accountid=self.target_environment.AwsAccountId, + database=self.shared_db_name, + region=self.target_environment.region, ) logger.info( - f'Shared database {shared_db_name} glue tables: {shared_glue_tables}' + f'Shared database {self.shared_db_name} glue tables: {shared_glue_tables}' ) - shared_tables = [t.GlueTableName for t in shared_tables] - logger.info(f'Share items of the share object {shared_tables}') - - aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) - client = aws_session.client('lakeformation', region_name=dataset.region) + shared_tables = [t.GlueTableName for t in self.shared_tables] + logger.info(f'Share items of the share object {self.shared_tables}') for table in shared_glue_tables: if table['Name'] not in shared_tables: logger.info( - f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' + f'Found a table not part of the share: {self.dataset.GlueDatabaseName}//{table["Name"]}' ) - is_shared = api.ShareObject.is_shared_table( - session, - target_environment.environmentUri, - dataset.datasetUri, - table['Name'], - ) - if not is_shared: - logger.info( - f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' - f'will be removed for account {target_environment.AwsAccountId}' + try: + LakeFormation.revoke_source_table_access( + target_accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + source_database=self.dataset.GlueDatabaseName, + source_table=table['Name'], + target_principal=self.env_group.environmentIAMRoleArn, + source_accountid=self.source_environment.AwsAccountId, + ) + except ClientError as e: + # error not raised due to multiple failure reasons + # cleanup failure does not impact share request items access + logger.error( + f'Revoking permission on source table failed due to: {e}' ) - if Glue.table_exists( - **{ - 'accountid': dataset.AwsAccountId, - 'region': dataset.region, - 'database': dataset.GlueDatabaseName, - 'tablename': table['Name'], - } - ): - LakeFormation.batch_revoke_permissions( - client, - target_environment.AwsAccountId, - [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': dataset.GlueDatabaseName, - 'Name': table['Name'], - 'ColumnWildcard': {}, - 'CatalogId': dataset.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - 'PermissionsWithGrantOption': [ - 'DESCRIBE', - 'SELECT', - ], - } - ], - ) tables_to_delete.append(table['Name']) Glue.batch_delete_tables( - accountid=target_environment.AwsAccountId, - region=target_environment.region, - database=shared_db_name, + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, tables=tables_to_delete, ) @@ -299,7 +250,7 @@ def handle_share_failure( table: models.DatasetTable, share_item: models.ShareObjectItem, error: Exception, - ) -> None: + ) -> bool: """ Handles share failure by raising an alarm to alarmsTopic Parameters @@ -310,7 +261,7 @@ def handle_share_failure( Returns ------- - None + True if alarm published successfully """ logging.error( f'Failed to share table {table.GlueTableName} ' @@ -326,6 +277,7 @@ def handle_share_failure( AlarmService().trigger_table_sharing_failure_alarm( table, self.share, self.target_environment ) + return True def build_share_data(self, principals: [str], table: models.DatasetTable) -> dict: """ @@ -363,7 +315,7 @@ def delete_deprecated_shared_database(self) -> bool: True if delete is successful """ return Glue.delete_database( - accountid=self.dataset.AwsAccountId, - region=self.dataset.region, + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, database=f'{self.dataset.GlueDatabaseName}shared', ) diff --git a/backend/dataall/tasks/data_sharing/common/share_revoke.py b/backend/dataall/tasks/data_sharing/common/share_revoke.py index a92896366..180b9f4a4 100644 --- a/backend/dataall/tasks/data_sharing/common/share_revoke.py +++ b/backend/dataall/tasks/data_sharing/common/share_revoke.py @@ -15,13 +15,13 @@ class ShareRevoke: def __init__( self, session, - shared_db_name, - env_group, - dataset, - share, - shared_tables, - source_environment, - target_environment, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, ): self.session = session self.env_group = env_group @@ -36,20 +36,13 @@ def __init__( def revoke_share(self): return NotImplementedError - def revoke_resource_links_access(self) -> [dict]: + def revoke_shared_tables_access(self) -> bool: """ Loops through share request items and revokes access on LF Returns ------- - List of revoke entries + True if revoke is successful """ - aws_session = SessionHelper.remote_session( - accountid=self.target_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=self.target_environment.region - ) - revoke_entries = [] for table in self.shared_tables: share_item = api.ShareObject.find_share_item_by_table( @@ -63,38 +56,12 @@ def revoke_resource_links_access(self) -> [dict]: ) try: - data = { - 'accountid': self.target_environment.AwsAccountId, - 'region': self.target_environment.region, - 'database': self.shared_db_name, - 'tablename': table.GlueTableName, - } - log.info(f'Starting revoke for: {data}') - - if Glue.table_exists(**data): - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn - }, - 'Resource': { - 'Table': { - 'DatabaseName': self.shared_db_name, - 'Name': table.GlueTableName, - 'CatalogId': self.target_environment.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - } - ) + log.info(f'Starting revoke access for table: {table.GlueTableName}') - log.info(f'Revoking permissions for entries : {revoke_entries}') + self.revoke_table_resource_link_access(table) - LakeFormation.batch_revoke_permissions( - client, self.target_environment.AwsAccountId, revoke_entries - ) + self.revoke_source_table_access(table) api.ShareObject.update_share_item_status( self.session, @@ -103,21 +70,101 @@ def revoke_resource_links_access(self) -> [dict]: ) except Exception as e: - logging.error( - f'Failed to revoke LF permissions to table share {table.GlueTableName} ' - f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' - f'due to: {e}' - ) - api.ShareObject.update_share_item_status( - self.session, - share_item, - models.ShareObjectStatus.Revoke_Share_Failed.value, - ) - AlarmService().trigger_revoke_sharing_failure_alarm( - table, self.share, self.target_environment - ) + self.handle_revoke_failure(share_item, table, e) + + return True + + def revoke_table_resource_link_access(self, table: models.DatasetTable): + """ + Revokes access to glue table resource link + Parameters + ---------- + table : models.DatasetTable + + Returns + ------- + True if revoke is successful + """ + if not Glue.table_exists( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + tablename=table.GlueTableName, + ): + log.info( + f'Resource link could not be found ' + f'on {self.target_environment.AwsAccountId}/{self.shared_db_name}/{table.GlueTableName} ' + f'skipping revoke actions...' + ) + return True + + log.info( + f'Revoking resource link access ' + f'on {self.target_environment.AwsAccountId}/{self.shared_db_name}/{table.GlueTableName} ' + f'for principal {self.env_group.environmentIAMRoleArn}' + ) + LakeFormation.batch_revoke_permissions( + SessionHelper.remote_session(self.target_environment.AwsAccountId).client( + 'lakeformation', region_name=self.target_environment.region + ), + self.target_environment.AwsAccountId, + [ + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn + }, + 'Resource': { + 'Table': { + 'DatabaseName': self.shared_db_name, + 'Name': table.GlueTableName, + 'CatalogId': self.target_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE'], + } + ], + ) + return True - return revoke_entries + def revoke_source_table_access(self, table): + """ + Revokes access to the source glue table + Parameters + ---------- + table : models.DatasetTable + + Returns + ------- + True if revoke is successful + """ + if not Glue.table_exists( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + tablename=table.GlueTableName, + ): + log.info( + f'Source table could not be found ' + f'on {self.source_environment.AwsAccountId}/{self.dataset.GlueDatabaseName}/{table.GlueTableName} ' + f'skipping revoke actions...' + ) + return True + + log.info( + f'Revoking source table access ' + f'on {self.source_environment.AwsAccountId}/{self.dataset.GlueDatabaseName}/{table.GlueTableName} ' + f'for principal {self.env_group.environmentIAMRoleArn}' + ) + LakeFormation.revoke_source_table_access( + target_accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + source_database=self.dataset.GlueDatabaseName, + source_table=table.GlueTableName, + target_principal=self.env_group.environmentIAMRoleArn, + source_accountid=self.source_environment.AwsAccountId, + ) + return True def delete_shared_database(self) -> bool: """ @@ -163,3 +210,36 @@ def check_share_item_exists_on_glue_catalog( f' but its correspondent Glue table {table.GlueTableName} does not exist.' ), ) + + def handle_revoke_failure( + self, + table: models.DatasetTable, + share_item: models.ShareObjectItem, + error: Exception, + ) -> bool: + """ + Handles revoke failure by raising an alarm to alarmsTopic + Parameters + ---------- + table : dataset table + share_item : failed item + error : share error + + Returns + ------- + True if alarm published successfully + """ + logging.error( + f'Failed to revoke LF permissions to table share {table.GlueTableName} ' + f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_sharing_failure_alarm( + table, self.share, self.target_environment + ) + return True diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index f2c9b47df..d40993f21 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -68,12 +68,6 @@ def approve_share( share_item = api.ShareObject.find_share_item_by_table( self.session, self.share, table ) - if not share_item: - log.warning( - f'Share Item not found for {self.share.shareUri} ' - f'and Dataset Table {table.GlueTableName} continuing loop...' - ) - continue api.ShareObject.update_share_item_status( self.session, @@ -109,13 +103,7 @@ def approve_share( except Exception as e: self.handle_share_failure(table, share_item, e) - self.clean_shared_database( - self.session, - self.dataset, - self.shared_tables, - self.target_environment, - self.shared_db_name, - ) + self.clean_shared_database() self.delete_deprecated_shared_database() diff --git a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py index fcb1afa43..54624266e 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py @@ -37,19 +37,22 @@ def revoke_share(self) -> bool: """ Revokes a share cross account 1) revoke resource link access on target account - 2) delete shared database on target account - 3) revoke resource link access on source account + 2) revoke table access on source account + 3) delete shared database on target account + 4) revoke external account sharing on source account Returns ------- True if revoke is successful """ - self.revoke_resource_links_access() + self.revoke_shared_tables_access() self.delete_shared_database() if not api.ShareObject.other_approved_share_object_exists( - self.session, self.target_environment.environmentUri + self.session, + self.target_environment.environmentUri, + self.dataset.datasetUri, ): self.revoke_external_account_access_on_source_account() diff --git a/backend/dataall/tasks/data_sharing/data_sharing_service.py b/backend/dataall/tasks/data_sharing/data_sharing_service.py index 8fda17e2a..99399d428 100644 --- a/backend/dataall/tasks/data_sharing/data_sharing_service.py +++ b/backend/dataall/tasks/data_sharing/data_sharing_service.py @@ -48,7 +48,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: shared_tables, source_environment, target_environment, - ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Approved.value]) + ) = api.ShareObject.get_share_data(session, share_uri, ['Approved']) shared_db_name = cls.build_shared_db_name(dataset, share) @@ -107,7 +107,7 @@ def reject_share(cls, engine: Engine, share_uri: str): shared_tables, source_environment, target_environment, - ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Rejected.value]) + ) = api.ShareObject.get_share_data(session, share_uri, ['Rejected']) log.info(f'Revoking permissions for tables : {shared_tables}') @@ -123,23 +123,23 @@ def reject_share(cls, engine: Engine, share_uri: str): return CrossAccountShareRevoke( session, shared_db_name, - env_group, dataset, share, shared_tables, source_environment, target_environment, + env_group, ).revoke_share() return SameAccountShareRevoke( session, shared_db_name, - env_group, dataset, share, shared_tables, source_environment, target_environment, + env_group, ).revoke_share() @classmethod diff --git a/backend/dataall/tasks/data_sharing/same_account/approve_share.py b/backend/dataall/tasks/data_sharing/same_account/approve_share.py index ec40d12d1..04e6179b4 100644 --- a/backend/dataall/tasks/data_sharing/same_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/same_account/approve_share.py @@ -89,13 +89,7 @@ def approve_share(self) -> bool: except Exception as e: self.handle_share_failure(table, share_item, e) - self.clean_shared_database( - self.session, - self.dataset, - self.shared_tables, - self.target_environment, - self.shared_db_name, - ) + self.clean_shared_database() self.delete_deprecated_shared_database() diff --git a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py index b3cfe6a6d..c35eef32c 100644 --- a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py @@ -33,13 +33,14 @@ def revoke_share(self) -> bool: """ Revokes a share on same account 1) revoke resource link access - 2) delete shared database on target account + 2) revoke source table access + 3) delete shared database Returns ------- True if revoke is successful """ - self.revoke_resource_links_access() + self.revoke_shared_tables_access() self.delete_shared_database() From 47ad72dd4c1503731bb92e5eaba153466a0adabd Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 06:04:32 +0200 Subject: [PATCH 16/28] fix lint issues --- backend/dataall/db/api/share_object.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index aeb4d0205..0b12ebe0c 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -682,15 +682,15 @@ def get_share_data(session, share_uri, status): shared_tables = ( session.query(models.DatasetTable) - .join( + .join( models.ShareObjectItem, models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, ) - .join( + .join( models.ShareObject, models.ShareObject.shareUri == models.ShareObjectItem.shareUri, ) - .filter( + .filter( and_( models.ShareObject.datasetUri == dataset.datasetUri, models.ShareObject.environmentUri @@ -699,18 +699,18 @@ def get_share_data(session, share_uri, status): models.ShareObject.shareUri == share_uri, ) ) - .all() + .all() ) env_group: models.EnvironmentGroup = ( session.query(models.EnvironmentGroup) - .filter( + .filter( and_( models.EnvironmentGroup.environmentUri == share.environmentUri, models.EnvironmentGroup.groupUri == share.principalId, ) ) - .first() + .first() ) if not env_group: raise Exception( @@ -730,7 +730,7 @@ def get_share_data(session, share_uri, status): def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(models.ShareObject) - .filter( + .filter( and_( models.Environment.environmentUri == environment_uri, models.ShareObject.status @@ -738,18 +738,18 @@ def other_approved_share_object_exists(session, environment_uri, dataset_uri): models.ShareObject.datasetUri == dataset_uri, ) ) - .all() + .all() ) @staticmethod def is_shared_table(session, environment_uri, dataset_uri, table_name): return ( session.query(models.ShareObjectItem) - .join( + .join( models.ShareObject, models.ShareObjectItem.shareUri == models.ShareObject.shareUri, ) - .filter( + .filter( and_( models.ShareObjectItem.GlueTableName == table_name, models.ShareObject.datasetUri == dataset_uri, @@ -757,5 +757,5 @@ def is_shared_table(session, environment_uri, dataset_uri, table_name): models.ShareObject.environmentUri == environment_uri, ) ) - .first() + .first() ) From f9285c52ff7dcdbc19efd1534fd9e05a5f90c61c Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 06:11:24 +0200 Subject: [PATCH 17/28] fix it test --- tests/tasks/test_share_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tasks/test_share_manager.py b/tests/tasks/test_share_manager.py index ab6bfbaee..2eec795d6 100644 --- a/tests/tasks/test_share_manager.py +++ b/tests/tasks/test_share_manager.py @@ -253,14 +253,14 @@ def __update_to_rejected_status(db, share): def test_cross_account_sharing(db, cross_account_share, dataset, mocker): - """mocker.patch( + mocker.patch( 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', return_value=True, ) mocker.patch( 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', return_value=True, - )""" + ) dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( db, cross_account_share.shareUri ) From d930e1a2c044550c92fe807cceaa9732e7ec3ada Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 07:06:59 +0200 Subject: [PATCH 18/28] remove empty f strings + put back waiter for cross account sharing --- backend/dataall/aws/handlers/lakeformation.py | 104 +++++++++++++++++- .../cross_account/approve_share.py | 2 + 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/backend/dataall/aws/handlers/lakeformation.py b/backend/dataall/aws/handlers/lakeformation.py index 5b0de94a1..051c76641 100644 --- a/backend/dataall/aws/handlers/lakeformation.py +++ b/backend/dataall/aws/handlers/lakeformation.py @@ -12,6 +12,26 @@ class LakeFormation: def __init__(self): pass + @staticmethod + def describe_resource(resource_arn, accountid, region): + """ + Describes a LF data location + """ + try: + session = SessionHelper.remote_session(accountid) + lf_client = session.client('lakeformation', region_name=region) + + response = lf_client.describe_resource(ResourceArn=resource_arn) + + log.debug(f'LF data location already registered: {response}') + + return response['ResourceInfo'] + + except ClientError as e: + log.error( + f'LF data location for resource {resource_arn} not found due to {e}' + ) + @staticmethod def grant_pivot_role_all_database_permissions(accountid, region, database): LakeFormation.grant_permissions_to_database( @@ -141,6 +161,7 @@ def batch_revoke_permissions(client, accountid, entries): :param entries: :return: """ + log.info(f'Batch Revoking {entries}') entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] failures = [] try: @@ -148,19 +169,31 @@ def batch_revoke_permissions(client, accountid, entries): response = client.batch_revoke_permissions( CatalogId=accountid, Entries=entries_chunk ) - log.info(f'Batch Revoke {entries_chunk} response: {response}') + log.info(f'Batch Revoke response: {response}') failures.extend(response.get('Failures')) - except ClientError as e: + for failure in failures: if not ( failure['Error']['ErrorCode'] == 'InvalidInputException' and ( 'Grantee has no permissions' in failure['Error']['ErrorMessage'] or 'No permissions revoked' in failure['Error']['ErrorMessage'] + or 'not found' in failure['Error']['ErrorMessage'] ) ): - log.warning(f'Batch Revoke ended with failures: {failures}') - raise e + raise ClientError( + error_response={ + 'Error': { + 'Code': 'LakeFormation.batch_revoke_permissions', + 'Message': f'Operation ended with failures: {failures}', + } + }, + operation_name='LakeFormation.batch_revoke_permissions', + ) + + except ClientError as e: + log.warning(f'Batch Revoke ended with failures: {failures}') + raise e @staticmethod def grant_resource_link_permission_on_target(client, source, target): @@ -221,3 +254,66 @@ def grant_resource_link_permission(client, source, target, target_database): f'due to: {e}' ) raise e + + @staticmethod + def revoke_source_table_access(**data): + """ + Revokes permissions for a principal in a cross account sharing setup + Parameters + ---------- + data : + + Returns + ------- + + """ + logging.info(f'Revoking source table access: {data} ...') + target_accountid = data['target_accountid'] + region = data['region'] + target_principal = data['target_principal'] + source_database = data['source_database'] + source_table = data['source_table'] + source_accountid = data['source_accountid'] + + try: + aws_session = SessionHelper.remote_session(target_accountid) + lakeformation = aws_session.client('lakeformation', region_name=region) + + logging.info('Revoking DESCRIBE permission...') + lakeformation.revoke_permissions( + Principal=dict(DataLakePrincipalIdentifier=target_principal), + Resource=dict( + Table=dict( + CatalogId=source_accountid, + DatabaseName=source_database, + Name=source_table, + ) + ), + Permissions=['DESCRIBE'], + PermissionsWithGrantOption=[], + ) + logging.info('Successfully revoked DESCRIBE permissions') + + logging.info('Revoking SELECT permission...') + lakeformation.revoke_permissions( + Principal=dict(DataLakePrincipalIdentifier=target_principal), + Resource=dict( + TableWithColumns=dict( + CatalogId=source_accountid, + DatabaseName=source_database, + Name=source_table, + ColumnWildcard={}, + ) + ), + Permissions=['SELECT'], + PermissionsWithGrantOption=[], + ) + logging.info('Successfully revoked DESCRIBE permissions') + + except ClientError as e: + logging.error( + f'Failed to revoke permissions for {target_principal} ' + f'on source table {source_accountid}/{source_database}/{source_table} ' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index d40993f21..9276e9e6e 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -135,6 +135,7 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) + time(1) LakeFormation.grant_permissions_to_table( source_lf_client, @@ -144,6 +145,7 @@ def share_table_with_target_account(cls, **data): ['DESCRIBE', 'SELECT'], ['DESCRIBE', 'SELECT'], ) + time(2) log.info( f"Granted access to table {data['source']['tablename']} " From 94efd00913d5decd76b442c4c9f50bd32009cc28 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 07:08:08 +0200 Subject: [PATCH 19/28] time sleep --- .../dataall/tasks/data_sharing/cross_account/approve_share.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index 9276e9e6e..6dd715efc 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -135,7 +135,7 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) - time(1) + time.sleep(1) LakeFormation.grant_permissions_to_table( source_lf_client, @@ -145,7 +145,7 @@ def share_table_with_target_account(cls, **data): ['DESCRIBE', 'SELECT'], ['DESCRIBE', 'SELECT'], ) - time(2) + time.sleep(2) log.info( f"Granted access to table {data['source']['tablename']} " From 46a91fd4e8beed4b257ac220dd69ea327011d2ab Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 13:53:39 +0200 Subject: [PATCH 20/28] Rename ssm param --- deploy/stacks/container.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 0cf6f1950..95fedc313 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -251,7 +251,7 @@ def __init__( ssm.StringParameter( self, f'RamCleanUpToggle{envname}', - parameter_name=f'/datahubsa/{envname}/shares/cleanlfv1ram', + parameter_name=f'/dataall/{envname}/shares/cleanlfv1ram', string_value='False', ) From ff85691ecf5c6dc722300896ee1e49060ab83a34 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Tue, 25 Oct 2022 10:14:20 +0200 Subject: [PATCH 21/28] Better sharing management --- backend/dataall/aws/handlers/ecs.py | 6 +- backend/dataall/aws/handlers/glue.py | 129 +- backend/dataall/aws/handlers/lakeformation.py | 223 +++ backend/dataall/aws/handlers/ram.py | 225 +++ backend/dataall/db/api/share_object.py | 153 +- .../dataall/tasks/data_sharing/__init__.py | 0 .../tasks/data_sharing/common/__init__.py | 0 .../data_sharing/common/share_approval.py | 369 +++++ .../tasks/data_sharing/common/share_revoke.py | 165 ++ .../data_sharing/cross_account/__init__.py | 0 .../cross_account/approve_share.py | 173 ++ .../cross_account/revoke_share.py | 120 ++ .../data_sharing/data_sharing_service.py | 234 +++ .../data_sharing/same_account/__init__.py | 0 .../same_account/approve_share.py | 102 ++ .../data_sharing/same_account/revoke_share.py | 46 + backend/dataall/tasks/share_manager.py | 1460 +---------------- backend/dataall/tasks/shares_refresh.py | 28 + deploy/stacks/container.py | 35 + tests/tasks/test_share_manager.py | 310 ++++ 20 files changed, 2307 insertions(+), 1471 deletions(-) create mode 100644 backend/dataall/aws/handlers/lakeformation.py create mode 100644 backend/dataall/aws/handlers/ram.py create mode 100644 backend/dataall/tasks/data_sharing/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/common/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/common/share_approval.py create mode 100644 backend/dataall/tasks/data_sharing/common/share_revoke.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/approve_share.py create mode 100644 backend/dataall/tasks/data_sharing/cross_account/revoke_share.py create mode 100644 backend/dataall/tasks/data_sharing/data_sharing_service.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/__init__.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/approve_share.py create mode 100644 backend/dataall/tasks/data_sharing/same_account/revoke_share.py create mode 100644 backend/dataall/tasks/shares_refresh.py create mode 100644 tests/tasks/test_share_manager.py diff --git a/backend/dataall/aws/handlers/ecs.py b/backend/dataall/aws/handlers/ecs.py index 65d5ff188..9c8d880d5 100644 --- a/backend/dataall/aws/handlers/ecs.py +++ b/backend/dataall/aws/handlers/ecs.py @@ -9,7 +9,7 @@ from ... import db from ...db import models from ...utils import Parameter -from ...tasks.share_manager import ShareManager +from ...tasks.data_sharing.data_sharing_service import DataSharingService log = logging.getLogger('aws:ecs') @@ -23,7 +23,7 @@ def __init__(self): def approve_share(engine, task: models.Task): envname = os.environ.get('envname', 'local') if envname in ['local', 'dkrcompose']: - return ShareManager.approve_share(engine, task.targetUri) + return DataSharingService.approve_share(engine, task.targetUri) else: return Ecs.run_share_management_ecs_task( envname, task.targetUri, 'approve_share' @@ -34,7 +34,7 @@ def approve_share(engine, task: models.Task): def reject_share(engine, task: models.Task): envname = os.environ.get('envname', 'local') if envname in ['local', 'dkrcompose']: - return ShareManager.reject_share(engine, task.targetUri) + return DataSharingService.reject_share(engine, task.targetUri) else: return Ecs.run_share_management_ecs_task( envname, task.targetUri, 'reject_share' diff --git a/backend/dataall/aws/handlers/glue.py b/backend/dataall/aws/handlers/glue.py index ef4fce1f7..cac696c4b 100644 --- a/backend/dataall/aws/handlers/glue.py +++ b/backend/dataall/aws/handlers/glue.py @@ -15,7 +15,7 @@ def __init__(self): pass @staticmethod - def _create_database(accountid, database, region, location): + def create_database(accountid, database, region, location): try: existing_database = Glue.database_exists( accountid=accountid, database=database, region=region @@ -27,7 +27,9 @@ def _create_database(accountid, database, region, location): glue_database_created = True return glue_database_created except ClientError as e: - log.debug(f'Failed to create database {database}', e) + log.error( + f'Failed to create database {database} on account {accountid} due to {e}' + ) raise e @staticmethod @@ -64,15 +66,11 @@ def database_exists(**data): session = SessionHelper.remote_session(accountid) try: glue_client = session.client('glue', region_name=region) - response = glue_client.get_database( - CatalogId=data['accountid'], Name=database - ) - if response.get('Database'): - return response - else: - return None - except ClientError as e: - log.debug(f'Database already exists in Glue{database}', e) + glue_client.get_database(CatalogId=data['accountid'], Name=database) + return True + except ClientError: + log.info(f'Database {database} does not exist on account {accountid}...') + return False @staticmethod @Worker.handler(path='glue.dataset.database.tables') @@ -140,12 +138,13 @@ def table_exists(**data): log.info(f'Glue table not found: {data}') return None + @staticmethod def _create_table(**data): accountid = data['accountid'] - session = SessionHelper.remote_session(accountid=accountid) region = data.get('region', 'eu-west-1') database = data.get('database', 'UnknownDatabaseName') + session = SessionHelper.remote_session(accountid=accountid) glue = session.client('glue', region_name=region) log.info( 'Creating table {} in database {}'.format( @@ -155,7 +154,7 @@ def _create_table(**data): if not Glue.database_exists( database=database, region=region, accountid=accountid ): - Glue._create_database(accountid, database, region, None) + Glue.create_database(accountid, database, region, None) if 'table_input' not in data: table_input = { 'Name': data['tablename'], @@ -222,6 +221,47 @@ def _create_table(**data): ) return response + @staticmethod + def create_resource_link(**data): + accountid = data['accountid'] + region = data['region'] + database = data['database'] + resource_link_name = data['resource_link_name'] + resource_link_input = data['resource_link_input'] + log.info( + f'Creating ResourceLink {resource_link_name} in database {accountid}://{database}' + ) + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + resource_link = Glue.table_exists( + accountid=accountid, + region=region, + database=database, + tablename=resource_link_name, + ) + if resource_link: + log.info( + f'ResourceLink {resource_link_name} already exists in database {accountid}://{database}' + ) + else: + resource_link = glue.create_table( + CatalogId=accountid, + DatabaseName=database, + TableInput=resource_link_input, + ) + log.info( + f'Successfully created ResourceLink {resource_link_name} in database {accountid}://{database}' + ) + return resource_link + except ClientError as e: + log.error( + f'Could not create ResourceLink {resource_link_name} ' + f'in database {accountid}://{database} ' + f'due to: {e}' + ) + raise e + @staticmethod def is_resource_link(table_input: dict): """ @@ -268,21 +308,64 @@ def delete_table_and_create_resourcelink(glue, database, accountid, table_input) ) raise e + @staticmethod + def delete_database(**data): + accountid = data['accountid'] + region = data['region'] + database = data['database'] + log.info(f'Deleting database {accountid}://{database} ...') + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + if Glue.database_exists( + accountid=accountid, + region=region, + database=database, + ): + glue.delete_database(CatalogId=accountid, Name=database) + return True + except ClientError as e: + log.error( + f'Could not delete database {database} ' + f'in account {accountid} ' + f'due to: {e}' + ) + raise e + @staticmethod def batch_delete_tables(**data): accountid = data['accountid'] - session = SessionHelper.remote_session(accountid=accountid) - glue = session.client('glue', region_name=data.get('region', 'eu-west-1')) + region = data['region'] database = data['database'] tables = data['tables'] - log.debug(f'Batch deleting tables: {tables}') - response = glue.batch_delete_table( - CatalogId=accountid, DatabaseName=database, TablesToDelete=tables - ) - log.debug( - f'Batch deleted tables {len(tables)} from database {database} successfully' - ) - return response + + if not tables: + log.info('No tables to delete exiting method...') + return + + log.info(f'Batch deleting tables: {tables}') + try: + session = SessionHelper.remote_session(accountid=accountid) + glue = session.client('glue', region_name=region) + if Glue.database_exists( + accountid=accountid, + region=region, + database=database, + ): + glue.batch_delete_table( + CatalogId=accountid, DatabaseName=database, TablesToDelete=tables + ) + log.debug( + f'Batch deleted tables {len(tables)} from database {database} successfully' + ) + return True + except ClientError as e: + log.error( + f'Could not batch delete tables {tables} ' + f'in database {accountid}://{database} ' + f'due to: {e}' + ) + raise e @staticmethod @Worker.handler(path='glue.dataset.crawler.create') diff --git a/backend/dataall/aws/handlers/lakeformation.py b/backend/dataall/aws/handlers/lakeformation.py new file mode 100644 index 000000000..5b0de94a1 --- /dev/null +++ b/backend/dataall/aws/handlers/lakeformation.py @@ -0,0 +1,223 @@ +import logging +import uuid + +from botocore.exceptions import ClientError + +from .sts import SessionHelper + +log = logging.getLogger('aws:lakeformation') + + +class LakeFormation: + def __init__(self): + pass + + @staticmethod + def grant_pivot_role_all_database_permissions(accountid, region, database): + LakeFormation.grant_permissions_to_database( + client=SessionHelper.remote_session(accountid=accountid).client( + 'lakeformation', region_name=region + ), + principals=[SessionHelper.get_delegation_role_arn(accountid)], + database_name=database, + permissions=['ALL'], + ) + + @staticmethod + def grant_permissions_to_database( + client, + principals, + database_name, + permissions, + permissions_with_grant_options=None, + ): + for principal in principals: + log.info( + f'Granting database permissions {permissions} to {principal} on database {database_name}' + ) + try: + client.grant_permissions( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'Database': {'Name': database_name}, + }, + Permissions=permissions, + ) + log.info( + f'Successfully granted principal {principal} permissions {permissions} ' + f'to {database_name}' + ) + except ClientError as e: + log.error( + f'Could not grant permissions ' + f'principal {principal} ' + f'{permissions} to database {database_name} due to: {e}' + ) + + @staticmethod + def grant_permissions_to_table( + client, + principal, + database_name, + table_name, + permissions, + permissions_with_grant_options=None, + ): + try: + grant_dict = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={'Table': {'DatabaseName': database_name, 'Name': table_name}}, + Permissions=permissions, + ) + if permissions_with_grant_options: + grant_dict[ + 'PermissionsWithGrantOption' + ] = permissions_with_grant_options + + response = client.grant_permissions(**grant_dict) + + log.info( + f'Successfully granted principal {principal} permissions {permissions} ' + f'to {database_name}.{table_name}: {response}' + ) + except ClientError as e: + log.warning( + f'Could not grant principal {principal} ' + f'permissions {permissions} to table ' + f'{database_name}.{table_name} due to: {e}' + ) + # raise e + + @staticmethod + def revoke_iamallowedgroups_super_permission_from_table( + client, accountid, database, table + ): + """ + When upgrading to LF tables can still have IAMAllowedGroups permissions + Unless this is revoked the table can not be shared using LakeFormation + :param client: + :param accountid: + :param database: + :param table: + :return: + """ + try: + log.info( + f'Revoking IAMAllowedGroups Super ' + f'permission for table {database}|{table}' + ) + LakeFormation.batch_revoke_permissions( + client, + accountid, + entries=[ + { + 'Id': str(uuid.uuid4()), + 'Principal': {'DataLakePrincipalIdentifier': 'EVERYONE'}, + 'Resource': { + 'Table': { + 'DatabaseName': database, + 'Name': table, + 'CatalogId': accountid, + } + }, + 'Permissions': ['ALL'], + 'PermissionsWithGrantOption': [], + } + ], + ) + except ClientError as e: + log.debug( + f'Could not revoke IAMAllowedGroups Super ' + f'permission on table {database}|{table} due to {e}' + ) + + @staticmethod + def batch_revoke_permissions(client, accountid, entries): + """ + Batch revoke permissions to entries + Retry is set for api throttling + :param client: + :param accountid: + :param entries: + :return: + """ + entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] + failures = [] + try: + for entries_chunk in entries_chunks: + response = client.batch_revoke_permissions( + CatalogId=accountid, Entries=entries_chunk + ) + log.info(f'Batch Revoke {entries_chunk} response: {response}') + failures.extend(response.get('Failures')) + except ClientError as e: + for failure in failures: + if not ( + failure['Error']['ErrorCode'] == 'InvalidInputException' + and ( + 'Grantee has no permissions' in failure['Error']['ErrorMessage'] + or 'No permissions revoked' in failure['Error']['ErrorMessage'] + ) + ): + log.warning(f'Batch Revoke ended with failures: {failures}') + raise e + + @staticmethod + def grant_resource_link_permission_on_target(client, source, target): + for principal in target['principals']: + try: + table_grant = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'TableWithColumns': { + 'DatabaseName': source['database'], + 'Name': source['tablename'], + 'ColumnWildcard': {}, + 'CatalogId': source['accountid'], + } + }, + Permissions=['DESCRIBE', 'SELECT'], + PermissionsWithGrantOption=[], + ) + client.grant_permissions(**table_grant) + log.info( + f'Successfully granted permissions DESCRIBE,SELECT to {principal} on target ' + f'{source["accountid"]}://{source["database"]}/{source["tablename"]}' + ) + except ClientError as e: + logging.error( + f'Failed granting principal {principal} ' + 'read access to resource link on target' + f' {source["accountid"]}://{source["database"]}/{source["tablename"]} ' + f'due to: {e}' + ) + raise e + + @staticmethod + def grant_resource_link_permission(client, source, target, target_database): + for principal in target['principals']: + resourcelink_grant = dict( + Principal={'DataLakePrincipalIdentifier': principal}, + Resource={ + 'Table': { + 'DatabaseName': target_database, + 'Name': source['tablename'], + 'CatalogId': target['accountid'], + } + }, + # Resource link only supports DESCRIBE and DROP permissions no SELECT + Permissions=['DESCRIBE'], + ) + try: + client.grant_permissions(**resourcelink_grant) + log.info( + f'Granted resource link DESCRIBE access ' + f'to principal {principal} on {target["accountid"]}://{target_database}/{source["tablename"]}' + ) + except ClientError as e: + logging.error( + f'Failed granting principal {principal} ' + f'read access to resource link on {target["accountid"]}://{target_database}/{source["tablename"]} ' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/aws/handlers/ram.py b/backend/dataall/aws/handlers/ram.py new file mode 100644 index 000000000..f089db15b --- /dev/null +++ b/backend/dataall/aws/handlers/ram.py @@ -0,0 +1,225 @@ +import logging +import time + +from botocore.exceptions import ClientError + +from .sts import SessionHelper + +log = logging.getLogger('aws:ram') + + +class Ram: + @staticmethod + def get_resource_share_invitations( + client, resource_share_arns, sender_account, receiver_account + ): + log.info(f'Listing invitations for resourceShareArns: {resource_share_arns}') + try: + resource_share_invitations = [] + + paginator = client.get_paginator('get_resource_share_invitations') + invitation_pages = paginator.paginate(resourceShareArns=resource_share_arns) + for page in invitation_pages: + resource_share_invitations.extend(page.get('resourceShareInvitations')) + + filtered_invitations = [ + i + for i in resource_share_invitations + if i['senderAccountId'] == sender_account + and i['receiverAccountId'] == receiver_account + ] + return filtered_invitations + except ClientError as e: + log.error( + f'Failed retrieving RAM resource ' + f'share invitations {resource_share_arns} due to {e}' + ) + raise e + + @staticmethod + def accept_resource_share_invitation(client, resource_share_invitation_arn): + try: + response = client.accept_resource_share_invitation( + resourceShareInvitationArn=resource_share_invitation_arn + ) + log.info(f'Accepted ram invitation {resource_share_invitation_arn}') + return response.get('resourceShareInvitation') + except ClientError as e: + if ( + e.response['Error']['Code'] + == 'ResourceShareInvitationAlreadyAcceptedException' + ): + log.info( + f'Failed to accept RAM invitation ' + f'{resource_share_invitation_arn} already accepted' + ) + else: + log.error( + f'Failed to accept RAM invitation ' + f'{resource_share_invitation_arn} due to {e}' + ) + raise e + + @staticmethod + def accept_ram_invitation(**data): + """ + Accepts RAM invitations on the target account + """ + retry_share_table = False + failed_invitations = [] + source = data['source'] + target = data['target'] + + if source['accountid'] == target['accountid']: + log.debug('Skipping RAM invitation management for same account sharing.') + return True + + source_session = SessionHelper.remote_session(accountid=source['accountid']) + source_ram = source_session.client('ram', region_name=source['region']) + + target_session = SessionHelper.remote_session(accountid=target['accountid']) + target_ram = target_session.client('ram', region_name=target['region']) + + resource_arn = ( + f'arn:aws:glue:{source["region"]}:{source["accountid"]}:' + f'table/{data["source"]["database"]}/{data["source"]["tablename"]}' + ) + associations = Ram.list_resource_share_associations(source_ram, resource_arn) + resource_share_arns = [a['resourceShareArn'] for a in associations] + + ram_invitations = Ram.get_resource_share_invitations( + target_ram, resource_share_arns, source['accountid'], target['accountid'] + ) + log.info( + f'Found {len(ram_invitations)} RAM invitations for resourceShareArn: {resource_share_arns}' + ) + for invitation in ram_invitations: + if 'LakeFormation' in invitation['resourceShareName']: + if invitation['status'] == 'PENDING': + log.info( + f'Invitation {invitation} is in PENDING status accepting it ...' + ) + Ram.accept_resource_share_invitation( + target_ram, invitation['resourceShareInvitationArn'] + ) + # Ram invitation acceptance is slow + time.sleep(5) + elif ( + invitation['status'] == 'EXPIRED' + or invitation['status'] == 'REJECTED' + ): + log.warning( + f'Invitation {invitation} has expired or was rejected. ' + 'Table flagged for revoke re-share.' + 'Deleting the resource share to reset the invitation... ' + ) + failed_invitations.append(invitation) + retry_share_table = True + source_ram.delete_resource_share( + resourceShareArn=invitation['resourceShareArn'] + ) + + elif invitation['status'] == 'ACCEPTED': + log.info( + f'Invitation {invitation} already accepted nothing to do ...' + ) + else: + log.warning( + f'Invitation is in an unknown status adding {invitation["status"]}. ' + 'Adding it to retry share list ...' + ) + + return retry_share_table, failed_invitations + + @staticmethod + def list_resource_share_associations(client, resource_arn): + associations = [] + try: + log.debug(f'RAM list_resource_share_associations : {resource_arn}') + + paginator = client.get_paginator( + 'get_resource_share_associations' + ).paginate( + associationType='RESOURCE', + resourceArn=resource_arn, + ) + for page in paginator: + associations.extend(page['resourceShareAssociations']) + + log.info(f'Found resource_share_associations : {associations}') + return associations + + except ClientError as e: + log.error( + f'Could not find resource share associations for resource {resource_arn} due to: {e}' + ) + raise e + + @staticmethod + def delete_resource_shares(client, resource_arn): + log.info(f'Cleaning RAM resource shares for resource: {resource_arn}') + try: + associations = Ram.list_resource_share_associations(client, resource_arn) + for a in associations: + log.info(f"Deleting resource share: {a['resourceShareArn']}") + client.delete_resource_share(resourceShareArn=a['resourceShareArn']) + return associations + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') + + @staticmethod + def delete_lfv1_resource_shares_for_table(client, resource_arn): + log.info(f'Cleaning LF V1 RAM resource shares for resource: {resource_arn}') + try: + associations = Ram.list_resource_share_associations(client, resource_arn) + for a in associations: + if ( + 'LakeFormation' in a['resourceShareName'] + and 'LakeFormation-V2' in a['resourceShareName'] + ): + log.info( + f"Found lakeformation V1 RAM association: {a['resourceShareName']}." + 'Deleting it ...' + ) + client.delete_resource_share(resourceShareArn=a['resourceShareArn']) + return associations + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') + + @staticmethod + def delete_lakeformation_v1_resource_shares(client): + log.info('Cleaning LF V1 RAM resource shares...') + + try: + resources = [] + paginator = client.get_paginator('list_resources').paginate( + resourceOwner='SELF', + resourceRegionScope='REGIONAL', + ) + for page in paginator: + resources.extend(page['resources']) + + log.info(f'Found resources : {len(resources)}') + resource_shares = [] + for r in resources: + paginator = client.get_paginator('get_resource_shares').paginate( + resourceShareArns=[r['resourceShareArn']], + resourceOwner='SELF', + ) + for page in paginator: + resource_shares.extend(page['resourceShares']) + for rs in resource_shares: + if ( + 'LakeFormation' in rs['name'] + and 'LakeFormation-V2' not in rs['name'] + ): + log.info( + f"Found lakeformation V1 RAM association: {rs['name']}." + 'Deleting it ...' + ) + client.delete_resource_share( + resourceShareArn=r['resourceShareArn'] + ) + + except ClientError as e: + log.error(f'Failed cleaning RAM resource shares due to: {e} ') diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index 7c17031c5..7b100cf9c 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -1,15 +1,14 @@ import logging -from datetime import datetime from sqlalchemy import and_, or_, func, case -from .. import models, exceptions, permissions, paginate -from .. import api from . import ( has_resource_perm, ResourcePolicy, Environment, ) +from .. import api +from .. import models, exceptions, permissions, paginate from ..models.Enums import ShareObjectStatus, ShareableType, PrincipalType logger = logging.getLogger(__name__) @@ -611,3 +610,151 @@ def list_user_sent_share_requests( ) ) return paginate(query, data.get('page', 1), data.get('pageSize', 10)).to_dict() + + @staticmethod + def get_share_by_dataset_and_environment(session, dataset_uri, environment_uri): + environment_groups = session.query(models.EnvironmentGroup).filter( + models.EnvironmentGroup.environmentUri == environment_uri + ) + groups = [g.groupUri for g in environment_groups] + share = session.query(models.ShareObject).filter( + and_( + models.ShareObject.datasetUri == dataset_uri, + models.ShareObject.environmentUri == environment_uri, + models.ShareObject.principalId.in_(groups), + ) + ) + if not share: + raise exceptions.ObjectNotFound('Share', f'{dataset_uri}/{environment_uri}') + return share + + @staticmethod + def update_share_item_status( + session, + share_item: models.ShareObjectItem, + status: str, + ) -> models.ShareObjectItem: + + logger.info(f'Updating share item status to {status}') + share_item.status = status + session.commit() + return share_item + + @staticmethod + def find_share_item_by_table( + session, + share: models.ShareObject, + table: models.DatasetTable, + ) -> models.ShareObjectItem: + share_item: models.ShareObjectItem = ( + session.query(models.ShareObjectItem) + .filter( + and_( + models.ShareObjectItem.itemUri == table.tableUri, + models.ShareObjectItem.shareUri == share.shareUri, + ) + ) + .first() + ) + return share_item + + @staticmethod + def get_share_data(session, share_uri, status): + share: models.ShareObject = session.query(models.ShareObject).get(share_uri) + if not share: + raise exceptions.ObjectNotFound('Share', share_uri) + + dataset: models.Dataset = session.query(models.Dataset).get(share.datasetUri) + if not dataset: + raise exceptions.ObjectNotFound('Dataset', share.datasetUri) + + source_environment: models.Environment = session.query(models.Environment).get( + dataset.environmentUri + ) + if not source_environment: + raise exceptions.ObjectNotFound('SourceEnvironment', dataset.environmentUri) + + target_environment: models.Environment = session.query(models.Environment).get( + share.environmentUri + ) + if not target_environment: + raise exceptions.ObjectNotFound('TargetEnvironment', share.environmentUri) + + shared_tables = ( + session.query(models.DatasetTable) + .join( + models.ShareObjectItem, + models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, + ) + .join( + models.ShareObject, + models.ShareObject.shareUri == models.ShareObjectItem.shareUri, + ) + .filter( + and_( + models.ShareObject.datasetUri == dataset.datasetUri, + models.ShareObject.environmentUri + == target_environment.environmentUri, + models.ShareObject.status.in_(status), + ) + ) + .all() + ) + + env_group: models.EnvironmentGroup = ( + session.query(models.EnvironmentGroup) + .filter( + and_( + models.EnvironmentGroup.environmentUri == share.environmentUri, + models.EnvironmentGroup.groupUri == share.principalId, + ) + ) + .first() + ) + if not env_group: + raise Exception( + f'Share object Team {share.principalId} is not a member of the ' + f'environment {target_environment.name}/{target_environment.AwsAccountId}' + ) + return ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) + + @staticmethod + def other_approved_share_object_exists(session, environment_uri): + return ( + session.query(models.ShareObject) + .filter( + and_( + models.Environment.environmentUri == environment_uri, + models.ShareObject.status + == models.Enums.ShareObjectStatus.Approved.value, + ) + ) + .all() + ) + + @staticmethod + def is_shared_table(session, environment_uri, dataset_uri, table_name): + return ( + session.query(models.ShareObjectItem) + .join( + models.ShareObject, + models.ShareObjectItem.shareUri == models.ShareObject.shareUri, + ) + .filter( + and_( + models.ShareObjectItem.GlueTableName == table_name, + models.ShareObject.datasetUri == dataset_uri, + models.ShareObject.status == models.Enums.ShareObjectStatus.Approved.value, + models.ShareObject.environmentUri == environment_uri, + ) + ) + .first() + ) + diff --git a/backend/dataall/tasks/data_sharing/__init__.py b/backend/dataall/tasks/data_sharing/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/common/__init__.py b/backend/dataall/tasks/data_sharing/common/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/common/share_approval.py b/backend/dataall/tasks/data_sharing/common/share_approval.py new file mode 100644 index 000000000..ffa53ca73 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/share_approval.py @@ -0,0 +1,369 @@ +import abc +import logging +import uuid + +from botocore.exceptions import ClientError + +from ....aws.handlers.glue import Glue +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.quicksight import Quicksight +from ....aws.handlers.sts import SessionHelper +from ....db import api, exceptions, models +from ....utils.alarm_service import AlarmService + +logger = logging.getLogger(__name__) + + +class ShareApproval: + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + self.session = session + self.env_group = env_group + self.dataset = dataset + self.share = share + self.shared_tables = shared_tables + self.source_environment = source_environment + self.target_environment = target_environment + self.shared_db_name = shared_db_name + + @abc.abstractmethod + def approve_share(self) -> [str]: + return NotImplementedError + + def get_share_principals(self) -> [str]: + """ + Builds list of principals of the share request + Returns + ------- + List of principals + """ + principals = [self.env_group.environmentIAMRoleArn] + if self.target_environment.dashboardsEnabled: + q_group = Quicksight.get_quicksight_group_arn( + self.target_environment.AwsAccountId + ) + if q_group: + principals.append(q_group) + return principals + + def check_share_item_exists_on_glue_catalog( + self, share_item: models.ShareObjectItem, table: models.DatasetTable + ) -> None: + """ + Checks if a table in the share request + still exists on the Glue catalog before sharing + + Parameters + ---------- + share_item : request share item + table : dataset table + + Returns + ------- + exceptions.AWSResourceNotFound + """ + if not Glue.table_exists( + accountid=self.source_environment.AwsAccountId, + region=self.source_environment.region, + database=table.GlueDatabaseName, + tablename=table.GlueTableName, + ): + raise exceptions.AWSResourceNotFound( + action='ApproveShare', + message=( + f'Share Item {share_item.itemUri} found on share request' + f' but its correspondent Glue table {table.GlueTableName} does not exist.' + ), + ) + + @classmethod + def create_shared_database( + cls, + target_environment: models.Environment, + dataset: models.Dataset, + shared_db_name: str, + principals: [str], + ) -> dict: + + """ + Creates the shared database if does not exists. + 1) Grants pivot role ALL permission on shareddb + 2) Grant Team role DESCRIBE Only permission + + Parameters + ---------- + target_environment : + dataset : + shared_db_name : + principals : + + Returns + ------- + boto3 glue create_database + """ + + logger.info( + f'Creating shared db ...' + f'{target_environment.AwsAccountId}://{shared_db_name}' + ) + + database = Glue.create_database( + target_environment.AwsAccountId, + shared_db_name, + target_environment.region, + f's3://{dataset.S3BucketName}', + ) + + LakeFormation.grant_pivot_role_all_database_permissions( + target_environment.AwsAccountId, target_environment.region, shared_db_name + ) + + LakeFormation.grant_permissions_to_database( + client=SessionHelper.remote_session( + accountid=target_environment.AwsAccountId + ).client('lakeformation', region_name=target_environment.region), + principals=principals, + database_name=shared_db_name, + permissions=['DESCRIBE'], + ) + + return database + + @classmethod + def create_resource_link(cls, **data) -> dict: + """ + Creates a resource link to the source shared Glue table + Parameters + ---------- + data : data of source and target accounts + + Returns + ------- + boto3 creation response + """ + source = data['source'] + target = data['target'] + target_session = SessionHelper.remote_session(accountid=target['accountid']) + lakeformation_client = target_session.client( + 'lakeformation', region_name=target['region'] + ) + target_database = target['database'] + resource_link_input = { + 'Name': source['tablename'], + 'TargetTable': { + 'CatalogId': data['source']['accountid'], + 'DatabaseName': source['database'], + 'Name': source['tablename'], + }, + } + + try: + resource_link = Glue.create_resource_link( + accountid=target['accountid'], + region=target['region'], + database=target_database, + resource_link_name=source['tablename'], + resource_link_input=resource_link_input, + ) + + LakeFormation.grant_resource_link_permission( + lakeformation_client, source, target, target_database + ) + + LakeFormation.grant_resource_link_permission_on_target( + lakeformation_client, source, target + ) + + return resource_link + + except ClientError as e: + logger.warning( + f'Resource Link {resource_link_input} was not created due to: {e}' + ) + raise e + + @classmethod + def clean_shared_database( + cls, + session, + dataset: models.Dataset, + shared_tables: [models.DatasetTable], + target_environment: models.Environment, + shared_db_name: str, + ) -> [str]: + """ + After share approval verify that the shared database + do not have any removed items from the share request. + + Parameters + ---------- + session : db + dataset : models.Dataset + shared_tables : [models.DatasetTable] + target_environment : models.Environment + shared_db_name : shared database name + + Returns + ------- + List of deleted tables from the shared database + """ + tables_to_delete = [] + + shared_glue_tables = Glue.list_glue_database_tables( + accountid=target_environment.AwsAccountId, + database=shared_db_name, + region=target_environment.region, + ) + logger.info( + f'Shared database {shared_db_name} glue tables: {shared_glue_tables}' + ) + + shared_tables = [t.GlueTableName for t in shared_tables] + logger.info(f'Share items of the share object {shared_tables}') + + aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) + client = aws_session.client('lakeformation', region_name=dataset.region) + + for table in shared_glue_tables: + if table['Name'] not in shared_tables: + logger.info( + f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' + ) + is_shared = api.ShareObject.is_shared_table( + session, + target_environment.environmentUri, + dataset.datasetUri, + table['Name'], + ) + if not is_shared: + logger.info( + f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' + f'will be removed for account {target_environment.AwsAccountId}' + ) + if Glue.table_exists( + **{ + 'accountid': dataset.AwsAccountId, + 'region': dataset.region, + 'database': dataset.GlueDatabaseName, + 'tablename': table['Name'], + } + ): + LakeFormation.batch_revoke_permissions( + client, + target_environment.AwsAccountId, + [ + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': dataset.GlueDatabaseName, + 'Name': table['Name'], + 'ColumnWildcard': {}, + 'CatalogId': dataset.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': [ + 'DESCRIBE', + 'SELECT', + ], + } + ], + ) + + tables_to_delete.append(table['Name']) + + Glue.batch_delete_tables( + accountid=target_environment.AwsAccountId, + region=target_environment.region, + database=shared_db_name, + tables=tables_to_delete, + ) + + return tables_to_delete + + def handle_share_failure( + self, + table: models.DatasetTable, + share_item: models.ShareObjectItem, + error: Exception, + ) -> None: + """ + Handles share failure by raising an alarm to alarmsTopic + Parameters + ---------- + table : dataset table + share_item : failed item + error : share error + + Returns + ------- + None + """ + logging.error( + f'Failed to share table {table.GlueTableName} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Failed.value, + ) + AlarmService().trigger_table_sharing_failure_alarm( + table, self.share, self.target_environment + ) + + def build_share_data(self, principals: [str], table: models.DatasetTable) -> dict: + """ + Build aws dict for boto3 operations on Glue and LF from share data + Parameters + ---------- + principals : team role + table : dataset table + + Returns + ------- + dict for boto3 operations + """ + data = { + 'source': { + 'accountid': self.source_environment.AwsAccountId, + 'region': self.source_environment.region, + 'database': table.GlueDatabaseName, + 'tablename': table.GlueTableName, + }, + 'target': { + 'accountid': self.target_environment.AwsAccountId, + 'region': self.target_environment.region, + 'principals': principals, + 'database': self.shared_db_name, + }, + } + return data + + def delete_deprecated_shared_database(self) -> bool: + """ + Deletes deprecated shared db + Returns + ------- + True if delete is successful + """ + return Glue.delete_database( + accountid=self.dataset.AwsAccountId, + region=self.dataset.region, + database=f'{self.dataset.GlueDatabaseName}shared', + ) diff --git a/backend/dataall/tasks/data_sharing/common/share_revoke.py b/backend/dataall/tasks/data_sharing/common/share_revoke.py new file mode 100644 index 000000000..a92896366 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/share_revoke.py @@ -0,0 +1,165 @@ +import abc +import logging +import uuid + +from ....aws.handlers.glue import Glue +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.sts import SessionHelper +from ....db import models, api, exceptions +from ....utils.alarm_service import AlarmService + +log = logging.getLogger(__name__) + + +class ShareRevoke: + def __init__( + self, + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ): + self.session = session + self.env_group = env_group + self.dataset = dataset + self.share = share + self.shared_tables = shared_tables + self.source_environment = source_environment + self.target_environment = target_environment + self.shared_db_name = shared_db_name + + @abc.abstractmethod + def revoke_share(self): + return NotImplementedError + + def revoke_resource_links_access(self) -> [dict]: + """ + Loops through share request items and revokes access on LF + Returns + ------- + List of revoke entries + """ + aws_session = SessionHelper.remote_session( + accountid=self.target_environment.AwsAccountId + ) + client = aws_session.client( + 'lakeformation', region_name=self.target_environment.region + ) + revoke_entries = [] + + for table in self.shared_tables: + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_In_Progress.value, + ) + + try: + data = { + 'accountid': self.target_environment.AwsAccountId, + 'region': self.target_environment.region, + 'database': self.shared_db_name, + 'tablename': table.GlueTableName, + } + + log.info(f'Starting revoke for: {data}') + + if Glue.table_exists(**data): + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn + }, + 'Resource': { + 'Table': { + 'DatabaseName': self.shared_db_name, + 'Name': table.GlueTableName, + 'CatalogId': self.target_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + } + ) + + log.info(f'Revoking permissions for entries : {revoke_entries}') + + LakeFormation.batch_revoke_permissions( + client, self.target_environment.AwsAccountId, revoke_entries + ) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Succeeded.value, + ) + + except Exception as e: + logging.error( + f'Failed to revoke LF permissions to table share {table.GlueTableName} ' + f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {e}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_sharing_failure_alarm( + table, self.share, self.target_environment + ) + + return revoke_entries + + def delete_shared_database(self) -> bool: + """ + Deletes shared database when share request is rejected + + Returns + ------- + bool + """ + log.info(f'Deleting shared database {self.shared_db_name}') + return Glue.delete_database( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + ) + + def check_share_item_exists_on_glue_catalog( + self, share_item: models.ShareObjectItem, table: models.DatasetTable + ) -> None: + """ + Checks if a table in the share request + still exists on the Glue catalog before revoking share + + Parameters + ---------- + share_item : request share item + table : dataset table + + Returns + ------- + exceptions.AWSResourceNotFound + """ + if not Glue.table_exists( + accountid=self.source_environment.AwsAccountId, + region=self.source_environment.region, + database=table.GlueDatabaseName, + tablename=table.GlueTableName, + ): + raise exceptions.AWSResourceNotFound( + action='RevokeShare', + message=( + f'Share Item {share_item.itemUri} found on share request' + f' but its correspondent Glue table {table.GlueTableName} does not exist.' + ), + ) diff --git a/backend/dataall/tasks/data_sharing/cross_account/__init__.py b/backend/dataall/tasks/data_sharing/cross_account/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py new file mode 100644 index 000000000..f2c9b47df --- /dev/null +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -0,0 +1,173 @@ +import logging +import time + +from botocore.exceptions import ClientError + +from ..common.share_approval import ShareApproval +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.ram import Ram +from ....aws.handlers.sts import SessionHelper +from ....db import models, api + +log = logging.getLogger(__name__) + + +class CrossAccountShareApproval(ShareApproval): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def approve_share( + self, + ) -> bool: + """ + 1) Gets share principals + 2) Creates the shared database if doesn't exist + 3) For each share request item: + a) update its status to share in progress + b) check if share item exists on glue catalog raise error if not and flag share item status to failed + c) grant external account to target account + d) accept Ram invitation if pending + e) create resource link on target account + f) grant permission to resource link for team role on target account + g) grant permission to resource link for team role on source account + h) update share item status to share successful + 4) Update shareddb by removing items not part of the share request + 5) Delete deprecated shareddb + + Returns + ------- + True if share is approved successfully + """ + principals = self.get_share_principals() + + self.create_shared_database( + self.target_environment, self.dataset, self.shared_db_name, principals + ) + + for table in self.shared_tables: + + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + if not share_item: + log.warning( + f'Share Item not found for {self.share.shareUri} ' + f'and Dataset Table {table.GlueTableName} continuing loop...' + ) + continue + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value, + ) + + try: + + self.check_share_item_exists_on_glue_catalog(share_item, table) + + data = self.build_share_data(principals, table) + + self.share_table_with_target_account(**data) + + ( + retry_share_table, + failed_invitations, + ) = Ram.accept_ram_invitation(**data) + + if retry_share_table: + self.share_table_with_target_account(**data) + Ram.accept_ram_invitation(**data) + + self.create_resource_link(**data) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + + except Exception as e: + self.handle_share_failure(table, share_item, e) + + self.clean_shared_database( + self.session, + self.dataset, + self.shared_tables, + self.target_environment, + self.shared_db_name, + ) + + self.delete_deprecated_shared_database() + + return True + + @classmethod + def share_table_with_target_account(cls, **data): + """ + Shares tables using Lake Formation + Sharing feature may take some extra seconds + :param data: + :return: + """ + source_accountid = data['source']['accountid'] + source_region = data['source']['region'] + + target_accountid = data['target']['accountid'] + target_region = data['target']['region'] + + source_session = SessionHelper.remote_session(accountid=source_accountid) + source_lf_client = source_session.client( + 'lakeformation', region_name=source_region + ) + try: + + LakeFormation.revoke_iamallowedgroups_super_permission_from_table( + source_lf_client, + source_accountid, + data['source']['database'], + data['source']['tablename'], + ) + + LakeFormation.grant_permissions_to_table( + source_lf_client, + target_accountid, + data['source']['database'], + data['source']['tablename'], + ['DESCRIBE', 'SELECT'], + ['DESCRIBE', 'SELECT'], + ) + + log.info( + f"Granted access to table {data['source']['tablename']} " + f'to external account {target_accountid} ' + ) + return True + + except ClientError as e: + logging.error( + f'Failed granting access to table {data["source"]["tablename"]} ' + f'from {source_accountid} / {source_region} ' + f'to external account{target_accountid}/{target_region}' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py new file mode 100644 index 000000000..fcb1afa43 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py @@ -0,0 +1,120 @@ +import logging +import uuid + +from ..common.share_revoke import ShareRevoke +from ....aws.handlers.lakeformation import LakeFormation +from ....aws.handlers.ram import Ram +from ....aws.handlers.sts import SessionHelper +from ....db import api, models + +log = logging.getLogger(__name__) + + +class CrossAccountShareRevoke(ShareRevoke): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def revoke_share(self) -> bool: + """ + Revokes a share cross account + 1) revoke resource link access on target account + 2) delete shared database on target account + 3) revoke resource link access on source account + Returns + ------- + True if revoke is successful + """ + + self.revoke_resource_links_access() + + self.delete_shared_database() + + if not api.ShareObject.other_approved_share_object_exists( + self.session, self.target_environment.environmentUri + ): + self.revoke_external_account_access_on_source_account() + + return True + + def revoke_external_account_access_on_source_account(self) -> [dict]: + """ + 1) Revokes access to external account + if dataset is not shared with any other team from the same workspace + 2) Deletes resource_shares on RAM associated to revoked tables + + Returns + ------- + List of revoke entries + """ + log.info( + f'Revoking Access for AWS account: {self.target_environment.AwsAccountId}' + ) + aws_session = SessionHelper.remote_session( + accountid=self.source_environment.AwsAccountId + ) + client = aws_session.client( + 'lakeformation', region_name=self.source_environment.region + ) + revoke_entries = [] + for table in self.shared_tables: + + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': table.GlueDatabaseName, + 'Name': table.GlueTableName, + 'ColumnWildcard': {}, + 'CatalogId': self.source_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], + } + ) + LakeFormation.batch_revoke_permissions( + client, self.source_environment.AwsAccountId, revoke_entries + ) + return revoke_entries + + def delete_ram_resource_shares(self, resource_arn: str) -> [dict]: + """ + Deletes resource share for the resource arn + Parameters + ---------- + resource_arn : glue table arn + + Returns + ------- + list of ram associations + """ + log.info(f'Cleaning RAM resource shares for resource: {resource_arn} ...') + return Ram.delete_resource_shares( + SessionHelper.remote_session( + accountid=self.source_environment.AwsAccountId + ).client('ram', region_name=self.source_environment.region), + resource_arn, + ) diff --git a/backend/dataall/tasks/data_sharing/data_sharing_service.py b/backend/dataall/tasks/data_sharing/data_sharing_service.py new file mode 100644 index 000000000..8fda17e2a --- /dev/null +++ b/backend/dataall/tasks/data_sharing/data_sharing_service.py @@ -0,0 +1,234 @@ +import logging +import os + +from .cross_account.approve_share import ( + CrossAccountShareApproval, +) +from .cross_account.revoke_share import ( + CrossAccountShareRevoke, +) +from .same_account.approve_share import ( + SameAccountShareApproval, +) +from .same_account.revoke_share import SameAccountShareRevoke +from ...aws.handlers.lakeformation import LakeFormation +from ...aws.handlers.ram import Ram +from ...aws.handlers.sts import SessionHelper +from ...db import api, models, Engine +from ...utils import Parameter + +log = logging.getLogger(__name__) + + +class DataSharingService: + def __init__(self): + pass + + @classmethod + def approve_share(cls, engine: Engine, share_uri: str) -> bool: + """ + 1) Retrieves share related model objects + 2) Build shared database name (unique db per team for a dataset) + 3) Grants pivot role ALL permissions on dataset db and its tables + 4) Calls sharing approval service + Parameters + ---------- + engine : db.engine + share_uri : share uri + + Returns + ------- + True if approve succeeds + """ + with engine.scoped_session() as session: + ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Approved.value]) + + shared_db_name = cls.build_shared_db_name(dataset, share) + + LakeFormation.grant_pivot_role_all_database_permissions( + source_environment.AwsAccountId, + source_environment.region, + dataset.GlueDatabaseName, + ) + + if source_environment.AwsAccountId != target_environment.AwsAccountId: + return CrossAccountShareApproval( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ).approve_share() + + return SameAccountShareApproval( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ).approve_share() + + @classmethod + def reject_share(cls, engine: Engine, share_uri: str): + """ + 1) Retrieves share related model objects + 2) Build shared database name (unique db per team for a dataset) + 3) Grants pivot role ALL permissions on dataset db and its tables + 4) Calls sharing revoke service + + Parameters + ---------- + engine : db.engine + share_uri : share uri + + Returns + ------- + True if reject succeeds + """ + + with engine.scoped_session() as session: + ( + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Rejected.value]) + + log.info(f'Revoking permissions for tables : {shared_tables}') + + shared_db_name = cls.build_shared_db_name(dataset, share) + + LakeFormation.grant_pivot_role_all_database_permissions( + source_environment.AwsAccountId, + source_environment.region, + dataset.GlueDatabaseName, + ) + + if source_environment.AwsAccountId != target_environment.AwsAccountId: + return CrossAccountShareRevoke( + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ).revoke_share() + + return SameAccountShareRevoke( + session, + shared_db_name, + env_group, + dataset, + share, + shared_tables, + source_environment, + target_environment, + ).revoke_share() + + @classmethod + def build_shared_db_name( + cls, dataset: models.Dataset, share: models.ShareObject + ) -> str: + """ + Build Glue shared database name. + Unique per share Uri. + Parameters + ---------- + dataset : models.Dataset + share : models.ShareObject + + Returns + ------- + Shared database name + """ + return (dataset.GlueDatabaseName + '_shared_' + share.shareUri)[:254] + + @classmethod + def clean_lfv1_ram_resources(cls, environment: models.Environment): + """ + Deletes LFV1 resource shares for an environment + Parameters + ---------- + environment : models.Environment + + Returns + ------- + None + """ + return Ram.delete_lakeformation_v1_resource_shares( + SessionHelper.remote_session(accountid=environment.AwsAccountId).client( + 'ram', region_name=environment.region + ) + ) + + @classmethod + def refresh_shares(cls, engine: Engine) -> bool: + """ + Refreshes the shares at scheduled frequency + Also cleans up LFV1 ram resource shares if enabled on SSM + Parameters + ---------- + engine : db.engine + + Returns + ------- + true if refresh succeeds + """ + with engine.scoped_session() as session: + environments = session.query(models.Environment).all() + shares = ( + session.query(models.ShareObject) + .filter(models.ShareObject.status.in_(['Approved', 'Rejected'])) + .all() + ) + + # Feature toggle: default value is False + if ( + Parameter().get_parameter( + os.getenv('envname', 'local'), 'shares/cleanlfv1ram' + ) + == 'True' + ): + log.info('LFV1 Cleanup toggle is enabled') + for e in environments: + log.info( + f'Cleaning LFV1 ram resource for environment: {e.AwsAccountId}/{e.region}...' + ) + cls.clean_lfv1_ram_resources(e) + + if not shares: + log.info('No Approved nor Rejected shares found. Nothing to do...') + return True + + for share in shares: + try: + log.info( + f'Refreshing share {share.shareUri} with {share.status} status...' + ) + if share.status == 'Approved': + cls.approve_share(engine, share.shareUri) + elif share.status == 'Rejected': + cls.reject_share(engine, share.shareUri) + except Exception as e: + log.error( + f'Failed refreshing share {share.shareUri} with {share.status}. ' + f'due to: {e}' + ) + return True diff --git a/backend/dataall/tasks/data_sharing/same_account/__init__.py b/backend/dataall/tasks/data_sharing/same_account/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/backend/dataall/tasks/data_sharing/same_account/approve_share.py b/backend/dataall/tasks/data_sharing/same_account/approve_share.py new file mode 100644 index 000000000..ec40d12d1 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/same_account/approve_share.py @@ -0,0 +1,102 @@ +import logging + +from ..common.share_approval import ShareApproval +from ....db import models, api + +log = logging.getLogger(__name__) + + +class SameAccountShareApproval(ShareApproval): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def approve_share(self) -> bool: + """ + Approves a share request for same account sharing + 1) Gets share principals + 2) Creates the shared database if doesn't exist + 3) For each share request item: + a) update its status to share in progress + b) check if share item exists on glue catalog raise error if not and flag share item status to failed + e) create resource link on same account + g) grant permission to resource link for team role on source account + h) update share item status to share successful + 4) Update shareddb by removing items not part of the share request + 5) Delete deprecated shareddb + + Returns + ------- + True if share is successful + """ + + principals = self.get_share_principals() + + self.create_shared_database( + self.target_environment, self.dataset, self.shared_db_name, principals + ) + + for table in self.shared_tables: + + share_item = api.ShareObject.find_share_item_by_table( + self.session, self.share, table + ) + if not share_item: + log.info( + f'Share Item not found for {self.share.shareUri} ' + f'and Dataset Table {table.GlueTableName} continuing loop...' + ) + continue + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value, + ) + + try: + + self.check_share_item_exists_on_glue_catalog(share_item, table) + + data = self.build_share_data(principals, table) + + self.create_resource_link(**data) + + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + + except Exception as e: + self.handle_share_failure(table, share_item, e) + + self.clean_shared_database( + self.session, + self.dataset, + self.shared_tables, + self.target_environment, + self.shared_db_name, + ) + + self.delete_deprecated_shared_database() + + return True diff --git a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py new file mode 100644 index 000000000..b3cfe6a6d --- /dev/null +++ b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py @@ -0,0 +1,46 @@ +import logging + +from ..common.share_revoke import ShareRevoke +from ....db import models + +log = logging.getLogger(__name__) + + +class SameAccountShareRevoke(ShareRevoke): + def __init__( + self, + session, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, + ): + super().__init__( + session, + shared_db_name, + dataset, + share, + shared_tables, + source_environment, + target_environment, + env_group, + ) + + def revoke_share(self) -> bool: + """ + Revokes a share on same account + 1) revoke resource link access + 2) delete shared database on target account + Returns + ------- + True if revoke is successful + """ + + self.revoke_resource_links_access() + + self.delete_shared_database() + + return True diff --git a/backend/dataall/tasks/share_manager.py b/backend/dataall/tasks/share_manager.py index be8d68217..637f86d39 100644 --- a/backend/dataall/tasks/share_manager.py +++ b/backend/dataall/tasks/share_manager.py @@ -1,25 +1,9 @@ import logging import os import sys -import time -import uuid -import json -from typing import Any -from botocore.exceptions import ClientError -from sqlalchemy import and_ - -from .. import db -from ..aws.handlers.glue import Glue -from ..aws.handlers.quicksight import Quicksight -from ..aws.handlers.sts import SessionHelper -from ..aws.handlers.s3 import S3 -from ..aws.handlers.kms import KMS -from ..aws.handlers.iam import IAM +from .data_sharing.data_sharing_service import DataSharingService from ..db import get_engine -from ..db import models, exceptions -from ..searchproxy import connect -from ..utils.alarm_service import AlarmService root = logging.getLogger() root.setLevel(logging.INFO) @@ -28,1434 +12,26 @@ log = logging.getLogger(__name__) -class ShareManager: - def __init__(self): - pass - - @staticmethod - def approve_share(engine, share_uri): - """ - Manages the approval of Glue tables sharing through LakeFormation - :param engine: - :param share_uri: - :return: - """ - with engine.scoped_session() as session: - ( - source_env_group, - target_env_group, - dataset, - share, - shared_tables, - shared_folders, - source_environment, - target_environment, - ) = ShareManager.get_share_data(session, share_uri, ['Approved']) - - principals = [target_env_group.environmentIAMRoleArn] - - if target_environment.dashboardsEnabled: - ShareManager.add_quicksight_group_to_shared_with_principals( - target_environment, principals - ) - - ShareManager.share_tables( - session, - share, - source_environment, - target_environment, - shared_tables, - principals, - ) - - ShareManager.clean_shared_database( - session, dataset, shared_tables, target_environment - ) - - ShareManager.share_folders( - session, - share, - source_env_group, - target_env_group, - target_environment, - shared_folders, - dataset, - ) - - ShareManager.clean_shared_folders( - session, - share, - source_env_group, - target_env_group, - target_environment, - dataset, - shared_folders, - ) - - return True - - @staticmethod - def share_tables( - session, - share: models.ShareObject, - source_environment: models.Environment, - target_environment: models.Environment, - shared_tables: [models.DatasetTable], - principals: [str], - ): - for table in shared_tables: - - share_item = ShareManager.get_share_item(session, share, table) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_In_Progress.value, - ) - - try: - data = { - 'source': { - 'accountid': source_environment.AwsAccountId, - 'region': source_environment.region, - 'database': table.GlueDatabaseName, - 'tablename': table.GlueTableName, - }, - 'target': { - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'principals': principals, - }, - } - - ShareManager.share_table_with_target_account(**data) - - ShareManager.accept_ram_invitation(**data) - - ShareManager.create_resource_link_on_target_account(**data) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Succeeded.value, - ) - - except Exception as e: - logging.error( - f'Failed to share table {table.GlueTableName} ' - f'from source account {source_environment.AwsAccountId}//{source_environment.region} ' - f'with target account {target_environment.AwsAccountId}/{target_environment.region}' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Failed.value, - ) - AlarmService().trigger_table_sharing_failure_alarm( - table, share, target_environment - ) - - @staticmethod - def share_folders( - session, - share: models.ShareObject, - source_env_group: models.EnvironmentGroup, - target_env_group: models.EnvironmentGroup, - target_environment: models.Environment, - shared_folders: [models.DatasetStorageLocation], - dataset: models.Dataset, - ): - for folder in shared_folders: - share_item = ShareManager.get_share_item(session, share, folder) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_In_Progress.value - ) - - source_account_id = folder.AWSAccountId - access_point_name = share_item.S3AccessPointName - bucket_name = folder.S3BucketName - target_account_id = target_environment.AwsAccountId - source_env_admin = source_env_group.environmentIAMRoleArn - dataset_admin = dataset.IAMDatasetAdminRoleArn - target_env_admin = target_env_group.environmentIAMRoleName - s3_prefix = folder.S3Prefix - - try: - ShareManager.manage_bucket_policy( - dataset_admin, - source_account_id, - bucket_name, - source_env_admin, - ) - - ShareManager.grant_target_role_access_policy( - bucket_name, - access_point_name, - target_account_id, - target_env_admin, - dataset, - ) - ShareManager.manage_access_point_and_policy( - dataset_admin, - source_account_id, - target_account_id, - source_env_admin, - target_env_admin, - bucket_name, - s3_prefix, - access_point_name, - ) - - ShareManager.update_dataset_bucket_key_policy( - source_account_id, - target_account_id, - target_env_admin, - dataset - ) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Succeeded.value, - ) - except Exception as e: - logging.error( - f'Failed to share folder {folder.S3Prefix} ' - f'from source account {folder.AWSAccountId}//{folder.region} ' - f'with target account {target_environment.AwsAccountId}//{target_environment.region} ' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Share_Failed.value, - ) - AlarmService().trigger_folder_sharing_failure_alarm( - folder, share, target_environment - ) - - @staticmethod - def add_quicksight_group_to_shared_with_principals(target_environment, principals): - try: - group = Quicksight.describe_group( - client=Quicksight.get_quicksight_client_in_identity_region( - target_environment.AwsAccountId - ), - AwsAccountId=target_environment.AwsAccountId, - ) - if group and group.get('Group', {}).get('Arn'): - principals.append(group['Group']['Arn']) - except ClientError as e: - log.warning(f'Failed to retrieve Quicksight . group due to: {e}') - - @staticmethod - def share_table_with_target_account(**data): - """ - Shares tables using Lake Formation and RAM only when cross account - Sharing feature may take some extra seconds that is why we are retrying here - :param data: - :return: - """ - source_accountid = data['source']['accountid'] - source_region = data['source']['region'] - source_session = SessionHelper.remote_session(accountid=source_accountid) - source_lf_client = source_session.client( - 'lakeformation', region_name=source_region - ) - target_accountid = data['target']['accountid'] - target_region = data['target']['region'] - - try: - - ShareManager.revoke_iamallowedgroups_super_permission_from_table( - source_lf_client, - source_accountid, - data['source']['database'], - data['source']['tablename'], - ) - - time.sleep(5) - - ShareManager.grant_permissions_to_table( - source_lf_client, - target_accountid, - data['source']['database'], - data['source']['tablename'], - ['DESCRIBE', 'SELECT'], - ['DESCRIBE', 'SELECT'], - ) - - # Issue with ram associations taking more than 10 seconds - time.sleep(15) - - log.info( - f"Granted access to table {data['source']['tablename']} " - f'to external account {target_accountid} ' - ) - return True - - except ClientError as e: - logging.error( - f'Failed granting access to table {data["source"]["tablename"]} ' - f'from {source_accountid} / {source_region} ' - f'to external account{target_accountid}/{target_region}' - f'due to: {e}' - ) - raise e - - @staticmethod - def grant_permissions_to_database( - client, - principals, - database_name, - permissions, - permissions_with_grant_options=None, - ): - for principal in principals: - log.info( - f'Grant full permissions to role {principals} on database {database_name}' - ) - try: - - response = client.grant_permissions( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'Database': {'Name': database_name}, - }, - Permissions=permissions, - ) - log.info( - f'Successfully granted principal {principal} permissions {permissions} ' - f'to {database_name}: {response}' - ) - except ClientError as e: - log.error( - f'Could not grant permissions ' - f'principal {principal} ' - f'{permissions} to database {database_name} due to: {e}' - ) - - @staticmethod - def grant_permissions_to_table( - client, - principal, - database_name, - table_name, - permissions, - permissions_with_grant_options=None, - ): - try: - grant_dict = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={'Table': {'DatabaseName': database_name, 'Name': table_name}}, - Permissions=permissions, - ) - if permissions_with_grant_options: - grant_dict[ - 'PermissionsWithGrantOption' - ] = permissions_with_grant_options - - response = client.grant_permissions(**grant_dict) - - log.info( - f'Successfully granted principal {principal} permissions {permissions} ' - f'to {database_name}.{table_name}: {response}' - ) - except ClientError as e: - log.warning( - f'Could not grant principal {principal}' - f'permissions {permissions} to table ' - f'{database_name}.{table_name} due to: {e}' - ) - # raise e - - @staticmethod - def create_resource_link_on_target_account(**data): - """ - When table is shared via Lake Formation from source account - A Glue resource link is created on the target account and the target database - :param data: - :return: - """ - source = data['source'] - target = data['target'] - target_session = SessionHelper.remote_session(accountid=target['accountid']) - lakeformation_client = target_session.client( - 'lakeformation', region_name=target['region'] - ) - target_database = f"{source['database']}shared" - resource_link_input = { - 'Name': source['tablename'], - 'TargetTable': { - 'CatalogId': data['source']['accountid'], - 'DatabaseName': source['database'], - 'Name': source['tablename'], - }, - } - - # Creates the database if it doesnt exist - try: - - Glue._create_table( - **{ - 'accountid': target['accountid'], - 'region': target['region'], - 'database': target_database, - 'tablename': source['tablename'], - 'table_input': resource_link_input, - } - ) - ShareManager.grant_permissions_to_database( - lakeformation_client, target['principals'], target_database, ['ALL'] - ) - - ShareManager.grant_resource_link_permission( - lakeformation_client, source, target, target_database - ) - - ShareManager.grant_resource_link_permission_on_target( - lakeformation_client, source, target - ) - - log.info( - f'Granted resource link SELECT read access on target ' - f"to principals {target['principals']}" - ) - - except ClientError as e: - log.warning( - f'Resource Link {resource_link_input} was not created because: {e}' - ) - raise e - - @staticmethod - def grant_resource_link_permission_on_target(client, source, target): - for principal in target['principals']: - table_grant = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'TableWithColumns': { - 'DatabaseName': source['database'], - 'Name': source['tablename'], - 'ColumnWildcard': {}, - 'CatalogId': source['accountid'], - } - }, - Permissions=['DESCRIBE', 'SELECT'], - PermissionsWithGrantOption=[], - ) - response = client.grant_permissions(**table_grant) - log.info( - f'Successfully granted permission to {principal} on target {source["tablename"]}: {response}' - ) - - @staticmethod - def grant_resource_link_permission( - lakeformation_client, source, target, target_database - ): - for principal in target['principals']: - resourcelink_grant = dict( - Principal={'DataLakePrincipalIdentifier': principal}, - Resource={ - 'Table': { - 'DatabaseName': target_database, - 'Name': source['tablename'], - 'CatalogId': target['accountid'], - } - }, - Permissions=['DESCRIBE', 'DROP', 'ALL'], - PermissionsWithGrantOption=[], - ) - try: - response = lakeformation_client.grant_permissions(**resourcelink_grant) - log.info( - f'Granted resource link DESCRIBE access ' - f'to project {principal} with response: {response}' - ) - except ClientError as e: - logging.error( - f'Failed granting {resourcelink_grant} to project role {principal} ' - f'read access to resource link {source["tablename"]} ' - f'due to: {e}' - ) - - @staticmethod - def get_resource_share_invitations(client, resource_share_arn): - try: - # Accepting one ram invitation - # response = client.get_resource_share_invitations( - # resourceShareArns=[resource_share_arn] - # ) - # Accepting All RAM invitations - response = client.get_resource_share_invitations() - invitation_list = response.get('resourceShareInvitations', []) - return invitation_list - except ClientError as e: - log.error( - f'Failed retrieving RAM resource ' - f'share invitations {resource_share_arn} due to {e}' - ) - raise e - - @staticmethod - def accept_resource_share_invitation(client, resource_share_invitation_arn): - try: - response = client.accept_resource_share_invitation( - resourceShareInvitationArn=resource_share_invitation_arn - ) - log.info(f'Accepted ram invitation {resource_share_invitation_arn}') - return response.get('resourceShareInvitation') - except ClientError as e: - if ( - e.response['Error']['Code'] - == 'ResourceShareInvitationAlreadyAcceptedException' - ): - log.info( - f'Failed to accept RAM invitation ' - f'{resource_share_invitation_arn} already accepted' - ) - else: - log.error( - f'Failed to accept RAM invitation ' - f'{resource_share_invitation_arn} due to {e}' - ) - raise e - - @staticmethod - def accept_ram_invitation(**data): - """ - Accepts RAM invitations on the target account - """ - source = data['source'] - target = data['target'] - target_session = SessionHelper.remote_session(accountid=target['accountid']) - ram = target_session.client('ram', region_name=target['region']) - resource_share_arn = ( - f'arn:aws:glue:{source["region"]}:{source["accountid"]}:' - f'table/{data["source"]["database"]}/{data["source"]["tablename"]}' - ) - ram_invitations = ShareManager.get_resource_share_invitations( - ram, resource_share_arn - ) - for invitation in ram_invitations: - ShareManager.accept_resource_share_invitation( - ram, invitation['resourceShareInvitationArn'] - ) - # Ram invitation acceptance is slow - time.sleep(5) - return True - - @staticmethod - def revoke_shared_folders( - session, - share: models.ShareObject, - source_env_group: models.EnvironmentGroup, - target_env_group: models.EnvironmentGroup, - target_environment: models.Environment, - rejected_folders: [models.DatasetStorageLocation], - dataset: models.Dataset, - ): - for folder in rejected_folders: - rejected_item = ShareManager.get_share_item(session, share, folder) - - ShareManager.update_share_item_status( - session, - rejected_item, - models.ShareObjectStatus.Revoke_In_Progress.value - ) - - source_account_id = folder.AWSAccountId - access_point_name = rejected_item.S3AccessPointName - bucket_name = folder.S3BucketName - target_account_id = target_environment.AwsAccountId - # source_env_admin = source_env_group.environmentIAMRoleArn - # dataset_admin = dataset.IAMDatasetAdminRoleArn - target_env_admin = target_env_group.environmentIAMRoleName - s3_prefix = folder.S3Prefix - - try: - ShareManager.delete_access_point_policy( - source_account_id, - target_account_id, - access_point_name, - target_env_admin, - s3_prefix, - ) - cleanup = ShareManager.delete_access_point(source_account_id, access_point_name) - if cleanup: - ShareManager.delete_target_role_access_policy( - target_account_id, - target_env_admin, - bucket_name, - access_point_name, - dataset, - ) - ShareManager.delete_dataset_bucket_key_policy( - source_account_id, - target_account_id, - target_env_admin, - dataset, - ) - ShareManager.update_share_item_status( - session, - rejected_item, - models.ShareObjectStatus.Revoke_Share_Succeeded.value, - ) - except Exception as e: - log.error( - f'Failed to revoke folder {folder.S3Prefix} ' - f'from source account {folder.AWSAccountId}//{folder.region} ' - f'with target account {target_environment.AwsAccountId}//{target_environment.region} ' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - rejected_item, - models.ShareObjectStatus.Revoke_Share_Failed.value, - ) - AlarmService().trigger_revoke_folder_sharing_failure_alarm( - folder, share, target_environment - ) - - @staticmethod - def revoke_iamallowedgroups_super_permission_from_table( - client, accountid, database, table - ): - """ - When upgrading to LF tables can still have IAMAllowedGroups permissions - Unless this is revoked the table can not be shared using LakeFormation - :param client: - :param accountid: - :param database: - :param table: - :return: - """ - try: - log.info( - f'Revoking IAMAllowedGroups Super ' - f'permission for table {database}|{table}' - ) - ShareManager.batch_revoke_permissions( - client, - accountid, - entries=[ - { - 'Id': str(uuid.uuid4()), - 'Principal': {'DataLakePrincipalIdentifier': 'EVERYONE'}, - 'Resource': { - 'Table': { - 'DatabaseName': database, - 'Name': table, - 'CatalogId': accountid, - } - }, - 'Permissions': ['ALL'], - 'PermissionsWithGrantOption': [], - } - ], - ) - except ClientError as e: - log.warning( - f'Cloud not revoke IAMAllowedGroups Super ' - f'permission on table {database}|{table} due to {e}' - ) - - @staticmethod - def clean_shared_database(session, dataset, shared_tables, target_environment): - shared_glue_tables = Glue.list_glue_database_tables( - accountid=target_environment.AwsAccountId, - database=dataset.GlueDatabaseName + 'shared', - region=target_environment.region, - ) - shared_tables = [t.GlueTableName for t in shared_tables] - log.info( - f'Shared database {dataset.GlueDatabaseName}shared glue tables: {shared_glue_tables}' - ) - log.info(f'Share items of the share object {shared_tables}') - tables_to_delete = [] - aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) - client = aws_session.client('lakeformation', region_name=dataset.region) - for table in shared_glue_tables: - if table['Name'] not in shared_tables: - log.info( - f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' - ) - is_shared = ( - session.query(models.ShareObjectItem) - .join( - models.ShareObject, - models.ShareObjectItem.shareUri == models.ShareObject.shareUri, - ) - .filter( - and_( - models.ShareObjectItem.GlueTableName == table['Name'], - models.ShareObject.datasetUri == dataset.datasetUri, - models.ShareObject.status == 'Approved', - models.ShareObject.environmentUri - == target_environment.environmentUri, - ) - ) - .first() - ) - - if not is_shared: - log.info( - f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' - f'will be removed for account {target_environment.AwsAccountId}' - ) - if Glue.table_exists( - **{ - 'accountid': dataset.AwsAccountId, - 'region': dataset.region, - 'database': dataset.GlueDatabaseName, - 'tablename': table['Name'], - } - ): - ShareManager.batch_revoke_permissions( - client, - target_environment.AwsAccountId, - [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': dataset.GlueDatabaseName, - 'Name': table['Name'], - 'ColumnWildcard': {}, - 'CatalogId': dataset.AwsAccountId, - } - }, - 'Permissions': ['SELECT'], - 'PermissionsWithGrantOption': ['SELECT'], - } - ], - ) - - tables_to_delete.append(table['Name']) - - if tables_to_delete: - log.info( - f'Deleting: {tables_to_delete} from shared database {dataset.GlueDatabaseName}shared' - ) - Glue.batch_delete_tables( - **{ - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': dataset.GlueDatabaseName + 'shared', - 'tables': tables_to_delete, - } - ) - - @staticmethod - def batch_revoke_permissions(client, accountid, entries): - """ - Batch revoke permissions to entries - Retry is set for api throttling - :param client: - :param accountid: - :param entries: - :return: - """ - entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] - failures = [] - try: - for entries_chunk in entries_chunks: - response = client.batch_revoke_permissions( - CatalogId=accountid, Entries=entries_chunk - ) - log.info(f'Batch Revoke {entries_chunk} response: {response}') - failures.extend(response.get('Failures')) - if failures: - raise ClientError( - error_response={ - 'Error': { - 'Code': 'LakeFormation.batch_revoke_permissions', - 'Message': f'Operation ended with failures: {failures}', - } - }, - operation_name='LakeFormation.batch_revoke_permissions', - ) - except ClientError as e: - for failure in failures: - if not ( - failure['Error']['ErrorCode'] == 'InvalidInputException' - and ( - 'Grantee has no permissions' in failure['Error']['ErrorMessage'] - or 'No permissions revoked' in failure['Error']['ErrorMessage'] - ) - ): - log.warning(f'Batch Revoke ended with failures: {failures}') - raise e - - @staticmethod - def reject_share(engine, share_uri): - """ - Revokes access to the environment group that tables were share with - If there is no other approved share object for the same environment - Then revoke access to the AWS account on LakeFormation and delete the resource links - :param engine: - :param share_uri: - :return: - """ - - with engine.scoped_session() as session: - ( - source_env_group, - target_env_group, - dataset, - share, - shared_tables, - shared_folders, - source_environment, - target_environment, - ) = ShareManager.get_share_data(session, share_uri, ['Rejected']) - - log.info(f'Revoking permissions for tables : {shared_tables}') - - ShareManager.revoke_resource_links_access_on_target_account( - session, source_env_group, share, shared_tables, target_environment - ) - - ShareManager.delete_resource_links_on_target_account( - dataset, shared_tables, target_environment - ) - - ShareManager.clean_shared_database( - session, dataset, shared_tables, target_environment - ) - - if not ShareManager.other_approved_share_object_exists( - session, target_environment.environmentUri - ): - ShareManager.revoke_external_account_access_on_source_account( - shared_tables, source_environment, target_environment - ) - - ShareManager.revoke_shared_folders( - session, - share, - source_env_group, - target_env_group, - target_environment, - shared_folders, - dataset, - ) - - return True - - @staticmethod - def revoke_external_account_access_on_source_account( - shared_tables, source_environment, target_environment - ): - log.info(f'Revoking Access for AWS account: {target_environment.AwsAccountId}') - aws_session = SessionHelper.remote_session( - accountid=source_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=source_environment.region - ) - revoke_entries = [] - for table in shared_tables: - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': table.GlueDatabaseName, - 'Name': table.GlueTableName, - 'ColumnWildcard': {}, - 'CatalogId': source_environment.AwsAccountId, - } - }, - 'Permissions': ['SELECT'], - 'PermissionsWithGrantOption': ['SELECT'], - } - ) - ShareManager.batch_revoke_permissions( - client, target_environment.AwsAccountId, revoke_entries - ) - - @staticmethod - def delete_resource_links_on_target_account( - dataset, shared_tables, target_environment - ): - resource_links = [table.GlueTableName for table in shared_tables] - log.info(f'Deleting resource links {resource_links}') - return Glue.batch_delete_tables( - **{ - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': dataset.GlueDatabaseName + 'shared', - 'tables': resource_links, - } - ) - - @staticmethod - def revoke_resource_links_access_on_target_account( - session, env_group, share, shared_tables, target_environment - ): - aws_session = SessionHelper.remote_session( - accountid=target_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=target_environment.region - ) - revoke_entries = [] - for table in shared_tables: - share_item = ShareManager.get_share_item(session, share, table) - - ShareManager.update_share_item_status( - session, share_item, models.ShareObjectStatus.Revoke_In_Progress.value - ) - try: - data = { - 'accountid': target_environment.AwsAccountId, - 'region': target_environment.region, - 'database': table.GlueDatabaseName + 'shared', - 'tablename': table.GlueTableName, - } - log.info(f'Starting revoke for: {data}') - - if Glue.table_exists(**data): - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': env_group.environmentIAMRoleArn - }, - 'Resource': { - 'Table': { - 'DatabaseName': table.GlueDatabaseName + 'shared', - 'Name': table.GlueTableName, - 'CatalogId': target_environment.AwsAccountId, - } - }, - 'Permissions': ['ALL', 'DESCRIBE', 'DROP'], - } - ) - - log.info(f'Revoking permissions for entries : {revoke_entries}') - - ShareManager.batch_revoke_permissions( - client, target_environment.AwsAccountId, revoke_entries - ) - - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Revoke_Share_Succeeded.value, - ) - except Exception as e: - logging.error( - f'Failed to revoke LF permissions to table share {table.GlueTableName} ' - f'on target account {target_environment.AwsAccountId}/{target_environment.region}' - f'due to: {e}' - ) - ShareManager.update_share_item_status( - session, - share_item, - models.ShareObjectStatus.Revoke_Share_Failed.value, - ) - AlarmService().trigger_revoke_sharing_failure_alarm( - table, share, target_environment - ) - - @staticmethod - def get_share_data(session, share_uri, status): - share: models.ShareObject = session.query(models.ShareObject).get(share_uri) - dataset: models.Dataset = session.query(models.Dataset).get(share.datasetUri) - source_environment: models.Environment = ( - db.api.Environment.get_environment_by_uri(session, dataset.environmentUri) - ) - target_environment: models.Environment = ( - db.api.Environment.get_environment_by_uri(session, share.environmentUri) - ) - shared_tables = db.api.DatasetTable.get_dataset_tables_shared_with_env( - session, - dataset_uri=dataset.datasetUri, - environment_uri=target_environment.environmentUri, - status=status, - ) - shared_folders = db.api.DatasetStorageLocation.get_dataset_locations_shared_with_env( - session, - dataset_uri=dataset.datasetUri, - share_uri=share_uri, - status=status, - ) - source_env_group = db.api.Environment.get_environment_group( - session, - dataset.SamlAdminGroupName, - dataset.environmentUri - ) - target_env_group = db.api.Environment.get_environment_group( - session, - share.principalId, - share.environmentUri - ) - if not target_env_group: - raise Exception( - f'Share object Team {share.principalId} is not a member of the ' - f'environment {target_environment.name}/{target_environment.AwsAccountId}' - ) - - return ( - source_env_group, - target_env_group, - dataset, - share, - shared_tables, - shared_folders, - source_environment, - target_environment, - ) - - @staticmethod - def other_approved_share_object_exists(session, environment_uri): - return ( - session.query(models.ShareObject) - .filter( - and_( - models.Environment.environmentUri == environment_uri, - models.ShareObject.status - == models.Enums.ShareObjectStatus.Approved.value, - ) - ) - .all() - ) - - @staticmethod - def get_share_item( - session, - share: models.ShareObject, - share_category: Any, - ) -> models.ShareObjectItem: - if isinstance(share_category, models.DatasetTable): - category_uri = share_category.tableUri - elif isinstance(share_category, models.DatasetStorageLocation): - category_uri = share_category.locationUri - else: - raise exceptions.InvalidInput( - 'share_category', - share_category, - 'DatasetTable or DatasetStorageLocation' - ) - share_item: models.ShareObjectItem = ( - session.query(models.ShareObjectItem) - .filter( - and_( - models.ShareObjectItem.itemUri == category_uri, - models.ShareObjectItem.shareUri == share.shareUri, - ) - ) - .first() - ) - - if not share_item: - raise exceptions.ObjectNotFound('ShareObjectItem', category_uri) - - return share_item - - @staticmethod - def update_share_item_status( - session, - share_item: models.ShareObjectItem, - status: str, - ) -> models.ShareObjectItem: - - log.info(f'Updating share item status to {status}') - share_item.status = status - session.commit() - return share_item - - @staticmethod - def manage_bucket_policy( - dataset_admin: str, - source_account_id: str, - bucket_name: str, - source_env_admin: str, - ): - ''' - This function will manage bucket policy by grant admin access to dataset admin, pivot role - and environment admin. All of the policies will only be added once. - ''' - bucket_policy = json.loads(S3.get_bucket_policy(source_account_id, bucket_name)) - for statement in bucket_policy["Statement"]: - if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: - return - exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( - source_account_id, - [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] - )] - allow_owner_access = { - "Sid": "AllowAllToAdmin", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*" - ], - "Condition": { - "StringLike": { - "aws:userId": exceptions_roleId - } - } - } - delegated_to_accesspoint = { - "Sid": "DelegateAccessToAccessPoint", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*" - ], - "Condition": { - "StringEquals": { - "s3:DataAccessPointAccount": f"{source_account_id}" - } - } - } - bucket_policy["Statement"].append(allow_owner_access) - bucket_policy["Statement"].append(delegated_to_accesspoint) - S3.create_bucket_policy(source_account_id, bucket_name, json.dumps(bucket_policy)) - - @staticmethod - def grant_target_role_access_policy( - bucket_name: str, - access_point_name: str, - target_account_id: str, - target_env_admin: str, - dataset: models.Dataset, - ): - existing_policy = IAM.get_role_policy( - target_account_id, - target_env_admin, - "targetDatasetAccessControlPolicy", - ) - if existing_policy: # type dict - if bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): - target_resources = [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" - ] - policy = existing_policy["Statement"][0]["Resource"].extend(target_resources) - else: - return - else: - policy = { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "s3:*" - ], - "Resource": [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" - ] - } - ] - } - IAM.update_role_policy( - target_account_id, - target_env_admin, - "targetDatasetAccessControlPolicy", - json.dumps(policy), - ) - - @staticmethod - def manage_access_point_and_policy( - dataset_admin: str, - source_account_id: str, - target_account_id: str, - source_env_admin: str, - target_env_admin: str, - bucket_name: str, - s3_prefix: str, - access_point_name: str, - ): - access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) - if not access_point_arn: - access_point_arn = S3.create_bucket_access_point(source_account_id, bucket_name, access_point_name) - existing_policy = S3.get_access_point_policy(source_account_id, access_point_name) - # requester will use this role to access resources - target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - if existing_policy: - # Update existing access point policy - existing_policy = json.loads(existing_policy) - statements = {item["Sid"]: item for item in existing_policy["Statement"]} - if f"{target_env_admin_id}0" in statements.keys(): - prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] - if isinstance(prefix_list, str): - prefix_list = [prefix_list] - if f"{s3_prefix}/*" not in prefix_list: - prefix_list.append(f"{s3_prefix}/*") - statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] = prefix_list - resource_list = statements[f"{target_env_admin_id}1"]["Resource"] - if isinstance(resource_list, str): - resource_list = [resource_list] - if f"{access_point_arn}/object/{s3_prefix}/*" not in resource_list: - resource_list.append(f"{access_point_arn}/object/{s3_prefix}/*") - statements[f"{target_env_admin_id}1"]["Resource"] = resource_list - existing_policy["Statement"] = list(statements.values()) - else: - additional_policy = S3.generate_access_point_policy_template( - target_env_admin_id, - access_point_arn, - s3_prefix, - ) - existing_policy["Statement"].extend(additional_policy["Statement"]) - access_point_policy = existing_policy - else: - # First time to create access point policy - access_point_policy = S3.generate_access_point_policy_template( - target_env_admin_id, - access_point_arn, - s3_prefix, - ) - exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( - source_account_id, - [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] - )] - admin_statement = { - "Sid": "AllowAllToAdmin", - "Effect": "Allow", - "Principal": "*", - "Action": "s3:*", - "Resource": f"{access_point_arn}", - "Condition": { - "StringLike": { - "aws:userId": exceptions_roleId - } - } - } - access_point_policy["Statement"].append(admin_statement) - S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) - - @staticmethod - def update_dataset_bucket_key_policy( - source_account_id: str, - target_account_id: str, - target_env_admin: str, - dataset: models.Dataset, - ): - key_alias = f"alias/{dataset.KmsAlias}" - kms_keyId = KMS.get_key_id(source_account_id, key_alias) - existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") - target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - if existing_policy and f'{target_env_admin_id}:*' not in existing_policy: - policy = json.loads(existing_policy) - policy["Statement"].append( - { - "Sid": f"{target_env_admin_id}", - "Effect": "Allow", - "Principal": { - "AWS": "*" - }, - "Action": "kms:Decrypt", - "Resource": "*", - "Condition": { - "StringLike": { - "aws:userId": f"{target_env_admin_id}:*" - } - } - } - ) - KMS.put_key_policy( - source_account_id, - kms_keyId, - "default", - json.dumps(policy) - ) - - @staticmethod - def delete_access_point_policy( - source_account_id: str, - target_account_id: str, - access_point_name: str, - target_env_admin: str, - s3_prefix: str, - ): - access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) - access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) - target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - statements = {item["Sid"]: item for item in access_point_policy["Statement"]} - if f"{target_env_admin_id}0" in statements.keys(): - prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] - if isinstance(prefix_list, list) and f"{s3_prefix}/*" in prefix_list: - prefix_list.remove(f"{s3_prefix}/*") - statements[f"{target_env_admin_id}1"]["Resource"].remove(f"{access_point_arn}/object/{s3_prefix}/*") - access_point_policy["Statement"] = list(statements.values()) - else: - access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}0"]) - access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}1"]) - S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) - - @staticmethod - def delete_access_point(source_account_id: str, access_point_name: str): - access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) - if len(access_point_policy["Statement"]) <= 1: - # At least we have the 'AllowAllToAdmin' statement - S3.delete_bucket_access_point(source_account_id, access_point_name) - return True - else: - return False - - @staticmethod - def delete_target_role_access_policy( - target_account_id: str, - target_env_admin: str, - bucket_name: str, - access_point_name: str, - dataset: models.Dataset, - ): - existing_policy = IAM.get_role_policy( - target_account_id, - target_env_admin, - "targetDatasetAccessControlPolicy", - ) - if existing_policy: - if bucket_name in ",".join(existing_policy["Statement"][0]["Resource"]): - target_resources = [ - f"arn:aws:s3:::{bucket_name}", - f"arn:aws:s3:::{bucket_name}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" - ] - for item in target_resources: - existing_policy["Statement"][0]["Resource"].remove(item) - if not existing_policy["Statement"][0]["Resource"]: - IAM.delete_role_policy(target_account_id, target_env_admin, "targetDatasetAccessControlPolicy") - else: - IAM.update_role_policy( - target_account_id, - target_env_admin, - "targetDatasetAccessControlPolicy", - json.dumps(existing_policy), - ) - - @staticmethod - def delete_dataset_bucket_key_policy( - source_account_id: str, - target_account_id: str, - target_env_admin: str, - dataset: models.Dataset, - ): - key_alias = f"alias/{dataset.KmsAlias}" - kms_keyId = KMS.get_key_id(source_account_id, key_alias) - existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") - target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - if existing_policy and f'{target_env_admin_id}:*' in existing_policy: - policy = json.loads(existing_policy) - policy["Statement"] = [item for item in policy["Statement"] if item["Sid"] != f"{target_env_admin_id}"] - KMS.put_key_policy( - source_account_id, - kms_keyId, - "default", - json.dumps(policy) - ) - - @staticmethod - def clean_shared_folders( - session, - share: models.ShareObject, - source_env_group: models.EnvironmentGroup, - target_env_group: models.EnvironmentGroup, - target_environment: models.Environment, - dataset: models.Dataset, - shared_folders: [models.DatasetStorageLocation], - ): - source_account_id = dataset.AwsAccountId - access_point_name = f"{dataset.datasetUri}-{share.principalId}".lower() - target_account_id = target_environment.AwsAccountId - target_env_admin = target_env_group.environmentIAMRoleName - access_point_policy = S3.get_access_point_policy(source_account_id, access_point_name) - if access_point_policy: - policy = json.loads(access_point_policy) - target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) - statements = {item["Sid"]: item for item in policy["Statement"]} - if f"{target_env_admin_id}0" in statements.keys(): - prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] - if isinstance(prefix_list, str): - prefix_list = [prefix_list] - prefix_list = [prefix[:-2] for prefix in prefix_list] - shared_prefix = [folder.S3Prefix for folder in shared_folders] - removed_prefixes = [prefix for prefix in prefix_list if prefix not in shared_prefix] - for prefix in removed_prefixes: - bucket_name = dataset.S3BucketName - try: - ShareManager.delete_access_point_policy( - source_account_id, - target_account_id, - access_point_name, - target_env_admin, - prefix, - ) - cleanup = ShareManager.delete_access_point(source_account_id, access_point_name) - if cleanup: - ShareManager.delete_target_role_access_policy( - target_account_id, - target_env_admin, - bucket_name, - access_point_name, - dataset, - ) - ShareManager.delete_dataset_bucket_key_policy( - source_account_id, - target_account_id, - target_env_admin, - dataset, - ) - except Exception as e: - log.error( - f'Failed to revoke folder {prefix} ' - f'from source account {dataset.AwsAccountId}//{dataset.region} ' - f'with target account {target_account_id}//{target_environment.region} ' - f'due to: {e}' - ) - location = db.api.DatasetStorageLocation.get_location_by_s3_prefix( - session, - prefix, - dataset.AwsAccountId, - dataset.region, - ) - AlarmService().trigger_revoke_folder_sharing_failure_alarm( - location, share, target_environment - ) - - if __name__ == '__main__': - ENVNAME = os.environ.get('envname', 'local') - ENGINE = get_engine(envname=ENVNAME) - ES = connect(envname=ENVNAME) + try: + ENVNAME = os.environ.get('envname', 'local') + ENGINE = get_engine(envname=ENVNAME) + + share_uri = os.getenv('shareUri') + share_item_uri = os.getenv('shareItemUri') + handler = os.getenv('handler') - share_uri = os.getenv('shareUri') - share_item_uri = os.getenv('shareItemUri') - handler = os.getenv('handler') + if handler == 'approve_share': + log.info(f'Starting approval task for share : {share_uri}...') + DataSharingService.approve_share(engine=ENGINE, share_uri=share_uri) - if handler == 'approve_share': - log.info(f'Starting approval task for share : {share_uri}...') - ShareManager.approve_share(engine=ENGINE, share_uri=share_uri) + elif handler == 'reject_share': + log.info(f'Starting revoke task for share : {share_uri}...') + DataSharingService.reject_share(engine=ENGINE, share_uri=share_uri) - elif handler == 'reject_share': - log.info(f'Starting revoke task for share : {share_uri}...') - ShareManager.reject_share(engine=ENGINE, share_uri=share_uri) + log.info('Sharing task finished successfully') - log.info('Sharing task finished successfully') + except Exception as e: + log.error(f'Sharing task failed due to: {e}') + raise e diff --git a/backend/dataall/tasks/shares_refresh.py b/backend/dataall/tasks/shares_refresh.py new file mode 100644 index 000000000..d1957bc74 --- /dev/null +++ b/backend/dataall/tasks/shares_refresh.py @@ -0,0 +1,28 @@ +import logging +import os +import sys + +from .data_sharing.data_sharing_service import DataSharingService +from ..db import get_engine + +root = logging.getLogger() +root.setLevel(logging.INFO) +if not root.hasHandlers(): + root.addHandler(logging.StreamHandler(sys.stdout)) +log = logging.getLogger(__name__) + + +if __name__ == '__main__': + + try: + ENVNAME = os.environ.get('envname', 'local') + ENGINE = get_engine(envname=ENVNAME) + + log.info('Starting refresh shares task...') + DataSharingService.refresh_shares(engine=ENGINE) + + log.info('Sharing task finished successfully') + + except Exception as e: + log.error(f'Sharing task failed due to: {e}') + raise e diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 458077553..0cf6f1950 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -220,6 +220,41 @@ def __init__( ) self.ecs_security_groups.extend(subscriptions_task.task.security_groups) + shares_refresh_task = self.set_scheduled_task( + cluster=cluster, + command=[ + 'python3.8', + '-m', + 'datahub.tasks.shares_refresh', + ], + container_id='container', + ecr_repository=ecr_repository, + environment={ + 'AWS_REGION': self.region, + 'envname': envname, + 'LOGLEVEL': 'INFO', + }, + image_tag=cdkproxy_image_tag, + log_group=self.create_log_group( + envname, resource_prefix, log_group_name='shares-refresh' + ), + schedule_expression=Schedule.expression('cron(0 2 * * ? *)'), + scheduled_task_id=f'{resource_prefix}-{envname}-shares-refresh-schedule', + task_id=f'{resource_prefix}-{envname}-shares-refresh', + task_role=task_role, + vpc=vpc, + security_group=scheduled_tasks_sg, + prod_sizing=prod_sizing, + ) + self.ecs_security_groups.extend(shares_refresh_task.task.security_groups) + + ssm.StringParameter( + self, + f'RamCleanUpToggle{envname}', + parameter_name=f'/datahubsa/{envname}/shares/cleanlfv1ram', + string_value='False', + ) + share_management_task_definition = ecs.FargateTaskDefinition( self, f'{resource_prefix}-{envname}-share-manager', diff --git a/tests/tasks/test_share_manager.py b/tests/tasks/test_share_manager.py new file mode 100644 index 000000000..ab6bfbaee --- /dev/null +++ b/tests/tasks/test_share_manager.py @@ -0,0 +1,310 @@ +import logging + +import pytest + +import dataall + +logger = logging.getLogger() +logger.setLevel(logging.INFO) +logging.getLogger('boto3').setLevel(logging.CRITICAL) +logging.getLogger('botocore').setLevel(logging.CRITICAL) + +REGION = 'eu-central-1' + +ENV_ACCOUNT = '' +ENV_ROLE_NAME = 'dataall-World-Happiness-Report-i6v1v1c2' +ENV_ROLE_ARN = f'arn:aws:iam::{ENV_ACCOUNT}:role/{ENV_ROLE_NAME}' + + +CROSS_ACCOUNT_ENV = '' +CROSS_ACCOUNT_ENV_ROLE_NAME = 'dataall-ConsumersEnvironment-r71ucp4m' +CROSS_ACCOUNT_ENV_ROLE_ARN = ( + f'arn:aws:iam::{CROSS_ACCOUNT_ENV}:role/{CROSS_ACCOUNT_ENV_ROLE_NAME}' +) + +DATASET_GLUE_DB = 'dataall_world_happiness_report_i6v1v1c2' +DATASET_S3_BUCKET = 'dataall-world-happiness-report-i6v1v1c2' + +TABLE_NAME = 'dataall_world_happiness_report_i6v1v1c2' +TABLE_S3_PREFIX = f's3://{DATASET_S3_BUCKET}/' + + +@pytest.fixture(scope='module') +def org(db): + with db.scoped_session() as session: + org = dataall.db.models.Organization( + label='org', + owner='alice', + tags=[], + description='desc', + SamlGroupName='admins', + ) + session.add(org) + yield org + + +@pytest.fixture(scope='module') +def env(org, db): + with db.scoped_session() as session: + env = dataall.db.models.Environment( + organizationUri=org.organizationUri, + AwsAccountId=ENV_ACCOUNT, + region='eu-central-1', + label='org', + owner='alice', + tags=[], + description='desc', + SamlGroupName='admins', + EnvironmentDefaultIAMRoleName=ENV_ROLE_NAME, + EnvironmentDefaultIAMRoleArn=ENV_ROLE_ARN, + CDKRoleArn=f'arn:aws::{ENV_ACCOUNT}:role/EnvRole', + environmentUri='mytest', + ) + session.add(env) + session.commit() + env_group = dataall.db.models.EnvironmentGroup( + environmentUri=env.environmentUri, + groupUri='bobTeam', + environmentIAMRoleArn=env.EnvironmentDefaultIAMRoleArn, + environmentIAMRoleName=env.EnvironmentDefaultIAMRoleName, + environmentAthenaWorkGroup='workgroup', + ) + session.add(env_group) + yield env + + +@pytest.fixture(scope='module') +def cross_account_env(org, db): + with db.scoped_session() as session: + env = dataall.db.models.Environment( + organizationUri=org.organizationUri, + AwsAccountId=CROSS_ACCOUNT_ENV, + region='eu-central-1', + label='org', + owner='bob', + tags=[], + description='desc', + SamlGroupName='bobTeam', + EnvironmentDefaultIAMRoleName=CROSS_ACCOUNT_ENV_ROLE_NAME, + EnvironmentDefaultIAMRoleArn=CROSS_ACCOUNT_ENV_ROLE_ARN, + CDKRoleArn=f'arn:aws::{CROSS_ACCOUNT_ENV}:role/EnvRole', + ) + session.add(env) + session.commit() + env_group = dataall.db.models.EnvironmentGroup( + environmentUri=env.environmentUri, + groupUri=env.SamlGroupName, + environmentIAMRoleArn=env.EnvironmentDefaultIAMRoleArn, + environmentIAMRoleName=env.EnvironmentDefaultIAMRoleName, + environmentAthenaWorkGroup='workgroup', + ) + session.add(env_group) + yield env + + +@pytest.fixture(scope='module') +def dataset(org, env, db): + with db.scoped_session() as session: + dataset = dataall.db.models.Dataset( + organizationUri=org.organizationUri, + environmentUri=env.environmentUri, + label=DATASET_S3_BUCKET, + owner='alice', + SamlAdminGroupName='admins', + businessOwnerDelegationEmails=['foo@amazon.com'], + name=DATASET_S3_BUCKET, + S3BucketName=DATASET_S3_BUCKET, + GlueDatabaseName=DATASET_GLUE_DB, + KmsAlias='kmsalias', + AwsAccountId=env.AwsAccountId, + region=env.region, + IAMDatasetAdminUserArn=f'arn:aws:iam::{ENV_ACCOUNT}:user/dataset', + IAMDatasetAdminRoleArn=f'arn:aws:iam::{ENV_ACCOUNT}:role/dataset', + ) + session.add(dataset) + yield dataset + + +@pytest.fixture(scope='module') +def table(org, env, db, dataset): + with db.scoped_session() as session: + table = dataall.db.models.DatasetTable( + label=TABLE_NAME, + name=TABLE_NAME, + owner='alice', + description='test table', + tags=['a', 'b'], + datasetUri=dataset.datasetUri, + S3Prefix=TABLE_S3_PREFIX, + GlueDatabaseName=dataset.GlueDatabaseName, + GlueTableName=TABLE_NAME, + S3BucketName=dataset.S3BucketName, + AWSAccountId=dataset.AwsAccountId, + region=dataset.region, + ) + session.add(table) + yield table + + +@pytest.fixture(scope='module') +def table2(org, env, db, dataset): + with db.scoped_session() as session: + table = dataall.db.models.DatasetTable( + label='deleted_glue_table', + name='deleted_glue_table', + owner='alice', + description='test table', + tags=['a', 'b'], + datasetUri=dataset.datasetUri, + S3Prefix='s3://dataall-world-happiness-report-i6v1v1c2/', + GlueDatabaseName=dataset.GlueDatabaseName, + GlueTableName='deleted_glue_table', + S3BucketName=dataset.S3BucketName, + AWSAccountId=dataset.AwsAccountId, + region=dataset.region, + ) + session.add(table) + yield table + + +@pytest.fixture(scope='module') +def cross_account_share( + dataset: dataall.db.models.Dataset, + db: dataall.db.Engine, + cross_account_env: dataall.db.models.Environment, + table: dataall.db.models.DatasetTable, + table2: dataall.db.models.DatasetTable, +): + with db.scoped_session() as session: + share = dataall.db.models.ShareObject( + shareUri='cross', + datasetUri=dataset.datasetUri, + environmentUri=cross_account_env.environmentUri, + owner='bob', + principalId=cross_account_env.SamlGroupName, + principalType=dataall.api.constants.PrincipalType.Environment.value, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share) + session.commit() + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table.GlueTableName, + GlueDatabaseName=table.GlueDatabaseName, + GlueTableName=table.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table2.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table2.GlueTableName, + GlueDatabaseName=table2.GlueDatabaseName, + GlueTableName=table2.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + session.commit() + yield share + + +@pytest.fixture(scope='module') +def same_account_share( + dataset: dataall.db.models.Dataset, + db: dataall.db.Engine, + env: dataall.db.models.Environment, + table: dataall.db.models.DatasetTable, +): + with db.scoped_session() as session: + share = dataall.db.models.ShareObject( + shareUri='same', + datasetUri=dataset.datasetUri, + environmentUri=env.environmentUri, + owner='bob', + principalId='bobTeam', + principalType=dataall.api.constants.PrincipalType.Group.value, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share) + session.commit() + share_item = dataall.db.models.ShareObjectItem( + shareUri=share.shareUri, + owner='alice', + itemUri=table.tableUri, + itemType=dataall.api.constants.ShareableType.Table.value, + itemName=table.GlueTableName, + GlueDatabaseName=table.GlueDatabaseName, + GlueTableName=table.GlueTableName, + status=dataall.api.constants.ShareObjectStatus.Approved.value, + ) + session.add(share_item) + yield share + + +def __update_to_rejected_status(db, share): + with db.scoped_session() as session: + share.status = dataall.api.constants.ShareObjectStatus.Rejected.value + session.merge(share) + + +def test_cross_account_sharing(db, cross_account_share, dataset, mocker): + """mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', + return_value=True, + ) + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', + return_value=True, + )""" + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( + db, cross_account_share.shareUri + ) + + __update_to_rejected_status(db, cross_account_share) + + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share( + db, cross_account_share.shareUri + ) + + +def test_same_account_sharing(db, same_account_share, dataset, mocker): + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', + return_value=True, + ) + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', + return_value=True, + ) + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( + db, same_account_share.shareUri + ) + + __update_to_rejected_status(db, same_account_share) + + dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share( + db, same_account_share.shareUri + ) + + +def test_refresh_shares(db, same_account_share, cross_account_share, dataset, mocker): + mocker.patch( + 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares', + return_value=True, + ) + mocker.patch('dataall.utils.Parameter.get_parameter', return_value='True') + assert dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares( + db + ) + + __update_to_rejected_status(db, same_account_share) + __update_to_rejected_status(db, cross_account_share) + + assert dataall.tasks.data_sharing.data_sharing_service.DataSharingService.refresh_shares( + db + ) From 051f7b12c43608a27b0843e7d8dd0d19752024fc Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 05:47:28 +0200 Subject: [PATCH 22/28] Testing feedbacks --- backend/dataall/db/api/share_object.py | 27 +-- .../data_sharing/common/share_approval.py | 108 +++------- .../tasks/data_sharing/common/share_revoke.py | 198 ++++++++++++------ .../cross_account/approve_share.py | 14 +- .../cross_account/revoke_share.py | 11 +- .../data_sharing/data_sharing_service.py | 8 +- .../same_account/approve_share.py | 8 +- .../data_sharing/same_account/revoke_share.py | 5 +- 8 files changed, 199 insertions(+), 180 deletions(-) diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index 7b100cf9c..97912b649 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -682,34 +682,35 @@ def get_share_data(session, share_uri, status): shared_tables = ( session.query(models.DatasetTable) - .join( + .join( models.ShareObjectItem, models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, ) - .join( + .join( models.ShareObject, models.ShareObject.shareUri == models.ShareObjectItem.shareUri, ) - .filter( + .filter( and_( models.ShareObject.datasetUri == dataset.datasetUri, models.ShareObject.environmentUri == target_environment.environmentUri, models.ShareObject.status.in_(status), + models.ShareObject.shareUri == share_uri, ) ) - .all() + .all() ) env_group: models.EnvironmentGroup = ( session.query(models.EnvironmentGroup) - .filter( + .filter( and_( models.EnvironmentGroup.environmentUri == share.environmentUri, models.EnvironmentGroup.groupUri == share.principalId, ) ) - .first() + .first() ) if not env_group: raise Exception( @@ -726,28 +727,29 @@ def get_share_data(session, share_uri, status): ) @staticmethod - def other_approved_share_object_exists(session, environment_uri): + def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(models.ShareObject) - .filter( + .filter( and_( models.Environment.environmentUri == environment_uri, models.ShareObject.status == models.Enums.ShareObjectStatus.Approved.value, + models.ShareObject.datasetUri == dataset_uri, ) ) - .all() + .all() ) @staticmethod def is_shared_table(session, environment_uri, dataset_uri, table_name): return ( session.query(models.ShareObjectItem) - .join( + .join( models.ShareObject, models.ShareObjectItem.shareUri == models.ShareObject.shareUri, ) - .filter( + .filter( and_( models.ShareObjectItem.GlueTableName == table_name, models.ShareObject.datasetUri == dataset_uri, @@ -755,6 +757,5 @@ def is_shared_table(session, environment_uri, dataset_uri, table_name): models.ShareObject.environmentUri == environment_uri, ) ) - .first() + .first() ) - diff --git a/backend/dataall/tasks/data_sharing/common/share_approval.py b/backend/dataall/tasks/data_sharing/common/share_approval.py index ffa53ca73..2815d929e 100644 --- a/backend/dataall/tasks/data_sharing/common/share_approval.py +++ b/backend/dataall/tasks/data_sharing/common/share_approval.py @@ -1,6 +1,5 @@ import abc import logging -import uuid from botocore.exceptions import ClientError @@ -191,27 +190,11 @@ def create_resource_link(cls, **data) -> dict: ) raise e - @classmethod - def clean_shared_database( - cls, - session, - dataset: models.Dataset, - shared_tables: [models.DatasetTable], - target_environment: models.Environment, - shared_db_name: str, - ) -> [str]: + def clean_shared_database(self) -> [str]: """ After share approval verify that the shared database do not have any removed items from the share request. - Parameters - ---------- - session : db - dataset : models.Dataset - shared_tables : [models.DatasetTable] - target_environment : models.Environment - shared_db_name : shared database name - Returns ------- List of deleted tables from the shared database @@ -219,76 +202,44 @@ def clean_shared_database( tables_to_delete = [] shared_glue_tables = Glue.list_glue_database_tables( - accountid=target_environment.AwsAccountId, - database=shared_db_name, - region=target_environment.region, + accountid=self.target_environment.AwsAccountId, + database=self.shared_db_name, + region=self.target_environment.region, ) logger.info( - f'Shared database {shared_db_name} glue tables: {shared_glue_tables}' + f'Shared database {self.shared_db_name} glue tables: {shared_glue_tables}' ) - shared_tables = [t.GlueTableName for t in shared_tables] - logger.info(f'Share items of the share object {shared_tables}') - - aws_session = SessionHelper.remote_session(accountid=dataset.AwsAccountId) - client = aws_session.client('lakeformation', region_name=dataset.region) + shared_tables = [t.GlueTableName for t in self.shared_tables] + logger.info(f'Share items of the share object {self.shared_tables}') for table in shared_glue_tables: if table['Name'] not in shared_tables: logger.info( - f'Found a table not part of the share: {dataset.GlueDatabaseName}//{table["Name"]}' + f'Found a table not part of the share: {self.dataset.GlueDatabaseName}//{table["Name"]}' ) - is_shared = api.ShareObject.is_shared_table( - session, - target_environment.environmentUri, - dataset.datasetUri, - table['Name'], - ) - if not is_shared: - logger.info( - f'Access to table {dataset.AwsAccountId}//{dataset.GlueDatabaseName}//{table["Name"]} ' - f'will be removed for account {target_environment.AwsAccountId}' + try: + LakeFormation.revoke_source_table_access( + target_accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + source_database=self.dataset.GlueDatabaseName, + source_table=table['Name'], + target_principal=self.env_group.environmentIAMRoleArn, + source_accountid=self.source_environment.AwsAccountId, + ) + except ClientError as e: + # error not raised due to multiple failure reasons + # cleanup failure does not impact share request items access + logger.error( + f'Revoking permission on source table failed due to: {e}' ) - if Glue.table_exists( - **{ - 'accountid': dataset.AwsAccountId, - 'region': dataset.region, - 'database': dataset.GlueDatabaseName, - 'tablename': table['Name'], - } - ): - LakeFormation.batch_revoke_permissions( - client, - target_environment.AwsAccountId, - [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': dataset.GlueDatabaseName, - 'Name': table['Name'], - 'ColumnWildcard': {}, - 'CatalogId': dataset.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - 'PermissionsWithGrantOption': [ - 'DESCRIBE', - 'SELECT', - ], - } - ], - ) tables_to_delete.append(table['Name']) Glue.batch_delete_tables( - accountid=target_environment.AwsAccountId, - region=target_environment.region, - database=shared_db_name, + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, tables=tables_to_delete, ) @@ -299,7 +250,7 @@ def handle_share_failure( table: models.DatasetTable, share_item: models.ShareObjectItem, error: Exception, - ) -> None: + ) -> bool: """ Handles share failure by raising an alarm to alarmsTopic Parameters @@ -310,7 +261,7 @@ def handle_share_failure( Returns ------- - None + True if alarm published successfully """ logging.error( f'Failed to share table {table.GlueTableName} ' @@ -326,6 +277,7 @@ def handle_share_failure( AlarmService().trigger_table_sharing_failure_alarm( table, self.share, self.target_environment ) + return True def build_share_data(self, principals: [str], table: models.DatasetTable) -> dict: """ @@ -363,7 +315,7 @@ def delete_deprecated_shared_database(self) -> bool: True if delete is successful """ return Glue.delete_database( - accountid=self.dataset.AwsAccountId, - region=self.dataset.region, + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, database=f'{self.dataset.GlueDatabaseName}shared', ) diff --git a/backend/dataall/tasks/data_sharing/common/share_revoke.py b/backend/dataall/tasks/data_sharing/common/share_revoke.py index a92896366..180b9f4a4 100644 --- a/backend/dataall/tasks/data_sharing/common/share_revoke.py +++ b/backend/dataall/tasks/data_sharing/common/share_revoke.py @@ -15,13 +15,13 @@ class ShareRevoke: def __init__( self, session, - shared_db_name, - env_group, - dataset, - share, - shared_tables, - source_environment, - target_environment, + shared_db_name: str, + dataset: models.Dataset, + share: models.ShareObject, + shared_tables: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + env_group: models.EnvironmentGroup, ): self.session = session self.env_group = env_group @@ -36,20 +36,13 @@ def __init__( def revoke_share(self): return NotImplementedError - def revoke_resource_links_access(self) -> [dict]: + def revoke_shared_tables_access(self) -> bool: """ Loops through share request items and revokes access on LF Returns ------- - List of revoke entries + True if revoke is successful """ - aws_session = SessionHelper.remote_session( - accountid=self.target_environment.AwsAccountId - ) - client = aws_session.client( - 'lakeformation', region_name=self.target_environment.region - ) - revoke_entries = [] for table in self.shared_tables: share_item = api.ShareObject.find_share_item_by_table( @@ -63,38 +56,12 @@ def revoke_resource_links_access(self) -> [dict]: ) try: - data = { - 'accountid': self.target_environment.AwsAccountId, - 'region': self.target_environment.region, - 'database': self.shared_db_name, - 'tablename': table.GlueTableName, - } - log.info(f'Starting revoke for: {data}') - - if Glue.table_exists(**data): - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn - }, - 'Resource': { - 'Table': { - 'DatabaseName': self.shared_db_name, - 'Name': table.GlueTableName, - 'CatalogId': self.target_environment.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - } - ) + log.info(f'Starting revoke access for table: {table.GlueTableName}') - log.info(f'Revoking permissions for entries : {revoke_entries}') + self.revoke_table_resource_link_access(table) - LakeFormation.batch_revoke_permissions( - client, self.target_environment.AwsAccountId, revoke_entries - ) + self.revoke_source_table_access(table) api.ShareObject.update_share_item_status( self.session, @@ -103,21 +70,101 @@ def revoke_resource_links_access(self) -> [dict]: ) except Exception as e: - logging.error( - f'Failed to revoke LF permissions to table share {table.GlueTableName} ' - f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' - f'due to: {e}' - ) - api.ShareObject.update_share_item_status( - self.session, - share_item, - models.ShareObjectStatus.Revoke_Share_Failed.value, - ) - AlarmService().trigger_revoke_sharing_failure_alarm( - table, self.share, self.target_environment - ) + self.handle_revoke_failure(share_item, table, e) + + return True + + def revoke_table_resource_link_access(self, table: models.DatasetTable): + """ + Revokes access to glue table resource link + Parameters + ---------- + table : models.DatasetTable + + Returns + ------- + True if revoke is successful + """ + if not Glue.table_exists( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + tablename=table.GlueTableName, + ): + log.info( + f'Resource link could not be found ' + f'on {self.target_environment.AwsAccountId}/{self.shared_db_name}/{table.GlueTableName} ' + f'skipping revoke actions...' + ) + return True + + log.info( + f'Revoking resource link access ' + f'on {self.target_environment.AwsAccountId}/{self.shared_db_name}/{table.GlueTableName} ' + f'for principal {self.env_group.environmentIAMRoleArn}' + ) + LakeFormation.batch_revoke_permissions( + SessionHelper.remote_session(self.target_environment.AwsAccountId).client( + 'lakeformation', region_name=self.target_environment.region + ), + self.target_environment.AwsAccountId, + [ + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.env_group.environmentIAMRoleArn + }, + 'Resource': { + 'Table': { + 'DatabaseName': self.shared_db_name, + 'Name': table.GlueTableName, + 'CatalogId': self.target_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE'], + } + ], + ) + return True - return revoke_entries + def revoke_source_table_access(self, table): + """ + Revokes access to the source glue table + Parameters + ---------- + table : models.DatasetTable + + Returns + ------- + True if revoke is successful + """ + if not Glue.table_exists( + accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + database=self.shared_db_name, + tablename=table.GlueTableName, + ): + log.info( + f'Source table could not be found ' + f'on {self.source_environment.AwsAccountId}/{self.dataset.GlueDatabaseName}/{table.GlueTableName} ' + f'skipping revoke actions...' + ) + return True + + log.info( + f'Revoking source table access ' + f'on {self.source_environment.AwsAccountId}/{self.dataset.GlueDatabaseName}/{table.GlueTableName} ' + f'for principal {self.env_group.environmentIAMRoleArn}' + ) + LakeFormation.revoke_source_table_access( + target_accountid=self.target_environment.AwsAccountId, + region=self.target_environment.region, + source_database=self.dataset.GlueDatabaseName, + source_table=table.GlueTableName, + target_principal=self.env_group.environmentIAMRoleArn, + source_accountid=self.source_environment.AwsAccountId, + ) + return True def delete_shared_database(self) -> bool: """ @@ -163,3 +210,36 @@ def check_share_item_exists_on_glue_catalog( f' but its correspondent Glue table {table.GlueTableName} does not exist.' ), ) + + def handle_revoke_failure( + self, + table: models.DatasetTable, + share_item: models.ShareObjectItem, + error: Exception, + ) -> bool: + """ + Handles revoke failure by raising an alarm to alarmsTopic + Parameters + ---------- + table : dataset table + share_item : failed item + error : share error + + Returns + ------- + True if alarm published successfully + """ + logging.error( + f'Failed to revoke LF permissions to table share {table.GlueTableName} ' + f'on target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_sharing_failure_alarm( + table, self.share, self.target_environment + ) + return True diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index f2c9b47df..d40993f21 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -68,12 +68,6 @@ def approve_share( share_item = api.ShareObject.find_share_item_by_table( self.session, self.share, table ) - if not share_item: - log.warning( - f'Share Item not found for {self.share.shareUri} ' - f'and Dataset Table {table.GlueTableName} continuing loop...' - ) - continue api.ShareObject.update_share_item_status( self.session, @@ -109,13 +103,7 @@ def approve_share( except Exception as e: self.handle_share_failure(table, share_item, e) - self.clean_shared_database( - self.session, - self.dataset, - self.shared_tables, - self.target_environment, - self.shared_db_name, - ) + self.clean_shared_database() self.delete_deprecated_shared_database() diff --git a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py index fcb1afa43..54624266e 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py @@ -37,19 +37,22 @@ def revoke_share(self) -> bool: """ Revokes a share cross account 1) revoke resource link access on target account - 2) delete shared database on target account - 3) revoke resource link access on source account + 2) revoke table access on source account + 3) delete shared database on target account + 4) revoke external account sharing on source account Returns ------- True if revoke is successful """ - self.revoke_resource_links_access() + self.revoke_shared_tables_access() self.delete_shared_database() if not api.ShareObject.other_approved_share_object_exists( - self.session, self.target_environment.environmentUri + self.session, + self.target_environment.environmentUri, + self.dataset.datasetUri, ): self.revoke_external_account_access_on_source_account() diff --git a/backend/dataall/tasks/data_sharing/data_sharing_service.py b/backend/dataall/tasks/data_sharing/data_sharing_service.py index 8fda17e2a..99399d428 100644 --- a/backend/dataall/tasks/data_sharing/data_sharing_service.py +++ b/backend/dataall/tasks/data_sharing/data_sharing_service.py @@ -48,7 +48,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: shared_tables, source_environment, target_environment, - ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Approved.value]) + ) = api.ShareObject.get_share_data(session, share_uri, ['Approved']) shared_db_name = cls.build_shared_db_name(dataset, share) @@ -107,7 +107,7 @@ def reject_share(cls, engine: Engine, share_uri: str): shared_tables, source_environment, target_environment, - ) = api.ShareObject.get_share_data(session, share_uri, [models.Enums.ShareObjectStatus.Rejected.value]) + ) = api.ShareObject.get_share_data(session, share_uri, ['Rejected']) log.info(f'Revoking permissions for tables : {shared_tables}') @@ -123,23 +123,23 @@ def reject_share(cls, engine: Engine, share_uri: str): return CrossAccountShareRevoke( session, shared_db_name, - env_group, dataset, share, shared_tables, source_environment, target_environment, + env_group, ).revoke_share() return SameAccountShareRevoke( session, shared_db_name, - env_group, dataset, share, shared_tables, source_environment, target_environment, + env_group, ).revoke_share() @classmethod diff --git a/backend/dataall/tasks/data_sharing/same_account/approve_share.py b/backend/dataall/tasks/data_sharing/same_account/approve_share.py index ec40d12d1..04e6179b4 100644 --- a/backend/dataall/tasks/data_sharing/same_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/same_account/approve_share.py @@ -89,13 +89,7 @@ def approve_share(self) -> bool: except Exception as e: self.handle_share_failure(table, share_item, e) - self.clean_shared_database( - self.session, - self.dataset, - self.shared_tables, - self.target_environment, - self.shared_db_name, - ) + self.clean_shared_database() self.delete_deprecated_shared_database() diff --git a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py index b3cfe6a6d..c35eef32c 100644 --- a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/same_account/revoke_share.py @@ -33,13 +33,14 @@ def revoke_share(self) -> bool: """ Revokes a share on same account 1) revoke resource link access - 2) delete shared database on target account + 2) revoke source table access + 3) delete shared database Returns ------- True if revoke is successful """ - self.revoke_resource_links_access() + self.revoke_shared_tables_access() self.delete_shared_database() From 85fdaf0cb4837b0fbd56647df27f459049730da9 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 06:04:32 +0200 Subject: [PATCH 23/28] fix lint issues --- backend/dataall/db/api/share_object.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index 97912b649..cb6dcc8be 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -682,15 +682,15 @@ def get_share_data(session, share_uri, status): shared_tables = ( session.query(models.DatasetTable) - .join( + .join( models.ShareObjectItem, models.ShareObjectItem.itemUri == models.DatasetTable.tableUri, ) - .join( + .join( models.ShareObject, models.ShareObject.shareUri == models.ShareObjectItem.shareUri, ) - .filter( + .filter( and_( models.ShareObject.datasetUri == dataset.datasetUri, models.ShareObject.environmentUri @@ -699,18 +699,18 @@ def get_share_data(session, share_uri, status): models.ShareObject.shareUri == share_uri, ) ) - .all() + .all() ) env_group: models.EnvironmentGroup = ( session.query(models.EnvironmentGroup) - .filter( + .filter( and_( models.EnvironmentGroup.environmentUri == share.environmentUri, models.EnvironmentGroup.groupUri == share.principalId, ) ) - .first() + .first() ) if not env_group: raise Exception( @@ -730,7 +730,7 @@ def get_share_data(session, share_uri, status): def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(models.ShareObject) - .filter( + .filter( and_( models.Environment.environmentUri == environment_uri, models.ShareObject.status @@ -738,18 +738,18 @@ def other_approved_share_object_exists(session, environment_uri, dataset_uri): models.ShareObject.datasetUri == dataset_uri, ) ) - .all() + .all() ) @staticmethod def is_shared_table(session, environment_uri, dataset_uri, table_name): return ( session.query(models.ShareObjectItem) - .join( + .join( models.ShareObject, models.ShareObjectItem.shareUri == models.ShareObject.shareUri, ) - .filter( + .filter( and_( models.ShareObjectItem.GlueTableName == table_name, models.ShareObject.datasetUri == dataset_uri, @@ -757,5 +757,5 @@ def is_shared_table(session, environment_uri, dataset_uri, table_name): models.ShareObject.environmentUri == environment_uri, ) ) - .first() + .first() ) From cf625017ea7345793e9ce7e6997da8a371789594 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 06:11:24 +0200 Subject: [PATCH 24/28] fix it test --- tests/tasks/test_share_manager.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/tasks/test_share_manager.py b/tests/tasks/test_share_manager.py index ab6bfbaee..2eec795d6 100644 --- a/tests/tasks/test_share_manager.py +++ b/tests/tasks/test_share_manager.py @@ -253,14 +253,14 @@ def __update_to_rejected_status(db, share): def test_cross_account_sharing(db, cross_account_share, dataset, mocker): - """mocker.patch( + mocker.patch( 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share', return_value=True, ) mocker.patch( 'dataall.tasks.data_sharing.data_sharing_service.DataSharingService.reject_share', return_value=True, - )""" + ) dataall.tasks.data_sharing.data_sharing_service.DataSharingService.approve_share( db, cross_account_share.shareUri ) From e195ce924d2daf85ba05c385d28e12dbea865780 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 07:06:59 +0200 Subject: [PATCH 25/28] remove empty f strings + put back waiter for cross account sharing --- backend/dataall/aws/handlers/lakeformation.py | 104 +++++++++++++++++- .../cross_account/approve_share.py | 2 + 2 files changed, 102 insertions(+), 4 deletions(-) diff --git a/backend/dataall/aws/handlers/lakeformation.py b/backend/dataall/aws/handlers/lakeformation.py index 5b0de94a1..051c76641 100644 --- a/backend/dataall/aws/handlers/lakeformation.py +++ b/backend/dataall/aws/handlers/lakeformation.py @@ -12,6 +12,26 @@ class LakeFormation: def __init__(self): pass + @staticmethod + def describe_resource(resource_arn, accountid, region): + """ + Describes a LF data location + """ + try: + session = SessionHelper.remote_session(accountid) + lf_client = session.client('lakeformation', region_name=region) + + response = lf_client.describe_resource(ResourceArn=resource_arn) + + log.debug(f'LF data location already registered: {response}') + + return response['ResourceInfo'] + + except ClientError as e: + log.error( + f'LF data location for resource {resource_arn} not found due to {e}' + ) + @staticmethod def grant_pivot_role_all_database_permissions(accountid, region, database): LakeFormation.grant_permissions_to_database( @@ -141,6 +161,7 @@ def batch_revoke_permissions(client, accountid, entries): :param entries: :return: """ + log.info(f'Batch Revoking {entries}') entries_chunks: list = [entries[i : i + 20] for i in range(0, len(entries), 20)] failures = [] try: @@ -148,19 +169,31 @@ def batch_revoke_permissions(client, accountid, entries): response = client.batch_revoke_permissions( CatalogId=accountid, Entries=entries_chunk ) - log.info(f'Batch Revoke {entries_chunk} response: {response}') + log.info(f'Batch Revoke response: {response}') failures.extend(response.get('Failures')) - except ClientError as e: + for failure in failures: if not ( failure['Error']['ErrorCode'] == 'InvalidInputException' and ( 'Grantee has no permissions' in failure['Error']['ErrorMessage'] or 'No permissions revoked' in failure['Error']['ErrorMessage'] + or 'not found' in failure['Error']['ErrorMessage'] ) ): - log.warning(f'Batch Revoke ended with failures: {failures}') - raise e + raise ClientError( + error_response={ + 'Error': { + 'Code': 'LakeFormation.batch_revoke_permissions', + 'Message': f'Operation ended with failures: {failures}', + } + }, + operation_name='LakeFormation.batch_revoke_permissions', + ) + + except ClientError as e: + log.warning(f'Batch Revoke ended with failures: {failures}') + raise e @staticmethod def grant_resource_link_permission_on_target(client, source, target): @@ -221,3 +254,66 @@ def grant_resource_link_permission(client, source, target, target_database): f'due to: {e}' ) raise e + + @staticmethod + def revoke_source_table_access(**data): + """ + Revokes permissions for a principal in a cross account sharing setup + Parameters + ---------- + data : + + Returns + ------- + + """ + logging.info(f'Revoking source table access: {data} ...') + target_accountid = data['target_accountid'] + region = data['region'] + target_principal = data['target_principal'] + source_database = data['source_database'] + source_table = data['source_table'] + source_accountid = data['source_accountid'] + + try: + aws_session = SessionHelper.remote_session(target_accountid) + lakeformation = aws_session.client('lakeformation', region_name=region) + + logging.info('Revoking DESCRIBE permission...') + lakeformation.revoke_permissions( + Principal=dict(DataLakePrincipalIdentifier=target_principal), + Resource=dict( + Table=dict( + CatalogId=source_accountid, + DatabaseName=source_database, + Name=source_table, + ) + ), + Permissions=['DESCRIBE'], + PermissionsWithGrantOption=[], + ) + logging.info('Successfully revoked DESCRIBE permissions') + + logging.info('Revoking SELECT permission...') + lakeformation.revoke_permissions( + Principal=dict(DataLakePrincipalIdentifier=target_principal), + Resource=dict( + TableWithColumns=dict( + CatalogId=source_accountid, + DatabaseName=source_database, + Name=source_table, + ColumnWildcard={}, + ) + ), + Permissions=['SELECT'], + PermissionsWithGrantOption=[], + ) + logging.info('Successfully revoked DESCRIBE permissions') + + except ClientError as e: + logging.error( + f'Failed to revoke permissions for {target_principal} ' + f'on source table {source_accountid}/{source_database}/{source_table} ' + f'due to: {e}' + ) + raise e diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index d40993f21..9276e9e6e 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -135,6 +135,7 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) + time(1) LakeFormation.grant_permissions_to_table( source_lf_client, @@ -144,6 +145,7 @@ def share_table_with_target_account(cls, **data): ['DESCRIBE', 'SELECT'], ['DESCRIBE', 'SELECT'], ) + time(2) log.info( f"Granted access to table {data['source']['tablename']} " From 3ef336a13b1c42062f91eb98d5be029a84436b93 Mon Sep 17 00:00:00 2001 From: "mamallem@amazon.com" Date: Wed, 26 Oct 2022 07:08:08 +0200 Subject: [PATCH 26/28] time sleep --- .../dataall/tasks/data_sharing/cross_account/approve_share.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py index 9276e9e6e..6dd715efc 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/cross_account/approve_share.py @@ -135,7 +135,7 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) - time(1) + time.sleep(1) LakeFormation.grant_permissions_to_table( source_lf_client, @@ -145,7 +145,7 @@ def share_table_with_target_account(cls, **data): ['DESCRIBE', 'SELECT'], ['DESCRIBE', 'SELECT'], ) - time(2) + time.sleep(2) log.info( f"Granted access to table {data['source']['tablename']} " From b7321b125bce627b6ad01a10923656ecfd1903e0 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Thu, 27 Oct 2022 15:15:20 +0200 Subject: [PATCH 27/28] First draft - refractoring to include S3 management --- backend/dataall/db/api/share_object.py | 59 +++ ...share_approval.py => lf_share_approval.py} | 2 +- .../{share_revoke.py => lf_share_revoke.py} | 2 +- .../data_sharing/common/s3_approve_share.py | 453 ++++++++++++++++++ .../data_sharing/common/s3_revoke_share.py | 245 ++++++++++ .../data_sharing/data_sharing_service.py | 43 +- .../__init__.py | 0 .../approve_share.py | 4 +- .../revoke_share.py | 4 +- .../__init__.py | 0 .../approve_share.py | 4 +- .../revoke_share.py | 4 +- 12 files changed, 806 insertions(+), 14 deletions(-) rename backend/dataall/tasks/data_sharing/common/{share_approval.py => lf_share_approval.py} (99%) rename backend/dataall/tasks/data_sharing/common/{share_revoke.py => lf_share_revoke.py} (99%) create mode 100644 backend/dataall/tasks/data_sharing/common/s3_approve_share.py create mode 100644 backend/dataall/tasks/data_sharing/common/s3_revoke_share.py rename backend/dataall/tasks/data_sharing/{cross_account => lf_cross_account}/__init__.py (100%) rename backend/dataall/tasks/data_sharing/{cross_account => lf_cross_account}/approve_share.py (98%) rename backend/dataall/tasks/data_sharing/{cross_account => lf_cross_account}/revoke_share.py (97%) rename backend/dataall/tasks/data_sharing/{same_account => lf_same_account}/__init__.py (100%) rename backend/dataall/tasks/data_sharing/{same_account => lf_same_account}/approve_share.py (96%) rename backend/dataall/tasks/data_sharing/{same_account => lf_same_account}/revoke_share.py (91%) diff --git a/backend/dataall/db/api/share_object.py b/backend/dataall/db/api/share_object.py index cb6dcc8be..45a47cfce 100644 --- a/backend/dataall/db/api/share_object.py +++ b/backend/dataall/db/api/share_object.py @@ -658,6 +658,24 @@ def find_share_item_by_table( ) return share_item + @staticmethod + def find_share_item_by_folder( + session, + share: models.ShareObject, + folder: models.DatasetStorageLocation, + ) -> models.ShareObjectItem: + share_item: models.ShareObjectItem = ( + session.query(models.ShareObjectItem) + .filter( + and_( + models.ShareObjectItem.itemUri == folder.locationUri, + models.ShareObjectItem.shareUri == share.shareUri, + ) + ) + .first() + ) + return share_item + @staticmethod def get_share_data(session, share_uri, status): share: models.ShareObject = session.query(models.ShareObject).get(share_uri) @@ -702,6 +720,28 @@ def get_share_data(session, share_uri, status): .all() ) + shared_folders = ( + session.query(models.DatasetStorageLocation) + .join( + models.ShareObjectItem, + models.ShareObjectItem.itemUri == models.DatasetStorageLocation.locationUri, + ) + .join( + models.ShareObject, + models.ShareObject.shareUri == models.ShareObjectItem.shareUri, + ) + .filter( + and_( + models.ShareObject.datasetUri == dataset.datasetUri, + models.ShareObject.environmentUri + == target_environment.environmentUri, + models.ShareObject.status.in_(status), + models.ShareObject.shareUri == share_uri, + ) + ) + .all() + ) + env_group: models.EnvironmentGroup = ( session.query(models.EnvironmentGroup) .filter( @@ -717,11 +757,30 @@ def get_share_data(session, share_uri, status): f'Share object Team {share.principalId} is not a member of the ' f'environment {target_environment.name}/{target_environment.AwsAccountId}' ) + + source_env_group: models.EnvironmentGroup = ( + session.query(models.EnvironmentGroup) + .filter( + and_( + models.EnvironmentGroup.environmentUri == dataset.environmentUri, + models.EnvironmentGroup.groupUri == dataset.SamlAdminGroupName, + ) + ) + .first() + ) + if not source_env_group: + raise Exception( + f'Share object Team {dataset.SamlAdminGroupName} is not a member of the ' + f'environment {dataset.environmentUri}' + ) + return ( + source_env_group, env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) diff --git a/backend/dataall/tasks/data_sharing/common/share_approval.py b/backend/dataall/tasks/data_sharing/common/lf_share_approval.py similarity index 99% rename from backend/dataall/tasks/data_sharing/common/share_approval.py rename to backend/dataall/tasks/data_sharing/common/lf_share_approval.py index 2815d929e..e99b69335 100644 --- a/backend/dataall/tasks/data_sharing/common/share_approval.py +++ b/backend/dataall/tasks/data_sharing/common/lf_share_approval.py @@ -13,7 +13,7 @@ logger = logging.getLogger(__name__) -class ShareApproval: +class LFShareApproval: def __init__( self, session, diff --git a/backend/dataall/tasks/data_sharing/common/share_revoke.py b/backend/dataall/tasks/data_sharing/common/lf_share_revoke.py similarity index 99% rename from backend/dataall/tasks/data_sharing/common/share_revoke.py rename to backend/dataall/tasks/data_sharing/common/lf_share_revoke.py index 180b9f4a4..060b2d31e 100644 --- a/backend/dataall/tasks/data_sharing/common/share_revoke.py +++ b/backend/dataall/tasks/data_sharing/common/lf_share_revoke.py @@ -11,7 +11,7 @@ log = logging.getLogger(__name__) -class ShareRevoke: +class LFShareRevoke: def __init__( self, session, diff --git a/backend/dataall/tasks/data_sharing/common/s3_approve_share.py b/backend/dataall/tasks/data_sharing/common/s3_approve_share.py new file mode 100644 index 000000000..2a67f5046 --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/s3_approve_share.py @@ -0,0 +1,453 @@ +import logging +import json + + +from ....db import models, api +from ....aws.handlers.sts import SessionHelper +from ....aws.handlers.s3 import S3 +from ....aws.handlers.kms import KMS +from ....aws.handlers.iam import IAM + +from ....utils.alarm_service import AlarmService + +log = logging.getLogger(__name__) + + +class S3ShareApproval: + def __init__( + self, + session, + dataset: models.Dataset, + share: models.ShareObject, + shared_folders: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + source_env_group: models.EnvironmentGroup, + env_group: models.EnvironmentGroup, + ): + self.session = session + self.source_env_group = source_env_group + self.env_group = env_group + self.dataset = dataset + self.share = share + self.shared_folders = shared_folders + self.source_environment = source_environment + self.target_environment = target_environment + + + def approve_share( + self, + ) -> bool: + """ + 1) Shares folders, for each shared folder: + a) .... + 2) Cleans un-shared folders + + Returns + ------- + True if share is approved successfully + """ + self.share_folders( + self.session, + self.share, + self.source_env_group, + self.env_group, + self.target_environment, + self.shared_folders, + self.dataset, + ) + + self.clean_shared_folders( + self.session, + self.share, + self.source_env_group, + self.env_group, + self.target_environment, + self.shared_folders, + self.dataset, + ) + + return True + + @classmethod + def share_folders( + cls, + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + shared_folders: [models.DatasetStorageLocation], + dataset: models.Dataset, + ): + for folder in shared_folders: + share_item = api.ShareObject.find_share_item_by_folder( + session, share, folder + ) + + api.ShareObject.update_share_item_status( + session, + share_item, + models.ShareObjectStatus.Share_In_Progress.value, + ) + + source_account_id = folder.AWSAccountId + access_point_name = share_item.S3AccessPointName + bucket_name = folder.S3BucketName + target_account_id = target_environment.AwsAccountId + source_env_admin = source_env_group.environmentIAMRoleArn + dataset_admin = dataset.IAMDatasetAdminRoleArn + target_env_admin = target_env_group.environmentIAMRoleName + s3_prefix = folder.S3Prefix + + try: + S3ShareApproval.manage_bucket_policy( + dataset_admin, + source_account_id, + bucket_name, + source_env_admin, + ) + + S3ShareApproval.grant_target_role_access_policy( + bucket_name, + access_point_name, + target_account_id, + target_env_admin, + dataset, + ) + S3ShareApproval.manage_access_point_and_policy( + dataset_admin, + source_account_id, + target_account_id, + source_env_admin, + target_env_admin, + bucket_name, + s3_prefix, + access_point_name, + ) + + S3ShareApproval.update_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset + ) + + api.ShareObject.update_share_item_status( + session, + share_item, + models.ShareObjectStatus.Share_Succeeded.value, + ) + except Exception as e: + S3ShareApproval.handle_share_failure(folder, share_item, e) + + @classmethod + def clean_shared_folders( + cls, + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + dataset: models.Dataset, + shared_folders: [models.DatasetStorageLocation], + ): + source_account_id = dataset.AwsAccountId + access_point_name = f"{dataset.datasetUri}-{share.principalId}".lower() + target_account_id = target_environment.AwsAccountId + target_env_admin = target_env_group.environmentIAMRoleName + access_point_policy = S3.get_access_point_policy(source_account_id, access_point_name) + if access_point_policy: + policy = json.loads(access_point_policy) + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + statements = {item["Sid"]: item for item in policy["Statement"]} + if f"{target_env_admin_id}0" in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if isinstance(prefix_list, str): + prefix_list = [prefix_list] + prefix_list = [prefix[:-2] for prefix in prefix_list] + shared_prefix = [folder.S3Prefix for folder in shared_folders] + removed_prefixes = [prefix for prefix in prefix_list if prefix not in shared_prefix] + for prefix in removed_prefixes: + bucket_name = dataset.S3BucketName + try: + S3ShareApproval.delete_access_point_policy( + source_account_id, + target_account_id, + access_point_name, + target_env_admin, + prefix, + ) + cleanup = S3ShareApproval.delete_access_point(source_account_id, access_point_name) + if cleanup: + S3ShareApproval.delete_target_role_access_policy( + target_account_id, + target_env_admin, + bucket_name, + access_point_name, + dataset, + ) + S3ShareApproval.delete_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset, + ) + except Exception as e: + log.error( + f'Failed to revoke folder {prefix} ' + f'from source account {dataset.AwsAccountId}//{dataset.region} ' + f'with target account {target_account_id}//{target_environment.region} ' + f'due to: {e}' + ) + location = db.api.DatasetStorageLocation.get_location_by_s3_prefix( + session, + prefix, + dataset.AwsAccountId, + dataset.region, + ) + AlarmService().trigger_revoke_folder_sharing_failure_alarm( + location, share, target_environment + ) + + @staticmethod + def manage_bucket_policy( + dataset_admin: str, + source_account_id: str, + bucket_name: str, + source_env_admin: str, + ): + ''' + This function will manage bucket policy by grant admin access to dataset admin, pivot role + and environment admin. All of the policies will only be added once. + ''' + bucket_policy = json.loads(S3.get_bucket_policy(source_account_id, bucket_name)) + for statement in bucket_policy["Statement"]: + if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: + return + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + allow_owner_access = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId + } + } + } + delegated_to_accesspoint = { + "Sid": "DelegateAccessToAccessPoint", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*" + ], + "Condition": { + "StringEquals": { + "s3:DataAccessPointAccount": f"{source_account_id}" + } + } + } + bucket_policy["Statement"].append(allow_owner_access) + bucket_policy["Statement"].append(delegated_to_accesspoint) + S3.create_bucket_policy(source_account_id, bucket_name, json.dumps(bucket_policy)) + + @staticmethod + def grant_target_role_access_policy( + bucket_name: str, + access_point_name: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + existing_policy = IAM.get_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + ) + if existing_policy: # type dict + if bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): + target_resources = [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + policy = existing_policy["Statement"][0]["Resource"].extend(target_resources) + else: + return + else: + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + } + ] + } + IAM.update_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + json.dumps(policy), + ) + + @staticmethod + def manage_access_point_and_policy( + dataset_admin: str, + source_account_id: str, + target_account_id: str, + source_env_admin: str, + target_env_admin: str, + bucket_name: str, + s3_prefix: str, + access_point_name: str, + ): + access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) + if not access_point_arn: + access_point_arn = S3.create_bucket_access_point(source_account_id, bucket_name, access_point_name) + existing_policy = S3.get_access_point_policy(source_account_id, access_point_name) + # requester will use this role to access resources + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + if existing_policy: + # Update existing access point policy + existing_policy = json.loads(existing_policy) + statements = {item["Sid"]: item for item in existing_policy["Statement"]} + if f"{target_env_admin_id}0" in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if isinstance(prefix_list, str): + prefix_list = [prefix_list] + if f"{s3_prefix}/*" not in prefix_list: + prefix_list.append(f"{s3_prefix}/*") + statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] = prefix_list + resource_list = statements[f"{target_env_admin_id}1"]["Resource"] + if isinstance(resource_list, str): + resource_list = [resource_list] + if f"{access_point_arn}/object/{s3_prefix}/*" not in resource_list: + resource_list.append(f"{access_point_arn}/object/{s3_prefix}/*") + statements[f"{target_env_admin_id}1"]["Resource"] = resource_list + existing_policy["Statement"] = list(statements.values()) + else: + additional_policy = S3.generate_access_point_policy_template( + target_env_admin_id, + access_point_arn, + s3_prefix, + ) + existing_policy["Statement"].extend(additional_policy["Statement"]) + access_point_policy = existing_policy + else: + # First time to create access point policy + access_point_policy = S3.generate_access_point_policy_template( + target_env_admin_id, + access_point_arn, + s3_prefix, + ) + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + source_account_id, + [dataset_admin, source_env_admin, SessionHelper.get_delegation_role_arn(source_account_id)] + )] + admin_statement = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": f"{access_point_arn}", + "Condition": { + "StringLike": { + "aws:userId": exceptions_roleId + } + } + } + access_point_policy["Statement"].append(admin_statement) + S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) + + @staticmethod + def update_dataset_bucket_key_policy( + source_account_id: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + key_alias = f"alias/{dataset.KmsAlias}" + kms_keyId = KMS.get_key_id(source_account_id, key_alias) + existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + if existing_policy and f'{target_env_admin_id}:*' not in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"].append( + { + "Sid": f"{target_env_admin_id}", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": { + "StringLike": { + "aws:userId": f"{target_env_admin_id}:*" + } + } + } + ) + KMS.put_key_policy( + source_account_id, + kms_keyId, + "default", + json.dumps(policy) + ) + + def handle_share_failure( + self, + folder: models.DatasetStorageLocation, + share_item: models.ShareObjectItem, + error: Exception, + ) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Parameters + ---------- + folder : dataset folder + share_item : failed item + error : share error + + Returns + ------- + True if alarm published successfully + """ + logging.error( + f'Failed to share folder {folder.S3Prefix} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Share_Failed.value, + ) + AlarmService().trigger_folder_sharing_failure_alarm( + folder, self.share, self.target_environment + ) + return True + diff --git a/backend/dataall/tasks/data_sharing/common/s3_revoke_share.py b/backend/dataall/tasks/data_sharing/common/s3_revoke_share.py new file mode 100644 index 000000000..b5a28449f --- /dev/null +++ b/backend/dataall/tasks/data_sharing/common/s3_revoke_share.py @@ -0,0 +1,245 @@ +import logging +import json + + +from ....db import models, api +from ....aws.handlers.sts import SessionHelper +from ....aws.handlers.s3 import S3 +from ....aws.handlers.kms import KMS +from ....aws.handlers.iam import IAM + +from ....utils.alarm_service import AlarmService + +log = logging.getLogger(__name__) + + +class S3ShareRevoke: + def __init__( + self, + session, + dataset: models.Dataset, + share: models.ShareObject, + rejected_folders: [models.DatasetTable], + source_environment: models.Environment, + target_environment: models.Environment, + source_env_group: models.EnvironmentGroup, + env_group: models.EnvironmentGroup, + ): + self.session = session + self.source_env_group = source_env_group + self.env_group = env_group + self.dataset = dataset + self.share = share + self.rejected_folders = rejected_folders + self.source_environment = source_environment + self.target_environment = target_environment + + + def revoke_share( + self, + ) -> bool: + """ + 1) Shares folders, for each shared folder: + a) .... + 2) Cleans un-shared folders + + Returns + ------- + True if share is approved successfully + """ + self.revoke_shared_folders( + self.session, + self.share, + self.source_env_group, + self.env_group, + self.target_environment, + self.rejected_folders, + self.dataset, + ) + + return True + + + @classmethod + def revoke_shared_folders( + cls, + session, + share: models.ShareObject, + source_env_group: models.EnvironmentGroup, + target_env_group: models.EnvironmentGroup, + target_environment: models.Environment, + rejected_folders: [models.DatasetStorageLocation], + dataset: models.Dataset, + ): + for folder in rejected_folders: + rejected_item = api.ShareObject.find_share_item_by_folder( + session, share, folder + ) + + api.ShareObject.update_share_item_status( + session, + rejected_item, + models.ShareObjectStatus.Revoke_In_Progress.value + ) + + source_account_id = folder.AWSAccountId + access_point_name = rejected_item.S3AccessPointName + bucket_name = folder.S3BucketName + target_account_id = target_environment.AwsAccountId + target_env_admin = target_env_group.environmentIAMRoleName + s3_prefix = folder.S3Prefix + + try: + S3ShareRevoke.delete_access_point_policy( + source_account_id, + target_account_id, + access_point_name, + target_env_admin, + s3_prefix, + ) + cleanup = S3ShareRevoke.delete_access_point(source_account_id, access_point_name) + if cleanup: + S3ShareRevoke.delete_target_role_access_policy( + target_account_id, + target_env_admin, + bucket_name, + access_point_name, + dataset, + ) + S3ShareRevoke.delete_dataset_bucket_key_policy( + source_account_id, + target_account_id, + target_env_admin, + dataset, + ) + api.ShareObject.update_share_item_status( + session, + rejected_item, + models.ShareObjectStatus.Revoke_Share_Succeeded.value, + ) + except Exception as e: + S3ShareRevoke.handle_share_failure(folder, rejected_item, e) + + + + @staticmethod + def delete_access_point_policy( + source_account_id: str, + target_account_id: str, + access_point_name: str, + target_env_admin: str, + s3_prefix: str, + ): + access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) + access_point_arn = S3.get_bucket_access_point_arn(source_account_id, access_point_name) + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + statements = {item["Sid"]: item for item in access_point_policy["Statement"]} + if f"{target_env_admin_id}0" in statements.keys(): + prefix_list = statements[f"{target_env_admin_id}0"]["Condition"]["StringLike"]["s3:prefix"] + if isinstance(prefix_list, list) and f"{s3_prefix}/*" in prefix_list: + prefix_list.remove(f"{s3_prefix}/*") + statements[f"{target_env_admin_id}1"]["Resource"].remove(f"{access_point_arn}/object/{s3_prefix}/*") + access_point_policy["Statement"] = list(statements.values()) + else: + access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}0"]) + access_point_policy["Statement"].remove(statements[f"{target_env_admin_id}1"]) + S3.attach_access_point_policy(source_account_id, access_point_name, json.dumps(access_point_policy)) + + @staticmethod + def delete_access_point(source_account_id: str, access_point_name: str): + access_point_policy = json.loads(S3.get_access_point_policy(source_account_id, access_point_name)) + if len(access_point_policy["Statement"]) <= 1: + # At least we have the 'AllowAllToAdmin' statement + S3.delete_bucket_access_point(source_account_id, access_point_name) + return True + else: + return False + + @staticmethod + def delete_target_role_access_policy( + target_account_id: str, + target_env_admin: str, + bucket_name: str, + access_point_name: str, + dataset: models.Dataset, + ): + existing_policy = IAM.get_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + ) + if existing_policy: + if bucket_name in ",".join(existing_policy["Statement"][0]["Resource"]): + target_resources = [ + f"arn:aws:s3:::{bucket_name}", + f"arn:aws:s3:::{bucket_name}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + for item in target_resources: + existing_policy["Statement"][0]["Resource"].remove(item) + if not existing_policy["Statement"][0]["Resource"]: + IAM.delete_role_policy(target_account_id, target_env_admin, "targetDatasetAccessControlPolicy") + else: + IAM.update_role_policy( + target_account_id, + target_env_admin, + "targetDatasetAccessControlPolicy", + json.dumps(existing_policy), + ) + + @staticmethod + def delete_dataset_bucket_key_policy( + source_account_id: str, + target_account_id: str, + target_env_admin: str, + dataset: models.Dataset, + ): + key_alias = f"alias/{dataset.KmsAlias}" + kms_keyId = KMS.get_key_id(source_account_id, key_alias) + existing_policy = KMS.get_key_policy(source_account_id, kms_keyId, "default") + target_env_admin_id = SessionHelper.get_role_id(target_account_id, target_env_admin) + if existing_policy and f'{target_env_admin_id}:*' in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"] = [item for item in policy["Statement"] if item["Sid"] != f"{target_env_admin_id}"] + KMS.put_key_policy( + source_account_id, + kms_keyId, + "default", + json.dumps(policy) + ) + + + def handle_share_failure( + self, + folder: models.DatasetStorageLocation, + share_item: models.ShareObjectItem, + error: Exception, + ) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Parameters + ---------- + folder : dataset folder + share_item : failed item + error : share error + + Returns + ------- + True if alarm published successfully + """ + logging.error( + f'Failed to revoke S3 permissions to folder {folder.S3Prefix} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region}' + f'due to: {error}' + ) + api.ShareObject.update_share_item_status( + self.session, + share_item, + models.ShareObjectStatus.Revoke_Share_Failed.value, + ) + AlarmService().trigger_revoke_folder_sharing_failure_alarm( + folder, self.share, self.target_environment + ) + return True diff --git a/backend/dataall/tasks/data_sharing/data_sharing_service.py b/backend/dataall/tasks/data_sharing/data_sharing_service.py index 99399d428..8db80e996 100644 --- a/backend/dataall/tasks/data_sharing/data_sharing_service.py +++ b/backend/dataall/tasks/data_sharing/data_sharing_service.py @@ -1,16 +1,25 @@ import logging import os -from .cross_account.approve_share import ( +from .lf_cross_account.approve_share import ( CrossAccountShareApproval, ) -from .cross_account.revoke_share import ( +from .lf_cross_account.revoke_share import ( CrossAccountShareRevoke, ) -from .same_account.approve_share import ( +from .lf_same_account.approve_share import ( SameAccountShareApproval, ) -from .same_account.revoke_share import SameAccountShareRevoke +from .lf_same_account.revoke_share import ( + SameAccountShareRevoke, +) +from .common.s3_approve_share import ( + S3ShareApproval +) +from .common.s3_revoke_share import ( + S3ShareRevoke +) + from ...aws.handlers.lakeformation import LakeFormation from ...aws.handlers.ram import Ram from ...aws.handlers.sts import SessionHelper @@ -42,10 +51,12 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: """ with engine.scoped_session() as session: ( + source_env_group, env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) = api.ShareObject.get_share_data(session, share_uri, ['Approved']) @@ -81,6 +92,17 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: env_group, ).approve_share() + S3ShareApproval( + session, + dataset, + share, + shared_folders, + source_environment, + target_environment, + source_env_group, + env_group, + ).approve_share() + @classmethod def reject_share(cls, engine: Engine, share_uri: str): """ @@ -101,10 +123,12 @@ def reject_share(cls, engine: Engine, share_uri: str): with engine.scoped_session() as session: ( + source_env_group, env_group, dataset, share, shared_tables, + shared_folders, source_environment, target_environment, ) = api.ShareObject.get_share_data(session, share_uri, ['Rejected']) @@ -142,6 +166,17 @@ def reject_share(cls, engine: Engine, share_uri: str): env_group, ).revoke_share() + S3ShareRevoke( + session, + dataset, + share, + shared_folders, + source_environment, + target_environment, + source_env_group, + env_group, + ).revoke_share() + @classmethod def build_shared_db_name( cls, dataset: models.Dataset, share: models.ShareObject diff --git a/backend/dataall/tasks/data_sharing/cross_account/__init__.py b/backend/dataall/tasks/data_sharing/lf_cross_account/__init__.py similarity index 100% rename from backend/dataall/tasks/data_sharing/cross_account/__init__.py rename to backend/dataall/tasks/data_sharing/lf_cross_account/__init__.py diff --git a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py b/backend/dataall/tasks/data_sharing/lf_cross_account/approve_share.py similarity index 98% rename from backend/dataall/tasks/data_sharing/cross_account/approve_share.py rename to backend/dataall/tasks/data_sharing/lf_cross_account/approve_share.py index 6dd715efc..b35db7ddd 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/lf_cross_account/approve_share.py @@ -3,7 +3,7 @@ from botocore.exceptions import ClientError -from ..common.share_approval import ShareApproval +from ..common.lf_share_approval import LFShareApproval from ....aws.handlers.lakeformation import LakeFormation from ....aws.handlers.ram import Ram from ....aws.handlers.sts import SessionHelper @@ -12,7 +12,7 @@ log = logging.getLogger(__name__) -class CrossAccountShareApproval(ShareApproval): +class CrossAccountShareApproval(LFShareApproval): def __init__( self, session, diff --git a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py b/backend/dataall/tasks/data_sharing/lf_cross_account/revoke_share.py similarity index 97% rename from backend/dataall/tasks/data_sharing/cross_account/revoke_share.py rename to backend/dataall/tasks/data_sharing/lf_cross_account/revoke_share.py index 54624266e..c60126dd7 100644 --- a/backend/dataall/tasks/data_sharing/cross_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/lf_cross_account/revoke_share.py @@ -1,7 +1,7 @@ import logging import uuid -from ..common.share_revoke import ShareRevoke +from ..common.lf_share_revoke import LFShareRevoke from ....aws.handlers.lakeformation import LakeFormation from ....aws.handlers.ram import Ram from ....aws.handlers.sts import SessionHelper @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) -class CrossAccountShareRevoke(ShareRevoke): +class CrossAccountShareRevoke(LFShareRevoke): def __init__( self, session, diff --git a/backend/dataall/tasks/data_sharing/same_account/__init__.py b/backend/dataall/tasks/data_sharing/lf_same_account/__init__.py similarity index 100% rename from backend/dataall/tasks/data_sharing/same_account/__init__.py rename to backend/dataall/tasks/data_sharing/lf_same_account/__init__.py diff --git a/backend/dataall/tasks/data_sharing/same_account/approve_share.py b/backend/dataall/tasks/data_sharing/lf_same_account/approve_share.py similarity index 96% rename from backend/dataall/tasks/data_sharing/same_account/approve_share.py rename to backend/dataall/tasks/data_sharing/lf_same_account/approve_share.py index 04e6179b4..c7f283592 100644 --- a/backend/dataall/tasks/data_sharing/same_account/approve_share.py +++ b/backend/dataall/tasks/data_sharing/lf_same_account/approve_share.py @@ -1,12 +1,12 @@ import logging -from ..common.share_approval import ShareApproval +from ..common.lf_share_approval import LFShareApproval from ....db import models, api log = logging.getLogger(__name__) -class SameAccountShareApproval(ShareApproval): +class SameAccountShareApproval(LFShareApproval): def __init__( self, session, diff --git a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py b/backend/dataall/tasks/data_sharing/lf_same_account/revoke_share.py similarity index 91% rename from backend/dataall/tasks/data_sharing/same_account/revoke_share.py rename to backend/dataall/tasks/data_sharing/lf_same_account/revoke_share.py index c35eef32c..984c160e7 100644 --- a/backend/dataall/tasks/data_sharing/same_account/revoke_share.py +++ b/backend/dataall/tasks/data_sharing/lf_same_account/revoke_share.py @@ -1,12 +1,12 @@ import logging -from ..common.share_revoke import ShareRevoke +from ..common.lf_share_revoke import LFShareRevoke from ....db import models log = logging.getLogger(__name__) -class SameAccountShareRevoke(ShareRevoke): +class SameAccountShareRevoke(LFShareRevoke): def __init__( self, session, From a6efa671ccfa8b5c0ab7acbfc94e8015f6f8e860 Mon Sep 17 00:00:00 2001 From: Yuri Liang Date: Fri, 28 Oct 2022 11:16:15 +0800 Subject: [PATCH 28/28] fix an invoke error in s3_approve_share --- backend/dataall/tasks/data_sharing/common/s3_approve_share.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/dataall/tasks/data_sharing/common/s3_approve_share.py b/backend/dataall/tasks/data_sharing/common/s3_approve_share.py index 2a67f5046..30b97a981 100644 --- a/backend/dataall/tasks/data_sharing/common/s3_approve_share.py +++ b/backend/dataall/tasks/data_sharing/common/s3_approve_share.py @@ -34,7 +34,6 @@ def __init__( self.source_environment = source_environment self.target_environment = target_environment - def approve_share( self, ) -> bool: @@ -200,7 +199,7 @@ def clean_shared_folders( f'with target account {target_account_id}//{target_environment.region} ' f'due to: {e}' ) - location = db.api.DatasetStorageLocation.get_location_by_s3_prefix( + location = api.DatasetStorageLocation.get_location_by_s3_prefix( session, prefix, dataset.AwsAccountId, @@ -450,4 +449,3 @@ def handle_share_failure( folder, self.share, self.target_environment ) return True -