From 6d3c0161e6e54958df4a859cea67413e5cde464f Mon Sep 17 00:00:00 2001 From: dlpzx Date: Fri, 27 Oct 2023 08:59:00 +0200 Subject: [PATCH 01/21] Merge branch 'main' into v2m1m0 # Conflicts: # deploy/stacks/backend_stack.py # deploy/stacks/backend_stage.py # deploy/stacks/lambda_api.py # deploy/stacks/pipeline.py # template_cdk.json --- CONTRIBUTING.md | 2 +- backend/api_handler.py | 53 +++++- .../dataall/base/cdkproxy/requirements.txt | 2 +- deploy/configs/frontend_config.py | 7 +- deploy/stacks/backend_stack.py | 4 + deploy/stacks/backend_stage.py | 2 + deploy/stacks/lambda_api.py | 4 +- deploy/stacks/param_store_stack.py | 10 +- deploy/stacks/pipeline.py | 2 + frontend/package.json | 4 + frontend/src/App.js | 7 +- .../contexts/CognitoAuthContext.js | 37 +++- frontend/src/index.js | 12 +- .../components/ReAuthModal.js | 82 +++++++++ .../src/reauthentication/components/index.js | 1 + .../contexts/RequestContext.js | 169 ++++++++++++++++++ .../src/reauthentication/contexts/index.js | 1 + frontend/src/reauthentication/index.js | 2 + frontend/src/routes.js | 2 + frontend/src/services/hooks/useClient.js | 43 +++-- template_cdk.json | 4 + tests/requirements.txt | 2 +- 22 files changed, 418 insertions(+), 34 deletions(-) create mode 100644 frontend/src/reauthentication/components/ReAuthModal.js create mode 100644 frontend/src/reauthentication/components/index.js create mode 100644 frontend/src/reauthentication/contexts/RequestContext.js create mode 100644 frontend/src/reauthentication/contexts/index.js create mode 100644 frontend/src/reauthentication/index.js diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 45a929495..f62378a36 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -68,6 +68,6 @@ If you discover a potential security issue in this project we ask that you notif ## Licensing -See the [LICENSE](https://github.com/awslabs/aws-dataall/blob/master/LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. +See the [LICENSE](./LICENCE.txt) file for our project's licensing. We will ask you to confirm the licensing of your contribution. We may ask you to sign a [Contributor License Agreement (CLA)](http://en.wikipedia.org/wiki/Contributor_License_Agreement) for larger changes. diff --git a/backend/api_handler.py b/backend/api_handler.py index 1e9168971..ca872e3d3 100644 --- a/backend/api_handler.py +++ b/backend/api_handler.py @@ -1,6 +1,7 @@ import json import logging import os +import datetime from argparse import Namespace from time import perf_counter @@ -12,6 +13,7 @@ from dataall.base.api import bootstrap as bootstrap_schema, get_executable_schema from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sqs import SqsQueue +from dataall.base.aws.parameter_store import ParameterStoreManager from dataall.base.context import set_context, dispose_context, RequestContext from dataall.core.permissions.db import save_permissions_with_tenant from dataall.core.permissions.db.tenant_policy_repositories import TenantPolicy @@ -30,6 +32,7 @@ load_modules(modes={ImportMode.API}) SCHEMA = bootstrap_schema() TYPE_DEFS = gql(SCHEMA.gql(with_directives=False)) +REAUTH_TTL = int(os.environ.get('REAUTH_TTL', '5')) ENVNAME = os.getenv('envname', 'local') ENGINE = get_engine(envname=ENVNAME) Worker.queue = SqsQueue.send @@ -114,9 +117,15 @@ def handler(event, context): } if 'authorizer' in event['requestContext']: - username = event['requestContext']['authorizer']['claims']['email'] + if 'claims' not in event['requestContext']['authorizer']: + claims = event['requestContext']['authorizer'] + else: + claims = event['requestContext']['authorizer']['claims'] + username = claims['email'] + log.debug('username is %s', username) try: - groups = get_groups(event['requestContext']['authorizer']['claims']) + groups = get_groups(claims) + log.debug('groups are %s', ",".join(groups)) with ENGINE.scoped_session() as session: for group in groups: policy = TenantPolicy.find_tenant_policy( @@ -146,10 +155,50 @@ def handler(event, context): 'schema': SCHEMA } + # Determine if there are any Operations that Require ReAuth From SSM Parameter + try: + reauth_apis = ParameterStoreManager.get_parameter_value(region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f"/dataall/{ENVNAME}/reauth/apis").split(',') + except Exception as e: + log.info("No ReAuth APIs Found in SSM") + reauth_apis = None else: raise Exception(f'Could not initialize user context from event {event}') query = json.loads(event.get('body')) + + # If The Operation is a ReAuth Operation - Ensure A Non-Expired Session or Return Error + if reauth_apis and query.get('operationName', None) in reauth_apis: + now = datetime.datetime.now(datetime.timezone.utc) + try: + auth_time_datetime = datetime.datetime.fromtimestamp(int(claims["auth_time"]), tz=datetime.timezone.utc) + if auth_time_datetime + datetime.timedelta(minutes=REAUTH_TTL) < now: + raise Exception("ReAuth") + except Exception as e: + log.info(f'ReAuth Required for User {username} on Operation {query.get("operationName", "")}, Error: {e}') + response = { + "data": {query.get('operationName', 'operation') : None}, + "errors": [ + { + "message": f"ReAuth Required To Perform This Action {query.get('operationName', '')}", + "locations": None, + "path": [query.get('operationName', '')], + "extensions": { + "code": "REAUTH" + } + } + ] + } + return { + 'statusCode': 401, + 'headers': { + 'content-type': 'application/json', + 'Access-Control-Allow-Origin': '*', + 'Access-Control-Allow-Headers': '*', + 'Access-Control-Allow-Methods': '*', + }, + 'body': json.dumps(response) + } + success, response = graphql_sync( schema=executable_schema, data=query, context_value=app_context ) diff --git a/backend/dataall/base/cdkproxy/requirements.txt b/backend/dataall/base/cdkproxy/requirements.txt index 974962a27..74eb975e2 100644 --- a/backend/dataall/base/cdkproxy/requirements.txt +++ b/backend/dataall/base/cdkproxy/requirements.txt @@ -12,7 +12,7 @@ requests==2.31.0 tabulate==0.8.9 uvicorn==0.15.0 jinja2==3.1.2 -werkzeug==2.3.3 +werkzeug==3.0.1 constructs>=10.0.0,<11.0.0 git-remote-codecommit==1.16 aws-ddk==0.5.1 diff --git a/deploy/configs/frontend_config.py b/deploy/configs/frontend_config.py index a32002099..80b5882d2 100644 --- a/deploy/configs/frontend_config.py +++ b/deploy/configs/frontend_config.py @@ -10,7 +10,8 @@ def create_react_env_file( resource_prefix, internet_facing='True', custom_domain='False', - cw_rum_enabled='False' + cw_rum_enabled='False', + reauth_ttl='5' ): ssm = boto3.client('ssm', region_name=region) user_pool_id = ssm.get_parameter(Name=f'/dataall/{envname}/cognito/userpool')[ @@ -69,6 +70,7 @@ def create_react_env_file( REACT_APP_COGNITO_REDIRECT_SIGNOUT=https://{signin_singout_link} REACT_APP_USERGUIDE_LINK=https://{user_guide_link} REACT_APP_ENABLE_PIVOT_ROLE_AUTO_CREATE={pivot_role_auto_create} +REACT_APP_REAUTH_TTL={reauth_ttl} """ print('.env content: \n', file_content) f.write(file_content) @@ -125,6 +127,7 @@ def create_react_env_file( custom_domain = os.environ.get('custom_domain', 'False') region = os.environ.get('deployment_region', 'eu-west-1') enable_cw_rum = os.environ.get('enable_cw_rum', 'False') + reauth_ttl = os.environ.get('reauth_ttl', '5') print( f'Creating React .env file with params: ' f'(region={region},envname={envname},resource_prefix={resource_prefix}' @@ -132,6 +135,6 @@ def create_react_env_file( f'cw_rum_enabled={enable_cw_rum})' ) create_react_env_file( - region, envname, resource_prefix, internet_facing, custom_domain, enable_cw_rum + region, envname, resource_prefix, internet_facing, custom_domain, enable_cw_rum, reauth_ttl ) print(f'React .env created successfully') diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index c1508872f..76346326a 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -54,6 +54,7 @@ def __init__( enable_opensearch_serverless=False, codeartifact_domain_name=None, codeartifact_pip_repo_name=None, + reauth_config=None, cognito_user_session_timeout_inmins=43200, **kwargs, ): @@ -91,6 +92,7 @@ def __init__( shared_dashboard_sessions=shared_dashboard_sessions, enable_pivot_role_auto_create=enable_pivot_role_auto_create, pivot_role_name=self.pivot_role_name, + reauth_apis=reauth_config.get("reauth_apis", None) if reauth_config else None, **kwargs, ) if enable_cw_canaries: @@ -118,6 +120,7 @@ def __init__( internet_facing=internet_facing, tooling_account_id=tooling_account_id, enable_cw_rum=enable_cw_rum, + vpc=vpc, cognito_user_session_timeout_inmins=cognito_user_session_timeout_inmins, **kwargs, ) @@ -158,6 +161,7 @@ def __init__( prod_sizing=prod_sizing, user_pool=cognito_stack.user_pool, pivot_role_name=self.pivot_role_name, + reauth_ttl=reauth_config.get("ttl", 5) if reauth_config else 5, email_notification_sender_email_id=email_sender, email_custom_domain = ses_stack.ses_identity.email_identity_name if ses_stack != None else None, ses_configuration_set = ses_stack.configuration_set.configuration_set_name if ses_stack != None else None, diff --git a/deploy/stacks/backend_stage.py b/deploy/stacks/backend_stage.py index 9e8144f97..240f4bf54 100644 --- a/deploy/stacks/backend_stage.py +++ b/deploy/stacks/backend_stage.py @@ -32,6 +32,7 @@ def __init__( enable_pivot_role_auto_create=False, codeartifact_domain_name=None, codeartifact_pip_repo_name=None, + reauth_config=None, cognito_user_session_timeout_inmins=43200, **kwargs, ): @@ -62,6 +63,7 @@ def __init__( enable_pivot_role_auto_create=enable_pivot_role_auto_create, codeartifact_domain_name=codeartifact_domain_name, codeartifact_pip_repo_name=codeartifact_pip_repo_name, + reauth_config=reauth_config, cognito_user_session_timeout_inmins=cognito_user_session_timeout_inmins, **kwargs, ) diff --git a/deploy/stacks/lambda_api.py b/deploy/stacks/lambda_api.py index 433cf88f9..b93ab350b 100644 --- a/deploy/stacks/lambda_api.py +++ b/deploy/stacks/lambda_api.py @@ -47,6 +47,7 @@ def __init__( prod_sizing=False, user_pool=None, pivot_role_name=None, + reauth_ttl=5, email_notification_sender_email_id=None, email_custom_domain=None, ses_configuration_set=None, @@ -97,7 +98,7 @@ def __init__( security_groups=[api_handler_sg], memory_size=3008 if prod_sizing else 1024, timeout=Duration.minutes(15), - environment={'envname': envname, 'LOG_LEVEL': 'INFO'}, + environment={'envname': envname, 'LOG_LEVEL': 'INFO', 'REAUTH_TTL': str(reauth_ttl)}, dead_letter_queue_enabled=True, dead_letter_queue=self.api_handler_dlq, on_failure=lambda_destination.SqsDestination(self.api_handler_dlq), @@ -544,6 +545,7 @@ def set_up_graphql_api_gateway( request_validator=request_validator, request_models={'application/json': graphql_validation_model}, ) + search_integration = apigw.LambdaIntegration(elasticsearch_proxy_handler) search = gw.root.add_resource(path_part='search') search_validation_model = apigw.Model( diff --git a/deploy/stacks/param_store_stack.py b/deploy/stacks/param_store_stack.py index b2991495b..7be190538 100644 --- a/deploy/stacks/param_store_stack.py +++ b/deploy/stacks/param_store_stack.py @@ -23,6 +23,7 @@ def __init__( shared_dashboard_sessions='anonymous', enable_pivot_role_auto_create=False, pivot_role_name='dataallPivotRole', + reauth_apis=None, **kwargs, ): super().__init__(scope, id, **kwargs) @@ -80,6 +81,13 @@ def __init__( parameter_name=f'/dataall/{envname}/quicksightmonitoring/DashboardId', string_value='updateme', ) + if reauth_apis: + aws_ssm.StringParameter( + self, + f'ReAuthAPIs{envname}', + parameter_name=f'/dataall/{envname}/reauth/apis', + string_value=','.join(reauth_apis), + ) aws_ssm.StringParameter( self, @@ -148,4 +156,4 @@ def _get_external_id_value(envname, account_id, region): def _generate_external_id(): allowed_chars = string.ascii_uppercase + string.ascii_lowercase + string.digits - return ''.join(random.choice(allowed_chars) for i in range(32)) \ No newline at end of file + return ''.join(random.choice(allowed_chars) for i in range(32)) diff --git a/deploy/stacks/pipeline.py b/deploy/stacks/pipeline.py index b1e34700f..462680ea5 100644 --- a/deploy/stacks/pipeline.py +++ b/deploy/stacks/pipeline.py @@ -635,6 +635,7 @@ def set_backend_stage(self, target_env, repository_name): enable_pivot_role_auto_create=target_env.get('enable_pivot_role_auto_create', False), codeartifact_domain_name=self.codeartifact.codeartifact_domain_name, codeartifact_pip_repo_name=self.codeartifact.codeartifact_pip_repo_name, + reauth_config = target_env.get('reauth_config', None), cognito_user_session_timeout_inmins=target_env.get('cognito_user_session_timeout_inmins', 43200) ) ) @@ -732,6 +733,7 @@ def set_cloudfront_stage(self, target_env): f'export deployment_region={target_env.get("region", self.region)}', f'export enable_cw_rum={target_env.get("enable_cw_rum", False)}', f'export resource_prefix={self.resource_prefix}', + f'export reauth_ttl={str(target_env.get("reauth_config", {}).get("ttl", 5))}', 'mkdir ~/.aws/ && touch ~/.aws/config', 'echo "[profile buildprofile]" > ~/.aws/config', f'echo "role_arn = arn:aws:iam::{target_env["account"]}:role/{self.resource_prefix}-{target_env["envname"]}-S3DeploymentRole" >> ~/.aws/config', diff --git a/frontend/package.json b/frontend/package.json index 0e98c8426..d233f5b2e 100644 --- a/frontend/package.json +++ b/frontend/package.json @@ -102,6 +102,10 @@ "authentication", "./src/authentication" ], + [ + "reauthentication", + "./src/reauthentication" + ], [ "design", "./src/design" diff --git a/frontend/src/App.js b/frontend/src/App.js index c8483fef9..c1d61a569 100644 --- a/frontend/src/App.js +++ b/frontend/src/App.js @@ -1,5 +1,4 @@ import { ThemeProvider } from '@mui/material'; -import { SnackbarProvider } from 'notistack'; import { useRoutes } from 'react-router-dom'; import { useAuth } from './authentication'; import { @@ -26,10 +25,8 @@ export const App = () => { return ( - - - {auth.isInitialized ? content : } - + + {auth.isInitialized ? content : } ); }; diff --git a/frontend/src/authentication/contexts/CognitoAuthContext.js b/frontend/src/authentication/contexts/CognitoAuthContext.js index 55097a405..5ad3f7cf6 100644 --- a/frontend/src/authentication/contexts/CognitoAuthContext.js +++ b/frontend/src/authentication/contexts/CognitoAuthContext.js @@ -26,7 +26,9 @@ Auth.configure({ const initialState = { isAuthenticated: false, isInitialized: false, - user: null + user: null, + reAuthStatus: false, + requestInfo: null }; const handlers = { @@ -37,7 +39,8 @@ const handlers = { ...state, isAuthenticated, isInitialized: true, - user + user, + reAuthStatus: false }; }, LOGIN: (state, action) => { @@ -53,7 +56,16 @@ const handlers = { ...state, isAuthenticated: false, user: null - }) + }), + REAUTH: (state, action) => { + const { reAuthStatus, requestInfo } = action.payload; + + return { + ...state, + reAuthStatus, + requestInfo + }; + } }; const reducer = (state, action) => @@ -63,7 +75,8 @@ export const CognitoAuthContext = createContext({ ...initialState, platform: 'Amplify', login: () => Promise.resolve(), - logout: () => Promise.resolve() + logout: () => Promise.resolve(), + reauth: () => Promise.resolve() }); export const CognitoAuthProvider = (props) => { @@ -118,6 +131,19 @@ export const CognitoAuthProvider = (props) => { }); }; + const reauth = async () => { + await Auth.signOut(); + dispatch({ + type: 'REAUTH', + payload: { + reAuthStatus: false, + requestInfo: null + } + }).catch((e) => { + console.error('Failed to reauth user', e); + }); + }; + const logout = async () => { await Auth.signOut(); dispatch({ @@ -132,7 +158,8 @@ export const CognitoAuthProvider = (props) => { dispatch, platform: 'Amplify', login, - logout + logout, + reauth }} > {children} diff --git a/frontend/src/index.js b/frontend/src/index.js index d62a2a212..31d61211f 100644 --- a/frontend/src/index.js +++ b/frontend/src/index.js @@ -13,6 +13,8 @@ import { SettingsProvider } from './design'; import { store } from './globalErrors'; import { reportWebVitals } from './reportWebVitals'; import * as serviceWorker from './serviceWorker'; +import { SnackbarProvider } from 'notistack'; +import { RequestContextProvider } from './reauthentication'; ReactDOM.render( @@ -22,9 +24,13 @@ ReactDOM.render( - - - + + + + + + + diff --git a/frontend/src/reauthentication/components/ReAuthModal.js b/frontend/src/reauthentication/components/ReAuthModal.js new file mode 100644 index 000000000..60352054c --- /dev/null +++ b/frontend/src/reauthentication/components/ReAuthModal.js @@ -0,0 +1,82 @@ +import { Box, CardContent, Dialog, Typography, Button } from '@mui/material'; +import { useAuth } from 'authentication'; +import { useRequestContext } from 'reauthentication'; +import { useLocation } from 'react-router-dom'; +import React, { useEffect } from 'react'; + +export const ReAuthModal = () => { + const { user, reAuthStatus, requestInfo, reauth, dispatch } = useAuth(); + const { storeRequestInfo, clearRequestInfo } = useRequestContext(); + const location = useLocation(); + + const continueSession = async () => { + clearRequestInfo(); + dispatch({ + type: 'REAUTH', + payload: { + reAuthStatus: false, + requestInfo: null + } + }); + }; + + useEffect(() => { + if (reAuthStatus && requestInfo) { + const timestamp = new Date(); + const pathname = location.pathname; + const username = user.name; + storeRequestInfo({ requestInfo, timestamp, pathname, username }); + } + }, [reAuthStatus, requestInfo]); + + return ( + + + + ReAuth Credentials + + + + + + In order to perform this action you are required to log in again + to the data.all UI. Click the below button to be redirected to log + back in before proceeding further or Click away to continue with + other data.all operations. + + + + + + + + + + ); +}; diff --git a/frontend/src/reauthentication/components/index.js b/frontend/src/reauthentication/components/index.js new file mode 100644 index 000000000..a0107d756 --- /dev/null +++ b/frontend/src/reauthentication/components/index.js @@ -0,0 +1 @@ +export * from './ReAuthModal'; diff --git a/frontend/src/reauthentication/contexts/RequestContext.js b/frontend/src/reauthentication/contexts/RequestContext.js new file mode 100644 index 000000000..5aedd1bfd --- /dev/null +++ b/frontend/src/reauthentication/contexts/RequestContext.js @@ -0,0 +1,169 @@ +import React, { createContext, useContext, useEffect, useState } from 'react'; +import PropTypes from 'prop-types'; +import { useClient } from 'services'; +import { gql } from '@apollo/client'; +import { print } from 'graphql/language'; +import { useNavigate } from 'react-router'; +import { useSnackbar } from 'notistack'; +import { Auth } from 'aws-amplify'; + +// Create a context for API request headers +const RequestContext = createContext(); + +// Create a custom hook to access the context +export const useRequestContext = () => { + return useContext(RequestContext); +}; + +const REQUEST_INFO_KEY = 'requestInfo'; +const REAUTH_TTL = process.env.REACT_APP_REAUTH_TTL + ? parseInt(process.env.REACT_APP_REAUTH_TTL, 10) + : 5; + +export const storeRequestInfoStorage = (requestInfo) => { + window.localStorage.setItem(REQUEST_INFO_KEY, JSON.stringify(requestInfo)); +}; + +export const restoreRetryRequest = () => { + try { + const storedRequestInfo = window.localStorage.getItem(REQUEST_INFO_KEY); + if (storedRequestInfo != null) { + return JSON.parse(storedRequestInfo); + } + return null; + } catch (err) { + console.error(err); + return null; + } +}; + +export const retrieveCurrentUsername = async () => { + try { + const user = await Auth.currentAuthenticatedUser(); + return user.attributes.email; + } catch (err) { + console.error(err); + return null; + } +}; + +export const RequestContextProvider = (props) => { + const { children } = props; + const [requestInfo, setRequestInfo] = useState(null); + const navigate = useNavigate(); + const client = useClient(); + const { enqueueSnackbar } = useSnackbar(); + const storeRequestInfo = (info) => { + setRequestInfo(info); + storeRequestInfoStorage(info); + }; + + const clearRequestInfo = () => { + setRequestInfo(null); + window.localStorage.removeItem('requestInfo'); + }; + + useEffect(() => { + if (client) { + const restoredRequestInfo = restoreRetryRequest(); + // If request info is restored from previous user session + if (restoredRequestInfo && restoredRequestInfo.timestamp) { + const currentTime = new Date(); + const reauthTime = new Date( + restoredRequestInfo.timestamp.replace(/\s/g, '') + ); + // If the user waited too long to reAuth - clear storage + // Else retry the ReAuth API Request + if (currentTime - reauthTime <= REAUTH_TTL * 60 * 1000) { + retryRequest(restoredRequestInfo) + .then((r) => { + if (r && !r.errors) { + enqueueSnackbar( + `ReAuth Retry Operation Successful ${restoredRequestInfo.requestInfo.operationName}`, + { + anchorOrigin: { + horizontal: 'right', + vertical: 'top' + }, + variant: 'success' + } + ); + if ( + restoredRequestInfo.requestInfo.query.definitions[0] + .operation === 'query' + ) { + navigate(restoredRequestInfo.pathname); + } + } else if (r) { + enqueueSnackbar( + `ReAuth Retry Operation Failed ${restoredRequestInfo.requestInfo.operationName} with error ${r.errors[0].message}`, + { + anchorOrigin: { + horizontal: 'right', + vertical: 'top' + }, + variant: 'error' + } + ); + } + }) + .finally(() => clearRequestInfo()); + } else { + enqueueSnackbar( + `ReAuth Retry Operation Failed ${restoredRequestInfo.requestInfo.operationName} - waited over ${REAUTH_TTL} minutes`, + { + anchorOrigin: { + horizontal: 'right', + vertical: 'top' + }, + variant: 'error' + } + ); + clearRequestInfo(); + } + } + } + }, [client]); + + const retryRequest = async (restoredInfo) => { + const gqlTemplateLiteral = gql(print(restoredInfo.requestInfo.query)); + const username = await retrieveCurrentUsername(); + console.error(username); + if (username !== restoredInfo.username) { + return null; + } else if ( + restoredInfo.requestInfo.query.definitions[0].operation === 'query' + ) { + const response = await client.query({ + query: gqlTemplateLiteral, + variables: restoredInfo.requestInfo.variables + }); + return response; + } else if ( + restoredInfo.requestInfo.query.definitions[0].operation === 'mutation' + ) { + const response = await client.mutate({ + mutation: gqlTemplateLiteral, + variables: restoredInfo.requestInfo.variables + }); + return response; + } + }; + + return ( + + {children} + + ); +}; + +RequestContextProvider.propTypes = { + children: PropTypes.node.isRequired +}; diff --git a/frontend/src/reauthentication/contexts/index.js b/frontend/src/reauthentication/contexts/index.js new file mode 100644 index 000000000..008c8e723 --- /dev/null +++ b/frontend/src/reauthentication/contexts/index.js @@ -0,0 +1 @@ +export * from './RequestContext'; diff --git a/frontend/src/reauthentication/index.js b/frontend/src/reauthentication/index.js new file mode 100644 index 000000000..b09270bb0 --- /dev/null +++ b/frontend/src/reauthentication/index.js @@ -0,0 +1,2 @@ +export * from './components'; +export * from './contexts'; diff --git a/frontend/src/routes.js b/frontend/src/routes.js index 31ec55c10..22f9050b6 100644 --- a/frontend/src/routes.js +++ b/frontend/src/routes.js @@ -1,5 +1,6 @@ import { lazy, Suspense } from 'react'; import { AuthGuard, GuestGuard } from 'authentication'; +import { ReAuthModal } from 'reauthentication'; import { DefaultLayout, LoadingScreen } from 'design'; import { ModuleNames, isModuleEnabled } from 'utils'; @@ -174,6 +175,7 @@ const routes = [ element: ( + ), children: [ diff --git a/frontend/src/services/hooks/useClient.js b/frontend/src/services/hooks/useClient.js index a1a9e2f01..df68a6dec 100644 --- a/frontend/src/services/hooks/useClient.js +++ b/frontend/src/services/hooks/useClient.js @@ -7,7 +7,7 @@ import { InMemoryCache } from 'apollo-boost'; import { useEffect, useState } from 'react'; -import { useToken } from 'authentication'; +import { useToken, useAuth } from 'authentication'; import { SET_ERROR, useDispatch } from 'globalErrors'; const defaultOptions = { @@ -29,6 +29,17 @@ export const useClient = () => { const dispatch = useDispatch(); const [client, setClient] = useState(null); const token = useToken(); + const auth = useAuth(); + + const setReAuth = async (requestInfo) => { + auth.dispatch({ + type: 'REAUTH', + payload: { + reAuthStatus: true, + requestInfo: requestInfo + } + }); + }; useEffect(() => { const initClient = async () => { @@ -36,6 +47,7 @@ export const useClient = () => { const httpLink = new HttpLink({ uri: process.env.REACT_APP_GRAPHQL_API }); + const authLink = new ApolloLink((operation, forward) => { operation.setContext({ headers: { @@ -49,20 +61,27 @@ export const useClient = () => { }); return forward(operation); }); - const errorLink = onError(({ graphQLErrors, networkError }) => { - if (graphQLErrors) { - graphQLErrors.forEach(({ message, locations, path }) => { - console.error( - `[GraphQL error]: Message: ${message}, Location: ${locations}, Path: ${path}` + const errorLink = onError( + ({ graphQLErrors, networkError, operation, forward }) => { + if (graphQLErrors) { + graphQLErrors.forEach( + ({ message, locations, path, extensions }) => { + console.error( + `[GraphQL error]: Message: ${message}, Location: ${locations}, Path: ${path}` + ); + if (extensions?.code === 'REAUTH') { + setReAuth(operation); + } + } ); - }); - } + } - if (networkError) { - console.error(`[Network error]: ${networkError}`); - dispatch({ type: SET_ERROR, error: 'Network error occurred' }); + if (networkError) { + console.error(`[Network error]: ${networkError}`); + dispatch({ type: SET_ERROR, error: 'Network error occurred' }); + } } - }); + ); const apolloClient = new ApolloClient({ link: from([errorLink, authLink, httpLink]), diff --git a/template_cdk.json b/template_cdk.json index a77c5fc91..c9fac19bb 100644 --- a/template_cdk.json +++ b/template_cdk.json @@ -39,6 +39,10 @@ "enable_opensearch_serverless": "boolean_USE_OPENSEARCH_SERVERLESS|DEFAULT=false", "enable_pivot_role_auto_create": "boolean_ENABLE_PIVOT_ROLE_AUTO_CREATE_IN_ENVIRONMENT|DEFAULT=false", "enable_update_dataall_stacks_in_cicd_pipeline": "boolean_ENABLE_UPDATE_DATAALL_STACKS_IN_CICD_PIPELINE|DEFAULT=false", + "reauth_config": { + "reauth_apis": "list_of_strings_OPERATION_NAMES_TO_REQUIRE_REAUTH_ON|DEFAULT=None", + "ttl": "int_TIME_IN_MINUTES_TO_ALLOW_USER_TO_PERFORM_SENSITIVE_APIS_BEFORE_FORCING_REAUTH|DEFAULT=5" + }, "cognito_user_session_timeout_inmins": "integer_COGNITO_USER_SESSION_TIMEOUT_INMINS|DEFAULT=43200" } ] diff --git a/tests/requirements.txt b/tests/requirements.txt index 681f68094..852f411a2 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -3,5 +3,5 @@ pytest==7.3.1 pytest-cov==3.0.0 pytest-mock==3.6.1 pytest-dependency==0.5.1 -werkzeug==2.2.3 +werkzeug==3.0.1 deprecated==1.2.13 \ No newline at end of file From 7912a24b0c4ba7fa8ddc850978866226bc7f37a3 Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Fri, 27 Oct 2023 09:28:43 +0200 Subject: [PATCH 02/21] Feat: pivot role limit kms (#830) ### Feature or Bugfix - Feature ### Detail - read KMS keys with an alias prefixed by the environment resource prefix - read KMS keys imported in imported datasets - restrict pivot role policies to the KMS keys created by data.all and those imported in the imported datasets - move kms client from data_sharing to base as it is used in environments and datasets ### Relates - #580 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). This PR restricts the IAM policies of the pivot role, following the least privilege permissions principle - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/base/utils/iam_policy_utils.py | 136 +++++++++++------- .../cdk/pivot_role_core_policies/kms.py | 32 +++-- .../share_managers/s3_share_manager.py | 4 +- .../cdk/env_role_dataset_s3_policy.py | 2 +- .../cdk/pivot_role_datasets_policy.py | 49 +++++-- .../datasets/services/dataset_service.py | 6 +- documentation/userguide/docs/datasets.md | 36 +++++ 7 files changed, 191 insertions(+), 74 deletions(-) diff --git a/backend/dataall/base/utils/iam_policy_utils.py b/backend/dataall/base/utils/iam_policy_utils.py index 5a3b8bd9f..4b0876d6a 100644 --- a/backend/dataall/base/utils/iam_policy_utils.py +++ b/backend/dataall/base/utils/iam_policy_utils.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Callable import logging from aws_cdk import aws_iam as iam @@ -43,61 +43,101 @@ def split_policy_statements_in_chunks(statements: List): return chunks -def split_policy_with_resources_in_statements(base_sid, effect, actions, resources): +def split_policy_with_resources_in_statements(base_sid: str, effect: iam.Effect, actions: List[str], resources: List[str]): """ - Splitter used for IAM policy statements with an undefined number of resources. - - Ensures that the size of the IAM statement is below the POLICY LIMIT - - If it exceeds the POLICY LIMIT, it breaks the statement in multiple statements with a subset of resources - - Note the POLICY_HEADERS_BUFFER to account for the headers of the policy which usually take around ~60chars + The variable part of the policy is in the resources parameter of the PolicyStatement """ - statement_without_resources = iam.PolicyStatement( - sid=base_sid, - effect=effect, - actions=actions, - resources=["*"] - ) - resources_str = '" ," '.join(r for r in resources) - number_resources = len(resources) - max_length = len(max(resources, key=len)) - base_length = len(str(statement_without_resources.to_json())) - total_length = base_length + len(resources_str) - logger.info(f"Policy base length = {base_length}") - logger.info(f"Number of resources = {number_resources}, resource maximum length = {max_length}") - logger.info(f"Resources as string length = {len(resources_str)}") - logger.info(f"Total length approximated as base length + resources string length = {total_length}") + def _build_statement(split, subset): + return iam.PolicyStatement( + sid=base_sid + str(split), + effect=effect, + actions=actions, + resources=subset + ) + + total_length, base_length = _policy_analyzer(resources, _build_statement) + extra_chars = len('" ," ') if total_length < POLICY_LIMIT - POLICY_HEADERS_BUFFER: logger.info("Not exceeding policy limit, returning statement ...") - resulting_statement = iam.PolicyStatement( - sid=base_sid, + resulting_statement = _build_statement(1, resources) + return [resulting_statement] + else: + logger.info("Exceeding policy limit, splitting statement ...") + resulting_statements = _policy_splitter(base_length=base_length, resources=resources, extra_chars=extra_chars, statement_builder=_build_statement) + return resulting_statements + + +def split_policy_with_mutiple_value_condition_in_statements(base_sid: str, effect: iam.Effect, actions: List[str], resources: List[str], condition_dict: dict): + """ + The variable part of the policy is in the conditions parameter of the PolicyStatement + conditions_dict passes the different components of the condition mapping + """ + def _build_statement(split, subset): + return iam.PolicyStatement( + sid=base_sid + str(split), effect=effect, actions=actions, - resources=resources + resources=resources, + conditions={ + condition_dict.get('key'): { + condition_dict.get('resource'): subset + } + } ) + + total_length, base_length = _policy_analyzer(condition_dict.get('values'), _build_statement) + extra_chars = len(str(f'"Condition": {{ "{condition_dict.get("key")}": {{"{condition_dict.get("resource")}": }} }}')) + + if total_length < POLICY_LIMIT - POLICY_HEADERS_BUFFER: + logger.info("Not exceeding policy limit, returning statement ...") + resulting_statement = _build_statement(1, condition_dict.get("values")) return [resulting_statement] else: - logger.info("Exceeding policy limit, splitting statement ...") - index = 0 - split = 0 - resulting_statements = [] - while index < len(resources): - # Iterating until all resources are defined in a policy statement. - # "index" represents the position of the resource in the resources list - size = 0 - res = [] - while index < len(resources) and (size + len(resources[index]) + 5) < POLICY_LIMIT - POLICY_HEADERS_BUFFER - base_length: - # Appending a resource to the "res" list until we reach the maximum size for the resources section - # It compares: current size of resources versus the allowed size of the resource section in a statement - res.append(resources[index]) - size += (len(resources[index]) + 5) # +5 for the 4 extra characters (", ") around each resource, plus additional ones [] - index += 1 - resulting_statement = iam.PolicyStatement( - sid=base_sid + str(split), - effect=effect, - actions=actions, - resources=res - ) - split += 1 - resulting_statements.append(resulting_statement) - logger.info(f"Statement divided into {split+1} smaller statements") + logger.info("Exceeding policy limit, splitting values ...") + resulting_statements = _policy_splitter(base_length=base_length, resources=condition_dict.get("values"), extra_chars=extra_chars, statement_builder=_build_statement) + + return resulting_statements + + +def _policy_analyzer(resources: List[str], statement_builder: Callable[[int, List[str]], iam.PolicyStatement]): + """ + Calculates the policy size with the resources (total_length) and without resources (base_length) + """ + statement_without_resources = statement_builder(1, ["*"]) + resources_str = '" ," '.join(r for r in resources) + base_length = len(str(statement_without_resources.to_json())) + total_length = base_length + len(resources_str) + logger.info(f"Policy base length = {base_length}") + logger.info(f"Resources as string length = {len(resources_str)}") + logger.info(f"Total length approximated as base length + resources string length = {total_length}") + + return total_length, base_length + + +def _policy_splitter(base_length: int, resources: List[str], extra_chars: int, statement_builder: Callable[[int, List[str]], iam.PolicyStatement]): + """ + Splitter used for IAM policy statements with an undefined number of resources one of the parameters of the policy. + - Ensures that the size of the IAM statement is below the POLICY LIMIT + - If it exceeds the POLICY LIMIT, it breaks the statement in multiple statements with a subset of resources + - Note the POLICY_HEADERS_BUFFER to account for the headers of the policy which usually take around ~60chars + """ + index = 0 + split = 0 + resulting_statements = [] + while index < len(resources): + # Iterating until all values are defined in a policy statement. + # "index" represents the position of the value in the values list + size = 0 + subset = [] + while index < len(resources) and (size + len(resources[index]) + extra_chars) < POLICY_LIMIT - POLICY_HEADERS_BUFFER - base_length: + # Appending a resource to the subset list until we reach the maximum size for the condition section + # It compares: current size of subset versus the allowed size of the condition section in a statement + subset.append(resources[index]) + size += (len(resources[index]) + extra_chars) + index += 1 + resulting_statement = statement_builder(split=split, subset=subset) + split += 1 + resulting_statements.append(resulting_statement) + logger.info(f"Statement divided into {split+1} smaller statements") return resulting_statements diff --git a/backend/dataall/core/environment/cdk/pivot_role_core_policies/kms.py b/backend/dataall/core/environment/cdk/pivot_role_core_policies/kms.py index 4d47b6afc..f38bb2ab0 100644 --- a/backend/dataall/core/environment/cdk/pivot_role_core_policies/kms.py +++ b/backend/dataall/core/environment/cdk/pivot_role_core_policies/kms.py @@ -6,32 +6,40 @@ class KMSPivotRole(PivotRoleStatementSet): """ Class including all permissions needed by the pivot role to work with AWS KMS. It allows pivot role to: + list and Describe KMS keys + manage data.all alias KMS keys - .... """ def get_statements(self): statements = [ iam.PolicyStatement( - sid='KMS', + sid='KMSList', + effect=iam.Effect.ALLOW, + actions=[ + 'kms:List*', + 'kms:DescribeKey', + ], + resources=['*'], + ), + iam.PolicyStatement( + sid='KMSDataAllAlias', effect=iam.Effect.ALLOW, actions=[ 'kms:Decrypt', 'kms:Encrypt', 'kms:GenerateDataKey*', + 'kms:GetKeyPolicy', 'kms:PutKeyPolicy', 'kms:ReEncrypt*', 'kms:TagResource', 'kms:UntagResource', ], - resources=['*'], - ), - iam.PolicyStatement( - sid='KMSList', - effect=iam.Effect.ALLOW, - actions=[ - 'kms:List*', - 'kms:DescribeKey', - ], - resources=['*'], - ), + resources=[f"arn:aws:kms:{self.region}:{self.account}:key/*"], + conditions={ + 'ForAnyValue:StringLike': { + 'kms:ResourceAliases': [f"alias/{self.env_resource_prefix}*"] + } + }, + ) ] return statements diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py index 5de4c01f3..644dbe360 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py @@ -281,7 +281,7 @@ def update_dataset_bucket_key_policy(self): 'Updating dataset Bucket KMS key policy...' ) key_alias = f"alias/{self.dataset.KmsAlias}" - kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_client = KmsClient(account_id=self.source_account_id, region=self.source_environment.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) @@ -392,7 +392,7 @@ def delete_dataset_bucket_key_policy( 'Deleting dataset bucket KMS key policy...' ) key_alias = f"alias/{dataset.KmsAlias}" - kms_client = KmsClient(dataset.AwsAccountId, dataset.region) + kms_client = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) diff --git a/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py b/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py index 03e3d7232..b59843cd1 100644 --- a/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py +++ b/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py @@ -75,7 +75,7 @@ def _set_allowed_kms_keys_statements(datasets): dataset: Dataset for dataset in datasets: if dataset.imported and dataset.importedKmsKey: - key_id = KmsClient(dataset.AwsAccountId, dataset.region).get_key_id( + key_id = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region).get_key_id( key_alias=f"alias/{dataset.KmsAlias}" ) if key_id: diff --git a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py index 8ca401506..46c34ea58 100644 --- a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py +++ b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py @@ -1,6 +1,6 @@ import os from dataall.base import db -from dataall.base.utils.iam_policy_utils import split_policy_with_resources_in_statements +from dataall.base.utils.iam_policy_utils import split_policy_with_resources_in_statements, split_policy_with_mutiple_value_condition_in_statements from dataall.core.environment.cdk.pivot_role_stack import PivotRoleStatementSet from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository from dataall.modules.datasets_base.db.dataset_models import Dataset @@ -10,8 +10,13 @@ class DatasetsPivotRole(PivotRoleStatementSet): """ Class including all permissions needed by the pivot role to work with Datasets based in S3 and Glue databases - It allows pivot role to: - - .... + It allows pivot role access to: + - Athena workgroups for the environment teams + - All Glue catalog resources (governed by Lake Formation) + - Lake Formation + - Glue ETL for environment resources + - Imported Datasets' buckets + - Imported KMS keys alias """ def get_statements(self): statements = [ @@ -126,7 +131,11 @@ def get_statements(self): } ) ] - allowed_buckets = [] + # Adding permissions for Imported Dataset S3 Buckets, created bucket permissions are added in core S3 permissions + # Adding permissions for Imported KMS keys + imported_buckets = [] + imported_kms_alias = [] + engine = db.get_engine(envname=os.environ.get('envname', 'local')) with engine.scoped_session() as session: datasets = DatasetRepository.query_environment_imported_datasets( @@ -135,10 +144,11 @@ def get_statements(self): if datasets: dataset: Dataset for dataset in datasets: - allowed_buckets.append(f'arn:aws:s3:::{dataset.S3BucketName}') + imported_buckets.append(f'arn:aws:s3:::{dataset.S3BucketName}') + if dataset.importedKmsKey: + imported_kms_alias.append(f'alias/{dataset.KmsAlias}') - if allowed_buckets: - # Imported Dataset S3 Buckets, created bucket permissions are added in core S3 permissions + if imported_buckets: dataset_statement = split_policy_with_resources_in_statements( base_sid='ImportedDatasetBuckets', effect=iam.Effect.ALLOW, @@ -152,7 +162,30 @@ def get_statements(self): 's3:PutObjectAcl', 's3:PutBucketOwnershipControls', ], - resources=allowed_buckets + resources=imported_buckets ) statements.extend(dataset_statement) + if imported_kms_alias: + kms_statement = split_policy_with_mutiple_value_condition_in_statements( + base_sid='KMSImportedDataset', + effect=iam.Effect.ALLOW, + actions=[ + 'kms:Decrypt', + 'kms:Encrypt', + 'kms:GenerateDataKey*', + 'kms:GetKeyPolicy', + 'kms:PutKeyPolicy', + 'kms:ReEncrypt*', + 'kms:TagResource', + 'kms:UntagResource', + ], + resources=[f"arn:aws:kms:{self.region}:{self.account}:key/*"], + condition_dict={ + "key": 'ForAnyValue:StringLike', + "resource": 'kms:ResourceAliases', + "values": imported_kms_alias + }, + ) + statements.extend(kms_statement) + return statements diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index be6a4ed47..707e9fb91 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -5,6 +5,7 @@ from dataall.base.db import exceptions from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sts import SessionHelper +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.base.context import get_context from dataall.core.environment.env_permission_checker import has_group_permission from dataall.core.environment.services.environment_service import EnvironmentService @@ -17,7 +18,6 @@ from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import AWSResourceNotFound, UnauthorizedOperation -from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER @@ -54,7 +54,7 @@ def check_dataset_account(session, environment): def check_imported_resources(environment, data): kms_alias = data.get('KmsKeyAlias') if kms_alias not in [None, "Undefined", "", "SSE-S3"]: - key_exists = KmsClient(environment.AwsAccountId, environment.region).check_key_exists( + key_exists = KmsClient(account_id=environment.AwsAccountId, region=environment.region).check_key_exists( key_alias=f"alias/{kms_alias}" ) if not key_exists: @@ -63,7 +63,7 @@ def check_imported_resources(environment, data): message=f'KMS key with alias={kms_alias} cannot be found - Please check if KMS Key Alias exists in account {environment.AwsAccountId}', ) - key_id = KmsClient(environment.AwsAccountId, environment.region).get_key_id( + key_id = KmsClient(account_id=environment.AwsAccountId, region=environment.region).get_key_id( key_alias=f"alias/{kms_alias}" ) if not key_id: diff --git a/documentation/userguide/docs/datasets.md b/documentation/userguide/docs/datasets.md index 09605d3f3..8a6016a40 100644 --- a/documentation/userguide/docs/datasets.md +++ b/documentation/userguide/docs/datasets.md @@ -101,6 +101,42 @@ the fields of a newly created dataset you have to specify the S3 bucket and opti is left empty, data.all will create a Glue database pointing at the S3 Bucket. As for the KMS key Alias, data.all assumes that if nothing is specified the S3 Bucket is encrypted with SSE-S3 encryption. +!!! danger "Imported KMS key and S3 Bucket policies requirements" + Data.all pivot role will handle data sharing on the imported Bucket and KMS key (if imported). Make sure that + the resource policies allow the pivot role to manage them. For the KMS key policy, explicit permissions are needed. See an example below. + + +### KMS key policy +In the KMS key policy we need to grant explicit permission to the pivot role. Note that this block is needed even if +permissions for the principal `"AWS": "arn:aws:iam::111122223333:root"` are given. + +``` +{ + "Sid": "Enable Pivot Role Permissions", + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::111122223333:role/dataallPivotRole-cdk" + }, + "Action": [ + "kms:Decrypt", + "kms:Encrypt", + "kms:GenerateDataKey*", + "kms:PutKeyPolicy", + "kms:GetKeyPolicy", + "kms:ReEncrypt*", + "kms:TagResource", + "kms:UntagResource" + ], + "Resource": "*" +} + +``` + + + + + + | Field | Description | Required | Editable |Example |------------------------|-------------------------------------------------------------------------------------------------|----------|----------|------------- | Amazon S3 bucket name | Name of the S3 bucket you want to import | Yes | No |DOC-EXAMPLE-BUCKET From 55c579b81ba29aed6b0fdcd35dbc6da508960983 Mon Sep 17 00:00:00 2001 From: Daniel Lorch <98748454+lorchda@users.noreply.github.com> Date: Fri, 27 Oct 2023 10:27:59 +0200 Subject: [PATCH 03/21] Make hosted_zone_id optional, code update (#812) ### Feature or Bugfix - Bugfix ### Detail - Make `hosted_zone_id` optional, code update ### Relates - #797 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? N/A - Is the input sanitized? N/A - What precautions are you taking before deserializing the data you consume? N/A - Is injection prevented by parametrizing queries? N/A - Have you ensured no `eval` or similar functions are used? N/A - Does this PR introduce any functionality or component that requires authorization? N/A - How have you ensured it respects the existing AuthN/AuthZ mechanisms? N/A - Are you logging failed auth attempts? N/A - Are you using or adding any cryptographic features? N/A - Do you use a standard proven implementations? N/A - Are the used keys controlled by the customer? Where are they stored? N/A - Are you introducing any new policies/roles/users? N/A - Have you used the least-privilege principle? How? N/A By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. YES ### Description Make `hosted_zone_id` optional and provide `HostedZoneId` and `DNSName` in CloudFormation Stack Output, so users can create their own [Route53 AliasTarget](https://docs.aws.amazon.com/Route53/latest/APIReference/API_AliasTarget.html). Following validation checks in `ecs_patterns.ApplicationLoadBalancedFargateService` were considered: * `frontend_alternate_domain` and `userguide_alternate_domain` have to be `None` when the `hosted_zone` is `None`, see checks in [multiple-target-groups-service-base.ts#L463](https://github.com/aws/aws-cdk/blob/c445b8cc6e20d17e4a536f17262646b291a0fe36/packages/aws-cdk-lib/aws-ecs-patterns/lib/base/network-multiple-target-groups-service-base.ts#L463), or else a `A Route53 hosted domain zone name is required to configure the specified domain name` error is raised * for a HTTPS ALB listener, only the `certificate` is ultimately required, and not the `domainName` or `domainZone`, as per evaluation logic in [application-load-balanced-service-base.ts#L509](https://github.com/aws/aws-cdk/blob/c445b8cc6e20d17e4a536f17262646b291a0fe36/packages/aws-cdk-lib/aws-ecs-patterns/lib/base/application-load-balanced-service-base.ts#L509) --- deploy/stacks/albfront_stack.py | 55 +++++++++++++++++++++++++++------ 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/deploy/stacks/albfront_stack.py b/deploy/stacks/albfront_stack.py index 3bc20f574..52e2b28c9 100644 --- a/deploy/stacks/albfront_stack.py +++ b/deploy/stacks/albfront_stack.py @@ -10,6 +10,7 @@ aws_elasticloadbalancing as elb, aws_s3 as s3, Stack, + CfnOutput, Duration, RemovalPolicy, Fn, @@ -131,20 +132,24 @@ def __init__( logs_bucket.grant_put(iam.ServicePrincipal('delivery.logs.amazonaws.com')) logs_bucket.grant_read(iam.ServicePrincipal('delivery.logs.amazonaws.com')) - frontend_alternate_domain = custom_domain['hosted_zone_name'] - userguide_alternate_domain = 'userguide.' + custom_domain['hosted_zone_name'] - - hosted_zone = route53.HostedZone.from_hosted_zone_attributes( - self, - 'CustomDomainHostedZone', - hosted_zone_id=custom_domain['hosted_zone_id'], - zone_name=custom_domain['hosted_zone_name'], - ) + if custom_domain and custom_domain.get('hosted_zone_id'): + hosted_zone = route53.HostedZone.from_hosted_zone_attributes( + self, + 'CustomDomainHostedZone', + hosted_zone_id=custom_domain['hosted_zone_id'], + zone_name=custom_domain['hosted_zone_name'], + ) + frontend_alternate_domain = custom_domain['hosted_zone_name'] + userguide_alternate_domain = 'userguide.' + custom_domain['hosted_zone_name'] + else: + hosted_zone = None + frontend_alternate_domain = None + userguide_alternate_domain = None if custom_domain and custom_domain.get('certificate_arn'): certificate = acm.Certificate.from_certificate_arn(self, "CustomDomainCertificate", custom_domain.get('certificate_arn')) - else: + elif custom_domain and custom_domain.get('hosted_zone_name'): certificate = acm.Certificate( self, 'CustomDomainCertificate', @@ -152,6 +157,8 @@ def __init__( subject_alternative_names=[f'*.{custom_domain["hosted_zone_name"]}'], validation=acm.CertificateValidation.from_dns(hosted_zone=hosted_zone), ) + else: + raise ValueError("Configuration parameter custom_domain['hosted_zone_name'] in cdk.json is REQUIRED when internet_facing=false") frontend_sg = ec2.SecurityGroup( self, @@ -273,6 +280,34 @@ def __init__( ) self.allow_alb_access(userguide_alb, ip_ranges, vpc) + CfnOutput( + self, + f'FrontEndService{envname}Arn', + export_name=f'frontend-{envname}-arn', + value=frontend_alb.load_balancer.load_balancer_arn, + ) + + CfnOutput( + self, + f'FrontEndService{envname}HostedZoneId', + export_name=f'frontend-{envname}-hostedzoneid', + value=frontend_alb.load_balancer.load_balancer_canonical_hosted_zone_id, + ) + + CfnOutput( + self, + f'UserGuideService{envname}Arn', + export_name=f'userguide-{envname}-arn', + value=userguide_alb.load_balancer.load_balancer_arn, + ) + + CfnOutput( + self, + f'UserGuideService{envname}HostedZoneId', + export_name=f'userguide-{envname}-hostedzoneid', + value=userguide_alb.load_balancer.load_balancer_canonical_hosted_zone_id, + ) + def create_log_group(self, envname, resource_prefix, log_group_name): log_group = logs.LogGroup( self, From 92d4324097568af1b35a66da6c0853013e4cc212 Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Mon, 30 Oct 2023 17:40:29 +0100 Subject: [PATCH 04/21] Clean-up for v2.1 (#843) ### Feature or Bugfix - Bugfix ### Detail - Clean up prints and show better exception message when custom_domain is not provided for SES ### Relates - v2.1.0 ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- backend/api_handler.py | 2 +- backend/dataall/base/cdkproxy/cdk_cli_wrapper.py | 6 +----- deploy/stacks/backend_stack.py | 2 +- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/backend/api_handler.py b/backend/api_handler.py index ca872e3d3..5840dfac7 100644 --- a/backend/api_handler.py +++ b/backend/api_handler.py @@ -152,7 +152,7 @@ def handler(event, context): 'engine': ENGINE, 'username': username, 'groups': groups, - 'schema': SCHEMA + 'schema': SCHEMA, } # Determine if there are any Operations that Require ReAuth From SSM Parameter diff --git a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py index 5c1e8d730..e12454ba1 100644 --- a/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py +++ b/backend/dataall/base/cdkproxy/cdk_cli_wrapper.py @@ -46,17 +46,13 @@ def aws_configure(profile_name='default'): print(' Running configure ') print('..............................................') AWS_CONTAINER_CREDENTIALS_RELATIVE_URI = os.getenv('AWS_CONTAINER_CREDENTIALS_RELATIVE_URI') - print(f"AWS_CONTAINER_CREDENTIALS_RELATIVE_URI: {AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}") cmd = ['curl', f'169.254.170.2{AWS_CONTAINER_CREDENTIALS_RELATIVE_URI}'] process = subprocess.run(cmd, text=True, shell=False, encoding='utf-8', capture_output=True) creds = None if process.returncode == 0: creds = ast.literal_eval(process.stdout) - print(f"Successfully curled credentials: {str(process.stdout)}, credentials = {creds}") else: - print( - f'Failed clean curl credentials due to {str(process.stderr)}' - ) + logger.error(f'Failed clean curl credentials due to {str(process.stderr)}') return creds diff --git a/deploy/stacks/backend_stack.py b/deploy/stacks/backend_stack.py index 76346326a..f2777ba03 100644 --- a/deploy/stacks/backend_stack.py +++ b/deploy/stacks/backend_stack.py @@ -368,7 +368,7 @@ def __init__( @run_if(["modules.datasets.features.share_notifications.email.active"]) def create_ses_stack(self, custom_domain, envname, kwargs, resource_prefix): - if None in [custom_domain, custom_domain.get('hosted_zone_name'), custom_domain.get('hosted_zone_id')]: + if custom_domain is None or None in [custom_domain.get('hosted_zone_name', None), custom_domain.get('hosted_zone_id', None)]: raise Exception("Cannot Create SES Stack For email notification as Custom Domain is not present or is missing hosted_zone_id or name. Either Disable Email Notification Config or add Custom Domain") return SesStack( From 5fb7cf86022473e16b2fadcc0bd891026d928d54 Mon Sep 17 00:00:00 2001 From: Anushka Singh Date: Mon, 30 Oct 2023 21:09:11 -0400 Subject: [PATCH 05/21] feat: Enabling S3 bucket share --- .../modules/dataset_sharing/__init__.py | 6 +- .../modules/dataset_sharing/api/enums.py | 1 + .../modules/dataset_sharing/api/resolvers.py | 1 + .../modules/dataset_sharing/api/types.py | 1 + .../dataset_sharing/aws/glue_client.py | 40 + .../modules/dataset_sharing/aws/kms_client.py | 26 +- .../modules/dataset_sharing/aws/s3_client.py | 44 + .../modules/dataset_sharing/db/enums.py | 1 + .../db/share_object_repositories.py | 53 +- .../services/data_sharing_service.py | 67 +- .../services/dataset_alarm_service.py | 45 +- .../services/share_managers/__init__.py | 3 +- .../share_managers/lf_share_manager.py | 54 +- ...er.py => s3_access_point_share_manager.py} | 136 +- .../share_managers/s3_bucket_share_manager.py | 443 +++++ .../share_managers/share_manager_utils.py | 64 + .../lf_process_cross_account_share.py | 36 +- .../lf_process_same_account_share.py | 10 + ...re.py => s3_access_point_process_share.py} | 44 +- .../s3_bucket_process_share.py | 171 ++ .../datasets/api/dataset/input_types.py | 4 +- .../cdk/pivot_role_datasets_policy.py | 17 + .../db/dataset_bucket_repositories.py | 41 + .../datasets/services/dataset_service.py | 19 +- .../tasks/dataset_subscription_task.py | 10 +- .../datasets_base/db/dataset_models.py | 20 + .../8c79fb896983_add_table_for_buckets.py | 206 +++ .../Shares/components/AddShareItemModal.js | 3 +- .../components/RevokeShareItemsModal.js | 3 +- frontend/src/utils/share.js | 10 + tests/modules/datasets/conftest.py | 5 +- tests/modules/datasets/tasks/conftest.py | 58 +- .../datasets/tasks/test_lf_share_manager.py | 2 +- ... => test_s3_access_point_share_manager.py} | 134 +- .../tasks/test_s3_bucket_share_manager.py | 1614 +++++++++++++++++ tests/modules/datasets/test_share.py | 4 + 36 files changed, 3199 insertions(+), 197 deletions(-) rename backend/dataall/modules/dataset_sharing/services/share_managers/{s3_share_manager.py => s3_access_point_share_manager.py} (79%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py rename backend/dataall/modules/dataset_sharing/services/share_processors/{s3_process_share.py => s3_access_point_process_share.py} (84%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py create mode 100644 backend/dataall/modules/datasets/db/dataset_bucket_repositories.py create mode 100644 backend/migrations/versions/8c79fb896983_add_table_for_buckets.py create mode 100644 frontend/src/utils/share.js rename tests/modules/datasets/tasks/{test_s3_share_manager.py => test_s3_access_point_share_manager.py} (90%) create mode 100644 tests/modules/datasets/tasks/test_s3_bucket_share_manager.py diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index 99dd6c01e..d98a13dbc 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -17,8 +17,7 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.notifications import NotificationsModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetBaseModuleInterface] def __init__(self): from dataall.modules.dataset_sharing import api @@ -36,8 +35,7 @@ def is_supported(modes: List[ImportMode]): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.notifications import NotificationsModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetBaseModuleInterface] def __init__(self): import dataall.modules.dataset_sharing.handlers diff --git a/backend/dataall/modules/dataset_sharing/api/enums.py b/backend/dataall/modules/dataset_sharing/api/enums.py index 37aa5022a..9fb593f18 100644 --- a/backend/dataall/modules/dataset_sharing/api/enums.py +++ b/backend/dataall/modules/dataset_sharing/api/enums.py @@ -5,6 +5,7 @@ class ShareableType(GraphQLEnumMapper): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class ShareObjectPermission(GraphQLEnumMapper): diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index ecb567ed9..d0ae3a568 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -191,6 +191,7 @@ def resolve_consumption_data(context: Context, source: ShareObject, **kwargs): return { 's3AccessPointName': S3AccessPointName, 'sharedGlueDatabase': (ds.GlueDatabaseName + '_shared_' + source.shareUri)[:254] if ds else 'Not created', + 's3bucketName': ds.S3BucketName, } diff --git a/backend/dataall/modules/dataset_sharing/api/types.py b/backend/dataall/modules/dataset_sharing/api/types.py index 6e41512be..b7e3b06bf 100644 --- a/backend/dataall/modules/dataset_sharing/api/types.py +++ b/backend/dataall/modules/dataset_sharing/api/types.py @@ -107,6 +107,7 @@ fields=[ gql.Field(name='s3AccessPointName', type=gql.String), gql.Field(name='sharedGlueDatabase', type=gql.String), + gql.Field(name='s3bucketName', type=gql.String), ], ) diff --git a/backend/dataall/modules/dataset_sharing/aws/glue_client.py b/backend/dataall/modules/dataset_sharing/aws/glue_client.py index f110d0f89..c296025ce 100644 --- a/backend/dataall/modules/dataset_sharing/aws/glue_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/glue_client.py @@ -130,3 +130,43 @@ def delete_database(self): f'due to: {e}' ) raise e + + def remove_create_table_default_permissions(self): + """ + When upgrading to LF tables and database can still have Create Table Default Permissions turned on. + Unless this setting is removed, the table or database + can not be shared using LakeFormation. + :return: + """ + try: + account_id = self._account_id + database = self._database + + log.info( + f'Removing CreateTableDefaultPermissions in database {database}' + ) + + response = self._client.get_database(CatalogId=account_id, Name=database) + existing_database_parameters = response['Database'] + existing_database_parameters['CreateTableDefaultPermissions'] = [] + + if 'CreateTime' in existing_database_parameters: + del existing_database_parameters['CreateTime'] + if 'CatalogId' in existing_database_parameters: + del existing_database_parameters['CatalogId'] + + response = self._client.update_database( + CatalogId=account_id, + Name=database, + DatabaseInput=existing_database_parameters + ) + + log.info( + f'Successfully removed Create Table Default Permissions and Create Database Default Permissions ' + f'| {response}') + + except ClientError as e: + log.error( + f'Could not remove CreateDatabaseDefaultPermissions and/or CreateTableDefaultPermissions ' + f'permission on database in {database} due to {e}' + ) diff --git a/backend/dataall/modules/dataset_sharing/aws/kms_client.py b/backend/dataall/modules/dataset_sharing/aws/kms_client.py index 5642a9013..bdb9e2e91 100644 --- a/backend/dataall/modules/dataset_sharing/aws/kms_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/kms_client.py @@ -53,19 +53,21 @@ def get_key_id(self, key_alias: str): else: return response['KeyMetadata']['KeyId'] - def check_key_exists(self, key_alias: str): + def add_tags_to_key(self, key_id: str, tags: list): + """ + Add tags to an existing AWS KMS key. + :param key_id: The ID of the KMS key to add tags to. + :param tags: A list of dictionaries containing the tags to be added. For example: + [{'TagKey': 'Purpose', 'TagValue': 'Test'}] + :return: None + """ try: - key_exist = False - paginator = self._client.get_paginator('list_aliases') - for page in paginator.paginate(): - key_aliases = [alias["AliasName"] for alias in page['Aliases']] - if key_alias in key_aliases: - key_exist = True - break + self._client.tag_resource( + KeyId=key_id, + Tags=tags, + ) except Exception as e: log.error( - f'Failed to list kms key aliases in account {self._account_id}: {e}' + f'Failed to add tags to kms key {key_id} : {e}' ) - return None - else: - return key_exist + raise e diff --git a/backend/dataall/modules/dataset_sharing/aws/s3_client.py b/backend/dataall/modules/dataset_sharing/aws/s3_client.py index 78b0296ce..9cd3a9a24 100755 --- a/backend/dataall/modules/dataset_sharing/aws/s3_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/s3_client.py @@ -121,6 +121,50 @@ def generate_access_point_policy_template( } return policy + @staticmethod + def generate_default_bucket_policy( + s3_bucket_name: str, + owner_roleId: list + ): + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + } + ] + } + return policy + class S3Client: def __init__(self, account_id, region): diff --git a/backend/dataall/modules/dataset_sharing/db/enums.py b/backend/dataall/modules/dataset_sharing/db/enums.py index 233991fad..7db0be34a 100644 --- a/backend/dataall/modules/dataset_sharing/db/enums.py +++ b/backend/dataall/modules/dataset_sharing/db/enums.py @@ -57,6 +57,7 @@ class ShareableType(Enum): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class PrincipalType(Enum): diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 7a6d1a70b..f90d5c330 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -12,7 +12,7 @@ ShareItemStatus, ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket logger = logging.getLogger(__name__) @@ -356,6 +356,8 @@ def get_share_item(session, item_type, item_uri): return session.query(DatasetTable).get(item_uri) if item_type == ShareableType.StorageLocation.value: return session.query(DatasetStorageLocation).get(item_uri) + if item_type == ShareableType.S3Bucket.value: + return session.query(DatasetBucket).get(item_uri) @staticmethod def get_share_by_uri(session, uri): @@ -525,7 +527,33 @@ def list_shareable_items(session, share, states, data): if states: locations = locations.filter(ShareObjectItem.status.in_(states)) - shareable_objects = tables.union(locations).subquery('shareable_objects') + s3_buckets = ( + session.query( + DatasetBucket.bucketUri.label('itemUri'), + func.coalesce('S3Bucket').label('itemType'), + DatasetBucket.S3BucketName.label('itemName'), + DatasetBucket.description.label('description'), + ShareObjectItem.shareItemUri.label('shareItemUri'), + ShareObjectItem.status.label('status'), + case( + [(ShareObjectItem.shareItemUri.isnot(None), True)], + else_=False, + ).label('isShared'), + ) + .outerjoin( + ShareObjectItem, + and_( + ShareObjectItem.shareUri == share.shareUri, + DatasetBucket.bucketUri + == ShareObjectItem.itemUri, + ), + ) + .filter(DatasetBucket.datasetUri == share.datasetUri) + ) + if states: + s3_buckets = s3_buckets.filter(ShareObjectItem.status.in_(states)) + + shareable_objects = tables.union(locations, s3_buckets).subquery('shareable_objects') query = session.query(shareable_objects) if data: @@ -732,9 +760,14 @@ def get_share_data_items(session, share_uri, status): session, share, status, DatasetStorageLocation, DatasetStorageLocation.locationUri ) + s3_buckets = ShareObjectRepository._find_all_share_item( + session, share, status, DatasetBucket, DatasetBucket.bucketUri + ) + return ( tables, folders, + s3_buckets, ) @staticmethod @@ -774,23 +807,17 @@ def find_all_share_items(session, share_uri, share_type): ) @staticmethod - def other_approved_share_item_table_exists(session, environment_uri, item_uri, share_item_uri): - share_item_shared_states = ShareItemSM.get_share_item_shared_states() + def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(ShareObject) - .join( - ShareObjectItem, - ShareObject.shareUri == ShareObjectItem.shareUri, - ) .filter( and_( - ShareObject.environmentUri == environment_uri, - ShareObjectItem.itemUri == item_uri, - ShareObjectItem.shareItemUri != share_item_uri, - ShareObjectItem.status.in_(share_item_shared_states), + Environment.environmentUri == environment_uri, + ShareObject.status == ShareObjectStatus.Approved.value, + ShareObject.datasetUri == dataset_uri, ) ) - .first() + .all() ) @staticmethod diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index 3e93d894a..14412abca 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -1,12 +1,17 @@ import logging -from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import ProcessLFCrossAccountShare -from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import ProcessLFSameAccountShare -from dataall.modules.dataset_sharing.services.share_processors.s3_process_share import ProcessS3Share +from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import \ + ProcessLFCrossAccountShare +from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import \ + ProcessLFSameAccountShare +from dataall.modules.dataset_sharing.services.share_processors.s3_access_point_process_share import \ + ProcessS3AccessPointShare +from dataall.modules.dataset_sharing.services.share_processors.s3_bucket_process_share import ProcessS3BucketShare from dataall.base.db import Engine from dataall.modules.dataset_sharing.db.enums import ShareObjectActions, ShareItemStatus, ShareableType -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, ShareItemSM +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, \ + ShareItemSM log = logging.getLogger(__name__) @@ -21,8 +26,9 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: 1) Updates share object State Machine with the Action: Start 2) Retrieves share data and items in Share_Approved state 3) Calls sharing folders processor to grant share - 4) Calls sharing tables processor for same or cross account sharing to grant share - 5) Updates share object State Machine with the Action: Finish + 4) Calls sharing buckets processor to grant share + 5) Calls sharing tables processor for same or cross account sharing to grant share + 6) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -50,12 +56,13 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ( shared_tables, - shared_folders + shared_folders, + shared_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Share_Approved.value) log.info(f'Granting permissions to folders: {shared_folders}') - approved_folders_succeed = ProcessS3Share.process_approved_shares( + approved_folders_succeed = ProcessS3AccessPointShare.process_approved_shares( session, dataset, share, @@ -67,6 +74,20 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ) log.info(f'sharing folders succeeded = {approved_folders_succeed}') + log.info('Granting permissions to S3 buckets') + + approved_s3_buckets_succeed = ProcessS3BucketShare.process_approved_shares( + session, + dataset, + share, + shared_buckets, + source_environment, + target_environment, + source_env_group, + env_group + ) + log.info(f'sharing s3 buckets succeeded = {approved_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -97,7 +118,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return approved_tables_succeed if approved_folders_succeed else False + return approved_folders_succeed and approved_s3_buckets_succeed and approved_tables_succeed @classmethod def revoke_share(cls, engine: Engine, share_uri: str): @@ -108,7 +129,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): 4) Checks if remaining folders are shared and effectuates clean up with folders processor 5) Calls sharing tables processor for same or cross account sharing to revoke share 6) Checks if remaining tables are shared and effectuates clean up with tables processor - 7) Updates share object State Machine with the Action: Finish + 7) Calls sharing buckets processor to revoke share + 8) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -139,7 +161,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): ( revoked_tables, - revoked_folders + revoked_folders, + revoked_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Revoke_Approved.value) new_state = revoked_item_sm.run_transition(ShareObjectActions.Start.value) @@ -147,7 +170,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Revoking permissions to folders: {revoked_folders}') - revoked_folders_succeed = ProcessS3Share.process_revoked_shares( + revoked_folders_succeed = ProcessS3AccessPointShare.process_revoked_shares( session, dataset, share, @@ -166,13 +189,27 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Still remaining S3 resources shared = {existing_shared_items}') if not existing_shared_items and revoked_folders: log.info("Clean up S3 access points...") - clean_up_folders = ProcessS3Share.clean_up_share( + clean_up_folders = ProcessS3AccessPointShare.clean_up_share( dataset=dataset, share=share, target_environment=target_environment ) log.info(f"Clean up S3 successful = {clean_up_folders}") + log.info('Revoking permissions to S3 buckets') + + revoked_s3_buckets_succeed = ProcessS3BucketShare.process_revoked_shares( + session, + dataset, + share, + revoked_buckets, + source_environment, + target_environment, + source_env_group, + env_group, + ) + log.info(f'revoking s3 buckets succeeded = {revoked_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -207,7 +244,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Still remaining LF resources shared = {existing_shared_items}') if not existing_shared_items and revoked_tables: log.info("Clean up LF remaining resources...") - clean_up_tables = processor.delete_shared_database() + clean_up_tables = processor.clean_up_share() log.info(f"Clean up LF successful = {clean_up_tables}") existing_pending_items = ShareObjectRepository.check_pending_share_items(session, share_uri) @@ -217,4 +254,4 @@ def revoke_share(cls, engine: Engine, share_uri: str): new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return revoked_tables_succeed and revoked_folders_succeed + return revoked_folders_succeed and revoked_s3_buckets_succeed and revoked_tables_succeed diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py index ae225f99f..d568dd4d8 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py @@ -3,7 +3,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation, DatasetBucket from dataall.base.utils.alarm_service import AlarmService log = logging.getLogger(__name__) @@ -147,5 +147,48 @@ def trigger_revoke_folder_sharing_failure_alarm( Share Target - AWS Account: {target_environment.AwsAccountId} - Region: {target_environment.region} +""" + return self.publish_message_to_alarms_topic(subject, message) + + def trigger_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Share" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def trigger_revoke_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Sharing Revoke" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def handle_bucket_sharing_failure(self, bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + alarm_type: str): + log.info(f'Triggering {alarm_type} failure alarm...') + subject = ( + f'ALARM: DATAALL S3 Bucket {bucket.S3BucketName} {alarm_type} Failure Notification' + ) + message = f""" +You are receiving this email because your DATAALL {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to {alarm_type} the S3 Bucket {bucket.S3BucketName}. +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: S3 Bucket {alarm_type} failure + - Timestamp: {datetime.now()} + Share Source + - Dataset URI: {share.datasetUri} + - AWS Account: {bucket.AwsAccountId} + - Region: {bucket.region} + - S3 Bucket: {bucket.S3BucketName} + Share Target + - AWS Account: {target_environment.AwsAccountId} + - Region: {target_environment.region} """ return self.publish_message_to_alarms_topic(subject, message) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py index f8c7a4347..df0af76bf 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py @@ -1,2 +1,3 @@ -from .s3_share_manager import S3ShareManager +from .s3_access_point_share_manager import S3AccessPointShareManager from .lf_share_manager import LFShareManager +from .s3_bucket_share_manager import S3BucketShareManager diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index d1e92e43b..754ceaf07 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -51,6 +51,10 @@ def process_approved_shares(self) -> [str]: def process_revoked_shares(self) -> [str]: return NotImplementedError + @abc.abstractmethod + def clean_up_share(self): + return NotImplementedError + def get_share_principals(self) -> [str]: """ Builds list of principals of the share request @@ -390,6 +394,9 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) + + glue_client = GlueClient(source_accountid, source_region, data['source']['database']) + glue_client.remove_create_table_default_permissions() time.sleep(1) LakeFormationClient.grant_permissions_to_table( @@ -417,7 +424,7 @@ def share_table_with_target_account(cls, **data): ) raise e - def revoke_external_account_access_on_source_account(self, db_name, table_name) -> [dict]: + def revoke_external_account_access_on_source_account(self) -> [dict]: """ 1) Revokes access to external account if dataset is not shared with any other team from the same workspace @@ -436,28 +443,29 @@ def revoke_external_account_access_on_source_account(self, db_name, table_name) client = aws_session.client( 'lakeformation', region_name=self.source_environment.region ) - revoke_entries = [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': db_name, - 'Name': table_name, - 'ColumnWildcard': {}, - 'CatalogId': self.source_environment.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], - } - ] - - LakeFormationClient.batch_revoke_permissions( - client, self.source_environment.AwsAccountId, revoke_entries - ) + revoke_entries = [] + for table in self.revoked_tables: + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': table.GlueDatabaseName, + 'Name': table.GlueTableName, + 'ColumnWildcard': {}, + 'CatalogId': self.source_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], + } + ) + LakeFormationClient.batch_revoke_permissions( + client, self.source_environment.AwsAccountId, revoke_entries + ) return revoke_entries def handle_share_failure( diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py similarity index 79% rename from backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py rename to backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py index 644dbe360..eda4d58d5 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py @@ -12,15 +12,17 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset logger = logging.getLogger(__name__) ACCESS_POINT_CREATION_TIME = 30 ACCESS_POINT_CREATION_RETRIES = 5 +IAM_ACCESS_POINT_ROLE_POLICY = "targetDatasetAccessControlPolicy" -class S3ShareManager: +class S3AccessPointShareManager: def __init__( self, session, @@ -91,7 +93,7 @@ def manage_bucket_policy(self): s3_client = S3Client(self.source_account_id, self.source_environment.region) bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) for statement in bucket_policy["Statement"]: - if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: + if statement.get("Sid") in ["DelegateAccessToAccessPoint"]: return exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( self.source_account_id, @@ -140,34 +142,53 @@ def grant_target_role_access_policy(self): logger.info( f'Grant target role {self.target_requester_IAMRoleName} access policy' ) + key_alias = f"alias/{self.dataset.KmsAlias}" + kms_client = KmsClient(self.dataset_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = IAM.get_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) if existing_policy: # type dict - if self.bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName}, ' - f'but S3 Access point {self.access_point_name} is not included, updating...' - ) - target_resources = [ + s3_target_resources = [ f"arn:aws:s3:::{self.bucket_name}", f"arn:aws:s3:::{self.bucket_name}/*", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" ] - existing_policy["Statement"][0]["Resource"].extend(target_resources) - policy = existing_policy - else: - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName} ' - f'and S3 Access point {self.access_point_name} is included, skipping...' - ) - return + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + self.bucket_name, + s3_target_resources, + existing_policy["Statement"][0], + IAM_ACCESS_POINT_ROLE_POLICY + ) + + kms_target_resources = [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}/*" + ] + share_manager.add_missing_resources_to_policy_statement( + kms_key_id, + kms_target_resources, + existing_policy["Statement"][1], + IAM_ACCESS_POINT_ROLE_POLICY + ) + + policy = existing_policy else: logger.info( - f'targetDatasetAccessControlPolicy does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + f'{IAM_ACCESS_POINT_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' ) policy = { "Version": "2012-10-17", @@ -183,13 +204,23 @@ def grant_target_role_access_policy(self): f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}/*" + ] } ] } IAM.update_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, json.dumps(policy), ) @@ -281,7 +312,7 @@ def update_dataset_bucket_key_policy(self): 'Updating dataset Bucket KMS key policy...' ) key_alias = f"alias/{self.dataset.KmsAlias}" - kms_client = KmsClient(account_id=self.source_account_id, region=self.source_environment.region) + kms_client = KmsClient(self.source_account_id, self.source_environment.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) @@ -333,7 +364,7 @@ def delete_access_point( share: ShareObject, dataset: Dataset, ): - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) logger.info( f'Deleting access point {access_point_name}...' ) @@ -356,31 +387,52 @@ def delete_target_role_access_policy( logger.info( 'Deleting target role IAM policy...' ) - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) existing_policy = IAM.get_role_policy( target_environment.AwsAccountId, share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) + key_alias = f"alias/{dataset.KmsAlias}" + kms_client = KmsClient(dataset.AwsAccountId, dataset.region) + kms_key_id = kms_client.get_key_id(key_alias) if existing_policy: - if dataset.S3BucketName in ",".join(existing_policy["Statement"][0]["Resource"]): - target_resources = [ - f"arn:aws:s3:::{dataset.S3BucketName}", - f"arn:aws:s3:::{dataset.S3BucketName}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" - ] - for item in target_resources: - existing_policy["Statement"][0]["Resource"].remove(item) - if not existing_policy["Statement"][0]["Resource"]: - IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, "targetDatasetAccessControlPolicy") - else: - IAM.update_role_policy( - target_environment.AwsAccountId, - share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", - json.dumps(existing_policy), - ) + s3_target_resources = [ + f"arn:aws:s3:::{dataset.S3BucketName}", + f"arn:aws:s3:::{dataset.S3BucketName}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][0], + s3_target_resources + ) + + kms_target_resources = [ + f"arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}", + f"arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}/*" + ] + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][1], + kms_target_resources + ) + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY, + json.dumps(existing_policy), + ) @staticmethod def delete_dataset_bucket_key_policy( @@ -392,7 +444,7 @@ def delete_dataset_bucket_key_policy( 'Deleting dataset bucket KMS key policy...' ) key_alias = f"alias/{dataset.KmsAlias}" - kms_client = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region) + kms_client = KmsClient(dataset.AwsAccountId, dataset.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py new file mode 100644 index 000000000..a120749b3 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py @@ -0,0 +1,443 @@ +import abc +import json +import logging +from itertools import count + +from dataall.base.aws.iam import IAM +from dataall.base.aws.sts import SessionHelper +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient +from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient, S3Client +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils +from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository + +logger = logging.getLogger(__name__) + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_OWNER_SID = "AllowAllToAdmin" +IAM_S3BUCKET_ROLE_POLICY = "dataall-targetDatasetS3Bucket-AccessControlPolicy" + + +class S3BucketShareManager: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + target_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.session = session + self.source_env_group = source_env_group + self.env_group = env_group + self.dataset = dataset + self.share = share + self.target_bucket = target_bucket + self.source_environment = source_environment + self.target_environment = target_environment + self.share_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + target_bucket.bucketUri, + ) + self.source_account_id = target_bucket.AwsAccountId + self.target_account_id = target_environment.AwsAccountId + self.source_env_admin = source_env_group.environmentIAMRoleArn + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.bucket_name = target_bucket.S3BucketName + self.dataset_admin = dataset.IAMDatasetAdminRoleArn + self.bucket_region = target_bucket.region + + @abc.abstractmethod + def process_approved_shares(self, *kwargs) -> bool: + raise NotImplementedError + + @abc.abstractmethod + def process_revoked_shares(self, *kwargs) -> bool: + raise NotImplementedError + + def grant_s3_iam_access(self): + """ + Updates requester IAM role policy to include requested S3 bucket and kms key + :return: + """ + logger.info( + f'Grant target role {self.target_requester_IAMRoleName} access policy' + ) + existing_policy = IAM.get_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + + if existing_policy: # type dict + s3_target_resources = [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + resource_type=self.bucket_name, + target_resources=s3_target_resources, + existing_policy_statement=existing_policy["Statement"][0], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + + kms_target_resources = [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}/*" + ] + + share_manager.add_missing_resources_to_policy_statement( + resource_type=kms_key_id, + target_resources=kms_target_resources, + existing_policy_statement=existing_policy["Statement"][1], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + + policy = existing_policy + else: + logger.info( + f'{IAM_S3BUCKET_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + ) + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}/*" + ] + } + ] + } + IAM.update_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(policy), + ) + + def get_bucket_policy_or_default(self): + """ + Fetches the existing bucket policy for the S3 bucket if one exists otherwise returns the default bucket policy + :return: + """ + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = s3_client.get_bucket_policy(self.bucket_name) + if bucket_policy: + logger.info( + f'There is already an existing policy for bucket {self.bucket_name}, will be updating policy...' + ) + bucket_policy = json.loads(bucket_policy) + else: + logger.info( + f'Bucket policy for {self.bucket_name} does not exist, generating default policy...' + ) + exceptions_roleId = self.get_bucket_owner_roleid() + bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name, exceptions_roleId) + return bucket_policy + + def get_bucket_owner_roleid(self): + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + self.source_account_id, + [self.dataset_admin, self.source_env_admin, SessionHelper.get_delegation_role_arn(self.source_account_id)] + )] + return exceptions_roleId + + def grant_role_bucket_policy(self): + """ + This function will update bucket policy by granting admin access to dataset admin, pivot role + and environment admin along with read only access to accepted share roles. All the policies will only be added + once. + :return: + """ + logger.info( + f'Granting access via Bucket policy for {self.bucket_name}' + ) + try: + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + bucket_policy = self.get_bucket_policy_or_default() + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + logger.info(f'Bucket policy contains share statement {DATAALL_READ_ONLY_SID}, updating the current one') + statements[DATAALL_READ_ONLY_SID] = self.add_target_arn_to_statement_principal(statements[DATAALL_READ_ONLY_SID], target_requester_arn) + else: + logger.info(f'Bucket policy does not contain share statement {DATAALL_READ_ONLY_SID}, generating a new one') + statements[DATAALL_READ_ONLY_SID] = self.generate_default_bucket_read_policy_statement(self.bucket_name, target_requester_arn) + + if DATAALL_ALLOW_OWNER_SID not in statements.keys(): + statements[DATAALL_ALLOW_OWNER_SID] = self.generate_owner_access_statement(self.bucket_name, self.get_bucket_owner_roleid()) + + bucket_policy["Statement"] = list(statements.values()) + s3_client = S3Client(self.source_account_id, self.source_environment.region) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def add_target_arn_to_statement_principal(self, statement, target_requester_arn): + principal_list = self.get_principal_list(statement) + if f"{target_requester_arn}" not in principal_list: + principal_list.append(f"{target_requester_arn}") + statement["Principal"]["AWS"] = principal_list + return statement + + @staticmethod + def generate_owner_access_statement(s3_bucket_name, owner_roleId): + owner_policy_statement = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + } + return owner_policy_statement + + @staticmethod + def get_principal_list(statement): + principal_list = statement["Principal"]["AWS"] + if isinstance(principal_list, str): + principal_list = [principal_list] + return principal_list + + def grant_dataset_bucket_key_policy(self): + if (self.target_bucket.imported and self.target_bucket.importedKmsKey) or not self.target_bucket.imported: + logger.info( + 'Updating dataset Bucket KMS key policy...' + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) + if existing_policy and f'{target_requester_id}:*' not in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"].append( + { + "Sid": f"{target_requester_id}", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": { + "StringLike": { + "aws:userId": f"{target_requester_id}:*" + } + } + } + ) + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def delete_target_role_bucket_policy(self): + logger.info( + f'Deleting target role from bucket policy for bucket {self.bucket_name}...' + ) + try: + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + principal_list = self.get_principal_list(statements[DATAALL_READ_ONLY_SID]) + if f"{target_requester_arn}" in principal_list: + principal_list.remove(f"{target_requester_arn}") + if len(principal_list) == 0: + statements.pop(DATAALL_READ_ONLY_SID) + else: + statements[DATAALL_READ_ONLY_SID]["Principal"]["AWS"] = principal_list + bucket_policy["Statement"] = list(statements.values()) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def delete_target_role_access_policy( + self, + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + logger.info( + 'Deleting target role IAM policy...' + ) + existing_policy = IAM.get_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + if existing_policy: + s3_target_resources = [ + f"arn:aws:s3:::{target_bucket.S3BucketName}", + f"arn:aws:s3:::{target_bucket.S3BucketName}/*" + ] + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.remove_resource_from_statement(existing_policy["Statement"][0], s3_target_resources) + + kms_target_resources = [ + f"arn:aws:kms:{target_bucket.region}:{target_bucket.AwsAccountId}:key/{kms_key_id}", + f"arn:aws:kms:{target_bucket.region}:{target_bucket.AwsAccountId}:key/{kms_key_id}/*", + ] + share_manager.remove_resource_from_statement(existing_policy["Statement"][1], kms_target_resources) + + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(existing_policy), + ) + + @staticmethod + def delete_target_role_bucket_key_policy( + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + if (target_bucket.imported and target_bucket.importedKmsKey) or not target_bucket.imported: + logger.info( + 'Deleting target role from dataset bucket KMS key policy...' + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) + if existing_policy and f'{target_requester_id}:*' in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"] = [item for item in policy["Statement"] if item.get("Sid", None) != f"{target_requester_id}"] + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def handle_share_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to share bucket {self.target_bucket.S3BucketName} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_s3_bucket_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + def handle_revoke_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to revoke S3 permissions to bucket {self.bucket_name} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_revoke_folder_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + @staticmethod + def get_role_arn(target_account_id, target_requester_IAMRoleName): + return f"arn:aws:iam::{target_account_id}:role/{target_requester_IAMRoleName}" + + @staticmethod + def generate_default_bucket_read_policy_statement(s3_bucket_name, target_requester_arn): + return { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py new file mode 100644 index 000000000..305d8c5e7 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py @@ -0,0 +1,64 @@ +import abc +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.datasets_base.db.dataset_models import Dataset + + +logger = logging.getLogger(__name__) + + +class ShareManagerUtils: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.session = session + self.dataset = dataset + self.share = share + self.source_environment = source_environment + self.target_environment = target_environment + self.source_env_group = source_env_group + self.env_group = env_group + + def add_missing_resources_to_policy_statement( + self, + resource_type, + target_resources, + existing_policy_statement, + iam_role_policy_name + ): + """ + Checks if the resources are in the existing policy. Otherwise, it will add it. + :param resource_type: str + :param target_resources: list + :param existing_policy_statement: dict + :param iam_role_policy_name: str + :return + """ + for target_resource in target_resources: + if target_resource not in existing_policy_statement["Resource"]: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName}, ' + f'but {resource_type} is not included, updating...' + ) + existing_policy_statement["Resource"].extend([target_resource]) + else: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName} ' + f'and {resource_type} is included, skipping...' + ) + + @staticmethod + def remove_resource_from_statement(policy_statement, target_resources): + for target_resource in target_resources: + if target_resource in policy_statement["Resource"]: + policy_statement["Resource"].remove(target_resource) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py index 51ba97cc7..d28340cd6 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py @@ -124,7 +124,7 @@ def process_revoked_shares(self) -> bool: a) update its status to REVOKE_IN_PROGRESS with Action Start b) check if item exists on glue catalog raise error if not and flag item status to failed c) revoke table resource link: undo grant permission to resource link table for team role in target account - d) revoke source table access: undo grant permission to table for team role in source account (and for QS Group if no other shares present for table) + d) revoke source table access: undo grant permission to table for team role in source account e) delete resource link table h) update share item status to REVOKE_SUCCESSFUL with Action Success @@ -157,23 +157,10 @@ def process_revoked_shares(self) -> bool: self.revoke_table_resource_link_access(table, principals) - other_table_shares_in_env = False - if ShareObjectRepository.other_approved_share_item_table_exists( - self.session, - self.target_environment.environmentUri, - share_item.itemUri, - share_item.shareItemUri - ): - other_table_shares_in_env = True - principals = [p for p in principals if "arn:aws:quicksight" not in p] - self.revoke_source_table_access(table, principals) self.delete_resource_link_table(table) - if not other_table_shares_in_env: - self.revoke_external_account_access_on_source_account(table.GlueDatabaseName, table.GlueTableName) - new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) revoked_item_SM.update_state_single_item(self.session, share_item, new_state) @@ -184,3 +171,24 @@ def process_revoked_shares(self) -> bool: success = False return success + + def clean_up_share(self) -> bool: + """" + 1) deletes deprecated shared db in target account + 2) checks if there are other share objects from this source account to this target account. + If not, it revokes external account access of the target account to the source account. + Returns + ------- + True if clean-up succeeds + """ + + self.delete_shared_database() + + if not ShareObjectRepository.other_approved_share_object_exists( + self.session, + self.target_environment.environmentUri, + self.dataset.datasetUri, + ): + self.revoke_external_account_access_on_source_account() + + return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py index 54df2d900..270538a0b 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py @@ -157,3 +157,13 @@ def process_revoked_shares(self) -> bool: success = False return success + + def clean_up_share(self) -> bool: + """" + 1) deletes deprecated shared db in target account + Returns + ------- + True if clean-up succeeds + """ + self.delete_shared_database() + return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py similarity index 84% rename from backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py rename to backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py index 8e2f6cf38..043b9a6b4 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py @@ -1,7 +1,7 @@ import logging from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup -from ..share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions from dataall.modules.dataset_sharing.db.share_object_models import ShareObject @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) -class ProcessS3Share(S3ShareManager): +class ProcessS3AccessPointShare(S3AccessPointShareManager): def __init__( self, session, @@ -21,6 +21,7 @@ def __init__( target_environment: Environment, source_env_group: EnvironmentGroup, env_group: EnvironmentGroup, + existing_shared_buckets: bool = False ): super().__init__( @@ -164,11 +165,18 @@ def process_revoked_shares( return success - @staticmethod + @classmethod def clean_up_share( + cls, + session, dataset: Dataset, share: ShareObject, - target_environment: Environment + folder: DatasetStorageLocation, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_buckets: bool = False ): """ 1) deletes S3 access point for this share in this Dataset S3 Bucket @@ -179,21 +187,33 @@ def clean_up_share( ------- True if share is cleaned-up successfully """ - - clean_up = S3ShareManager.delete_access_point( + clean_up_folder = cls( + session, + dataset, + share, + folder, + source_environment, + target_environment, + source_env_group, + env_group, + existing_shared_buckets + ) + clean_up = clean_up_folder.delete_access_point( share=share, dataset=dataset ) + if clean_up: - S3ShareManager.delete_target_role_access_policy( - share=share, - dataset=dataset, - target_environment=target_environment - ) - S3ShareManager.delete_dataset_bucket_key_policy( + clean_up_folder.delete_target_role_access_policy( share=share, dataset=dataset, target_environment=target_environment ) + if not existing_shared_buckets: + clean_up_folder.delete_dataset_bucket_key_policy( + share=share, + dataset=dataset, + target_environment=target_environment + ) return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py new file mode 100644 index 000000000..57e8e069f --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py @@ -0,0 +1,171 @@ +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM + + +log = logging.getLogger(__name__) + + +class ProcessS3BucketShare(S3BucketShareManager): + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + s3_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + + super().__init__( + session, + dataset, + share, + s3_bucket, + source_environment, + target_environment, + source_env_group, + env_group, + ) + + @classmethod + def process_approved_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + shared_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) manage_bucket_policy - grants permission in the bucket policy + 3) grant_target_role_access_policy == done + 4) update_dataset_bucket_key_policy == done + 5) update_share_item_status with Finish action == done + + Returns + ------- + True if share is granted successfully + """ + log.info( + '##### Starting S3 bucket share #######' + ) + success = True + for shared_bucket in shared_buckets: + sharing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + shared_bucket.bucketUri, + ) + shared_item_SM = ShareItemSM(ShareItemStatus.Share_Approved.value) + new_state = shared_item_SM.run_transition(ShareObjectActions.Start.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + sharing_bucket = cls( + session, + dataset, + share, + shared_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + sharing_bucket.grant_role_bucket_policy() + sharing_bucket.grant_s3_iam_access() + sharing_bucket.grant_dataset_bucket_key_policy() + new_state = shared_item_SM.run_transition(ShareItemActions.Success.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + except Exception as e: + sharing_bucket.handle_share_failure(e) + new_state = shared_item_SM.run_transition(ShareItemActions.Failure.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + success = False + return success + + @classmethod + def process_revoked_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + revoked_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_folders: bool = False + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) remove access from bucket policy + 3) remove access from key policy + 4) remove access from IAM role policy + 5) update_share_item_status with Finish action + + Returns + ------- + True if share is revoked successfully + False if revoke fails + """ + + log.info( + '##### Starting Revoking S3 bucket share #######' + ) + success = True + for revoked_bucket in revoked_buckets: + removing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + revoked_bucket.bucketUri, + ) + + revoked_item_SM = ShareItemSM(ShareItemStatus.Revoke_Approved.value) + new_state = revoked_item_SM.run_transition(ShareObjectActions.Start.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + removing_bucket = cls( + session, + dataset, + share, + revoked_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + removing_bucket.delete_target_role_bucket_policy() + removing_bucket.delete_target_role_access_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + if not existing_shared_folders: + removing_bucket.delete_target_role_bucket_key_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + + except Exception as e: + removing_bucket.handle_revoke_failure(e) + new_state = revoked_item_SM.run_transition(ShareItemActions.Failure.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + success = False + + return success diff --git a/backend/dataall/modules/datasets/api/dataset/input_types.py b/backend/dataall/modules/datasets/api/dataset/input_types.py index d238a8103..4310fb3b4 100644 --- a/backend/dataall/modules/datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/datasets/api/dataset/input_types.py @@ -20,7 +20,7 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String), + gql.Argument(name='stewards', type=gql.String) ], ) @@ -102,6 +102,6 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String), + gql.Argument(name='stewards', type=gql.String) ], ) diff --git a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py index 46c34ea58..b44fdfbdc 100644 --- a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py +++ b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py @@ -20,6 +20,23 @@ class DatasetsPivotRole(PivotRoleStatementSet): """ def get_statements(self): statements = [ + # S3 Imported Buckets - restrict resources via bucket policies + iam.PolicyStatement( + sid='ImportedBuckets', + effect=iam.Effect.ALLOW, + actions=[ + 's3:List*', + 's3:GetBucket*', + 's3:GetLifecycleConfiguration', + 's3:GetObject', + 's3:PutBucketPolicy', + 's3:PutBucketTagging', + 's3:PutObject', + 's3:PutObjectAcl', + 's3:PutBucketOwnershipControls', + ], + resources=['arn:aws:s3:::*'], + ), # For dataset preview iam.PolicyStatement( sid='AthenaWorkgroupsDataset', diff --git a/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py new file mode 100644 index 000000000..31cfa2cdd --- /dev/null +++ b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py @@ -0,0 +1,41 @@ +import logging + +from dataall.modules.datasets_base.db.dataset_models import DatasetBucket, Dataset + +logger = logging.getLogger(__name__) + + +class DatasetBucketRepository: + + @staticmethod + def create_dataset_bucket( + session, + dataset: Dataset, + data: dict = None + ) -> DatasetBucket: + bucket = DatasetBucket( + datasetUri=dataset.datasetUri, + label=data.get('label'), + description=data.get('description', 'No description provided'), + tags=data.get('tags', []), + S3BucketName=dataset.S3BucketName, + AwsAccountId=dataset.AwsAccountId, + owner=dataset.owner, + region=dataset.region, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + return bucket + + @staticmethod + def delete_dataset_buckets(session, dataset_uri) -> bool: + buckets = ( + session.query(DatasetBucket) + .filter(DatasetBucket.datasetUri == dataset_uri) + .all() + ) + for bucket in buckets: + session.delete(bucket) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 707e9fb91..09611bcc9 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -5,7 +5,6 @@ from dataall.base.db import exceptions from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sts import SessionHelper -from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.base.context import get_context from dataall.core.environment.env_permission_checker import has_group_permission from dataall.core.environment.services.environment_service import EnvironmentService @@ -16,8 +15,10 @@ from dataall.core.stacks.db.stack_repositories import Stack from dataall.core.tasks.db.task_models import Task from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository +from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import AWSResourceNotFound, UnauthorizedOperation +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER @@ -54,22 +55,13 @@ def check_dataset_account(session, environment): def check_imported_resources(environment, data): kms_alias = data.get('KmsKeyAlias') if kms_alias not in [None, "Undefined", "", "SSE-S3"]: - key_exists = KmsClient(account_id=environment.AwsAccountId, region=environment.region).check_key_exists( - key_alias=f"alias/{kms_alias}" - ) - if not key_exists: - raise exceptions.AWSResourceNotFound( - action=IMPORT_DATASET, - message=f'KMS key with alias={kms_alias} cannot be found - Please check if KMS Key Alias exists in account {environment.AwsAccountId}', - ) - - key_id = KmsClient(account_id=environment.AwsAccountId, region=environment.region).get_key_id( + key_id = KmsClient(environment.AwsAccountId, environment.region).get_key_id( key_alias=f"alias/{kms_alias}" ) if not key_id: raise exceptions.AWSResourceNotFound( action=IMPORT_DATASET, - message=f'Data.all Environment Pivot Role does not have kms:DescribeKey Permission to KMS key with alias={kms_alias}', + message=f'KMS key with alias={kms_alias} cannot be found', ) return True @@ -92,6 +84,8 @@ def create_dataset(uri, admin_group, data: dict): data=data, ) + DatasetBucketRepository.create_dataset_bucket(session, dataset, data) + ResourcePolicy.attach_resource_policy( session=session, group=data['SamlAdminGroupName'], @@ -380,6 +374,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): DatasetService.delete_dataset_term_links(session, uri) DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri) DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri) + DatasetBucketRepository.delete_dataset_buckets(session, dataset.datasetUri) KeyValueTag.delete_key_value_tags(session, dataset.datasetUri, 'dataset') VoteRepository.delete_votes(session, dataset.datasetUri, 'dataset') diff --git a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py index d0c874a4c..6a83fc5c8 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py @@ -25,8 +25,6 @@ root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -# TODO: review this task usage and remove if not needed - class DatasetSubscriptionService: def __init__(self, engine): @@ -148,12 +146,12 @@ def publish_sns_message( response = sns_client.publish_dataset_message(message) log.info(f'SNS update publish response {response}') - notifications = ShareNotificationService( + notifications = ShareNotificationService.notify_new_data_available_from_owners( session=session, dataset=dataset, - share=share_object - ).notify_new_data_available_from_owners(s3_prefix=prefix) - + share=share_object, + s3_prefix=prefix, + ) log.info(f'Notifications for share owners {notifications}') except ClientError as e: diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index a5fcf1260..dd12746ad 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -141,3 +141,23 @@ class Dataset(Resource, Base): @classmethod def uri(cls): return cls.datasetUri + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py new file mode 100644 index 000000000..9142418f8 --- /dev/null +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -0,0 +1,206 @@ +"""add table for buckets + +Revision ID: 8c79fb896983 +Revises: 5781fdf1f877 +Create Date: 2023-09-06 12:01:53.841149 + +""" +import os +from sqlalchemy import orm, Column, String, Boolean, ForeignKey, DateTime, and_, inspect +from sqlalchemy.orm import query_expression +from sqlalchemy.ext.declarative import declarative_base +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +from dataall.base.db import get_engine, has_table +from dataall.base.db import utils, Resource +from dataall.modules.dataset_sharing.db.enums import ShareObjectStatus, ShareableType +from datetime import datetime + +# revision identifiers, used by Alembic. +revision = '8c79fb896983' +down_revision = '5781fdf1f877' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Dataset(Resource, Base): + __tablename__ = 'dataset' + environmentUri = Column(String, ForeignKey("environment.environmentUri"), nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + region = Column(String, default='eu-west-1') + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=False) + GlueCrawlerName = Column(String) + GlueCrawlerSchedule = Column(String) + GlueProfilingJobName = Column(String) + GlueProfilingTriggerSchedule = Column(String) + GlueProfilingTriggerName = Column(String) + GlueDataQualityJobName = Column(String) + GlueDataQualitySchedule = Column(String) + GlueDataQualityTriggerName = Column(String) + IAMDatasetAdminRoleArn = Column(String, nullable=False) + IAMDatasetAdminUserArn = Column(String, nullable=False) + KmsAlias = Column(String, nullable=False) + userRoleForDataset = query_expression() + userRoleInEnvironment = query_expression() + isPublishedInEnvironment = query_expression() + projectPermission = query_expression() + language = Column(String, nullable=False, default='English') + topics = Column(postgresql.ARRAY(String), nullable=True) + confidentiality = Column(String, nullable=False, default='Unclassified') + tags = Column(postgresql.ARRAY(String)) + inProject = query_expression() + + bucketCreated = Column(Boolean, default=False) + glueDatabaseCreated = Column(Boolean, default=False) + iamAdminRoleCreated = Column(Boolean, default=False) + iamAdminUserCreated = Column(Boolean, default=False) + kmsAliasCreated = Column(Boolean, default=False) + lakeformationLocationCreated = Column(Boolean, default=False) + bucketPolicyCreated = Column(Boolean, default=False) + + # bookmarked = Column(Integer, default=0) + # upvotes=Column(Integer, default=0) + + businessOwnerEmail = Column(String, nullable=True) + businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) + stewards = Column(String, nullable=True) + + SamlAdminGroupName = Column(String, nullable=True) + + importedS3Bucket = Column(Boolean, default=False) + importedGlueDatabase = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + importedAdminRole = Column(Boolean, default=False) + imported = Column(Boolean, default=False) + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri + + +class ShareObjectItem(Base): + __tablename__ = 'share_object_item' + shareUri = Column(String, nullable=False) + shareItemUri = Column( + String, default=utils.uuid('shareitem'), nullable=False, primary_key=True + ) + itemType = Column(String, nullable=False) + itemUri = Column(String, nullable=False) + itemName = Column(String, nullable=False) + permission = Column(String, nullable=True) + created = Column(DateTime, nullable=False, default=datetime.now) + updated = Column(DateTime, nullable=True, onupdate=datetime.now) + deleted = Column(DateTime, nullable=True) + owner = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=True) + GlueTableName = Column(String, nullable=True) + S3AccessPointName = Column(String, nullable=True) + status = Column(String, nullable=False, default=ShareObjectStatus.Draft.value) + action = Column(String, nullable=True) + + +def upgrade(): + try: + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + engine = get_engine(envname=envname).engine + bind = op.get_bind() + session = orm.Session(bind=bind) + datasets: [Dataset] = session.query(Dataset).all() + if not has_table('dataset_bucket', engine): + op.create_table( + 'dataset_bucket', + sa.Column('bucketUri', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('partition', sa.String(), nullable=False, default='aws'), + sa.ForeignKeyConstraint(columns=['datasetUri'], refcolumns=['dataset.datasetUri']), + sa.PrimaryKeyConstraint('bucketUri'), + ) + print('Creating a new dataset_bucket row for each existing dataset...') + for dataset in datasets: + dataset_bucket = DatasetBucket( + name=dataset.S3BucketName, + datasetUri=dataset.datasetUri, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + region=dataset.region, + label=dataset.label, + description=dataset.label, + tags=dataset.tags, + owner=dataset.owner, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(dataset_bucket) + session.flush() # flush to get the bucketUri + + for dataset in datasets: + shared_bucket_object: ShareObjectItem = session.query(ShareObjectItem).filter( + and_( + ShareObjectItem.itemType == ShareableType.S3Bucket.value, + ShareObjectItem.itemUri == dataset.datasetUri, + ) + ).first() + dataset_bucket: DatasetBucket = session.query(DatasetBucket).filter( + DatasetBucket.datasetUri == dataset.datasetUri + ).first() + if shared_bucket_object is not None: + shared_bucket_object.itemUri = dataset_bucket.bucketUri + shared_bucket_object.itemName = dataset_bucket.S3BucketName + + if column_exists('dataset', 'dataSharingModel'): + op.drop_column('dataset', 'dataSharingModel') + session.commit() + + except Exception as exception: + print('Failed to upgrade due to:', exception) + raise exception + + +def column_exists(table_name, column_name): + bind = op.get_context().bind + insp = inspect(bind) + columns = insp.get_columns(table_name) + return any(c["name"] == column_name for c in columns) + + +def downgrade(): + op.drop_table('dataset_bucket') diff --git a/frontend/src/modules/Shares/components/AddShareItemModal.js b/frontend/src/modules/Shares/components/AddShareItemModal.js index a21b55d13..9ee016c33 100644 --- a/frontend/src/modules/Shares/components/AddShareItemModal.js +++ b/frontend/src/modules/Shares/components/AddShareItemModal.js @@ -20,6 +20,7 @@ import { Defaults, Pager, Scrollbar } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { addSharedItem, getShareObject } from '../services'; +import { generateShareItemLabel } from '../../../utils/share'; export const AddShareItemModal = (props) => { const client = useClient(); @@ -144,7 +145,7 @@ export const AddShareItemModal = (props) => { sharedItems.nodes.map((item) => ( - {item.itemType === 'Table' ? 'Table' : 'Folder'} + {generateShareItemLabel(item.itemType)} {item.itemName} diff --git a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js index 2aa066df8..ded95ce8c 100644 --- a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js +++ b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js @@ -10,6 +10,7 @@ import { Defaults } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { getShareObject, revokeItemsShareObject } from '../services'; +import { generateShareItemLabel } from '../../../utils/share'; export const RevokeShareItemsModal = (props) => { const client = useClient(); @@ -40,7 +41,7 @@ export const RevokeShareItemsModal = (props) => { response.data.getShareObject.items.nodes.map((item) => ({ id: item.shareItemUri, name: item.itemName, - type: item.itemType === 'StorageLocation' ? 'Folder' : 'Table', + type: generateShareItemLabel(item.itemType), status: item.status })) ); diff --git a/frontend/src/utils/share.js b/frontend/src/utils/share.js new file mode 100644 index 000000000..c52099d0e --- /dev/null +++ b/frontend/src/utils/share.js @@ -0,0 +1,10 @@ +export const generateShareItemLabel = (itemType): string => { + switch (itemType) { + case 'Table': + return 'Table'; + case 'S3Bucket': + return 'S3Bucket'; + case 'StorageLocation': + return 'Folder'; + } +}; diff --git a/tests/modules/datasets/conftest.py b/tests/modules/datasets/conftest.py index a4bc39a4d..efec9ab6e 100644 --- a/tests/modules/datasets/conftest.py +++ b/tests/modules/datasets/conftest.py @@ -6,13 +6,12 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.core.organizations.db.organization_models import Organization from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy -from dataall.core.stacks.db.stack_models import Stack from dataall.modules.dataset_sharing.db.enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_REQUESTER, SHARE_OBJECT_APPROVER from dataall.modules.datasets.api.dataset.enums import ConfidentialityClassification from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation, DatasetBucket @pytest.fixture(scope='module', autouse=True) @@ -244,7 +243,7 @@ def dataset_model(db): def factory( organization: Organization, environment: Environment, - label: str, + label: str ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( diff --git a/tests/modules/datasets/tasks/conftest.py b/tests/modules/datasets/tasks/conftest.py index 43f888fe6..7503660fc 100644 --- a/tests/modules/datasets/tasks/conftest.py +++ b/tests/modules/datasets/tasks/conftest.py @@ -1,11 +1,10 @@ import pytest -from dataall.core.cognito_groups.db.cognito_group_models import Group from dataall.core.organizations.db.organization_models import Organization from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.modules.dataset_sharing.db.enums import ShareableType, ShareItemStatus, ShareObjectStatus, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket @pytest.fixture(scope="module") @@ -14,6 +13,7 @@ def factory( organization: Organization, environment: Environment, label: str, + imported: bool = False ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( @@ -31,6 +31,8 @@ def factory( region=environment.region, IAMDatasetAdminUserArn=f"arn:aws:iam::{environment.AwsAccountId}:user/dataset", IAMDatasetAdminRoleArn=f"arn:aws:iam::{environment.AwsAccountId}:role/dataset", + imported=imported, + importedKmsKey=imported ) session.add(dataset) session.commit() @@ -83,6 +85,35 @@ def factory(dataset: Dataset, label: str) -> DatasetTable: yield factory +@pytest.fixture(scope='module', autouse=True) +def bucket(db): + cache = {} + + def factory(dataset: Dataset, name) -> DatasetBucket: + key = f'{dataset.datasetUri}-{name}' + if cache.get(key): + return cache.get(key) + with db.scoped_session() as session: + bucket = DatasetBucket( + name=name, + label=name, + owner=dataset.owner, + datasetUri=dataset.datasetUri, + region=dataset.region, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + + return bucket + + yield factory + + @pytest.fixture(scope="module") def share(db): def factory( @@ -99,6 +130,7 @@ def factory( principalType=PrincipalType.Group.value, principalIAMRoleName=env_group.environmentIAMRoleName, status=ShareObjectStatus.Approved.value, + groupUri=env_group.groupUri, ) session.add(share) session.commit() @@ -150,3 +182,25 @@ def factory( return share_item yield factory + + +@pytest.fixture(scope="module") +def share_item_bucket(db): + def factory( + share: ShareObject, + bucket: DatasetBucket, + ) -> ShareObjectItem: + with db.scoped_session() as session: + share_item = ShareObjectItem( + shareUri=share.shareUri, + owner="alice", + itemUri=bucket.bucketUri, + itemType=ShareableType.StorageLocation.value, + itemName=bucket.name, + status=ShareItemStatus.Share_Approved.value, + ) + session.add(share_item) + session.commit() + return share_item + + yield factory diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index fd76ba0b1..78a289d9f 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -660,7 +660,7 @@ def test_revoke_external_account_access_on_source_account( return_value=boto3.Session(), ) - processor_cross_account.revoke_external_account_access_on_source_account(table1.GlueDatabaseName, table1.GlueTableName) + processor_cross_account.revoke_external_account_access_on_source_account() # Then lf_mock.assert_called_once() diff --git a/tests/modules/datasets/tasks/test_s3_share_manager.py b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py similarity index 90% rename from tests/modules/datasets/tasks/test_s3_share_manager.py rename to tests/modules/datasets/tasks/test_s3_access_point_share_manager.py index febea47f9..598c06f2c 100644 --- a/tests/modules/datasets/tasks/test_s3_share_manager.py +++ b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py @@ -11,7 +11,7 @@ from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem -from dataall.modules.dataset_sharing.services.share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset SOURCE_ENV_ACCOUNT = "111111111111" @@ -127,7 +127,7 @@ def admin_ap_delegation_bucket_policy(): "Resource": "arn:aws:s3:::dataall-iris-test-120922-4s47wv71", }, { - "Sid": "AllowAllToAdmin", + "Sid": "DelegateAccessToAccessPoint", "Effect": "Allow", "Principal": "*", "Action": "s3:*", @@ -143,7 +143,7 @@ def admin_ap_delegation_bucket_policy(): def mock_s3_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3Client', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3Client', mock_client ) mock_client.create_bucket_policy.return_value = None @@ -153,7 +153,7 @@ def mock_s3_client(mocker): def mock_s3_control_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3ControlClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3ControlClient', mock_client ) @@ -170,7 +170,7 @@ def mock_s3_control_client(mocker): def mock_kms_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.KmsClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.KmsClient', mock_client ) mock_client.put_key_policy.return_value = None @@ -192,6 +192,16 @@ def target_dataset_access_control_policy(request): f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}", f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" + ] } ], } @@ -229,7 +239,7 @@ def test_manage_bucket_policy_no_policy( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -250,7 +260,7 @@ def test_manage_bucket_policy_no_policy( # Then print(f"Bucket policy generated {created_bucket_policy}") - sid_list = [statement.get("Sid") for statement in + sid_list = [statement.get("Sid") for statement in created_bucket_policy["Statement"] if statement.get("Sid")] assert "AllowAllToAdmin" in sid_list @@ -278,7 +288,7 @@ def test_manage_bucket_policy_existing_policy( s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -296,7 +306,7 @@ def test_manage_bucket_policy_existing_policy( s3_client.create_bucket_policy.assert_not_called() -@pytest.mark.parametrize("target_dataset_access_control_policy", +@pytest.mark.parametrize("target_dataset_access_control_policy", ([("bucketname", "aws_account_id", "access_point_name")]), indirect=True) def test_grant_target_role_access_policy_existing_policy_bucket_not_included( @@ -326,8 +336,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -349,6 +362,9 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( # Assert that bucket_name is inside the resource array of policy object assert location1.S3BucketName in ",".join(policy_object["Statement"][0]["Resource"]) + assert f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] @pytest.mark.parametrize("target_dataset_access_control_policy", ([("dataset1", SOURCE_ENV_ACCOUNT, "test")]), indirect=True) @@ -379,8 +395,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -395,7 +414,7 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( manager.grant_target_role_access_policy() # Then - iam_update_role_policy_mock.assert_not_called() + iam_update_role_policy_mock.assert_called() def test_grant_target_role_access_policy_test_no_policy( @@ -434,12 +453,25 @@ def test_grant_target_role_access_policy_test_no_policy( f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}", f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -455,7 +487,7 @@ def test_grant_target_role_access_policy_test_no_policy( # Then iam_update_role_policy_mock.assert_called_with( - target_environment.AwsAccountId, share1.principalIAMRoleName, + target_environment.AwsAccountId, share1.principalIAMRoleName, "targetDatasetAccessControlPolicy", json.dumps(expected_policy) ) @@ -498,7 +530,7 @@ def test_update_dataset_bucket_key_policy_with_env_admin( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -614,7 +646,7 @@ def test_update_dataset_bucket_key_policy_without_env_admin( } with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -672,7 +704,7 @@ def test_manage_access_point_and_policy_1( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -740,7 +772,7 @@ def test_manage_access_point_and_policy_2( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -805,7 +837,7 @@ def test_manage_access_point_and_policy_3( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -869,7 +901,7 @@ def test_delete_access_point_policy_with_env_admin_one_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -928,7 +960,7 @@ def test_delete_access_point_policy_with_env_admin_multiple_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -974,7 +1006,7 @@ def test_dont_delete_access_point_with_policy( s3_control_client().get_access_point_policy.return_value = json.dumps(existing_ap_policy) # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1014,7 +1046,7 @@ def test_delete_access_point_without_policy( # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1055,9 +1087,19 @@ def test_delete_target_role_access_policy_no_remaining_statement( "Resource": [ f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } @@ -1077,9 +1119,12 @@ def test_delete_target_role_access_policy_no_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1122,9 +1167,21 @@ def test_delete_target_role_access_policy_with_remaining_statement( "arn:aws:s3:::UNRELATED_BUCKET_ARN", f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", + ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } @@ -1136,6 +1193,16 @@ def test_delete_target_role_access_policy_with_remaining_statement( "Effect": "Allow", "Action": ["s3:*"], "Resource": ["arn:aws:s3:::UNRELATED_BUCKET_ARN"], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + ] } ], } @@ -1155,9 +1222,12 @@ def test_delete_target_role_access_policy_with_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1245,7 +1315,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_additional_target ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1312,7 +1382,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1331,4 +1401,4 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar kms_client().put_key_policy.assert_called_with( kms_client().get_key_id.return_value, json.dumps(remaining_policy) - ) + ) \ No newline at end of file diff --git a/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py new file mode 100644 index 000000000..0eb35aa2f --- /dev/null +++ b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py @@ -0,0 +1,1614 @@ +import pytest +import json +from unittest.mock import MagicMock + +from typing import Callable + +from dataall.core.cognito_groups.db.cognito_group_models import Group +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.core.organizations.db.organization_models import Organization +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket + +SOURCE_ENV_ACCOUNT = "111111111111" +SOURCE_ENV_ROLE_NAME = "dataall-ProducerEnvironment-i6v1v1c2" + +TARGET_ACCOUNT_ENV = "222222222222" +TARGET_ACCOUNT_ENV_ROLE_NAME = "dataall-ConsumersEnvironment-r71ucp4m" + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_ALL_ADMINS_SID = "AllowAllToAdmin" + + +@pytest.fixture(scope="module") +def source_environment(env: Callable, org_fixture: Organization, group: Group): + source_environment = env( + org=org_fixture, + account=SOURCE_ENV_ACCOUNT, + envname="source_environment", + owner=group.owner, + group=group.name, + role=SOURCE_ENV_ROLE_NAME, + ) + yield source_environment + + +@pytest.fixture(scope="module") +def source_environment_group(environment_group: Callable, source_environment: Environment, group: Group): + source_environment_group = environment_group(source_environment, group.name) + yield source_environment_group + + +@pytest.fixture(scope="module") +def target_environment(env: Callable, org_fixture: Organization, group2: Group): + target_environment = env( + org=org_fixture, + account=TARGET_ACCOUNT_ENV, + envname="target_environment", + owner=group2.owner, + group=group2.name, + role=TARGET_ACCOUNT_ENV_ROLE_NAME, + ) + yield target_environment + + +@pytest.fixture(scope="module") +def target_environment_group(environment_group: Callable, target_environment: Environment, group2: Group): + target_environment_group = environment_group(target_environment, group2.name) + yield target_environment_group + + +@pytest.fixture(scope="module") +def dataset_imported(create_dataset: Callable, org_fixture: Organization, source_environment: Environment): + dataset_imported = create_dataset(org_fixture, source_environment, "dataset_imported", True) + yield dataset_imported + + +@pytest.fixture(scope="module") +def dataset2(create_dataset: Callable, org_fixture: Organization, source_environment: Organization): + dataset2 = create_dataset(org_fixture, source_environment, "dataset2") + yield dataset2 + + +@pytest.fixture(scope="module") +def bucket2(bucket: Callable, dataset2: Dataset) -> DatasetBucket: + yield bucket(dataset2, "bucket2") + + +@pytest.fixture(scope="module") +def bucket3(bucket: Callable, dataset_imported: Dataset) -> DatasetBucket: + yield bucket(dataset_imported, "bucket3") + + +@pytest.fixture(scope="module") +def share2(share: Callable, dataset2: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share2 = share(dataset2, target_environment, target_environment_group) + yield share2 + + +@pytest.fixture(scope="module") +def share3(share: Callable, dataset_imported: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share3 = share(dataset_imported, target_environment, target_environment_group) + yield share3 + + +@pytest.fixture(scope="function") +def base_bucket_policy(dataset2): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": {"AWS": "*"}, + "Action": "s3:*", + "Resource": [f"arn:aws:s3:::{dataset2.S3BucketName}", f"arn:aws:s3:::{dataset2.S3BucketName}/*"], + "Condition": {"Bool": {"aws:SecureTransport": "false"}}, + } + ], + } + return bucket_policy + + +def base_kms_key_policy(target_environment_samlGrpName: str): + kms_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": f"{target_environment_samlGrpName}", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": f"{target_environment_samlGrpName}:*"}}, + } + ], + } + return kms_policy + + +def complete_access_bucket_policy(target_requester_arn, s3_bucket_name, owner_roleId): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + }, + { + "Sid": f"{DATAALL_ALLOW_ALL_ADMINS_SID}", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } + ] + } + + return bucket_policy + + +def mock_s3_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.S3Client', + mock_client + ) + mock_client.create_bucket_policy.return_value = None + return mock_client + + +def mock_kms_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.KmsClient', + mock_client + ) + mock_client.put_key_policy.return_value = None + return mock_client + + +# For below test cases, dataset2, share2, src, target env and src group , env group remain the same +def test_grant_role_bucket_policy_with_no_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + bucket2, + db, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # No Bucket policy. A Default bucket policy should be formed with DataAll-Bucket-ReadOnly, AllowAllToAdmin & RequiredSecureTransport Sids + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = None + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + # Check all the Sids are present + # Check that the S3 bucket resources are also present + assert f"{DATAALL_ALLOW_ALL_ADMINS_SID}" in modified_bucket_policy["Statement"][0]["Sid"] + assert modified_bucket_policy["Statement"][0]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][0]["Condition"]["StringLike"]["aws:userId"] == ['1:*', '2:*', '3:*'] + assert "RequiredSecureTransport" in modified_bucket_policy["Statement"][1]["Sid"] + assert modified_bucket_policy["Statement"][1]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert f"{DATAALL_READ_ONLY_SID}" in modified_bucket_policy["Statement"][2]["Sid"] + assert modified_bucket_policy["Statement"][2]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][2]["Principal"]["AWS"] == [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}"] + + +def test_grant_role_bucket_policy_with_default_complete_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Bucket Policy containing required "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's + # Bucket Policy shouldn't be modified after calling "grant_role_bucket_policy" function + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + bucket_policy = complete_access_bucket_policy(target_arn, + dataset2.S3BucketName, "ABNCSJ81982393") + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + created_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if nothing is removed from the policy and is the policy remains the same + for policy in created_bucket_policy["Statement"]: + assert policy["Sid"] in json.dumps(bucket_policy) + + +def test_grant_role_bucket_policy_with_policy_and_no_allow_owner_sid_and_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # base bucket policy + # Check if both "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's Statements are added to the policy + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin, DataAll-Bucket-ReadOnly Sid's should be attached now + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + +def test_grant_role_bucket_policy_with_another_read_only_role( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given base bucket policy with "DataAll-Bucket-ReadOnly" + bucket_policy = base_bucket_policy + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + # Append a policy for read only role + bucket_policy["Statement"].append( + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeTargetResourceArn" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::someS3Bucket", + f"arn:aws:s3:::someS3Bucket/*" + ] + }) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin Sid should be attached now. Also DataAll-Bucket-ReadOnly Sid should be present + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + # Check if the principal was appended and not overridden into the DataAll-Bucket-ReadOnly + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 2 + assert modified_bucket_policy["Statement"][1]["Principal"]["AWS"][0] == "SomeTargetResourceArn" + + +def test_grant_s3_iam_access_with_no_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # There is not existing IAM policy in the requesters account for the dataset's S3bucket + # Check if the update_role_policy func is called and policy statements are added + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert if the IAM role policy with S3 and KMS permissions was created + assert len(iam_policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 2 + assert len(iam_policy["Statement"][1]["Resource"]) == 2 + assert f"arn:aws:s3:::{dataset2.S3BucketName}" in iam_policy["Statement"][0]["Resource"] and "s3:*" in iam_policy["Statement"][0]["Action"] + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] + + +def test_grant_s3_iam_access_with_policy_and_target_resources_not_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy with some other bucket as resource + # Check if the correct resource is attached/appended + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::S3Bucket", + f"arn:aws:s3:::S3Bucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:12121121121:key/some-kms-key", + f"arn:aws:kms:us-east-1:12121121121:key/some-kms-key/*" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + assert len(policy["Statement"]) == 2 + assert len(policy["Statement"][0]["Resource"]) == 2 + assert len(policy["Statement"][1]["Resource"]) == 2 + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert that new resources were appended + assert len(policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 4 + assert f'arn:aws:s3:::{dataset2.S3BucketName}' in iam_policy["Statement"][0]["Resource"] + assert len(iam_policy["Statement"][1]["Resource"]) == 4 + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in iam_policy["Statement"][1]["Resource"] + + +# Tests to check if +def test_grant_s3_iam_access_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete policy present with required target resources + # Check if policy created after calling function and the existing Policy is same + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + # Assert that the IAM Policy is the same as the existing complete policy + iam_update_role_policy_mock.assert_called() + + created_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert len(created_iam_policy["Statement"]) == 2 + assert policy["Statement"][0]["Resource"] == created_iam_policy["Statement"][0]["Resource"] and policy["Statement"][0]["Action"] == created_iam_policy["Statement"][0]["Action"] + assert policy["Statement"][1]["Resource"] == created_iam_policy["Statement"][1]["Resource"] and policy["Statement"][1]["Action"] == \ + created_iam_policy["Statement"][1]["Action"] + + +def test_grant_dataset_bucket_key_policy_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete existing policy + # Check if KMS.put_key_policy is not called + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_not_called() + + +def test_grant_dataset_bucket_key_policy_with_target_requester_id_absent( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy where target_requester is not present + # Check if KMS.put_key_policy is called and check if the policy is modified + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_called() + + kms_key_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(kms_key_policy["Statement"]) == 2 + assert kms_key_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert kms_key_policy["Statement"][1]["Action"] == "kms:Decrypt" + assert target_environment.SamlGroupName in kms_key_policy["Statement"][1]["Condition"]["StringLike"]["aws:userId"] + +# Test Case to check if the IAM Role is updated +def test_grant_dataset_bucket_key_policy_and_default_bucket_key_policy( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + share3: ShareObject, + bucket3, + source_environment: Environment, + target_environment: Environment + ): + # Given + # Dataset is imported and it doesn't have Imported KMS Key + # Mocking KMS key function - > Check if not called + # Mocking KMS Tags Functions -> Check if not called + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(bucket3) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and doesn't have importedKey, kms policy function are not triggered + kms_client().get_key_policy.assert_not_called() + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(bucket3) + + +def test_grant_dataset_bucket_key_policy_with_imported( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + bucket3, + db, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Dataset is imported and it has Imported KMS Key + # Mocking KMS key function + # Mocking KMS Tags Functions + # Check if the bucket policy is modified and the targetResource is added + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and has importedKey + # policy is fetched and the target requester id SID is attached to it + kms_client().get_key_policy.assert_called() + kms_client().put_key_policy.assert_called() + updated_bucket_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(updated_bucket_policy["Statement"]) == 2 + assert updated_bucket_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert target_environment.SamlGroupName in updated_bucket_policy["Statement"][1]["Condition"]["StringLike"][ + "aws:userId"] + + +def test_delete_target_role_bucket_policy_with_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with no DataAll-Bucket-ReadOnly Sid + # S3 function to update bucket policy (create_bucket_policy) should not trigger + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_not_called() + + +def test_delete_target_role_bucket_policy_with_multiple_principals_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Multiple Principals + # Check if the appropriate AWS arn is removed and 'SomeotherArn' is retained + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeotherArn", + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is still present + # Check if the 'someOtherArn' is still present and the target arn is removed + assert modified_bucket_policy["Statement"][1]["Sid"] == f"{DATAALL_READ_ONLY_SID}" + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 1 + assert 'SomeotherArn' in modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + assert f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" not in \ + modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + + +def test_delete_target_role_bucket_policy_with_one_principal_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Single target Principals + # Bucket Policy should not have the DataAll-Bucket-ReadOnly Sid after delete_target_role_bucket_policy is called + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + assert len(bucket_policy["Statement"]) == 2 + + sid_list = [statement["Sid"] for statement in bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" in sid_list + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is removed completely + assert len(modified_bucket_policy["Statement"]) == 1 + sid_list = [statement["Sid"] for statement in modified_bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" not in sid_list + + +def test_delete_target_role_access_policy_no_resource_of_datasets_s3_bucket( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy which doesn't contain target S3 bucket resources + # IAM.delete_role_policy & IAM.update_role_policy should not be called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + # Get the updated IAM policy and compare it with the existing one + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + assert len(updated_iam_policy["Statement"]) == 2 + assert "arn:aws:s3:::someOtherBucket,arn:aws:s3:::someOtherBucket/*" == ",".join(updated_iam_policy["Statement"][0]["Resource"]) + assert "arn:aws:kms:us-east-1:121231131212:key/some-key-2112,arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" == ",".join( + updated_iam_policy["Statement"][1]["Resource"]) + + +def test_delete_target_role_access_policy_with_multiple_s3_buckets_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with multiple bucket resources along with target environments bucket resources + # Check if the IAM.update_policy is called and it only updates / deletes the target env bucket resources + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*", + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert f"arn:aws:s3:::{dataset2.S3BucketName}" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::{dataset2.S3BucketName}/*" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket" in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket/*" in updated_iam_policy["Statement"][0]["Resource"] + + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" not in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" not in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" in updated_iam_policy["Statement"][1]["Resource"] + + +def test_delete_target_role_access_policy_with_one_s3_bucket_and_one_kms_resource_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with target environments bucket resources only + # Check if the IAM.delete_policy is called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_not_called() + iam_delete_role_policy_mock.assert_called() + + +def test_delete_target_role_bucket_key_policy_with_no_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # complete existing KMS key policy with no target requester id in it + # Check if KMS.put_key_policy is not called + + existing_key_policy = base_kms_key_policy("Some_other_requester_id") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + +def test_delete_target_role_bucket_key_policy_with_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with target requester id in it + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +def test_delete_target_role_bucket_key_policy_with_multiple_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + existing_key_policy["Statement"].append( + { + "Sid": "some_other_target_sid", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0]["Sid"] == "some_other_target_sid" + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) + + +# Test for delete_target_role_bucket_key_policy when dataset is imported +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and that the dataset is imported and has a importedKMS key + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +# Test for delete_target_role_bucket_key_policy when dataset is imported and importedKMS key is missing +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset_with_no_imported_kms_key( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and the dataset is imported but doens't contain importedKey + # In that case the KMS.put_key_policy should not be called + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(dataset_imported) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(dataset_imported) + + +def test_delete_target_role_bucket_key_policy_missing_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + missing_sid_statement = { + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + existing_key_policy["Statement"].append( + missing_sid_statement + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0] == missing_sid_statement + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) diff --git a/tests/modules/datasets/test_share.py b/tests/modules/datasets/test_share.py index 60909a65b..5ff64b965 100644 --- a/tests/modules/datasets/test_share.py +++ b/tests/modules/datasets/test_share.py @@ -401,6 +401,10 @@ def create_share_object(client, username, group, groupUri, environmentUri, datas userRoleForShareObject requestPurpose rejectPurpose + dataset { + datasetUri + datasetName + } } } """ From cf9afc1574bf77e1a781ba564e8da146d878c961 Mon Sep 17 00:00:00 2001 From: Anushka Singh Date: Mon, 30 Oct 2023 21:11:20 -0400 Subject: [PATCH 06/21] feat: Enabling S3 bucket share --- .../modules/dataset_sharing/__init__.py | 6 +- .../modules/dataset_sharing/api/enums.py | 1 + .../modules/dataset_sharing/api/resolvers.py | 1 + .../modules/dataset_sharing/api/types.py | 1 + .../dataset_sharing/aws/glue_client.py | 40 + .../modules/dataset_sharing/aws/kms_client.py | 26 +- .../modules/dataset_sharing/aws/s3_client.py | 44 + .../modules/dataset_sharing/db/enums.py | 1 + .../db/share_object_repositories.py | 53 +- .../services/data_sharing_service.py | 67 +- .../services/dataset_alarm_service.py | 45 +- .../services/share_managers/__init__.py | 3 +- .../share_managers/lf_share_manager.py | 54 +- ...er.py => s3_access_point_share_manager.py} | 136 +- .../share_managers/s3_bucket_share_manager.py | 443 +++++ .../share_managers/share_manager_utils.py | 64 + .../lf_process_cross_account_share.py | 36 +- .../lf_process_same_account_share.py | 10 + ...re.py => s3_access_point_process_share.py} | 44 +- .../s3_bucket_process_share.py | 171 ++ .../datasets/api/dataset/input_types.py | 4 +- .../cdk/pivot_role_datasets_policy.py | 17 + .../db/dataset_bucket_repositories.py | 41 + .../datasets/services/dataset_service.py | 19 +- .../tasks/dataset_subscription_task.py | 10 +- .../datasets_base/db/dataset_models.py | 20 + .../8c79fb896983_add_table_for_buckets.py | 206 +++ .../Shares/components/AddShareItemModal.js | 3 +- .../components/RevokeShareItemsModal.js | 3 +- frontend/src/utils/share.js | 10 + tests/modules/datasets/conftest.py | 5 +- tests/modules/datasets/tasks/conftest.py | 58 +- .../datasets/tasks/test_lf_share_manager.py | 2 +- ... => test_s3_access_point_share_manager.py} | 134 +- .../tasks/test_s3_bucket_share_manager.py | 1614 +++++++++++++++++ tests/modules/datasets/test_share.py | 4 + 36 files changed, 3199 insertions(+), 197 deletions(-) rename backend/dataall/modules/dataset_sharing/services/share_managers/{s3_share_manager.py => s3_access_point_share_manager.py} (79%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py create mode 100644 backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py rename backend/dataall/modules/dataset_sharing/services/share_processors/{s3_process_share.py => s3_access_point_process_share.py} (84%) create mode 100644 backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py create mode 100644 backend/dataall/modules/datasets/db/dataset_bucket_repositories.py create mode 100644 backend/migrations/versions/8c79fb896983_add_table_for_buckets.py create mode 100644 frontend/src/utils/share.js rename tests/modules/datasets/tasks/{test_s3_share_manager.py => test_s3_access_point_share_manager.py} (90%) create mode 100644 tests/modules/datasets/tasks/test_s3_bucket_share_manager.py diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index 99dd6c01e..d98a13dbc 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -17,8 +17,7 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.notifications import NotificationsModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetBaseModuleInterface] def __init__(self): from dataall.modules.dataset_sharing import api @@ -36,8 +35,7 @@ def is_supported(modes: List[ImportMode]): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.notifications import NotificationsModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetBaseModuleInterface] def __init__(self): import dataall.modules.dataset_sharing.handlers diff --git a/backend/dataall/modules/dataset_sharing/api/enums.py b/backend/dataall/modules/dataset_sharing/api/enums.py index 37aa5022a..9fb593f18 100644 --- a/backend/dataall/modules/dataset_sharing/api/enums.py +++ b/backend/dataall/modules/dataset_sharing/api/enums.py @@ -5,6 +5,7 @@ class ShareableType(GraphQLEnumMapper): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class ShareObjectPermission(GraphQLEnumMapper): diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index ecb567ed9..d0ae3a568 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -191,6 +191,7 @@ def resolve_consumption_data(context: Context, source: ShareObject, **kwargs): return { 's3AccessPointName': S3AccessPointName, 'sharedGlueDatabase': (ds.GlueDatabaseName + '_shared_' + source.shareUri)[:254] if ds else 'Not created', + 's3bucketName': ds.S3BucketName, } diff --git a/backend/dataall/modules/dataset_sharing/api/types.py b/backend/dataall/modules/dataset_sharing/api/types.py index 6e41512be..b7e3b06bf 100644 --- a/backend/dataall/modules/dataset_sharing/api/types.py +++ b/backend/dataall/modules/dataset_sharing/api/types.py @@ -107,6 +107,7 @@ fields=[ gql.Field(name='s3AccessPointName', type=gql.String), gql.Field(name='sharedGlueDatabase', type=gql.String), + gql.Field(name='s3bucketName', type=gql.String), ], ) diff --git a/backend/dataall/modules/dataset_sharing/aws/glue_client.py b/backend/dataall/modules/dataset_sharing/aws/glue_client.py index f110d0f89..c296025ce 100644 --- a/backend/dataall/modules/dataset_sharing/aws/glue_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/glue_client.py @@ -130,3 +130,43 @@ def delete_database(self): f'due to: {e}' ) raise e + + def remove_create_table_default_permissions(self): + """ + When upgrading to LF tables and database can still have Create Table Default Permissions turned on. + Unless this setting is removed, the table or database + can not be shared using LakeFormation. + :return: + """ + try: + account_id = self._account_id + database = self._database + + log.info( + f'Removing CreateTableDefaultPermissions in database {database}' + ) + + response = self._client.get_database(CatalogId=account_id, Name=database) + existing_database_parameters = response['Database'] + existing_database_parameters['CreateTableDefaultPermissions'] = [] + + if 'CreateTime' in existing_database_parameters: + del existing_database_parameters['CreateTime'] + if 'CatalogId' in existing_database_parameters: + del existing_database_parameters['CatalogId'] + + response = self._client.update_database( + CatalogId=account_id, + Name=database, + DatabaseInput=existing_database_parameters + ) + + log.info( + f'Successfully removed Create Table Default Permissions and Create Database Default Permissions ' + f'| {response}') + + except ClientError as e: + log.error( + f'Could not remove CreateDatabaseDefaultPermissions and/or CreateTableDefaultPermissions ' + f'permission on database in {database} due to {e}' + ) diff --git a/backend/dataall/modules/dataset_sharing/aws/kms_client.py b/backend/dataall/modules/dataset_sharing/aws/kms_client.py index 5642a9013..bdb9e2e91 100644 --- a/backend/dataall/modules/dataset_sharing/aws/kms_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/kms_client.py @@ -53,19 +53,21 @@ def get_key_id(self, key_alias: str): else: return response['KeyMetadata']['KeyId'] - def check_key_exists(self, key_alias: str): + def add_tags_to_key(self, key_id: str, tags: list): + """ + Add tags to an existing AWS KMS key. + :param key_id: The ID of the KMS key to add tags to. + :param tags: A list of dictionaries containing the tags to be added. For example: + [{'TagKey': 'Purpose', 'TagValue': 'Test'}] + :return: None + """ try: - key_exist = False - paginator = self._client.get_paginator('list_aliases') - for page in paginator.paginate(): - key_aliases = [alias["AliasName"] for alias in page['Aliases']] - if key_alias in key_aliases: - key_exist = True - break + self._client.tag_resource( + KeyId=key_id, + Tags=tags, + ) except Exception as e: log.error( - f'Failed to list kms key aliases in account {self._account_id}: {e}' + f'Failed to add tags to kms key {key_id} : {e}' ) - return None - else: - return key_exist + raise e diff --git a/backend/dataall/modules/dataset_sharing/aws/s3_client.py b/backend/dataall/modules/dataset_sharing/aws/s3_client.py index 78b0296ce..9cd3a9a24 100755 --- a/backend/dataall/modules/dataset_sharing/aws/s3_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/s3_client.py @@ -121,6 +121,50 @@ def generate_access_point_policy_template( } return policy + @staticmethod + def generate_default_bucket_policy( + s3_bucket_name: str, + owner_roleId: list + ): + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + } + ] + } + return policy + class S3Client: def __init__(self, account_id, region): diff --git a/backend/dataall/modules/dataset_sharing/db/enums.py b/backend/dataall/modules/dataset_sharing/db/enums.py index 233991fad..7db0be34a 100644 --- a/backend/dataall/modules/dataset_sharing/db/enums.py +++ b/backend/dataall/modules/dataset_sharing/db/enums.py @@ -57,6 +57,7 @@ class ShareableType(Enum): Table = 'DatasetTable' StorageLocation = 'DatasetStorageLocation' View = 'View' + S3Bucket = 'S3Bucket' class PrincipalType(Enum): diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 7a6d1a70b..f90d5c330 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -12,7 +12,7 @@ ShareItemStatus, ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket logger = logging.getLogger(__name__) @@ -356,6 +356,8 @@ def get_share_item(session, item_type, item_uri): return session.query(DatasetTable).get(item_uri) if item_type == ShareableType.StorageLocation.value: return session.query(DatasetStorageLocation).get(item_uri) + if item_type == ShareableType.S3Bucket.value: + return session.query(DatasetBucket).get(item_uri) @staticmethod def get_share_by_uri(session, uri): @@ -525,7 +527,33 @@ def list_shareable_items(session, share, states, data): if states: locations = locations.filter(ShareObjectItem.status.in_(states)) - shareable_objects = tables.union(locations).subquery('shareable_objects') + s3_buckets = ( + session.query( + DatasetBucket.bucketUri.label('itemUri'), + func.coalesce('S3Bucket').label('itemType'), + DatasetBucket.S3BucketName.label('itemName'), + DatasetBucket.description.label('description'), + ShareObjectItem.shareItemUri.label('shareItemUri'), + ShareObjectItem.status.label('status'), + case( + [(ShareObjectItem.shareItemUri.isnot(None), True)], + else_=False, + ).label('isShared'), + ) + .outerjoin( + ShareObjectItem, + and_( + ShareObjectItem.shareUri == share.shareUri, + DatasetBucket.bucketUri + == ShareObjectItem.itemUri, + ), + ) + .filter(DatasetBucket.datasetUri == share.datasetUri) + ) + if states: + s3_buckets = s3_buckets.filter(ShareObjectItem.status.in_(states)) + + shareable_objects = tables.union(locations, s3_buckets).subquery('shareable_objects') query = session.query(shareable_objects) if data: @@ -732,9 +760,14 @@ def get_share_data_items(session, share_uri, status): session, share, status, DatasetStorageLocation, DatasetStorageLocation.locationUri ) + s3_buckets = ShareObjectRepository._find_all_share_item( + session, share, status, DatasetBucket, DatasetBucket.bucketUri + ) + return ( tables, folders, + s3_buckets, ) @staticmethod @@ -774,23 +807,17 @@ def find_all_share_items(session, share_uri, share_type): ) @staticmethod - def other_approved_share_item_table_exists(session, environment_uri, item_uri, share_item_uri): - share_item_shared_states = ShareItemSM.get_share_item_shared_states() + def other_approved_share_object_exists(session, environment_uri, dataset_uri): return ( session.query(ShareObject) - .join( - ShareObjectItem, - ShareObject.shareUri == ShareObjectItem.shareUri, - ) .filter( and_( - ShareObject.environmentUri == environment_uri, - ShareObjectItem.itemUri == item_uri, - ShareObjectItem.shareItemUri != share_item_uri, - ShareObjectItem.status.in_(share_item_shared_states), + Environment.environmentUri == environment_uri, + ShareObject.status == ShareObjectStatus.Approved.value, + ShareObject.datasetUri == dataset_uri, ) ) - .first() + .all() ) @staticmethod diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index 3e93d894a..14412abca 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -1,12 +1,17 @@ import logging -from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import ProcessLFCrossAccountShare -from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import ProcessLFSameAccountShare -from dataall.modules.dataset_sharing.services.share_processors.s3_process_share import ProcessS3Share +from dataall.modules.dataset_sharing.services.share_processors.lf_process_cross_account_share import \ + ProcessLFCrossAccountShare +from dataall.modules.dataset_sharing.services.share_processors.lf_process_same_account_share import \ + ProcessLFSameAccountShare +from dataall.modules.dataset_sharing.services.share_processors.s3_access_point_process_share import \ + ProcessS3AccessPointShare +from dataall.modules.dataset_sharing.services.share_processors.s3_bucket_process_share import ProcessS3BucketShare from dataall.base.db import Engine from dataall.modules.dataset_sharing.db.enums import ShareObjectActions, ShareItemStatus, ShareableType -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, ShareItemSM +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectSM, ShareObjectRepository, \ + ShareItemSM log = logging.getLogger(__name__) @@ -21,8 +26,9 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: 1) Updates share object State Machine with the Action: Start 2) Retrieves share data and items in Share_Approved state 3) Calls sharing folders processor to grant share - 4) Calls sharing tables processor for same or cross account sharing to grant share - 5) Updates share object State Machine with the Action: Finish + 4) Calls sharing buckets processor to grant share + 5) Calls sharing tables processor for same or cross account sharing to grant share + 6) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -50,12 +56,13 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ( shared_tables, - shared_folders + shared_folders, + shared_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Share_Approved.value) log.info(f'Granting permissions to folders: {shared_folders}') - approved_folders_succeed = ProcessS3Share.process_approved_shares( + approved_folders_succeed = ProcessS3AccessPointShare.process_approved_shares( session, dataset, share, @@ -67,6 +74,20 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: ) log.info(f'sharing folders succeeded = {approved_folders_succeed}') + log.info('Granting permissions to S3 buckets') + + approved_s3_buckets_succeed = ProcessS3BucketShare.process_approved_shares( + session, + dataset, + share, + shared_buckets, + source_environment, + target_environment, + source_env_group, + env_group + ) + log.info(f'sharing s3 buckets succeeded = {approved_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -97,7 +118,7 @@ def approve_share(cls, engine: Engine, share_uri: str) -> bool: new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return approved_tables_succeed if approved_folders_succeed else False + return approved_folders_succeed and approved_s3_buckets_succeed and approved_tables_succeed @classmethod def revoke_share(cls, engine: Engine, share_uri: str): @@ -108,7 +129,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): 4) Checks if remaining folders are shared and effectuates clean up with folders processor 5) Calls sharing tables processor for same or cross account sharing to revoke share 6) Checks if remaining tables are shared and effectuates clean up with tables processor - 7) Updates share object State Machine with the Action: Finish + 7) Calls sharing buckets processor to revoke share + 8) Updates share object State Machine with the Action: Finish Parameters ---------- @@ -139,7 +161,8 @@ def revoke_share(cls, engine: Engine, share_uri: str): ( revoked_tables, - revoked_folders + revoked_folders, + revoked_buckets ) = ShareObjectRepository.get_share_data_items(session, share_uri, ShareItemStatus.Revoke_Approved.value) new_state = revoked_item_sm.run_transition(ShareObjectActions.Start.value) @@ -147,7 +170,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Revoking permissions to folders: {revoked_folders}') - revoked_folders_succeed = ProcessS3Share.process_revoked_shares( + revoked_folders_succeed = ProcessS3AccessPointShare.process_revoked_shares( session, dataset, share, @@ -166,13 +189,27 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Still remaining S3 resources shared = {existing_shared_items}') if not existing_shared_items and revoked_folders: log.info("Clean up S3 access points...") - clean_up_folders = ProcessS3Share.clean_up_share( + clean_up_folders = ProcessS3AccessPointShare.clean_up_share( dataset=dataset, share=share, target_environment=target_environment ) log.info(f"Clean up S3 successful = {clean_up_folders}") + log.info('Revoking permissions to S3 buckets') + + revoked_s3_buckets_succeed = ProcessS3BucketShare.process_revoked_shares( + session, + dataset, + share, + revoked_buckets, + source_environment, + target_environment, + source_env_group, + env_group, + ) + log.info(f'revoking s3 buckets succeeded = {revoked_s3_buckets_succeed}') + if source_environment.AwsAccountId != target_environment.AwsAccountId: processor = ProcessLFCrossAccountShare( session, @@ -207,7 +244,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Still remaining LF resources shared = {existing_shared_items}') if not existing_shared_items and revoked_tables: log.info("Clean up LF remaining resources...") - clean_up_tables = processor.delete_shared_database() + clean_up_tables = processor.clean_up_share() log.info(f"Clean up LF successful = {clean_up_tables}") existing_pending_items = ShareObjectRepository.check_pending_share_items(session, share_uri) @@ -217,4 +254,4 @@ def revoke_share(cls, engine: Engine, share_uri: str): new_share_state = share_sm.run_transition(ShareObjectActions.Finish.value) share_sm.update_state(session, share, new_share_state) - return revoked_tables_succeed and revoked_folders_succeed + return revoked_folders_succeed and revoked_s3_buckets_succeed and revoked_tables_succeed diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py index ae225f99f..d568dd4d8 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py @@ -3,7 +3,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation, DatasetBucket from dataall.base.utils.alarm_service import AlarmService log = logging.getLogger(__name__) @@ -147,5 +147,48 @@ def trigger_revoke_folder_sharing_failure_alarm( Share Target - AWS Account: {target_environment.AwsAccountId} - Region: {target_environment.region} +""" + return self.publish_message_to_alarms_topic(subject, message) + + def trigger_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Share" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def trigger_revoke_s3_bucket_sharing_failure_alarm( + self, + bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + ): + alarm_type = "Sharing Revoke" + return self.handle_bucket_sharing_failure(bucket, share, target_environment, alarm_type) + + def handle_bucket_sharing_failure(self, bucket: DatasetBucket, + share: ShareObject, + target_environment: Environment, + alarm_type: str): + log.info(f'Triggering {alarm_type} failure alarm...') + subject = ( + f'ALARM: DATAALL S3 Bucket {bucket.S3BucketName} {alarm_type} Failure Notification' + ) + message = f""" +You are receiving this email because your DATAALL {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to {alarm_type} the S3 Bucket {bucket.S3BucketName}. +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: S3 Bucket {alarm_type} failure + - Timestamp: {datetime.now()} + Share Source + - Dataset URI: {share.datasetUri} + - AWS Account: {bucket.AwsAccountId} + - Region: {bucket.region} + - S3 Bucket: {bucket.S3BucketName} + Share Target + - AWS Account: {target_environment.AwsAccountId} + - Region: {target_environment.region} """ return self.publish_message_to_alarms_topic(subject, message) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py index f8c7a4347..df0af76bf 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/__init__.py @@ -1,2 +1,3 @@ -from .s3_share_manager import S3ShareManager +from .s3_access_point_share_manager import S3AccessPointShareManager from .lf_share_manager import LFShareManager +from .s3_bucket_share_manager import S3BucketShareManager diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index d1e92e43b..754ceaf07 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -51,6 +51,10 @@ def process_approved_shares(self) -> [str]: def process_revoked_shares(self) -> [str]: return NotImplementedError + @abc.abstractmethod + def clean_up_share(self): + return NotImplementedError + def get_share_principals(self) -> [str]: """ Builds list of principals of the share request @@ -390,6 +394,9 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) + + glue_client = GlueClient(source_accountid, source_region, data['source']['database']) + glue_client.remove_create_table_default_permissions() time.sleep(1) LakeFormationClient.grant_permissions_to_table( @@ -417,7 +424,7 @@ def share_table_with_target_account(cls, **data): ) raise e - def revoke_external_account_access_on_source_account(self, db_name, table_name) -> [dict]: + def revoke_external_account_access_on_source_account(self) -> [dict]: """ 1) Revokes access to external account if dataset is not shared with any other team from the same workspace @@ -436,28 +443,29 @@ def revoke_external_account_access_on_source_account(self, db_name, table_name) client = aws_session.client( 'lakeformation', region_name=self.source_environment.region ) - revoke_entries = [ - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': db_name, - 'Name': table_name, - 'ColumnWildcard': {}, - 'CatalogId': self.source_environment.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], - } - ] - - LakeFormationClient.batch_revoke_permissions( - client, self.source_environment.AwsAccountId, revoke_entries - ) + revoke_entries = [] + for table in self.revoked_tables: + revoke_entries.append( + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': table.GlueDatabaseName, + 'Name': table.GlueTableName, + 'ColumnWildcard': {}, + 'CatalogId': self.source_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], + } + ) + LakeFormationClient.batch_revoke_permissions( + client, self.source_environment.AwsAccountId, revoke_entries + ) return revoke_entries def handle_share_failure( diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py similarity index 79% rename from backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py rename to backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py index 644dbe360..eda4d58d5 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py @@ -12,15 +12,17 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset logger = logging.getLogger(__name__) ACCESS_POINT_CREATION_TIME = 30 ACCESS_POINT_CREATION_RETRIES = 5 +IAM_ACCESS_POINT_ROLE_POLICY = "targetDatasetAccessControlPolicy" -class S3ShareManager: +class S3AccessPointShareManager: def __init__( self, session, @@ -91,7 +93,7 @@ def manage_bucket_policy(self): s3_client = S3Client(self.source_account_id, self.source_environment.region) bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) for statement in bucket_policy["Statement"]: - if statement.get("Sid") in ["AllowAllToAdmin", "DelegateAccessToAccessPoint"]: + if statement.get("Sid") in ["DelegateAccessToAccessPoint"]: return exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( self.source_account_id, @@ -140,34 +142,53 @@ def grant_target_role_access_policy(self): logger.info( f'Grant target role {self.target_requester_IAMRoleName} access policy' ) + key_alias = f"alias/{self.dataset.KmsAlias}" + kms_client = KmsClient(self.dataset_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = IAM.get_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) if existing_policy: # type dict - if self.bucket_name not in ",".join(existing_policy["Statement"][0]["Resource"]): - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName}, ' - f'but S3 Access point {self.access_point_name} is not included, updating...' - ) - target_resources = [ + s3_target_resources = [ f"arn:aws:s3:::{self.bucket_name}", f"arn:aws:s3:::{self.bucket_name}/*", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" ] - existing_policy["Statement"][0]["Resource"].extend(target_resources) - policy = existing_policy - else: - logger.info( - f'targetDatasetAccessControlPolicy exists for IAM role {self.target_requester_IAMRoleName} ' - f'and S3 Access point {self.access_point_name} is included, skipping...' - ) - return + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + self.bucket_name, + s3_target_resources, + existing_policy["Statement"][0], + IAM_ACCESS_POINT_ROLE_POLICY + ) + + kms_target_resources = [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}/*" + ] + share_manager.add_missing_resources_to_policy_statement( + kms_key_id, + kms_target_resources, + existing_policy["Statement"][1], + IAM_ACCESS_POINT_ROLE_POLICY + ) + + policy = existing_policy else: logger.info( - f'targetDatasetAccessControlPolicy does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + f'{IAM_ACCESS_POINT_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' ) policy = { "Version": "2012-10-17", @@ -183,13 +204,23 @@ def grant_target_role_access_policy(self): f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}", f"arn:aws:s3:{self.dataset_region}:{self.dataset_account_id}:accesspoint/{self.access_point_name}/*" ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.dataset_region}:{self.dataset_account_id}:key/{kms_key_id}/*" + ] } ] } IAM.update_role_policy( self.target_account_id, self.target_requester_IAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, json.dumps(policy), ) @@ -281,7 +312,7 @@ def update_dataset_bucket_key_policy(self): 'Updating dataset Bucket KMS key policy...' ) key_alias = f"alias/{self.dataset.KmsAlias}" - kms_client = KmsClient(account_id=self.source_account_id, region=self.source_environment.region) + kms_client = KmsClient(self.source_account_id, self.source_environment.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) @@ -333,7 +364,7 @@ def delete_access_point( share: ShareObject, dataset: Dataset, ): - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) logger.info( f'Deleting access point {access_point_name}...' ) @@ -356,31 +387,52 @@ def delete_target_role_access_policy( logger.info( 'Deleting target role IAM policy...' ) - access_point_name = S3ShareManager.build_access_point_name(share) + access_point_name = S3AccessPointShareManager.build_access_point_name(share) existing_policy = IAM.get_role_policy( target_environment.AwsAccountId, share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", + IAM_ACCESS_POINT_ROLE_POLICY, ) + key_alias = f"alias/{dataset.KmsAlias}" + kms_client = KmsClient(dataset.AwsAccountId, dataset.region) + kms_key_id = kms_client.get_key_id(key_alias) if existing_policy: - if dataset.S3BucketName in ",".join(existing_policy["Statement"][0]["Resource"]): - target_resources = [ - f"arn:aws:s3:::{dataset.S3BucketName}", - f"arn:aws:s3:::{dataset.S3BucketName}/*", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", - f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" - ] - for item in target_resources: - existing_policy["Statement"][0]["Resource"].remove(item) - if not existing_policy["Statement"][0]["Resource"]: - IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, "targetDatasetAccessControlPolicy") - else: - IAM.update_role_policy( - target_environment.AwsAccountId, - share.principalIAMRoleName, - "targetDatasetAccessControlPolicy", - json.dumps(existing_policy), - ) + s3_target_resources = [ + f"arn:aws:s3:::{dataset.S3BucketName}", + f"arn:aws:s3:::{dataset.S3BucketName}/*", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}", + f"arn:aws:s3:{dataset.region}:{dataset.AwsAccountId}:accesspoint/{access_point_name}/*" + ] + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][0], + s3_target_resources + ) + + kms_target_resources = [ + f"arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}", + f"arn:aws:kms:{dataset.region}:{dataset.AwsAccountId}:key/{kms_key_id}/*" + ] + ShareManagerUtils.remove_resource_from_statement( + existing_policy["Statement"][1], + kms_target_resources + ) + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_ACCESS_POINT_ROLE_POLICY, + json.dumps(existing_policy), + ) @staticmethod def delete_dataset_bucket_key_policy( @@ -392,7 +444,7 @@ def delete_dataset_bucket_key_policy( 'Deleting dataset bucket KMS key policy...' ) key_alias = f"alias/{dataset.KmsAlias}" - kms_client = KmsClient(account_id=dataset.AwsAccountId, region=dataset.region) + kms_client = KmsClient(dataset.AwsAccountId, dataset.region) kms_key_id = kms_client.get_key_id(key_alias) existing_policy = kms_client.get_key_policy(kms_key_id) target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py new file mode 100644 index 000000000..a120749b3 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py @@ -0,0 +1,443 @@ +import abc +import json +import logging +from itertools import count + +from dataall.base.aws.iam import IAM +from dataall.base.aws.sts import SessionHelper +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient +from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient, S3Client +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareManagerUtils +from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository + +logger = logging.getLogger(__name__) + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_OWNER_SID = "AllowAllToAdmin" +IAM_S3BUCKET_ROLE_POLICY = "dataall-targetDatasetS3Bucket-AccessControlPolicy" + + +class S3BucketShareManager: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + target_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.session = session + self.source_env_group = source_env_group + self.env_group = env_group + self.dataset = dataset + self.share = share + self.target_bucket = target_bucket + self.source_environment = source_environment + self.target_environment = target_environment + self.share_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + target_bucket.bucketUri, + ) + self.source_account_id = target_bucket.AwsAccountId + self.target_account_id = target_environment.AwsAccountId + self.source_env_admin = source_env_group.environmentIAMRoleArn + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.bucket_name = target_bucket.S3BucketName + self.dataset_admin = dataset.IAMDatasetAdminRoleArn + self.bucket_region = target_bucket.region + + @abc.abstractmethod + def process_approved_shares(self, *kwargs) -> bool: + raise NotImplementedError + + @abc.abstractmethod + def process_revoked_shares(self, *kwargs) -> bool: + raise NotImplementedError + + def grant_s3_iam_access(self): + """ + Updates requester IAM role policy to include requested S3 bucket and kms key + :return: + """ + logger.info( + f'Grant target role {self.target_requester_IAMRoleName} access policy' + ) + existing_policy = IAM.get_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + + if existing_policy: # type dict + s3_target_resources = [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.add_missing_resources_to_policy_statement( + resource_type=self.bucket_name, + target_resources=s3_target_resources, + existing_policy_statement=existing_policy["Statement"][0], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + + kms_target_resources = [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}/*" + ] + + share_manager.add_missing_resources_to_policy_statement( + resource_type=kms_key_id, + target_resources=kms_target_resources, + existing_policy_statement=existing_policy["Statement"][1], + iam_role_policy_name=IAM_S3BUCKET_ROLE_POLICY + ) + + policy = existing_policy + else: + logger.info( + f'{IAM_S3BUCKET_ROLE_POLICY} does not exists for IAM role {self.target_requester_IAMRoleName}, creating...' + ) + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{self.bucket_name}", + f"arn:aws:s3:::{self.bucket_name}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}", + f"arn:aws:kms:{self.bucket_region}:{self.source_account_id}:key/{kms_key_id}/*" + ] + } + ] + } + IAM.update_role_policy( + self.target_account_id, + self.target_requester_IAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(policy), + ) + + def get_bucket_policy_or_default(self): + """ + Fetches the existing bucket policy for the S3 bucket if one exists otherwise returns the default bucket policy + :return: + """ + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = s3_client.get_bucket_policy(self.bucket_name) + if bucket_policy: + logger.info( + f'There is already an existing policy for bucket {self.bucket_name}, will be updating policy...' + ) + bucket_policy = json.loads(bucket_policy) + else: + logger.info( + f'Bucket policy for {self.bucket_name} does not exist, generating default policy...' + ) + exceptions_roleId = self.get_bucket_owner_roleid() + bucket_policy = S3ControlClient.generate_default_bucket_policy(self.bucket_name, exceptions_roleId) + return bucket_policy + + def get_bucket_owner_roleid(self): + exceptions_roleId = [f'{item}:*' for item in SessionHelper.get_role_ids( + self.source_account_id, + [self.dataset_admin, self.source_env_admin, SessionHelper.get_delegation_role_arn(self.source_account_id)] + )] + return exceptions_roleId + + def grant_role_bucket_policy(self): + """ + This function will update bucket policy by granting admin access to dataset admin, pivot role + and environment admin along with read only access to accepted share roles. All the policies will only be added + once. + :return: + """ + logger.info( + f'Granting access via Bucket policy for {self.bucket_name}' + ) + try: + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + bucket_policy = self.get_bucket_policy_or_default() + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + logger.info(f'Bucket policy contains share statement {DATAALL_READ_ONLY_SID}, updating the current one') + statements[DATAALL_READ_ONLY_SID] = self.add_target_arn_to_statement_principal(statements[DATAALL_READ_ONLY_SID], target_requester_arn) + else: + logger.info(f'Bucket policy does not contain share statement {DATAALL_READ_ONLY_SID}, generating a new one') + statements[DATAALL_READ_ONLY_SID] = self.generate_default_bucket_read_policy_statement(self.bucket_name, target_requester_arn) + + if DATAALL_ALLOW_OWNER_SID not in statements.keys(): + statements[DATAALL_ALLOW_OWNER_SID] = self.generate_owner_access_statement(self.bucket_name, self.get_bucket_owner_roleid()) + + bucket_policy["Statement"] = list(statements.values()) + s3_client = S3Client(self.source_account_id, self.source_environment.region) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def add_target_arn_to_statement_principal(self, statement, target_requester_arn): + principal_list = self.get_principal_list(statement) + if f"{target_requester_arn}" not in principal_list: + principal_list.append(f"{target_requester_arn}") + statement["Principal"]["AWS"] = principal_list + return statement + + @staticmethod + def generate_owner_access_statement(s3_bucket_name, owner_roleId): + owner_policy_statement = { + "Sid": "AllowAllToAdmin", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + } + return owner_policy_statement + + @staticmethod + def get_principal_list(statement): + principal_list = statement["Principal"]["AWS"] + if isinstance(principal_list, str): + principal_list = [principal_list] + return principal_list + + def grant_dataset_bucket_key_policy(self): + if (self.target_bucket.imported and self.target_bucket.importedKmsKey) or not self.target_bucket.imported: + logger.info( + 'Updating dataset Bucket KMS key policy...' + ) + key_alias = f"alias/{self.target_bucket.KmsAlias}" + kms_client = KmsClient(self.source_account_id, self.source_environment.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(self.target_account_id, self.target_requester_IAMRoleName) + if existing_policy and f'{target_requester_id}:*' not in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"].append( + { + "Sid": f"{target_requester_id}", + "Effect": "Allow", + "Principal": { + "AWS": "*" + }, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": { + "StringLike": { + "aws:userId": f"{target_requester_id}:*" + } + } + } + ) + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def delete_target_role_bucket_policy(self): + logger.info( + f'Deleting target role from bucket policy for bucket {self.bucket_name}...' + ) + try: + s3_client = S3Client(self.source_account_id, self.source_environment.region) + bucket_policy = json.loads(s3_client.get_bucket_policy(self.bucket_name)) + target_requester_arn = self.get_role_arn(self.target_account_id, self.target_requester_IAMRoleName) + counter = count() + statements = {item.get("Sid", next(counter)): item for item in bucket_policy.get("Statement", {})} + if DATAALL_READ_ONLY_SID in statements.keys(): + principal_list = self.get_principal_list(statements[DATAALL_READ_ONLY_SID]) + if f"{target_requester_arn}" in principal_list: + principal_list.remove(f"{target_requester_arn}") + if len(principal_list) == 0: + statements.pop(DATAALL_READ_ONLY_SID) + else: + statements[DATAALL_READ_ONLY_SID]["Principal"]["AWS"] = principal_list + bucket_policy["Statement"] = list(statements.values()) + s3_client.create_bucket_policy(self.bucket_name, json.dumps(bucket_policy)) + except Exception as e: + logger.exception( + f'Failed during bucket policy management {e}' + ) + raise e + + def delete_target_role_access_policy( + self, + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + logger.info( + 'Deleting target role IAM policy...' + ) + existing_policy = IAM.get_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + if existing_policy: + s3_target_resources = [ + f"arn:aws:s3:::{target_bucket.S3BucketName}", + f"arn:aws:s3:::{target_bucket.S3BucketName}/*" + ] + share_manager = ShareManagerUtils( + self.session, + self.dataset, + self.share, + self.source_environment, + self.target_environment, + self.source_env_group, + self.env_group + ) + share_manager.remove_resource_from_statement(existing_policy["Statement"][0], s3_target_resources) + + kms_target_resources = [ + f"arn:aws:kms:{target_bucket.region}:{target_bucket.AwsAccountId}:key/{kms_key_id}", + f"arn:aws:kms:{target_bucket.region}:{target_bucket.AwsAccountId}:key/{kms_key_id}/*", + ] + share_manager.remove_resource_from_statement(existing_policy["Statement"][1], kms_target_resources) + + policy_statements = [] + for statement in existing_policy["Statement"]: + if len(statement["Resource"]) != 0: + policy_statements.append(statement) + + existing_policy["Statement"] = policy_statements + if len(existing_policy["Statement"]) == 0: + IAM.delete_role_policy(target_environment.AwsAccountId, share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY) + else: + IAM.update_role_policy( + target_environment.AwsAccountId, + share.principalIAMRoleName, + IAM_S3BUCKET_ROLE_POLICY, + json.dumps(existing_policy), + ) + + @staticmethod + def delete_target_role_bucket_key_policy( + share: ShareObject, + target_bucket: DatasetBucket, + target_environment: Environment, + ): + if (target_bucket.imported and target_bucket.importedKmsKey) or not target_bucket.imported: + logger.info( + 'Deleting target role from dataset bucket KMS key policy...' + ) + key_alias = f"alias/{target_bucket.KmsAlias}" + kms_client = KmsClient(target_bucket.AwsAccountId, target_bucket.region) + kms_key_id = kms_client.get_key_id(key_alias) + existing_policy = kms_client.get_key_policy(kms_key_id) + target_requester_id = SessionHelper.get_role_id(target_environment.AwsAccountId, share.principalIAMRoleName) + if existing_policy and f'{target_requester_id}:*' in existing_policy: + policy = json.loads(existing_policy) + policy["Statement"] = [item for item in policy["Statement"] if item.get("Sid", None) != f"{target_requester_id}"] + kms_client.put_key_policy( + kms_key_id, + json.dumps(policy) + ) + + def handle_share_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to share bucket {self.target_bucket.S3BucketName} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_s3_bucket_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + def handle_revoke_failure(self, error: Exception) -> bool: + """ + Handles share failure by raising an alarm to alarmsTopic + Returns + ------- + True if alarm published successfully + """ + logger.error( + f'Failed to revoke S3 permissions to bucket {self.bucket_name} ' + f'from source account {self.source_environment.AwsAccountId}//{self.source_environment.region} ' + f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' + f'due to: {error}' + ) + DatasetAlarmService().trigger_revoke_folder_sharing_failure_alarm( + self.target_bucket, self.share, self.target_environment + ) + return True + + @staticmethod + def get_role_arn(target_account_id, target_requester_IAMRoleName): + return f"arn:aws:iam::{target_account_id}:role/{target_requester_IAMRoleName}" + + @staticmethod + def generate_default_bucket_read_policy_statement(s3_bucket_name, target_requester_arn): + return { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py new file mode 100644 index 000000000..305d8c5e7 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/share_manager_utils.py @@ -0,0 +1,64 @@ +import abc +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.datasets_base.db.dataset_models import Dataset + + +logger = logging.getLogger(__name__) + + +class ShareManagerUtils: + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + self.target_requester_IAMRoleName = share.principalIAMRoleName + self.session = session + self.dataset = dataset + self.share = share + self.source_environment = source_environment + self.target_environment = target_environment + self.source_env_group = source_env_group + self.env_group = env_group + + def add_missing_resources_to_policy_statement( + self, + resource_type, + target_resources, + existing_policy_statement, + iam_role_policy_name + ): + """ + Checks if the resources are in the existing policy. Otherwise, it will add it. + :param resource_type: str + :param target_resources: list + :param existing_policy_statement: dict + :param iam_role_policy_name: str + :return + """ + for target_resource in target_resources: + if target_resource not in existing_policy_statement["Resource"]: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName}, ' + f'but {resource_type} is not included, updating...' + ) + existing_policy_statement["Resource"].extend([target_resource]) + else: + logger.info( + f'{iam_role_policy_name} exists for IAM role {self.target_requester_IAMRoleName} ' + f'and {resource_type} is included, skipping...' + ) + + @staticmethod + def remove_resource_from_statement(policy_statement, target_resources): + for target_resource in target_resources: + if target_resource in policy_statement["Resource"]: + policy_statement["Resource"].remove(target_resource) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py index 51ba97cc7..d28340cd6 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py @@ -124,7 +124,7 @@ def process_revoked_shares(self) -> bool: a) update its status to REVOKE_IN_PROGRESS with Action Start b) check if item exists on glue catalog raise error if not and flag item status to failed c) revoke table resource link: undo grant permission to resource link table for team role in target account - d) revoke source table access: undo grant permission to table for team role in source account (and for QS Group if no other shares present for table) + d) revoke source table access: undo grant permission to table for team role in source account e) delete resource link table h) update share item status to REVOKE_SUCCESSFUL with Action Success @@ -157,23 +157,10 @@ def process_revoked_shares(self) -> bool: self.revoke_table_resource_link_access(table, principals) - other_table_shares_in_env = False - if ShareObjectRepository.other_approved_share_item_table_exists( - self.session, - self.target_environment.environmentUri, - share_item.itemUri, - share_item.shareItemUri - ): - other_table_shares_in_env = True - principals = [p for p in principals if "arn:aws:quicksight" not in p] - self.revoke_source_table_access(table, principals) self.delete_resource_link_table(table) - if not other_table_shares_in_env: - self.revoke_external_account_access_on_source_account(table.GlueDatabaseName, table.GlueTableName) - new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) revoked_item_SM.update_state_single_item(self.session, share_item, new_state) @@ -184,3 +171,24 @@ def process_revoked_shares(self) -> bool: success = False return success + + def clean_up_share(self) -> bool: + """" + 1) deletes deprecated shared db in target account + 2) checks if there are other share objects from this source account to this target account. + If not, it revokes external account access of the target account to the source account. + Returns + ------- + True if clean-up succeeds + """ + + self.delete_shared_database() + + if not ShareObjectRepository.other_approved_share_object_exists( + self.session, + self.target_environment.environmentUri, + self.dataset.datasetUri, + ): + self.revoke_external_account_access_on_source_account() + + return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py index 54df2d900..270538a0b 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py @@ -157,3 +157,13 @@ def process_revoked_shares(self) -> bool: success = False return success + + def clean_up_share(self) -> bool: + """" + 1) deletes deprecated shared db in target account + Returns + ------- + True if clean-up succeeds + """ + self.delete_shared_database() + return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py similarity index 84% rename from backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py rename to backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py index 8e2f6cf38..043b9a6b4 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py @@ -1,7 +1,7 @@ import logging from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup -from ..share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions from dataall.modules.dataset_sharing.db.share_object_models import ShareObject @@ -10,7 +10,7 @@ log = logging.getLogger(__name__) -class ProcessS3Share(S3ShareManager): +class ProcessS3AccessPointShare(S3AccessPointShareManager): def __init__( self, session, @@ -21,6 +21,7 @@ def __init__( target_environment: Environment, source_env_group: EnvironmentGroup, env_group: EnvironmentGroup, + existing_shared_buckets: bool = False ): super().__init__( @@ -164,11 +165,18 @@ def process_revoked_shares( return success - @staticmethod + @classmethod def clean_up_share( + cls, + session, dataset: Dataset, share: ShareObject, - target_environment: Environment + folder: DatasetStorageLocation, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_buckets: bool = False ): """ 1) deletes S3 access point for this share in this Dataset S3 Bucket @@ -179,21 +187,33 @@ def clean_up_share( ------- True if share is cleaned-up successfully """ - - clean_up = S3ShareManager.delete_access_point( + clean_up_folder = cls( + session, + dataset, + share, + folder, + source_environment, + target_environment, + source_env_group, + env_group, + existing_shared_buckets + ) + clean_up = clean_up_folder.delete_access_point( share=share, dataset=dataset ) + if clean_up: - S3ShareManager.delete_target_role_access_policy( - share=share, - dataset=dataset, - target_environment=target_environment - ) - S3ShareManager.delete_dataset_bucket_key_policy( + clean_up_folder.delete_target_role_access_policy( share=share, dataset=dataset, target_environment=target_environment ) + if not existing_shared_buckets: + clean_up_folder.delete_dataset_bucket_key_policy( + share=share, + dataset=dataset, + target_environment=target_environment + ) return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py new file mode 100644 index 000000000..57e8e069f --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py @@ -0,0 +1,171 @@ +import logging + +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.dataset_sharing.db.enums import ShareItemStatus, ShareObjectActions, ShareItemActions +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM + + +log = logging.getLogger(__name__) + + +class ProcessS3BucketShare(S3BucketShareManager): + def __init__( + self, + session, + dataset: Dataset, + share: ShareObject, + s3_bucket: DatasetBucket, + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + ): + + super().__init__( + session, + dataset, + share, + s3_bucket, + source_environment, + target_environment, + source_env_group, + env_group, + ) + + @classmethod + def process_approved_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + shared_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) manage_bucket_policy - grants permission in the bucket policy + 3) grant_target_role_access_policy == done + 4) update_dataset_bucket_key_policy == done + 5) update_share_item_status with Finish action == done + + Returns + ------- + True if share is granted successfully + """ + log.info( + '##### Starting S3 bucket share #######' + ) + success = True + for shared_bucket in shared_buckets: + sharing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + shared_bucket.bucketUri, + ) + shared_item_SM = ShareItemSM(ShareItemStatus.Share_Approved.value) + new_state = shared_item_SM.run_transition(ShareObjectActions.Start.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + sharing_bucket = cls( + session, + dataset, + share, + shared_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + sharing_bucket.grant_role_bucket_policy() + sharing_bucket.grant_s3_iam_access() + sharing_bucket.grant_dataset_bucket_key_policy() + new_state = shared_item_SM.run_transition(ShareItemActions.Success.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + + except Exception as e: + sharing_bucket.handle_share_failure(e) + new_state = shared_item_SM.run_transition(ShareItemActions.Failure.value) + shared_item_SM.update_state_single_item(session, sharing_item, new_state) + success = False + return success + + @classmethod + def process_revoked_shares( + cls, + session, + dataset: Dataset, + share: ShareObject, + revoked_buckets: [DatasetBucket], + source_environment: Environment, + target_environment: Environment, + source_env_group: EnvironmentGroup, + env_group: EnvironmentGroup, + existing_shared_folders: bool = False + ) -> bool: + """ + 1) update_share_item_status with Start action + 2) remove access from bucket policy + 3) remove access from key policy + 4) remove access from IAM role policy + 5) update_share_item_status with Finish action + + Returns + ------- + True if share is revoked successfully + False if revoke fails + """ + + log.info( + '##### Starting Revoking S3 bucket share #######' + ) + success = True + for revoked_bucket in revoked_buckets: + removing_item = ShareObjectRepository.find_sharable_item( + session, + share.shareUri, + revoked_bucket.bucketUri, + ) + + revoked_item_SM = ShareItemSM(ShareItemStatus.Revoke_Approved.value) + new_state = revoked_item_SM.run_transition(ShareObjectActions.Start.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + removing_bucket = cls( + session, + dataset, + share, + revoked_bucket, + source_environment, + target_environment, + source_env_group, + env_group + ) + try: + removing_bucket.delete_target_role_bucket_policy() + removing_bucket.delete_target_role_access_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + if not existing_shared_folders: + removing_bucket.delete_target_role_bucket_key_policy( + share=share, + target_bucket=revoked_bucket, + target_environment=target_environment + ) + new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + + except Exception as e: + removing_bucket.handle_revoke_failure(e) + new_state = revoked_item_SM.run_transition(ShareItemActions.Failure.value) + revoked_item_SM.update_state_single_item(session, removing_item, new_state) + success = False + + return success diff --git a/backend/dataall/modules/datasets/api/dataset/input_types.py b/backend/dataall/modules/datasets/api/dataset/input_types.py index d238a8103..4310fb3b4 100644 --- a/backend/dataall/modules/datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/datasets/api/dataset/input_types.py @@ -20,7 +20,7 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String), + gql.Argument(name='stewards', type=gql.String) ], ) @@ -102,6 +102,6 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String), + gql.Argument(name='stewards', type=gql.String) ], ) diff --git a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py index 46c34ea58..b44fdfbdc 100644 --- a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py +++ b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py @@ -20,6 +20,23 @@ class DatasetsPivotRole(PivotRoleStatementSet): """ def get_statements(self): statements = [ + # S3 Imported Buckets - restrict resources via bucket policies + iam.PolicyStatement( + sid='ImportedBuckets', + effect=iam.Effect.ALLOW, + actions=[ + 's3:List*', + 's3:GetBucket*', + 's3:GetLifecycleConfiguration', + 's3:GetObject', + 's3:PutBucketPolicy', + 's3:PutBucketTagging', + 's3:PutObject', + 's3:PutObjectAcl', + 's3:PutBucketOwnershipControls', + ], + resources=['arn:aws:s3:::*'], + ), # For dataset preview iam.PolicyStatement( sid='AthenaWorkgroupsDataset', diff --git a/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py new file mode 100644 index 000000000..31cfa2cdd --- /dev/null +++ b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py @@ -0,0 +1,41 @@ +import logging + +from dataall.modules.datasets_base.db.dataset_models import DatasetBucket, Dataset + +logger = logging.getLogger(__name__) + + +class DatasetBucketRepository: + + @staticmethod + def create_dataset_bucket( + session, + dataset: Dataset, + data: dict = None + ) -> DatasetBucket: + bucket = DatasetBucket( + datasetUri=dataset.datasetUri, + label=data.get('label'), + description=data.get('description', 'No description provided'), + tags=data.get('tags', []), + S3BucketName=dataset.S3BucketName, + AwsAccountId=dataset.AwsAccountId, + owner=dataset.owner, + region=dataset.region, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + return bucket + + @staticmethod + def delete_dataset_buckets(session, dataset_uri) -> bool: + buckets = ( + session.query(DatasetBucket) + .filter(DatasetBucket.datasetUri == dataset_uri) + .all() + ) + for bucket in buckets: + session.delete(bucket) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 707e9fb91..09611bcc9 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -5,7 +5,6 @@ from dataall.base.db import exceptions from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sts import SessionHelper -from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.base.context import get_context from dataall.core.environment.env_permission_checker import has_group_permission from dataall.core.environment.services.environment_service import EnvironmentService @@ -16,8 +15,10 @@ from dataall.core.stacks.db.stack_repositories import Stack from dataall.core.tasks.db.task_models import Task from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository +from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import AWSResourceNotFound, UnauthorizedOperation +from dataall.modules.dataset_sharing.aws.kms_client import KmsClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER @@ -54,22 +55,13 @@ def check_dataset_account(session, environment): def check_imported_resources(environment, data): kms_alias = data.get('KmsKeyAlias') if kms_alias not in [None, "Undefined", "", "SSE-S3"]: - key_exists = KmsClient(account_id=environment.AwsAccountId, region=environment.region).check_key_exists( - key_alias=f"alias/{kms_alias}" - ) - if not key_exists: - raise exceptions.AWSResourceNotFound( - action=IMPORT_DATASET, - message=f'KMS key with alias={kms_alias} cannot be found - Please check if KMS Key Alias exists in account {environment.AwsAccountId}', - ) - - key_id = KmsClient(account_id=environment.AwsAccountId, region=environment.region).get_key_id( + key_id = KmsClient(environment.AwsAccountId, environment.region).get_key_id( key_alias=f"alias/{kms_alias}" ) if not key_id: raise exceptions.AWSResourceNotFound( action=IMPORT_DATASET, - message=f'Data.all Environment Pivot Role does not have kms:DescribeKey Permission to KMS key with alias={kms_alias}', + message=f'KMS key with alias={kms_alias} cannot be found', ) return True @@ -92,6 +84,8 @@ def create_dataset(uri, admin_group, data: dict): data=data, ) + DatasetBucketRepository.create_dataset_bucket(session, dataset, data) + ResourcePolicy.attach_resource_policy( session=session, group=data['SamlAdminGroupName'], @@ -380,6 +374,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): DatasetService.delete_dataset_term_links(session, uri) DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri) DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri) + DatasetBucketRepository.delete_dataset_buckets(session, dataset.datasetUri) KeyValueTag.delete_key_value_tags(session, dataset.datasetUri, 'dataset') VoteRepository.delete_votes(session, dataset.datasetUri, 'dataset') diff --git a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py index d0c874a4c..6a83fc5c8 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py @@ -25,8 +25,6 @@ root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) -# TODO: review this task usage and remove if not needed - class DatasetSubscriptionService: def __init__(self, engine): @@ -148,12 +146,12 @@ def publish_sns_message( response = sns_client.publish_dataset_message(message) log.info(f'SNS update publish response {response}') - notifications = ShareNotificationService( + notifications = ShareNotificationService.notify_new_data_available_from_owners( session=session, dataset=dataset, - share=share_object - ).notify_new_data_available_from_owners(s3_prefix=prefix) - + share=share_object, + s3_prefix=prefix, + ) log.info(f'Notifications for share owners {notifications}') except ClientError as e: diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets_base/db/dataset_models.py index a5fcf1260..dd12746ad 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets_base/db/dataset_models.py @@ -141,3 +141,23 @@ class Dataset(Resource, Base): @classmethod def uri(cls): return cls.datasetUri + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py new file mode 100644 index 000000000..9142418f8 --- /dev/null +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -0,0 +1,206 @@ +"""add table for buckets + +Revision ID: 8c79fb896983 +Revises: 5781fdf1f877 +Create Date: 2023-09-06 12:01:53.841149 + +""" +import os +from sqlalchemy import orm, Column, String, Boolean, ForeignKey, DateTime, and_, inspect +from sqlalchemy.orm import query_expression +from sqlalchemy.ext.declarative import declarative_base +import sqlalchemy as sa +from alembic import op +from sqlalchemy.dialects import postgresql + +from dataall.base.db import get_engine, has_table +from dataall.base.db import utils, Resource +from dataall.modules.dataset_sharing.db.enums import ShareObjectStatus, ShareableType +from datetime import datetime + +# revision identifiers, used by Alembic. +revision = '8c79fb896983' +down_revision = '5781fdf1f877' +branch_labels = None +depends_on = None + +Base = declarative_base() + + +class Dataset(Resource, Base): + __tablename__ = 'dataset' + environmentUri = Column(String, ForeignKey("environment.environmentUri"), nullable=False) + organizationUri = Column(String, nullable=False) + datasetUri = Column(String, primary_key=True, default=utils.uuid('dataset')) + region = Column(String, default='eu-west-1') + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=False) + GlueCrawlerName = Column(String) + GlueCrawlerSchedule = Column(String) + GlueProfilingJobName = Column(String) + GlueProfilingTriggerSchedule = Column(String) + GlueProfilingTriggerName = Column(String) + GlueDataQualityJobName = Column(String) + GlueDataQualitySchedule = Column(String) + GlueDataQualityTriggerName = Column(String) + IAMDatasetAdminRoleArn = Column(String, nullable=False) + IAMDatasetAdminUserArn = Column(String, nullable=False) + KmsAlias = Column(String, nullable=False) + userRoleForDataset = query_expression() + userRoleInEnvironment = query_expression() + isPublishedInEnvironment = query_expression() + projectPermission = query_expression() + language = Column(String, nullable=False, default='English') + topics = Column(postgresql.ARRAY(String), nullable=True) + confidentiality = Column(String, nullable=False, default='Unclassified') + tags = Column(postgresql.ARRAY(String)) + inProject = query_expression() + + bucketCreated = Column(Boolean, default=False) + glueDatabaseCreated = Column(Boolean, default=False) + iamAdminRoleCreated = Column(Boolean, default=False) + iamAdminUserCreated = Column(Boolean, default=False) + kmsAliasCreated = Column(Boolean, default=False) + lakeformationLocationCreated = Column(Boolean, default=False) + bucketPolicyCreated = Column(Boolean, default=False) + + # bookmarked = Column(Integer, default=0) + # upvotes=Column(Integer, default=0) + + businessOwnerEmail = Column(String, nullable=True) + businessOwnerDelegationEmails = Column(postgresql.ARRAY(String), nullable=True) + stewards = Column(String, nullable=True) + + SamlAdminGroupName = Column(String, nullable=True) + + importedS3Bucket = Column(Boolean, default=False) + importedGlueDatabase = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + importedAdminRole = Column(Boolean, default=False) + imported = Column(Boolean, default=False) + + +class DatasetBucket(Resource, Base): + __tablename__ = 'dataset_bucket' + datasetUri = Column(String, nullable=False) + bucketUri = Column(String, primary_key=True, default=utils.uuid('bucket')) + AwsAccountId = Column(String, nullable=False) + S3BucketName = Column(String, nullable=False) + region = Column(String, default='eu-west-1') + partition = Column(String, default='aws') + KmsAlias = Column(String, nullable=False) + imported = Column(Boolean, default=False) + importedKmsKey = Column(Boolean, default=False) + userRoleForStorageBucket = query_expression() + projectPermission = query_expression() + environmentEndPoint = query_expression() + + @classmethod + def uri(cls): + return cls.bucketUri + + +class ShareObjectItem(Base): + __tablename__ = 'share_object_item' + shareUri = Column(String, nullable=False) + shareItemUri = Column( + String, default=utils.uuid('shareitem'), nullable=False, primary_key=True + ) + itemType = Column(String, nullable=False) + itemUri = Column(String, nullable=False) + itemName = Column(String, nullable=False) + permission = Column(String, nullable=True) + created = Column(DateTime, nullable=False, default=datetime.now) + updated = Column(DateTime, nullable=True, onupdate=datetime.now) + deleted = Column(DateTime, nullable=True) + owner = Column(String, nullable=False) + GlueDatabaseName = Column(String, nullable=True) + GlueTableName = Column(String, nullable=True) + S3AccessPointName = Column(String, nullable=True) + status = Column(String, nullable=False, default=ShareObjectStatus.Draft.value) + action = Column(String, nullable=True) + + +def upgrade(): + try: + envname = os.getenv('envname', 'local') + print('ENVNAME', envname) + engine = get_engine(envname=envname).engine + bind = op.get_bind() + session = orm.Session(bind=bind) + datasets: [Dataset] = session.query(Dataset).all() + if not has_table('dataset_bucket', engine): + op.create_table( + 'dataset_bucket', + sa.Column('bucketUri', sa.String(), nullable=False), + sa.Column('label', sa.String(), nullable=False), + sa.Column('name', sa.String(), nullable=False), + sa.Column('owner', sa.String(), nullable=False), + sa.Column('created', sa.DateTime(), nullable=True), + sa.Column('updated', sa.DateTime(), nullable=True), + sa.Column('deleted', sa.DateTime(), nullable=True), + sa.Column('description', sa.String(), nullable=True), + sa.Column('tags', postgresql.ARRAY(sa.String()), nullable=True), + sa.Column('datasetUri', sa.String(), nullable=False), + sa.Column('AwsAccountId', sa.String(), nullable=False), + sa.Column('S3BucketName', sa.String(), nullable=False), + sa.Column('KmsAlias', sa.String(), nullable=False), + sa.Column('imported', sa.Boolean(), nullable=True), + sa.Column('importedKmsKey', sa.Boolean(), nullable=True), + sa.Column('region', sa.String(), nullable=True), + sa.Column('partition', sa.String(), nullable=False, default='aws'), + sa.ForeignKeyConstraint(columns=['datasetUri'], refcolumns=['dataset.datasetUri']), + sa.PrimaryKeyConstraint('bucketUri'), + ) + print('Creating a new dataset_bucket row for each existing dataset...') + for dataset in datasets: + dataset_bucket = DatasetBucket( + name=dataset.S3BucketName, + datasetUri=dataset.datasetUri, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + region=dataset.region, + label=dataset.label, + description=dataset.label, + tags=dataset.tags, + owner=dataset.owner, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(dataset_bucket) + session.flush() # flush to get the bucketUri + + for dataset in datasets: + shared_bucket_object: ShareObjectItem = session.query(ShareObjectItem).filter( + and_( + ShareObjectItem.itemType == ShareableType.S3Bucket.value, + ShareObjectItem.itemUri == dataset.datasetUri, + ) + ).first() + dataset_bucket: DatasetBucket = session.query(DatasetBucket).filter( + DatasetBucket.datasetUri == dataset.datasetUri + ).first() + if shared_bucket_object is not None: + shared_bucket_object.itemUri = dataset_bucket.bucketUri + shared_bucket_object.itemName = dataset_bucket.S3BucketName + + if column_exists('dataset', 'dataSharingModel'): + op.drop_column('dataset', 'dataSharingModel') + session.commit() + + except Exception as exception: + print('Failed to upgrade due to:', exception) + raise exception + + +def column_exists(table_name, column_name): + bind = op.get_context().bind + insp = inspect(bind) + columns = insp.get_columns(table_name) + return any(c["name"] == column_name for c in columns) + + +def downgrade(): + op.drop_table('dataset_bucket') diff --git a/frontend/src/modules/Shares/components/AddShareItemModal.js b/frontend/src/modules/Shares/components/AddShareItemModal.js index a21b55d13..9ee016c33 100644 --- a/frontend/src/modules/Shares/components/AddShareItemModal.js +++ b/frontend/src/modules/Shares/components/AddShareItemModal.js @@ -20,6 +20,7 @@ import { Defaults, Pager, Scrollbar } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { addSharedItem, getShareObject } from '../services'; +import { generateShareItemLabel } from '../../../utils/share'; export const AddShareItemModal = (props) => { const client = useClient(); @@ -144,7 +145,7 @@ export const AddShareItemModal = (props) => { sharedItems.nodes.map((item) => ( - {item.itemType === 'Table' ? 'Table' : 'Folder'} + {generateShareItemLabel(item.itemType)} {item.itemName} diff --git a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js index 2aa066df8..ded95ce8c 100644 --- a/frontend/src/modules/Shares/components/RevokeShareItemsModal.js +++ b/frontend/src/modules/Shares/components/RevokeShareItemsModal.js @@ -10,6 +10,7 @@ import { Defaults } from 'design'; import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient } from 'services'; import { getShareObject, revokeItemsShareObject } from '../services'; +import { generateShareItemLabel } from '../../../utils/share'; export const RevokeShareItemsModal = (props) => { const client = useClient(); @@ -40,7 +41,7 @@ export const RevokeShareItemsModal = (props) => { response.data.getShareObject.items.nodes.map((item) => ({ id: item.shareItemUri, name: item.itemName, - type: item.itemType === 'StorageLocation' ? 'Folder' : 'Table', + type: generateShareItemLabel(item.itemType), status: item.status })) ); diff --git a/frontend/src/utils/share.js b/frontend/src/utils/share.js new file mode 100644 index 000000000..c52099d0e --- /dev/null +++ b/frontend/src/utils/share.js @@ -0,0 +1,10 @@ +export const generateShareItemLabel = (itemType): string => { + switch (itemType) { + case 'Table': + return 'Table'; + case 'S3Bucket': + return 'S3Bucket'; + case 'StorageLocation': + return 'Folder'; + } +}; diff --git a/tests/modules/datasets/conftest.py b/tests/modules/datasets/conftest.py index a4bc39a4d..efec9ab6e 100644 --- a/tests/modules/datasets/conftest.py +++ b/tests/modules/datasets/conftest.py @@ -6,13 +6,12 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.core.organizations.db.organization_models import Organization from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy -from dataall.core.stacks.db.stack_models import Stack from dataall.modules.dataset_sharing.db.enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_REQUESTER, SHARE_OBJECT_APPROVER from dataall.modules.datasets.api.dataset.enums import ConfidentialityClassification from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation, DatasetBucket @pytest.fixture(scope='module', autouse=True) @@ -244,7 +243,7 @@ def dataset_model(db): def factory( organization: Organization, environment: Environment, - label: str, + label: str ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( diff --git a/tests/modules/datasets/tasks/conftest.py b/tests/modules/datasets/tasks/conftest.py index 43f888fe6..7503660fc 100644 --- a/tests/modules/datasets/tasks/conftest.py +++ b/tests/modules/datasets/tasks/conftest.py @@ -1,11 +1,10 @@ import pytest -from dataall.core.cognito_groups.db.cognito_group_models import Group from dataall.core.organizations.db.organization_models import Organization from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.modules.dataset_sharing.db.enums import ShareableType, ShareItemStatus, ShareObjectStatus, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket @pytest.fixture(scope="module") @@ -14,6 +13,7 @@ def factory( organization: Organization, environment: Environment, label: str, + imported: bool = False ) -> Dataset: with db.scoped_session() as session: dataset = Dataset( @@ -31,6 +31,8 @@ def factory( region=environment.region, IAMDatasetAdminUserArn=f"arn:aws:iam::{environment.AwsAccountId}:user/dataset", IAMDatasetAdminRoleArn=f"arn:aws:iam::{environment.AwsAccountId}:role/dataset", + imported=imported, + importedKmsKey=imported ) session.add(dataset) session.commit() @@ -83,6 +85,35 @@ def factory(dataset: Dataset, label: str) -> DatasetTable: yield factory +@pytest.fixture(scope='module', autouse=True) +def bucket(db): + cache = {} + + def factory(dataset: Dataset, name) -> DatasetBucket: + key = f'{dataset.datasetUri}-{name}' + if cache.get(key): + return cache.get(key) + with db.scoped_session() as session: + bucket = DatasetBucket( + name=name, + label=name, + owner=dataset.owner, + datasetUri=dataset.datasetUri, + region=dataset.region, + AwsAccountId=dataset.AwsAccountId, + S3BucketName=dataset.S3BucketName, + KmsAlias=dataset.KmsAlias, + imported=dataset.imported, + importedKmsKey=dataset.importedKmsKey, + ) + session.add(bucket) + session.commit() + + return bucket + + yield factory + + @pytest.fixture(scope="module") def share(db): def factory( @@ -99,6 +130,7 @@ def factory( principalType=PrincipalType.Group.value, principalIAMRoleName=env_group.environmentIAMRoleName, status=ShareObjectStatus.Approved.value, + groupUri=env_group.groupUri, ) session.add(share) session.commit() @@ -150,3 +182,25 @@ def factory( return share_item yield factory + + +@pytest.fixture(scope="module") +def share_item_bucket(db): + def factory( + share: ShareObject, + bucket: DatasetBucket, + ) -> ShareObjectItem: + with db.scoped_session() as session: + share_item = ShareObjectItem( + shareUri=share.shareUri, + owner="alice", + itemUri=bucket.bucketUri, + itemType=ShareableType.StorageLocation.value, + itemName=bucket.name, + status=ShareItemStatus.Share_Approved.value, + ) + session.add(share_item) + session.commit() + return share_item + + yield factory diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index fd76ba0b1..78a289d9f 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -660,7 +660,7 @@ def test_revoke_external_account_access_on_source_account( return_value=boto3.Session(), ) - processor_cross_account.revoke_external_account_access_on_source_account(table1.GlueDatabaseName, table1.GlueTableName) + processor_cross_account.revoke_external_account_access_on_source_account() # Then lf_mock.assert_called_once() diff --git a/tests/modules/datasets/tasks/test_s3_share_manager.py b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py similarity index 90% rename from tests/modules/datasets/tasks/test_s3_share_manager.py rename to tests/modules/datasets/tasks/test_s3_access_point_share_manager.py index febea47f9..598c06f2c 100644 --- a/tests/modules/datasets/tasks/test_s3_share_manager.py +++ b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py @@ -11,7 +11,7 @@ from dataall.modules.dataset_sharing.aws.s3_client import S3ControlClient from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem -from dataall.modules.dataset_sharing.services.share_managers import S3ShareManager +from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset SOURCE_ENV_ACCOUNT = "111111111111" @@ -127,7 +127,7 @@ def admin_ap_delegation_bucket_policy(): "Resource": "arn:aws:s3:::dataall-iris-test-120922-4s47wv71", }, { - "Sid": "AllowAllToAdmin", + "Sid": "DelegateAccessToAccessPoint", "Effect": "Allow", "Principal": "*", "Action": "s3:*", @@ -143,7 +143,7 @@ def admin_ap_delegation_bucket_policy(): def mock_s3_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3Client', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3Client', mock_client ) mock_client.create_bucket_policy.return_value = None @@ -153,7 +153,7 @@ def mock_s3_client(mocker): def mock_s3_control_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.S3ControlClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.S3ControlClient', mock_client ) @@ -170,7 +170,7 @@ def mock_s3_control_client(mocker): def mock_kms_client(mocker): mock_client = MagicMock() mocker.patch( - 'dataall.modules.dataset_sharing.services.share_managers.s3_share_manager.KmsClient', + 'dataall.modules.dataset_sharing.services.share_managers.s3_access_point_share_manager.KmsClient', mock_client ) mock_client.put_key_policy.return_value = None @@ -192,6 +192,16 @@ def target_dataset_access_control_policy(request): f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}", f"arn:aws:s3:datasetregion:{request.param[1]}:accesspoint/{request.param[2]}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" + ] } ], } @@ -229,7 +239,7 @@ def test_manage_bucket_policy_no_policy( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -250,7 +260,7 @@ def test_manage_bucket_policy_no_policy( # Then print(f"Bucket policy generated {created_bucket_policy}") - sid_list = [statement.get("Sid") for statement in + sid_list = [statement.get("Sid") for statement in created_bucket_policy["Statement"] if statement.get("Sid")] assert "AllowAllToAdmin" in sid_list @@ -278,7 +288,7 @@ def test_manage_bucket_policy_existing_policy( s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -296,7 +306,7 @@ def test_manage_bucket_policy_existing_policy( s3_client.create_bucket_policy.assert_not_called() -@pytest.mark.parametrize("target_dataset_access_control_policy", +@pytest.mark.parametrize("target_dataset_access_control_policy", ([("bucketname", "aws_account_id", "access_point_name")]), indirect=True) def test_grant_target_role_access_policy_existing_policy_bucket_not_included( @@ -326,8 +336,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -349,6 +362,9 @@ def test_grant_target_role_access_policy_existing_policy_bucket_not_included( # Assert that bucket_name is inside the resource array of policy object assert location1.S3BucketName in ",".join(policy_object["Statement"][0]["Resource"]) + assert f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] @pytest.mark.parametrize("target_dataset_access_control_policy", ([("dataset1", SOURCE_ENV_ACCOUNT, "test")]), indirect=True) @@ -379,8 +395,11 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -395,7 +414,7 @@ def test_grant_target_role_access_policy_existing_policy_bucket_included( manager.grant_target_role_access_policy() # Then - iam_update_role_policy_mock.assert_not_called() + iam_update_role_policy_mock.assert_called() def test_grant_target_role_access_policy_test_no_policy( @@ -434,12 +453,25 @@ def test_grant_target_role_access_policy_test_no_policy( f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}", f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{share_item_folder1.S3AccessPointName}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -455,7 +487,7 @@ def test_grant_target_role_access_policy_test_no_policy( # Then iam_update_role_policy_mock.assert_called_with( - target_environment.AwsAccountId, share1.principalIAMRoleName, + target_environment.AwsAccountId, share1.principalIAMRoleName, "targetDatasetAccessControlPolicy", json.dumps(expected_policy) ) @@ -498,7 +530,7 @@ def test_update_dataset_bucket_key_policy_with_env_admin( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -614,7 +646,7 @@ def test_update_dataset_bucket_key_policy_without_env_admin( } with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -672,7 +704,7 @@ def test_manage_access_point_and_policy_1( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -740,7 +772,7 @@ def test_manage_access_point_and_policy_2( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -805,7 +837,7 @@ def test_manage_access_point_and_policy_3( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -869,7 +901,7 @@ def test_delete_access_point_policy_with_env_admin_one_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -928,7 +960,7 @@ def test_delete_access_point_policy_with_env_admin_multiple_prefix( ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -974,7 +1006,7 @@ def test_dont_delete_access_point_with_policy( s3_control_client().get_access_point_policy.return_value = json.dumps(existing_ap_policy) # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1014,7 +1046,7 @@ def test_delete_access_point_without_policy( # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1055,9 +1087,19 @@ def test_delete_target_role_access_policy_no_remaining_statement( "Resource": [ f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } @@ -1077,9 +1119,12 @@ def test_delete_target_role_access_policy_no_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1122,9 +1167,21 @@ def test_delete_target_role_access_policy_with_remaining_statement( "arn:aws:s3:::UNRELATED_BUCKET_ARN", f"arn:aws:s3:::{location1.S3BucketName}", f"arn:aws:s3:::{location1.S3BucketName}/*", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}", - f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3ShareManager.build_access_point_name(share1)}/*", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}", + f"arn:aws:s3:{dataset1.region}:{dataset1.AwsAccountId}:accesspoint/{S3AccessPointShareManager.build_access_point_name(share1)}/*", + ], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset1.region}:{dataset1.AwsAccountId}:key/kms-key/*" + ] } ], } @@ -1136,6 +1193,16 @@ def test_delete_target_role_access_policy_with_remaining_statement( "Effect": "Allow", "Action": ["s3:*"], "Resource": ["arn:aws:s3:::UNRELATED_BUCKET_ARN"], + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + ] } ], } @@ -1155,9 +1222,12 @@ def test_delete_target_role_access_policy_with_remaining_statement( return_value=None, ) + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + # When with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1245,7 +1315,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_additional_target ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1312,7 +1382,7 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar ) with db.scoped_session() as session: - manager = S3ShareManager( + manager = S3AccessPointShareManager( session, dataset1, share1, @@ -1331,4 +1401,4 @@ def test_delete_dataset_bucket_key_policy_existing_policy_with_no_additional_tar kms_client().put_key_policy.assert_called_with( kms_client().get_key_id.return_value, json.dumps(remaining_policy) - ) + ) \ No newline at end of file diff --git a/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py new file mode 100644 index 000000000..0eb35aa2f --- /dev/null +++ b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py @@ -0,0 +1,1614 @@ +import pytest +import json +from unittest.mock import MagicMock + +from typing import Callable + +from dataall.core.cognito_groups.db.cognito_group_models import Group +from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup +from dataall.core.organizations.db.organization_models import Organization +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject +from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager +from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket + +SOURCE_ENV_ACCOUNT = "111111111111" +SOURCE_ENV_ROLE_NAME = "dataall-ProducerEnvironment-i6v1v1c2" + +TARGET_ACCOUNT_ENV = "222222222222" +TARGET_ACCOUNT_ENV_ROLE_NAME = "dataall-ConsumersEnvironment-r71ucp4m" + +DATAALL_READ_ONLY_SID = "DataAll-Bucket-ReadOnly" +DATAALL_ALLOW_ALL_ADMINS_SID = "AllowAllToAdmin" + + +@pytest.fixture(scope="module") +def source_environment(env: Callable, org_fixture: Organization, group: Group): + source_environment = env( + org=org_fixture, + account=SOURCE_ENV_ACCOUNT, + envname="source_environment", + owner=group.owner, + group=group.name, + role=SOURCE_ENV_ROLE_NAME, + ) + yield source_environment + + +@pytest.fixture(scope="module") +def source_environment_group(environment_group: Callable, source_environment: Environment, group: Group): + source_environment_group = environment_group(source_environment, group.name) + yield source_environment_group + + +@pytest.fixture(scope="module") +def target_environment(env: Callable, org_fixture: Organization, group2: Group): + target_environment = env( + org=org_fixture, + account=TARGET_ACCOUNT_ENV, + envname="target_environment", + owner=group2.owner, + group=group2.name, + role=TARGET_ACCOUNT_ENV_ROLE_NAME, + ) + yield target_environment + + +@pytest.fixture(scope="module") +def target_environment_group(environment_group: Callable, target_environment: Environment, group2: Group): + target_environment_group = environment_group(target_environment, group2.name) + yield target_environment_group + + +@pytest.fixture(scope="module") +def dataset_imported(create_dataset: Callable, org_fixture: Organization, source_environment: Environment): + dataset_imported = create_dataset(org_fixture, source_environment, "dataset_imported", True) + yield dataset_imported + + +@pytest.fixture(scope="module") +def dataset2(create_dataset: Callable, org_fixture: Organization, source_environment: Organization): + dataset2 = create_dataset(org_fixture, source_environment, "dataset2") + yield dataset2 + + +@pytest.fixture(scope="module") +def bucket2(bucket: Callable, dataset2: Dataset) -> DatasetBucket: + yield bucket(dataset2, "bucket2") + + +@pytest.fixture(scope="module") +def bucket3(bucket: Callable, dataset_imported: Dataset) -> DatasetBucket: + yield bucket(dataset_imported, "bucket3") + + +@pytest.fixture(scope="module") +def share2(share: Callable, dataset2: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share2 = share(dataset2, target_environment, target_environment_group) + yield share2 + + +@pytest.fixture(scope="module") +def share3(share: Callable, dataset_imported: Dataset, + target_environment: Environment, + target_environment_group: EnvironmentGroup) -> ShareObject: + share3 = share(dataset_imported, target_environment, target_environment_group) + yield share3 + + +@pytest.fixture(scope="function") +def base_bucket_policy(dataset2): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": {"AWS": "*"}, + "Action": "s3:*", + "Resource": [f"arn:aws:s3:::{dataset2.S3BucketName}", f"arn:aws:s3:::{dataset2.S3BucketName}/*"], + "Condition": {"Bool": {"aws:SecureTransport": "false"}}, + } + ], + } + return bucket_policy + + +def base_kms_key_policy(target_environment_samlGrpName: str): + kms_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Sid": f"{target_environment_samlGrpName}", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": f"{target_environment_samlGrpName}:*"}}, + } + ], + } + return kms_policy + + +def complete_access_bucket_policy(target_requester_arn, s3_bucket_name, owner_roleId): + bucket_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Deny", + "Principal": { + "AWS": "*" + }, + "Sid": "RequiredSecureTransport", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "Bool": { + "aws:SecureTransport": "false" + } + } + }, + { + "Sid": f"{DATAALL_ALLOW_ALL_ADMINS_SID}", + "Effect": "Allow", + "Principal": "*", + "Action": "s3:*", + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ], + "Condition": { + "StringLike": { + "aws:userId": owner_roleId + } + } + }, + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"{target_requester_arn}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{s3_bucket_name}", + f"arn:aws:s3:::{s3_bucket_name}/*" + ] + } + ] + } + + return bucket_policy + + +def mock_s3_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.S3Client', + mock_client + ) + mock_client.create_bucket_policy.return_value = None + return mock_client + + +def mock_kms_client(mocker): + mock_client = MagicMock() + mocker.patch( + 'dataall.modules.dataset_sharing.services.share_managers.s3_bucket_share_manager.KmsClient', + mock_client + ) + mock_client.put_key_policy.return_value = None + return mock_client + + +# For below test cases, dataset2, share2, src, target env and src group , env group remain the same +def test_grant_role_bucket_policy_with_no_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + bucket2, + db, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # No Bucket policy. A Default bucket policy should be formed with DataAll-Bucket-ReadOnly, AllowAllToAdmin & RequiredSecureTransport Sids + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = None + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + # Check all the Sids are present + # Check that the S3 bucket resources are also present + assert f"{DATAALL_ALLOW_ALL_ADMINS_SID}" in modified_bucket_policy["Statement"][0]["Sid"] + assert modified_bucket_policy["Statement"][0]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][0]["Condition"]["StringLike"]["aws:userId"] == ['1:*', '2:*', '3:*'] + assert "RequiredSecureTransport" in modified_bucket_policy["Statement"][1]["Sid"] + assert modified_bucket_policy["Statement"][1]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert f"{DATAALL_READ_ONLY_SID}" in modified_bucket_policy["Statement"][2]["Sid"] + assert modified_bucket_policy["Statement"][2]["Resource"] == [f'arn:aws:s3:::{dataset2.S3BucketName}', + f'arn:aws:s3:::{dataset2.S3BucketName}/*'] + assert modified_bucket_policy["Statement"][2]["Principal"]["AWS"] == [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}"] + + +def test_grant_role_bucket_policy_with_default_complete_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Bucket Policy containing required "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's + # Bucket Policy shouldn't be modified after calling "grant_role_bucket_policy" function + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + bucket_policy = complete_access_bucket_policy(target_arn, + dataset2.S3BucketName, "ABNCSJ81982393") + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + created_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if nothing is removed from the policy and is the policy remains the same + for policy in created_bucket_policy["Statement"]: + assert policy["Sid"] in json.dumps(bucket_policy) + + +def test_grant_role_bucket_policy_with_policy_and_no_allow_owner_sid_and_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # base bucket policy + # Check if both "AllowAllToAdmin" and "DataAll-Bucket-ReadOnly" Sid's Statements are added to the policy + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin, DataAll-Bucket-ReadOnly Sid's should be attached now + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + +def test_grant_role_bucket_policy_with_another_read_only_role( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given base bucket policy with "DataAll-Bucket-ReadOnly" + bucket_policy = base_bucket_policy + + target_arn = f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + + # Append a policy for read only role + bucket_policy["Statement"].append( + { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeTargetResourceArn" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::someS3Bucket", + f"arn:aws:s3:::someS3Bucket/*" + ] + }) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_delegation_role_arn", + return_value="arn:role", + ) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_ids", + return_value=[1, 2, 3], + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + # Get the Bucket Policy and it should be the same + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # AllowToAdmin Sid should be attached now. Also DataAll-Bucket-ReadOnly Sid should be present + for policy in modified_bucket_policy["Statement"]: + if "Sid" in policy: + assert policy["Sid"] in [f"{DATAALL_ALLOW_ALL_ADMINS_SID}", f"{DATAALL_READ_ONLY_SID}"] + + # Check if the principal was appended and not overridden into the DataAll-Bucket-ReadOnly + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 2 + assert modified_bucket_policy["Statement"][1]["Principal"]["AWS"][0] == "SomeTargetResourceArn" + + +def test_grant_s3_iam_access_with_no_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # There is not existing IAM policy in the requesters account for the dataset's S3bucket + # Check if the update_role_policy func is called and policy statements are added + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert if the IAM role policy with S3 and KMS permissions was created + assert len(iam_policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 2 + assert len(iam_policy["Statement"][1]["Resource"]) == 2 + assert f"arn:aws:s3:::{dataset2.S3BucketName}" in iam_policy["Statement"][0]["Resource"] and "s3:*" in iam_policy["Statement"][0]["Action"] + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in \ + iam_policy["Statement"][1]["Resource"] \ + and "kms:*" in iam_policy["Statement"][1]["Action"] + + +def test_grant_s3_iam_access_with_policy_and_target_resources_not_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy with some other bucket as resource + # Check if the correct resource is attached/appended + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::S3Bucket", + f"arn:aws:s3:::S3Bucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:12121121121:key/some-kms-key", + f"arn:aws:kms:us-east-1:12121121121:key/some-kms-key/*" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + assert len(policy["Statement"]) == 2 + assert len(policy["Statement"][0]["Resource"]) == 2 + assert len(policy["Statement"][1]["Resource"]) == 2 + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + iam_update_role_policy_mock.assert_called() + + iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + # Assert that new resources were appended + assert len(policy["Statement"]) == 2 + assert len(iam_policy["Statement"][0]["Resource"]) == 4 + assert f'arn:aws:s3:::{dataset2.S3BucketName}' in iam_policy["Statement"][0]["Resource"] + assert len(iam_policy["Statement"][1]["Resource"]) == 4 + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" in iam_policy["Statement"][1]["Resource"] + + +# Tests to check if +def test_grant_s3_iam_access_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete policy present with required target resources + # Check if policy created after calling function and the existing Policy is same + + policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch("dataall.base.aws.iam.IAM.get_role_policy", return_value=policy) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_s3_iam_access() + + # Assert that the IAM Policy is the same as the existing complete policy + iam_update_role_policy_mock.assert_called() + + created_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert len(created_iam_policy["Statement"]) == 2 + assert policy["Statement"][0]["Resource"] == created_iam_policy["Statement"][0]["Resource"] and policy["Statement"][0]["Action"] == created_iam_policy["Statement"][0]["Action"] + assert policy["Statement"][1]["Resource"] == created_iam_policy["Statement"][1]["Resource"] and policy["Statement"][1]["Action"] == \ + created_iam_policy["Statement"][1]["Action"] + + +def test_grant_dataset_bucket_key_policy_with_complete_policy_present( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given complete existing policy + # Check if KMS.put_key_policy is not called + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_not_called() + + +def test_grant_dataset_bucket_key_policy_with_target_requester_id_absent( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given policy where target_requester is not present + # Check if KMS.put_key_policy is called and check if the policy is modified + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + kms_client().put_key_policy.assert_called() + + kms_key_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(kms_key_policy["Statement"]) == 2 + assert kms_key_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert kms_key_policy["Statement"][1]["Action"] == "kms:Decrypt" + assert target_environment.SamlGroupName in kms_key_policy["Statement"][1]["Condition"]["StringLike"]["aws:userId"] + +# Test Case to check if the IAM Role is updated +def test_grant_dataset_bucket_key_policy_and_default_bucket_key_policy( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + share3: ShareObject, + bucket3, + source_environment: Environment, + target_environment: Environment + ): + # Given + # Dataset is imported and it doesn't have Imported KMS Key + # Mocking KMS key function - > Check if not called + # Mocking KMS Tags Functions -> Check if not called + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(bucket3) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and doesn't have importedKey, kms policy function are not triggered + kms_client().get_key_policy.assert_not_called() + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(bucket3) + + +def test_grant_dataset_bucket_key_policy_with_imported( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + bucket3, + db, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment +): + # Given + # Dataset is imported and it has Imported KMS Key + # Mocking KMS key function + # Mocking KMS Tags Functions + # Check if the bucket policy is modified and the targetResource is added + + existing_key_policy = base_kms_key_policy("OtherTargetSamlId") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.grant_dataset_bucket_key_policy() + + # Assert that when a dataset is imported and has importedKey + # policy is fetched and the target requester id SID is attached to it + kms_client().get_key_policy.assert_called() + kms_client().put_key_policy.assert_called() + updated_bucket_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(updated_bucket_policy["Statement"]) == 2 + assert updated_bucket_policy["Statement"][1]["Sid"] == target_environment.SamlGroupName + assert target_environment.SamlGroupName in updated_bucket_policy["Statement"][1]["Condition"]["StringLike"][ + "aws:userId"] + + +def test_delete_target_role_bucket_policy_with_no_read_only_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + share2: ShareObject, + bucket2, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with no DataAll-Bucket-ReadOnly Sid + # S3 function to update bucket policy (create_bucket_policy) should not trigger + + bucket_policy = base_bucket_policy + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_not_called() + + +def test_delete_target_role_bucket_policy_with_multiple_principals_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Multiple Principals + # Check if the appropriate AWS arn is removed and 'SomeotherArn' is retained + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + "SomeotherArn", + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is still present + # Check if the 'someOtherArn' is still present and the target arn is removed + assert modified_bucket_policy["Statement"][1]["Sid"] == f"{DATAALL_READ_ONLY_SID}" + assert len(modified_bucket_policy["Statement"][1]["Principal"]["AWS"]) == 1 + assert 'SomeotherArn' in modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + assert f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" not in \ + modified_bucket_policy["Statement"][1]["Principal"]["AWS"] + + +def test_delete_target_role_bucket_policy_with_one_principal_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, + base_bucket_policy +): + # Given + # Base Bucket Policy with DataAll-Bucket-ReadOnly Sid And Single target Principals + # Bucket Policy should not have the DataAll-Bucket-ReadOnly Sid after delete_target_role_bucket_policy is called + + bucket_policy = base_bucket_policy + + addition_to_policy = { + "Sid": f"{DATAALL_READ_ONLY_SID}", + "Effect": "Allow", + "Principal": { + "AWS": [ + f"arn:aws:iam::{target_environment.AwsAccountId}:role/{target_environment.EnvironmentDefaultIAMRoleName}" + ] + }, + "Action": [ + "s3:List*", + "s3:GetObject" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*" + ] + } + + bucket_policy["Statement"].append(addition_to_policy) + + assert len(bucket_policy["Statement"]) == 2 + + sid_list = [statement["Sid"] for statement in bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" in sid_list + + s3_client = mock_s3_client(mocker) + s3_client().get_bucket_policy.return_value = json.dumps(bucket_policy) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_policy() + + s3_client().create_bucket_policy.assert_called() + + modified_bucket_policy = json.loads(s3_client().create_bucket_policy.call_args.args[1]) + + # Check if the 'DataAll-Bucket-ReadOnly' Sid is removed completely + assert len(modified_bucket_policy["Statement"]) == 1 + sid_list = [statement["Sid"] for statement in modified_bucket_policy["Statement"] if "Sid" in statement] + assert f"{DATAALL_READ_ONLY_SID}" not in sid_list + + +def test_delete_target_role_access_policy_no_resource_of_datasets_s3_bucket( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy which doesn't contain target S3 bucket resources + # IAM.delete_role_policy & IAM.update_role_policy should not be called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + # Get the updated IAM policy and compare it with the existing one + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + assert len(updated_iam_policy["Statement"]) == 2 + assert "arn:aws:s3:::someOtherBucket,arn:aws:s3:::someOtherBucket/*" == ",".join(updated_iam_policy["Statement"][0]["Resource"]) + assert "arn:aws:kms:us-east-1:121231131212:key/some-key-2112,arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" == ",".join( + updated_iam_policy["Statement"][1]["Resource"]) + + +def test_delete_target_role_access_policy_with_multiple_s3_buckets_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with multiple bucket resources along with target environments bucket resources + # Check if the IAM.update_policy is called and it only updates / deletes the target env bucket resources + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::someOtherBucket", + f"arn:aws:s3:::someOtherBucket/*", + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112", + f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_called() + iam_delete_role_policy_mock.assert_not_called() + + updated_iam_policy = json.loads(iam_update_role_policy_mock.call_args.args[3]) + + assert f"arn:aws:s3:::{dataset2.S3BucketName}" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::{dataset2.S3BucketName}/*" not in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket" in updated_iam_policy["Statement"][0]["Resource"] + assert f"arn:aws:s3:::someOtherBucket/*" in updated_iam_policy["Statement"][0]["Resource"] + + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key" not in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" not in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112" in updated_iam_policy["Statement"][1]["Resource"] + assert f"arn:aws:kms:us-east-1:121231131212:key/some-key-2112/*" in updated_iam_policy["Statement"][1]["Resource"] + + +def test_delete_target_role_access_policy_with_one_s3_bucket_and_one_kms_resource_in_policy( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # IAM Policy with target environments bucket resources only + # Check if the IAM.delete_policy is called + + iam_policy = { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:*" + ], + "Resource": [ + f"arn:aws:s3:::{dataset2.S3BucketName}", + f"arn:aws:s3:::{dataset2.S3BucketName}/*", + ] + }, + { + "Effect": "Allow", + "Action": [ + "kms:*" + ], + "Resource": [ + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key", + f"arn:aws:kms:{dataset2.region}:{dataset2.AwsAccountId}:key/kms-key/*" + ] + } + ] + } + + mocker.patch( + "dataall.base.aws.iam.IAM.get_role_policy", + return_value=iam_policy, + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + iam_update_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.update_role_policy", return_value=None) + + iam_delete_role_policy_mock = mocker.patch("dataall.base.aws.iam.IAM.delete_role_policy", return_value=None) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_access_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + iam_update_role_policy_mock.assert_not_called() + iam_delete_role_policy_mock.assert_called() + + +def test_delete_target_role_bucket_key_policy_with_no_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given + # complete existing KMS key policy with no target requester id in it + # Check if KMS.put_key_policy is not called + + existing_key_policy = base_kms_key_policy("Some_other_requester_id") + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + +def test_delete_target_role_bucket_key_policy_with_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with target requester id in it + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +def test_delete_target_role_bucket_key_policy_with_multiple_target_requester_id( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + existing_key_policy["Statement"].append( + { + "Sid": "some_other_target_sid", + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0]["Sid"] == "some_other_target_sid" + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) + + +# Test for delete_target_role_bucket_key_policy when dataset is imported +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and that the dataset is imported and has a importedKMS key + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 0 + + +# Test for delete_target_role_bucket_key_policy when dataset is imported and importedKMS key is missing +def test_delete_target_role_bucket_key_policy_with_target_requester_id_and_imported_dataset_with_no_imported_kms_key( + mocker, + source_environment_group, + target_environment_group, + dataset_imported, + db, + bucket3, + share3: ShareObject, + source_environment: Environment, + target_environment: Environment + ): + # Given complete existing KMS key policy with target requester id in it + # and the dataset is imported but doens't contain importedKey + # In that case the KMS.put_key_policy should not be called + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset_imported, + share3, + bucket3, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + # dataset2 should not have importedKey to simulate that while importing the dataset a key was not added + bucket3.importedKmsKey = False + session.add(dataset_imported) + + manager.delete_target_role_bucket_key_policy( + share=share3, + target_bucket=bucket3, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_not_called() + + bucket3.importedKmsKey = True + session.add(dataset_imported) + + +def test_delete_target_role_bucket_key_policy_missing_sid( + mocker, + source_environment_group, + target_environment_group, + dataset2, + db, + bucket2, + share2: ShareObject, + source_environment: Environment, + target_environment: Environment, +): + # Given complete existing KMS key policy with multiple target requester ids + # Check if KMS.put_key_policy is called and the statement corresponding to target Sid should be removed + + existing_key_policy = base_kms_key_policy(target_environment.SamlGroupName) + missing_sid_statement = { + "Effect": "Allow", + "Principal": {"AWS": "*"}, + "Action": "kms:Decrypt", + "Resource": "*", + "Condition": {"StringLike": {"aws:userId": "some_other_target_sid:*"}} + } + existing_key_policy["Statement"].append( + missing_sid_statement + ) + + kms_client = mock_kms_client(mocker) + kms_client().get_key_id.return_value = "kms-key" + + kms_client().get_key_policy.return_value = json.dumps(existing_key_policy) + + mocker.patch( + "dataall.base.aws.sts.SessionHelper.get_role_id", + return_value=target_environment.SamlGroupName, + ) + + with db.scoped_session() as session: + manager = S3BucketShareManager( + session, + dataset2, + share2, + bucket2, + source_environment, + target_environment, + source_environment_group, + target_environment_group, + ) + + manager.delete_target_role_bucket_key_policy( + share=share2, + target_bucket=bucket2, + target_environment=target_environment + ) + + kms_client().put_key_policy.assert_called() + + new_kms_policy = json.loads(kms_client().put_key_policy.call_args.args[1]) + + assert len(new_kms_policy["Statement"]) == 1 + assert new_kms_policy["Statement"][0] == missing_sid_statement + assert target_environment.SamlGroupName not in json.dumps(new_kms_policy) diff --git a/tests/modules/datasets/test_share.py b/tests/modules/datasets/test_share.py index 60909a65b..5ff64b965 100644 --- a/tests/modules/datasets/test_share.py +++ b/tests/modules/datasets/test_share.py @@ -401,6 +401,10 @@ def create_share_object(client, username, group, groupUri, environmentUri, datas userRoleForShareObject requestPurpose rejectPurpose + dataset { + datasetUri + datasetName + } } } """ From 1365e92c8a150ccad63fe80115e3a2e73508bef4 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:19:16 +0100 Subject: [PATCH 07/21] Revert overwrites 2. --- backend/dataall/modules/dataset_sharing/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index d98a13dbc..99dd6c01e 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -17,7 +17,8 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - return [DatasetBaseModuleInterface] + from dataall.modules.notifications import NotificationsModuleInterface + return [DatasetBaseModuleInterface, NotificationsModuleInterface] def __init__(self): from dataall.modules.dataset_sharing import api @@ -35,7 +36,8 @@ def is_supported(modes: List[ImportMode]): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - return [DatasetBaseModuleInterface] + from dataall.modules.notifications import NotificationsModuleInterface + return [DatasetBaseModuleInterface, NotificationsModuleInterface] def __init__(self): import dataall.modules.dataset_sharing.handlers From bbcfbd5120dfc4463408fa89ce8aa0f2be03aac3 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:21:03 +0100 Subject: [PATCH 08/21] Revert overwrites 3. --- .../modules/dataset_sharing/aws/kms_client.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/backend/dataall/modules/dataset_sharing/aws/kms_client.py b/backend/dataall/modules/dataset_sharing/aws/kms_client.py index bdb9e2e91..35c21eaa4 100644 --- a/backend/dataall/modules/dataset_sharing/aws/kms_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/kms_client.py @@ -53,6 +53,23 @@ def get_key_id(self, key_alias: str): else: return response['KeyMetadata']['KeyId'] + def check_key_exists(self, key_alias: str): + try: + key_exist = False + paginator = self._client.get_paginator('list_aliases') + for page in paginator.paginate(): + key_aliases = [alias["AliasName"] for alias in page['Aliases']] + if key_alias in key_aliases: + key_exist = True + break + except Exception as e: + log.error( + f'Failed to list kms key aliases in account {self._account_id}: {e}' + ) + return None + else: + return key_exist + def add_tags_to_key(self, key_id: str, tags: list): """ Add tags to an existing AWS KMS key. From 9e8cdf1fa3de7b8c22e6091d339614817ec3f43f Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:26:26 +0100 Subject: [PATCH 09/21] Revert overwrites 4. --- .../db/share_object_repositories.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index f90d5c330..469eb548c 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -807,17 +807,23 @@ def find_all_share_items(session, share_uri, share_type): ) @staticmethod - def other_approved_share_object_exists(session, environment_uri, dataset_uri): + def other_approved_share_item_table_exists(session, environment_uri, item_uri, share_item_uri): + share_item_shared_states = ShareItemSM.get_share_item_shared_states() return ( session.query(ShareObject) + .join( + ShareObjectItem, + ShareObject.shareUri == ShareObjectItem.shareUri, + ) .filter( and_( - Environment.environmentUri == environment_uri, - ShareObject.status == ShareObjectStatus.Approved.value, - ShareObject.datasetUri == dataset_uri, + ShareObject.environmentUri == environment_uri, + ShareObjectItem.itemUri == item_uri, + ShareObjectItem.shareItemUri != share_item_uri, + ShareObjectItem.status.in_(share_item_shared_states), ) ) - .all() + .first() ) @staticmethod From 5d907977c4cc97fbcc98fd473df412036b27cd28 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:30:11 +0100 Subject: [PATCH 10/21] Revert overwrites 4. --- .../modules/dataset_sharing/services/data_sharing_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index 14412abca..5c57d4af5 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -244,7 +244,7 @@ def revoke_share(cls, engine: Engine, share_uri: str): log.info(f'Still remaining LF resources shared = {existing_shared_items}') if not existing_shared_items and revoked_tables: log.info("Clean up LF remaining resources...") - clean_up_tables = processor.clean_up_share() + clean_up_tables = processor.delete_shared_database() log.info(f"Clean up LF successful = {clean_up_tables}") existing_pending_items = ShareObjectRepository.check_pending_share_items(session, share_uri) From 94be491d49593066a4d1bdbd545f603e6c2f59f2 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:34:04 +0100 Subject: [PATCH 11/21] Revert overwrites 5. --- .../share_managers/lf_share_manager.py | 54 ++++++++----------- 1 file changed, 23 insertions(+), 31 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index 754ceaf07..d1e92e43b 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -51,10 +51,6 @@ def process_approved_shares(self) -> [str]: def process_revoked_shares(self) -> [str]: return NotImplementedError - @abc.abstractmethod - def clean_up_share(self): - return NotImplementedError - def get_share_principals(self) -> [str]: """ Builds list of principals of the share request @@ -394,9 +390,6 @@ def share_table_with_target_account(cls, **data): data['source']['database'], data['source']['tablename'], ) - - glue_client = GlueClient(source_accountid, source_region, data['source']['database']) - glue_client.remove_create_table_default_permissions() time.sleep(1) LakeFormationClient.grant_permissions_to_table( @@ -424,7 +417,7 @@ def share_table_with_target_account(cls, **data): ) raise e - def revoke_external_account_access_on_source_account(self) -> [dict]: + def revoke_external_account_access_on_source_account(self, db_name, table_name) -> [dict]: """ 1) Revokes access to external account if dataset is not shared with any other team from the same workspace @@ -443,29 +436,28 @@ def revoke_external_account_access_on_source_account(self) -> [dict]: client = aws_session.client( 'lakeformation', region_name=self.source_environment.region ) - revoke_entries = [] - for table in self.revoked_tables: - revoke_entries.append( - { - 'Id': str(uuid.uuid4()), - 'Principal': { - 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId - }, - 'Resource': { - 'TableWithColumns': { - 'DatabaseName': table.GlueDatabaseName, - 'Name': table.GlueTableName, - 'ColumnWildcard': {}, - 'CatalogId': self.source_environment.AwsAccountId, - } - }, - 'Permissions': ['DESCRIBE', 'SELECT'], - 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], - } - ) - LakeFormationClient.batch_revoke_permissions( - client, self.source_environment.AwsAccountId, revoke_entries - ) + revoke_entries = [ + { + 'Id': str(uuid.uuid4()), + 'Principal': { + 'DataLakePrincipalIdentifier': self.target_environment.AwsAccountId + }, + 'Resource': { + 'TableWithColumns': { + 'DatabaseName': db_name, + 'Name': table_name, + 'ColumnWildcard': {}, + 'CatalogId': self.source_environment.AwsAccountId, + } + }, + 'Permissions': ['DESCRIBE', 'SELECT'], + 'PermissionsWithGrantOption': ['DESCRIBE', 'SELECT'], + } + ] + + LakeFormationClient.batch_revoke_permissions( + client, self.source_environment.AwsAccountId, revoke_entries + ) return revoke_entries def handle_share_failure( From cff577f00cf361b85a67dda6eb383f32eef76505 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:36:28 +0100 Subject: [PATCH 12/21] Revert overwrites 6. --- .../dataset_sharing/aws/glue_client.py | 40 ------------------- 1 file changed, 40 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/aws/glue_client.py b/backend/dataall/modules/dataset_sharing/aws/glue_client.py index c296025ce..f110d0f89 100644 --- a/backend/dataall/modules/dataset_sharing/aws/glue_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/glue_client.py @@ -130,43 +130,3 @@ def delete_database(self): f'due to: {e}' ) raise e - - def remove_create_table_default_permissions(self): - """ - When upgrading to LF tables and database can still have Create Table Default Permissions turned on. - Unless this setting is removed, the table or database - can not be shared using LakeFormation. - :return: - """ - try: - account_id = self._account_id - database = self._database - - log.info( - f'Removing CreateTableDefaultPermissions in database {database}' - ) - - response = self._client.get_database(CatalogId=account_id, Name=database) - existing_database_parameters = response['Database'] - existing_database_parameters['CreateTableDefaultPermissions'] = [] - - if 'CreateTime' in existing_database_parameters: - del existing_database_parameters['CreateTime'] - if 'CatalogId' in existing_database_parameters: - del existing_database_parameters['CatalogId'] - - response = self._client.update_database( - CatalogId=account_id, - Name=database, - DatabaseInput=existing_database_parameters - ) - - log.info( - f'Successfully removed Create Table Default Permissions and Create Database Default Permissions ' - f'| {response}') - - except ClientError as e: - log.error( - f'Could not remove CreateDatabaseDefaultPermissions and/or CreateTableDefaultPermissions ' - f'permission on database in {database} due to {e}' - ) From 5ff80fb0cb66a2c0dfe89104c10c9170b15edbc2 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:41:05 +0100 Subject: [PATCH 13/21] Revert overwrites 7. --- .../lf_process_cross_account_share.py | 38 ++++++++----------- 1 file changed, 15 insertions(+), 23 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py index d28340cd6..cbf2fdd0a 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py @@ -124,7 +124,7 @@ def process_revoked_shares(self) -> bool: a) update its status to REVOKE_IN_PROGRESS with Action Start b) check if item exists on glue catalog raise error if not and flag item status to failed c) revoke table resource link: undo grant permission to resource link table for team role in target account - d) revoke source table access: undo grant permission to table for team role in source account + d) revoke source table access: undo grant permission to table for team role in source account (and for QS Group if no other shares present for table) e) delete resource link table h) update share item status to REVOKE_SUCCESSFUL with Action Success @@ -157,10 +157,23 @@ def process_revoked_shares(self) -> bool: self.revoke_table_resource_link_access(table, principals) + other_table_shares_in_env = False + if ShareObjectRepository.other_approved_share_item_table_exists( + self.session, + self.target_environment.environmentUri, + share_item.itemUri, + share_item.shareItemUri + ): + other_table_shares_in_env = True + principals = [p for p in principals if "arn:aws:quicksight" not in p] + self.revoke_source_table_access(table, principals) self.delete_resource_link_table(table) + if not other_table_shares_in_env: + self.revoke_external_account_access_on_source_account(table.GlueDatabaseName, table.GlueTableName) + new_state = revoked_item_SM.run_transition(ShareItemActions.Success.value) revoked_item_SM.update_state_single_item(self.session, share_item, new_state) @@ -170,25 +183,4 @@ def process_revoked_shares(self) -> bool: revoked_item_SM.update_state_single_item(self.session, share_item, new_state) success = False - return success - - def clean_up_share(self) -> bool: - """" - 1) deletes deprecated shared db in target account - 2) checks if there are other share objects from this source account to this target account. - If not, it revokes external account access of the target account to the source account. - Returns - ------- - True if clean-up succeeds - """ - - self.delete_shared_database() - - if not ShareObjectRepository.other_approved_share_object_exists( - self.session, - self.target_environment.environmentUri, - self.dataset.datasetUri, - ): - self.revoke_external_account_access_on_source_account() - - return True + return success From 3383166ac2a22a15a51a6bb246142f2db13f31a0 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:42:38 +0100 Subject: [PATCH 14/21] Revert overwrites 7. --- .../share_processors/lf_process_cross_account_share.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py index cbf2fdd0a..51ba97cc7 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_cross_account_share.py @@ -159,10 +159,10 @@ def process_revoked_shares(self) -> bool: other_table_shares_in_env = False if ShareObjectRepository.other_approved_share_item_table_exists( - self.session, - self.target_environment.environmentUri, - share_item.itemUri, - share_item.shareItemUri + self.session, + self.target_environment.environmentUri, + share_item.itemUri, + share_item.shareItemUri ): other_table_shares_in_env = True principals = [p for p in principals if "arn:aws:quicksight" not in p] @@ -183,4 +183,4 @@ def process_revoked_shares(self) -> bool: revoked_item_SM.update_state_single_item(self.session, share_item, new_state) success = False - return success + return success From 7ed96afb28ba39fca176debbc59ff38202f5c72c Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:44:09 +0100 Subject: [PATCH 15/21] Revert overwrites 8. --- .../share_processors/lf_process_same_account_share.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py index 270538a0b..54df2d900 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lf_process_same_account_share.py @@ -157,13 +157,3 @@ def process_revoked_shares(self) -> bool: success = False return success - - def clean_up_share(self) -> bool: - """" - 1) deletes deprecated shared db in target account - Returns - ------- - True if clean-up succeeds - """ - self.delete_shared_database() - return True From c0518968ea6675066d0b376c19c9f50035764734 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:48:44 +0100 Subject: [PATCH 16/21] Revert overwrites 9. --- backend/dataall/modules/datasets/api/dataset/input_types.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/dataall/modules/datasets/api/dataset/input_types.py b/backend/dataall/modules/datasets/api/dataset/input_types.py index 4310fb3b4..d238a8103 100644 --- a/backend/dataall/modules/datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/datasets/api/dataset/input_types.py @@ -20,7 +20,7 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String) + gql.Argument(name='stewards', type=gql.String), ], ) @@ -102,6 +102,6 @@ name='businessOwnerDelegationEmails', type=gql.ArrayType(gql.String) ), gql.Argument('confidentiality', gql.Ref('ConfidentialityClassification')), - gql.Argument(name='stewards', type=gql.String) + gql.Argument(name='stewards', type=gql.String), ], ) From f5d62d76bd286d60e8ca4ebf2b2cbba823fff7b5 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:48:53 +0100 Subject: [PATCH 17/21] Revert overwrites 10. --- .../datasets/cdk/pivot_role_datasets_policy.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py index b44fdfbdc..46c34ea58 100644 --- a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py +++ b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py @@ -20,23 +20,6 @@ class DatasetsPivotRole(PivotRoleStatementSet): """ def get_statements(self): statements = [ - # S3 Imported Buckets - restrict resources via bucket policies - iam.PolicyStatement( - sid='ImportedBuckets', - effect=iam.Effect.ALLOW, - actions=[ - 's3:List*', - 's3:GetBucket*', - 's3:GetLifecycleConfiguration', - 's3:GetObject', - 's3:PutBucketPolicy', - 's3:PutBucketTagging', - 's3:PutObject', - 's3:PutObjectAcl', - 's3:PutBucketOwnershipControls', - ], - resources=['arn:aws:s3:::*'], - ), # For dataset preview iam.PolicyStatement( sid='AthenaWorkgroupsDataset', From 3783a953c93d50f9bc1fbb30d54aa40dd704cd70 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:52:59 +0100 Subject: [PATCH 18/21] Revert overwrites 11. --- backend/dataall/modules/datasets/services/dataset_service.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 93a65a209..5a02d2e9b 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -70,7 +70,7 @@ def check_imported_resources(environment, data): if not key_id: raise exceptions.AWSResourceNotFound( action=IMPORT_DATASET, - message=f'KMS key with alias={kms_alias} cannot be found', + message=f'Data.all Environment Pivot Role does not have kms:DescribeKey Permission to KMS key with alias={kms_alias}', ) return True From dacba14a57d191df2b6774356f4c43160ce1d5d3 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:55:27 +0100 Subject: [PATCH 19/21] Revert overwrites 12. --- .../datasets/tasks/dataset_subscription_task.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py index 6a83fc5c8..d0c874a4c 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py @@ -25,6 +25,8 @@ root.addHandler(logging.StreamHandler(sys.stdout)) log = logging.getLogger(__name__) +# TODO: review this task usage and remove if not needed + class DatasetSubscriptionService: def __init__(self, engine): @@ -146,12 +148,12 @@ def publish_sns_message( response = sns_client.publish_dataset_message(message) log.info(f'SNS update publish response {response}') - notifications = ShareNotificationService.notify_new_data_available_from_owners( + notifications = ShareNotificationService( session=session, dataset=dataset, - share=share_object, - s3_prefix=prefix, - ) + share=share_object + ).notify_new_data_available_from_owners(s3_prefix=prefix) + log.info(f'Notifications for share owners {notifications}') except ClientError as e: From 3b404cd860944f33eb644af8ccba0c9d4697a750 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 09:57:14 +0100 Subject: [PATCH 20/21] Revert overwrites 13. --- tests/modules/datasets/tasks/test_lf_share_manager.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index 78a289d9f..fd76ba0b1 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -660,7 +660,7 @@ def test_revoke_external_account_access_on_source_account( return_value=boto3.Session(), ) - processor_cross_account.revoke_external_account_access_on_source_account() + processor_cross_account.revoke_external_account_access_on_source_account(table1.GlueDatabaseName, table1.GlueTableName) # Then lf_mock.assert_called_once() From 5d0fe687e8c7122f57a94319b0d2a5f26950f845 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 31 Oct 2023 10:00:45 +0100 Subject: [PATCH 21/21] Fix down revision for migration script --- .../migrations/versions/8c79fb896983_add_table_for_buckets.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py index 9142418f8..518f80576 100644 --- a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -20,7 +20,7 @@ # revision identifiers, used by Alembic. revision = '8c79fb896983' -down_revision = '5781fdf1f877' +down_revision = '4f3c1d84a628' branch_labels = None depends_on = None