Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2.6.1 Security features #1686

Merged
merged 15 commits into from
Nov 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
586 changes: 582 additions & 4 deletions .checkov.baseline

Large diffs are not rendered by default.

Binary file modified UserGuide.pdf
Binary file not shown.
6 changes: 4 additions & 2 deletions backend/api_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
attach_tenant_policy_for_groups,
check_reauth,
validate_and_block_if_maintenance_window,
redact_creds,
)
from dataall.core.tasks.service_handlers import Worker
from dataall.base.aws.sqs import SqsQueue
Expand Down Expand Up @@ -83,6 +84,7 @@ def handler(event, context):
Return doc: https://docs.aws.amazon.com/apigateway/latest/developerguide/set-up-lambda-proxy-integrations.html
"""

event = redact_creds(event)
log.info('Lambda Event %s', event)
log.debug('Env name %s', ENVNAME)
log.debug('Engine %s', ENGINE.engine.url)
Expand Down Expand Up @@ -140,8 +142,8 @@ def handler(event, context):
dispose_context()
response = json.dumps(response)

log.info('Lambda Response %s', response)

log.info('Lambda Response Success: %s', success)
log.debug('Lambda Response %s', response)
return {
'statusCode': 200 if success else 400,
'headers': {
Expand Down
2 changes: 1 addition & 1 deletion backend/aws_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dataall.base.loader import load_modules, ImportMode

logger = logging.getLogger()
logger.setLevel(os.environ.get('LOG_LEVEL'))
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))
log = logging.getLogger(__name__)

ENVNAME = os.getenv('envname', 'local')
Expand Down
3 changes: 1 addition & 2 deletions backend/dataall/base/aws/quicksight.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@

from .sts import SessionHelper

logger = logging.getLogger('QuicksightHandler')
logger.setLevel(logging.DEBUG)
logger = logging.getLogger(__name__)


class QuicksightClient:
Expand Down
12 changes: 11 additions & 1 deletion backend/dataall/base/utils/api_handler_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@
item.casefold() for item in ['getGroupsForUser', 'getMaintenanceWindowStatus']
]
ENGINE = get_engine(envname=ENVNAME)
AWS_REGION = os.getenv('AWS_REGION')


def redact_creds(event):
if event.get('headers', {}).get('Authorization'):
event['headers']['Authorization'] = 'XXXXXXXXXXXX'

if event.get('multiValueHeaders', {}).get('Authorization'):
event['multiValueHeaders']['Authorization'] = 'XXXXXXXXXXXX'
return event


def get_cognito_groups(claims):
Expand Down Expand Up @@ -106,7 +116,7 @@ def check_reauth(query, auth_time, username):
# Determine if there are any Operations that Require ReAuth From SSM Parameter
try:
reauth_apis = ParameterStoreManager.get_parameter_value(
region=os.getenv('AWS_REGION', 'eu-west-1'), parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
region=AWS_REGION, parameter_path=f'/dataall/{ENVNAME}/reauth/apis'
).split(',')
except Exception:
log.info('No ReAuth APIs Found in SSM')
Expand Down
1 change: 1 addition & 0 deletions backend/dataall/core/environment/cdk/environment_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def __init__(self, scope, id, target_uri: str = None, **kwargs):
versioned=True,
enforce_ssl=True,
)
default_environment_bucket.policy.apply_removal_policy(RemovalPolicy.RETAIN)
self.default_environment_bucket = default_environment_bucket

default_environment_bucket.add_to_resource_policy(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,12 @@ def get_statements(self):
f'arn:aws:iam::{self.account}:role/{self.role_name}',
],
),
# DENY to prevent pivot role to grant itself permissions
iam.PolicyStatement(
sid='IAMDenyForPivotRole',
effect=iam.Effect.DENY,
actions=['iam:Put*', 'iam:Delete*', 'iam:Update*', 'iam:AttachRolePolicy', 'iam:DetachRolePolicy'],
resources=[f'arn:aws:iam::{self.account}:role/{self.role_name}'],
),
]
return statements
Original file line number Diff line number Diff line change
Expand Up @@ -846,6 +846,7 @@ def list_all_active_environments(session) -> List[Environment]:
return environments

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
@ResourcePolicyService.has_resource_permission(environment_permissions.DELETE_ENVIRONMENT)
def delete_environment(uri):
with get_context().db_engine.scoped_session() as session:
Expand Down Expand Up @@ -927,6 +928,7 @@ def resolve_user_role(environment: Environment):
return EnvironmentPermission.NotInvited.value

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def enable_subscriptions(environmentUri: str = None, input: dict = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down Expand Up @@ -962,6 +964,7 @@ def enable_subscriptions(environmentUri: str = None, input: dict = None):
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def disable_subscriptions(environment_uri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down Expand Up @@ -1023,6 +1026,7 @@ def _get_environment_group_aws_session(session, username, groups, environment, g
return aws_session

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def get_environment_assume_role_url(
environmentUri: str = None,
groupUri: str = None,
Expand Down Expand Up @@ -1050,6 +1054,7 @@ def get_environment_assume_role_url(
return url

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ENVIRONMENTS)
def generate_environment_access_token(environmentUri: str = None, groupUri: str = None):
context = get_context()
with context.db_engine.scoped_session() as session:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,10 @@
from dataall.base.utils import Parameter

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))

RETRIES = 30
SLEEP_TIME = 30
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ def create_organization(data):
return org

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_ORGANIZATIONS)
@ResourcePolicyService.has_resource_permission(UPDATE_ORGANIZATION)
def update_organization(uri, data):
context = get_context()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def check_group_environment_permission(uri, group, permission_name):
permission_name=permission_name,
)

@staticmethod
def has_group_permission(permission):
def decorator(f):
@wraps(f)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,7 @@ def save_permissions_with_tenant(engine, envname=None):
TenantPolicyService.save_tenant(session, name=TenantPolicyService.TENANT_NAME, description='Tenant dataall')
PermissionService.init_permissions(session)

@staticmethod
def has_tenant_permission(permission: str):
"""
Decorator to check if a user has a permission to do some action.
Expand Down
2 changes: 1 addition & 1 deletion backend/dataall/core/stacks/tasks/cdkproxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
from dataall.base.db import get_engine

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
logger = logging.getLogger(__name__)
logger.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))


if __name__ == '__main__':
Expand Down
27 changes: 24 additions & 3 deletions backend/dataall/modules/catalog/indexers/base_indexer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ class BaseIndexer(ABC):

_INDEX = 'dataall-index'
_es = None
_QUERY_SIZE = 1000

@classmethod
def es(cls):
Expand Down Expand Up @@ -53,11 +54,31 @@ def _index(cls, doc_id, doc):
return False

@classmethod
def search(cls, query):
def search_all(cls, query, sort):
all_results = []
search_after = None
while True:
if search_after:
query['search_after'] = search_after

response = BaseIndexer.search(query=query, sort=sort)
hits = response['hits']['hits']
if not hits:
break # No more results

all_results.extend(hits)

# Update search_after for the next iteration
search_after = hits[-1]['sort']

return all_results

@classmethod
def search(cls, query, sort=None):
es = cls.es()
if es:
res = es.search(index=cls._INDEX, body=query)
log.info(f'Search query {query} returned {res["hits"]["total"]["value"]} records')
res = es.search(index=cls._INDEX, body=query, sort=sort, size=cls._QUERY_SIZE)
log.info(f'Search query {query} found {res["hits"]["total"]["value"]} total records')
return res
else:
log.error(f'ES config is missing, search query {query} failed')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,15 @@ def delete_node(uri: str = None):
return GlossaryRepository.delete_node(session=session, uri=uri)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def approve_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.approve_term_association(
session=session, username=get_context().username, groups=get_context().groups, linkUri=linkUri
)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_GLOSSARIES)
def dismiss_term_association(linkUri: str):
with _session() as session:
return GlossaryRepository.dismiss_term_association(
Expand Down
10 changes: 4 additions & 6 deletions backend/dataall/modules/catalog/tasks/catalog_indexer_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,10 @@
from dataall.base.utils.alarm_service import AlarmService

root = logging.getLogger()
root.setLevel(logging.INFO)
if not root.hasHandlers():
root.addHandler(logging.StreamHandler(sys.stdout))
log = logging.getLogger(__name__)
log.setLevel(os.environ.get('LOG_LEVEL', 'INFO'))


class CatalogIndexerTask:
Expand Down Expand Up @@ -43,12 +43,10 @@ def _delete_old_objects(cls, indexed_object_uris: List[str]) -> None:
# Search for documents in opensearch without an ID in the indexed_object_uris list
query = {'query': {'bool': {'must_not': {'terms': {'_id': indexed_object_uris}}}}}
# Delete All "Outdated" Objects from Index
docs = BaseIndexer.search(query)
for doc in docs.get('hits', {}).get('hits', []):
log.info(f'Deleting document {doc["_id"]}...')
docs = BaseIndexer.search_all(query, sort='_id')
for doc in docs:
BaseIndexer.delete_doc(doc_id=doc['_id'])

log.info(f'Deleted {len(docs.get("hits", {}).get("hits", []))} records')
log.info(f'Deleted {len(docs)} records')


if __name__ == '__main__':
Expand Down
8 changes: 2 additions & 6 deletions backend/dataall/modules/dashboards/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,13 @@
gql.Field('DashboardId', type=gql.String),
gql.Field('tags', type=gql.ArrayType(gql.String)),
gql.Field('created', type=gql.String),
gql.Field('AwsAccountId', type=gql.String),
gql.Field('updated', type=gql.String),
gql.Field('owner', type=gql.String),
gql.Field('SamlGroupName', type=gql.String),
gql.Field(
'organization',
type=gql.Ref('Organization'),
resolver=get_dashboard_organization,
),
gql.Field(
'environment',
type=gql.Ref('Environment'),
type=gql.Ref('EnvironmentSimplified'),
resolver=resolve_environment,
),
gql.Field(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from dataall.base.aws.secrets_manager import SecretsManager

log = logging.getLogger(__name__)
log.setLevel(logging.DEBUG)


class DashboardQuicksightClient:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ class DashboardService:
"""Service that serves request related to dashboard"""

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_DASHBOARDS)
@ResourcePolicyService.has_resource_permission(GET_DASHBOARD)
def get_dashboard(uri: str) -> Dashboard:
with get_context().db_engine.scoped_session() as session:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ def list_pipelines(*, filter: dict) -> dict:
)

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(GET_PIPELINE)
def get_pipeline(
uri: str,
Expand All @@ -202,6 +201,7 @@ def get_clone_url_http(uri: str):
return f'codecommit::{env.region}://{pipeline.repo}'

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(DELETE_PIPELINE)
def delete_pipeline(uri: str, deleteFromAWS: bool):
with _session() as session:
Expand Down Expand Up @@ -254,12 +254,14 @@ def _delete_repository(target_uri, accountid, cdk_role_arn, region, repo_name):
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
def delete_pipeline_environment(envPipelineUri: str):
with _session() as session:
DatapipelinesRepository.delete_pipeline_environment(session=session, envPipelineUri=envPipelineUri)
return True

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_PIPELINES)
@ResourcePolicyService.has_resource_permission(CREDENTIALS_PIPELINE)
def get_credentials(uri):
with _session() as session:
Expand Down
7 changes: 1 addition & 6 deletions backend/dataall/modules/datasets_base/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,9 @@
gql.Field(name='imported', type=gql.Boolean),
gql.Field(
name='environment',
type=gql.Ref('Environment'),
type=gql.Ref('EnvironmentSimplified'),
resolver=get_dataset_environment,
),
gql.Field(
name='organization',
type=gql.Ref('Organization'),
resolver=get_dataset_organization,
),
gql.Field(
name='owners',
type=gql.String,
Expand Down
4 changes: 4 additions & 0 deletions backend/dataall/modules/mlstudio/services/mlstudio_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def create_sagemaker_studio_user(*, uri: str, admin_group: str, request: Sagemak
return sagemaker_studio_user

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def update_sagemaker_studio_domain(environment, domain, data):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)
domain.vpcType = data.get('vpcType')
Expand Down Expand Up @@ -205,6 +206,7 @@ def _update_sagemaker_studio_domain_vpc(account_id, region, data={}):
data['vpcType'] = 'created'

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
def create_sagemaker_studio_domain(session, environment, data: dict = {}):
SagemakerStudioService._update_sagemaker_studio_domain_vpc(environment.AwsAccountId, environment.region, data)

Expand Down Expand Up @@ -246,6 +248,7 @@ def get_sagemaker_studio_user_status(*, uri: str):
return status

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(SGMSTUDIO_USER_URL)
def get_sagemaker_studio_user_presigned_url(*, uri: str):
with _session() as session:
Expand All @@ -259,6 +262,7 @@ def get_sagemaker_studio_user_applications(*, uri: str):
return sagemaker_studio_client(user).get_sagemaker_studio_user_applications()

@staticmethod
@TenantPolicyService.has_tenant_permission(MANAGE_SGMSTUDIO_USERS)
@ResourcePolicyService.has_resource_permission(DELETE_SGMSTUDIO_USER)
def delete_sagemaker_studio_user(*, uri: str, delete_from_aws: bool):
"""Deletes SageMaker Studio user from the database and if delete_from_aws is True from AWS as well"""
Expand Down
Loading