Skip to content

Commit

Permalink
Refactor: uncouple datasets and dataset_sharing modules - part 2-2 (#…
Browse files Browse the repository at this point in the history
…1187)

### Feature or Bugfix
- Refactoring
⚠️ MERGE AFTER #1185

### Detail
This is needed as explained in full PR [AFTER 2.4] Refactor: uncouple
datasets and dataset_sharing modules #1179
- Split the getDatasetAssumeRole API into 2 APIs, one for dataset owners
role (in datasets module) and another one for share requester roles (in
datasets_sharing module)

### Relates
-  #1179

### Security
Please answer the questions below briefly where applicable, or write
`N/A`. Based on
[OWASP 10](https://owasp.org/Top10/en/).

- Does this PR introduce or modify any input fields or queries - this
includes
fetching data from storage outside the application (e.g. a database, an
S3 bucket)?
  - Is the input sanitized?
- What precautions are you taking before deserializing the data you
consume?
  - Is injection prevented by parametrizing queries?
  - Have you ensured no `eval` or similar functions are used?
- Does this PR introduce any functionality or component that requires
authorization?
- How have you ensured it respects the existing AuthN/AuthZ mechanisms?
  - Are you logging failed auth attempts?
- Are you using or adding any cryptographic features?
  - Do you use a standard proven implementations?
  - Are the used keys controlled by the customer? Where are they stored?
- Are you introducing any new policies/roles/users?
  - Have you used the least-privilege principle? How?


By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 license.
  • Loading branch information
dlpzx authored May 1, 2024
1 parent 5173419 commit 8204468
Show file tree
Hide file tree
Showing 7 changed files with 91 additions and 21 deletions.
9 changes: 9 additions & 0 deletions backend/dataall/modules/dataset_sharing/api/queries.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from dataall.base.api import gql
from dataall.modules.dataset_sharing.api.resolvers import (
get_dataset_shared_assume_role_url,
get_share_object,
list_shared_with_environment_data_items,
list_shares_in_my_inbox,
Expand Down Expand Up @@ -61,3 +62,11 @@
type=gql.ArrayType(gql.Ref('SharedDatasetTableItem')),
resolver=list_shared_tables_by_env_dataset,
)

getDatasetSharedAssumeRoleUrl = gql.QueryField(
name='getDatasetSharedAssumeRoleUrl',
args=[gql.Argument(name='datasetUri', type=gql.String)],
type=gql.String,
resolver=get_dataset_shared_assume_role_url,
test_scope='Dataset',
)
6 changes: 6 additions & 0 deletions backend/dataall/modules/dataset_sharing/api/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from dataall.core.environment.services.environment_service import EnvironmentService
from dataall.core.organizations.db.organization_repositories import OrganizationRepository
from dataall.base.db.exceptions import RequiredParameter
from dataall.base.feature_toggle_checker import is_feature_enabled
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectPermission
from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject
from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService
Expand Down Expand Up @@ -332,3 +333,8 @@ def list_dataset_share_objects(context, source, filter: dict = None):

def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str):
return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri)


@is_feature_enabled('modules.datasets.features.aws_actions')
def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None):
return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri)
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService
from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService
from dataall.core.environment.services.environment_service import EnvironmentService
from dataall.base.context import get_context
from dataall.base.aws.sts import SessionHelper
from dataall.modules.dataset_sharing.db.share_object_repositories import (
ShareObjectRepository,
ShareItemSM,
)
from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService
from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository
from dataall.modules.datasets.services.dataset_permissions import (
MANAGE_DATASETS,
UPDATE_DATASET,
CREDENTIALS_DATASET,
)

from dataall.modules.datasets_base.db.dataset_models import Dataset
Expand Down Expand Up @@ -49,3 +53,37 @@ def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str):
session, env_uri, dataset_uri, context.username, context.groups
)
]

@staticmethod
@ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET)
def get_dataset_shared_assume_role_url(uri):
context = get_context()
with context.db_engine.scoped_session() as session:
dataset = DatasetRepository.get_dataset_by_uri(session, uri)

if dataset.SamlAdminGroupName in context.groups:
role_arn = dataset.IAMDatasetAdminRoleArn
account_id = dataset.AwsAccountId
region = dataset.region
else:
share = ShareObjectRepository.get_share_by_dataset_attributes(
session=session, dataset_uri=uri, dataset_owner=context.username
)
shared_environment = EnvironmentService.get_environment_by_uri(
session=session, uri=share.environmentUri
)
env_group = EnvironmentService.get_environment_group(
session=session, group_uri=share.principalId, environment_uri=share.environmentUri
)
role_arn = env_group.environmentIAMRoleArn
account_id = shared_environment.AwsAccountId
region = shared_environment.region

pivot_session = SessionHelper.remote_session(account_id, region)
aws_session = SessionHelper.get_session(base_session=pivot_session, role_arn=role_arn)
url = SessionHelper.get_console_access_url(
aws_session,
region=dataset.region,
bucket=dataset.S3BucketName,
)
return url
20 changes: 6 additions & 14 deletions backend/dataall/modules/datasets/services/dataset_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,24 +283,16 @@ def get_dataset_assume_role_url(uri):
context = get_context()
with context.db_engine.scoped_session() as session:
dataset = DatasetRepository.get_dataset_by_uri(session, uri)
if dataset.SamlAdminGroupName not in context.groups:
share = ShareObjectRepository.get_share_by_dataset_attributes(
session=session, dataset_uri=uri, dataset_owner=context.username
)
shared_environment = EnvironmentService.get_environment_by_uri(
session=session, uri=share.environmentUri
)
env_group = EnvironmentService.get_environment_group(
session=session, group_uri=share.principalId, environment_uri=share.environmentUri
)
role_arn = env_group.environmentIAMRoleArn
account_id = shared_environment.AwsAccountId
region = shared_environment.region
else:
if dataset.SamlAdminGroupName in context.groups:
role_arn = dataset.IAMDatasetAdminRoleArn
account_id = dataset.AwsAccountId
region = dataset.region

else:
raise exceptions.UnauthorizedOperation(
action=CREDENTIALS_DATASET,
message=f'User: {context.username} is not a member of the group {dataset.SamlAdminGroupName}',
)
pivot_session = SessionHelper.remote_session(account_id, region)
aws_session = SessionHelper.get_session(base_session=pivot_session, role_arn=role_arn)
url = SessionHelper.get_console_access_url(
Expand Down
26 changes: 19 additions & 7 deletions frontend/src/modules/Folders/views/FolderView.js
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ import { SET_ERROR, useDispatch } from 'globalErrors';
import {
useClient,
deleteDatasetStorageLocation,
getDatasetAssumeRoleUrl
getDatasetAssumeRoleUrl,
getDatasetSharedAssumeRoleUrl
} from 'services';
import { getDatasetStorageLocation } from '../services';

Expand All @@ -51,13 +52,24 @@ function FolderPageHeader(props) {

const goToS3Console = async () => {
setIsLoadingUI(true);
const response = await client.query(
getDatasetAssumeRoleUrl(folder.dataset.datasetUri)
);
if (!response.errors) {
window.open(response.data.getDatasetAssumeRoleUrl, '_blank');
if (isAdmin) {
const response = await client.query(
getDatasetAssumeRoleUrl(folder.dataset.datasetUri)
);
if (!response.errors) {
window.open(response.data.getDatasetAssumeRoleUrl, '_blank');
} else {
dispatch({ type: SET_ERROR, error: response.errors[0].message });
}
} else {
dispatch({ type: SET_ERROR, error: response.errors[0].message });
const response = await client.query(
getDatasetSharedAssumeRoleUrl(folder.dataset.datasetUri)
);
if (!response.errors) {
window.open(response.data.getDatasetSharedAssumeRoleUrl, '_blank');
} else {
dispatch({ type: SET_ERROR, error: response.errors[0].message });
}
}
setIsLoadingUI(false);
};
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import { gql } from 'apollo-boost';

export const getDatasetSharedAssumeRoleUrl = (datasetUri) => ({
variables: {
datasetUri
},
query: gql`
query GetDatasetSharedAssumeRoleUrl($datasetUri: String!) {
getDatasetSharedAssumeRoleUrl(datasetUri: $datasetUri)
}
`
});
1 change: 1 addition & 0 deletions frontend/src/services/graphql/Datasets/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
export * from './addDatasetStorageLocation';
export * from './getDataset';
export * from './getDatasetAssumeRoleUrl';
export * from './getDatasetSharedAssumeRoleUrl';
export * from './listDatasetTables';
export * from './removeDatasetStorageLocation';

0 comments on commit 8204468

Please sign in to comment.