From 7993b1a2b6aeee812f0bc806ac2ac5b47781d2a9 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 15 Apr 2024 17:11:29 +0200 Subject: [PATCH 01/18] Move dataset models and services from dataset_base to dataset --- .../modules/dataset_sharing/api/resolvers.py | 4 ++-- .../db/share_object_repositories.py | 4 ++-- .../services/data_sharing_service.py | 2 +- .../services/dataset_alarm_service.py | 2 +- .../services/share_item_service.py | 4 ++-- .../share_managers/lf_share_manager.py | 2 +- .../s3_access_point_share_manager.py | 2 +- .../share_managers/s3_bucket_share_manager.py | 2 +- .../services/share_notification_service.py | 2 +- .../services/share_object_service.py | 6 +++--- .../lakeformation_process_share.py | 2 +- .../s3_access_point_process_share.py | 2 +- .../s3_bucket_process_share.py | 2 +- backend/dataall/modules/datasets/__init__.py | 4 ++-- .../datasets/api/dataset/input_types.py | 2 +- .../modules/datasets/api/dataset/resolvers.py | 4 ++-- .../modules/datasets/api/dataset/types.py | 2 +- .../datasets/api/profiling/resolvers.py | 2 +- .../datasets/api/storage_location/resolvers.py | 2 +- .../modules/datasets/api/table/input_types.py | 2 +- .../modules/datasets/api/table/resolvers.py | 2 +- .../datasets/api/table_column/resolvers.py | 2 +- .../datasets/aws/athena_table_client.py | 2 +- .../datasets/aws/glue_dataset_client.py | 2 +- .../datasets/aws/glue_profiler_client.py | 4 ++-- .../modules/datasets/aws/glue_table_client.py | 2 +- .../modules/datasets/aws/lf_dataset_client.py | 2 +- .../modules/datasets/aws/lf_table_client.py | 2 +- .../aws/s3_dataset_bucket_policy_client.py | 2 +- .../modules/datasets/aws/s3_dataset_client.py | 2 +- .../modules/datasets/aws/s3_location_client.py | 2 +- .../modules/datasets/aws/sns_dataset_client.py | 2 +- .../modules/datasets/cdk/dataset_stack.py | 2 +- .../datasets/cdk/env_role_dataset_s3_policy.py | 4 ++-- .../datasets/cdk/pivot_role_datasets_policy.py | 4 ++-- .../datasets/db/dataset_bucket_repositories.py | 2 +- .../datasets/db/dataset_column_repositories.py | 2 +- .../db/dataset_location_repositories.py | 2 +- .../db/dataset_models.py | 2 +- .../db/dataset_profiling_repositories.py | 2 +- .../db/dataset_repositories.py | 4 ++-- .../datasets/db/dataset_table_repositories.py | 2 +- .../datasets/handlers/glue_dataset_handler.py | 4 ++-- .../handlers/glue_profiling_handler.py | 4 ++-- .../handlers/glue_table_sync_handler.py | 2 +- .../indexers/dataset_catalog_indexer.py | 4 ++-- .../datasets/indexers/dataset_indexer.py | 2 +- .../datasets/indexers/location_indexer.py | 2 +- .../modules/datasets/indexers/table_indexer.py | 2 +- .../services/dataset_column_service.py | 8 ++++---- .../services/dataset_location_service.py | 6 +++--- .../datasets/services/dataset_permissions.py | 18 +++++++++++++++++- .../services/dataset_profiling_service.py | 8 ++++---- .../datasets/services/dataset_service.py | 8 ++++---- .../datasets/services/dataset_table_service.py | 8 ++++---- .../services/datasets_base_enums.py | 0 .../datasets/tasks/dataset_stack_finder.py | 4 ++-- .../tasks/dataset_subscription_task.py | 4 ++-- .../modules/datasets/tasks/tables_syncer.py | 4 ++-- .../modules/datasets_base/db/__init__.py | 0 .../5e5c84138af7_backfill_confidentiality.py | 2 +- .../72b8a90b6ee8__share_request_purpose.py | 2 +- .../8c79fb896983_add_table_for_buckets.py | 2 +- .../versions/97050ec09354_release_3_7_8.py | 2 +- ...79d_add_backfill_read_folder_permissions.py | 4 ++-- ...b215e_backfill_dataset_table_permissions.py | 4 ++-- ...2b_rename_imported_dataset_aws_resources.py | 2 +- tests/modules/datasets/conftest.py | 6 +++--- tests/modules/datasets/tasks/conftest.py | 2 +- .../tasks/test_dataset_catalog_indexer.py | 2 +- .../tasks/test_dataset_subscriptions.py | 2 +- .../datasets/tasks/test_dataset_tables_sync.py | 2 +- .../datasets/tasks/test_lf_share_manager.py | 2 +- .../test_s3_access_point_share_manager.py | 2 +- .../tasks/test_s3_bucket_share_manager.py | 2 +- .../tasks/test_stacks_updater_with_datasets.py | 2 +- tests/modules/datasets/test_dataset.py | 6 +++--- tests/modules/datasets/test_dataset_feed.py | 2 +- .../modules/datasets/test_dataset_glossary.py | 2 +- .../modules/datasets/test_dataset_location.py | 2 +- .../datasets/test_dataset_permissions.py | 4 ++-- .../modules/datasets/test_dataset_profiling.py | 2 +- .../datasets/test_dataset_resource_found.py | 2 +- tests/modules/datasets/test_dataset_stack.py | 2 +- tests/modules/datasets/test_dataset_table.py | 2 +- .../test_environment_stack_with_dataset.py | 2 +- .../datasets/test_import_dataset_check_unit.py | 2 +- tests/modules/datasets/test_share.py | 2 +- 88 files changed, 140 insertions(+), 124 deletions(-) rename backend/dataall/modules/{datasets_base => datasets}/db/dataset_models.py (98%) rename backend/dataall/modules/{datasets_base => datasets}/db/dataset_repositories.py (98%) rename backend/dataall/modules/{datasets_base => datasets}/services/datasets_base_enums.py (100%) delete mode 100644 backend/dataall/modules/datasets_base/db/__init__.py diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index 4b438e7bb..b073d3dff 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -10,8 +10,8 @@ from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService from dataall.modules.dataset_sharing.aws.glue_client import GlueClient -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index de6fcc4c8..18c66b87c 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -20,8 +20,8 @@ PrincipalType, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket logger = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py index dd6896e51..eb8f1cd8b 100644 --- a/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/data_sharing_service.py @@ -25,7 +25,7 @@ ShareItemStatus, ShareableType, ) -from dataall.modules.datasets_base.db.dataset_models import DatasetLock +from dataall.modules.datasets.db.dataset_models import DatasetLock log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py index 6296fef1d..8d537f48c 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py @@ -3,7 +3,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation, DatasetBucket +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation, DatasetBucket from dataall.base.utils.alarm_service import AlarmService log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/services/share_item_service.py b/backend/dataall/modules/dataset_sharing/services/share_item_service.py index 9a63fe956..67b70d636 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_item_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_item_service.py @@ -31,8 +31,8 @@ LIST_ENVIRONMENT_SHARED_WITH_OBJECTS, APPROVE_SHARE_OBJECT, ) -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index 35c0b9ee7..496f619ed 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -18,7 +18,7 @@ ShareItemActions, ShareItemHealthStatus, ) -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py index e2b37bdcd..4a948b8d9 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py @@ -31,7 +31,7 @@ EMPTY_STATEMENT_SID, ) from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset logger = logging.getLogger(__name__) ACCESS_POINT_CREATION_TIME = 30 diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py index d9923c207..e8326b71b 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py @@ -23,7 +23,7 @@ EMPTY_STATEMENT_SID, ) from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetBucket from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository logger = logging.getLogger(__name__) diff --git a/backend/dataall/modules/dataset_sharing/services/share_notification_service.py b/backend/dataall/modules/dataset_sharing/services/share_notification_service.py index 33a3e8e18..55c00a60b 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_notification_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_notification_service.py @@ -6,7 +6,7 @@ from dataall.core.tasks.db.task_models import Task from dataall.core.tasks.service_handlers import Worker from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset from dataall.base.context import get_context from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectStatus from dataall.modules.notifications.db.notification_repositories import NotificationRepository diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index e2ff27ba3..683ea2be9 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -41,9 +41,9 @@ GET_SHARE_OBJECT, ) from dataall.modules.dataset_sharing.aws.glue_client import GlueClient -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ, DATASET_FOLDER_READ +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation +from dataall.modules.datasets.services.dataset_permissions import DATASET_TABLE_READ, DATASET_FOLDER_READ from dataall.base.aws.iam import IAM import logging diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py index c91830020..335c22498 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/lakeformation_process_share.py @@ -13,7 +13,7 @@ from dataall.modules.dataset_sharing.services.share_managers import LFShareManager from dataall.modules.dataset_sharing.aws.ram_client import RamClient from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py index 6152ea123..272684ec4 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_access_point_process_share.py @@ -5,7 +5,7 @@ from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ( ShareItemHealthStatus, ShareItemStatus, diff --git a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py index 9b2819a14..036f2919e 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py +++ b/backend/dataall/modules/dataset_sharing/services/share_processors/s3_bucket_process_share.py @@ -5,7 +5,7 @@ from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetBucket from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ( ShareItemHealthStatus, ShareItemStatus, diff --git a/backend/dataall/modules/datasets/__init__.py b/backend/dataall/modules/datasets/__init__.py index 7ed6acb89..2c17b981a 100644 --- a/backend/dataall/modules/datasets/__init__.py +++ b/backend/dataall/modules/datasets/__init__.py @@ -44,8 +44,8 @@ def __init__(self): import dataall.modules.datasets.api from dataall.modules.datasets.services.dataset_permissions import GET_DATASET, UPDATE_DATASET - from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository - from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset + from dataall.modules.datasets.db.dataset_repositories import DatasetRepository + from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset FeedRegistry.register(FeedDefinition('DatasetStorageLocation', DatasetStorageLocation)) FeedRegistry.register(FeedDefinition('DatasetTable', DatasetTable)) diff --git a/backend/dataall/modules/datasets/api/dataset/input_types.py b/backend/dataall/modules/datasets/api/dataset/input_types.py index ef9fcfc54..303f007b1 100644 --- a/backend/dataall/modules/datasets/api/dataset/input_types.py +++ b/backend/dataall/modules/datasets/api/dataset/input_types.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.base.api.constants import SortDirection -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetSortField +from dataall.modules.datasets.services.datasets_base_enums import DatasetSortField NewDatasetInput = gql.InputType( diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 71793bef2..8f303b6e6 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -8,8 +8,8 @@ from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db.exceptions import RequiredParameter, InvalidInput from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.datasets_base.db.dataset_models import Dataset -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetRole, ConfidentialityClassification +from dataall.modules.datasets.db.dataset_models import Dataset +from dataall.modules.datasets.services.datasets_base_enums import DatasetRole, ConfidentialityClassification from dataall.modules.datasets.services.dataset_service import DatasetService log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/api/dataset/types.py b/backend/dataall/modules/datasets/api/dataset/types.py index 97d4a6327..7426a1f5d 100644 --- a/backend/dataall/modules/datasets/api/dataset/types.py +++ b/backend/dataall/modules/datasets/api/dataset/types.py @@ -1,5 +1,5 @@ from dataall.base.api import gql -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetRole +from dataall.modules.datasets.services.datasets_base_enums import DatasetRole from dataall.modules.datasets.api.dataset.resolvers import ( get_dataset_environment, get_dataset_organization, diff --git a/backend/dataall/modules/datasets/api/profiling/resolvers.py b/backend/dataall/modules/datasets/api/profiling/resolvers.py index e7e4c6bff..bcfd161b1 100644 --- a/backend/dataall/modules/datasets/api/profiling/resolvers.py +++ b/backend/dataall/modules/datasets/api/profiling/resolvers.py @@ -5,7 +5,7 @@ from dataall.base.db.exceptions import RequiredParameter from dataall.modules.datasets.services.dataset_profiling_service import DatasetProfilingService from dataall.modules.datasets.services.dataset_service import DatasetService -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/api/storage_location/resolvers.py b/backend/dataall/modules/datasets/api/storage_location/resolvers.py index 2e3ac203f..41187827e 100644 --- a/backend/dataall/modules/datasets/api/storage_location/resolvers.py +++ b/backend/dataall/modules/datasets/api/storage_location/resolvers.py @@ -3,7 +3,7 @@ from dataall.base.db.exceptions import RequiredParameter from dataall.base.feature_toggle_checker import is_feature_enabled from dataall.modules.datasets.services.dataset_location_service import DatasetLocationService -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset @is_feature_enabled('modules.datasets.features.file_actions') diff --git a/backend/dataall/modules/datasets/api/table/input_types.py b/backend/dataall/modules/datasets/api/table/input_types.py index ef9761bbf..4cdc016f7 100644 --- a/backend/dataall/modules/datasets/api/table/input_types.py +++ b/backend/dataall/modules/datasets/api/table/input_types.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.base.api.constants import SortDirection -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetSortField +from dataall.modules.datasets.services.datasets_base_enums import DatasetSortField ModifyDatasetTableInput = gql.InputType( diff --git a/backend/dataall/modules/datasets/api/table/resolvers.py b/backend/dataall/modules/datasets/api/table/resolvers.py index 899acf656..0b312b5ea 100644 --- a/backend/dataall/modules/datasets/api/table/resolvers.py +++ b/backend/dataall/modules/datasets/api/table/resolvers.py @@ -5,7 +5,7 @@ from dataall.modules.datasets.api.dataset.resolvers import get_dataset from dataall.base.api.context import Context from dataall.modules.datasets.services.dataset_table_service import DatasetTableService -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/api/table_column/resolvers.py b/backend/dataall/modules/datasets/api/table_column/resolvers.py index 70a29690f..5cb1a8342 100644 --- a/backend/dataall/modules/datasets/api/table_column/resolvers.py +++ b/backend/dataall/modules/datasets/api/table_column/resolvers.py @@ -2,7 +2,7 @@ from dataall.modules.catalog.db.glossary_models import TermLink from dataall.base.db import paginate from dataall.modules.datasets.services.dataset_column_service import DatasetColumnService -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn, DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn, DatasetTable def list_table_columns( diff --git a/backend/dataall/modules/datasets/aws/athena_table_client.py b/backend/dataall/modules/datasets/aws/athena_table_client.py index 3c5bf01ec..40764ded9 100644 --- a/backend/dataall/modules/datasets/aws/athena_table_client.py +++ b/backend/dataall/modules/datasets/aws/athena_table_client.py @@ -6,7 +6,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.core.environment.db.environment_models import Environment -from dataall.modules.datasets_base.db.dataset_models import DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTable from dataall.base.utils import json_utils, sql_utils log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/glue_dataset_client.py b/backend/dataall/modules/datasets/aws/glue_dataset_client.py index 0cc589254..77dd5a3d3 100644 --- a/backend/dataall/modules/datasets/aws/glue_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/glue_dataset_client.py @@ -2,7 +2,7 @@ from botocore.exceptions import ClientError from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/glue_profiler_client.py b/backend/dataall/modules/datasets/aws/glue_profiler_client.py index 0526a7137..3aab6daf3 100644 --- a/backend/dataall/modules/datasets/aws/glue_profiler_client.py +++ b/backend/dataall/modules/datasets/aws/glue_profiler_client.py @@ -2,8 +2,8 @@ from botocore.exceptions import ClientError from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import Dataset -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun +from dataall.modules.datasets.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/glue_table_client.py b/backend/dataall/modules/datasets/aws/glue_table_client.py index a20897de4..627d07d93 100644 --- a/backend/dataall/modules/datasets/aws/glue_table_client.py +++ b/backend/dataall/modules/datasets/aws/glue_table_client.py @@ -2,7 +2,7 @@ from botocore.exceptions import ClientError -from dataall.modules.datasets_base.db.dataset_models import DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTable log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/lf_dataset_client.py b/backend/dataall/modules/datasets/aws/lf_dataset_client.py index 13cffc52d..aaffc7787 100644 --- a/backend/dataall/modules/datasets/aws/lf_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/lf_dataset_client.py @@ -3,7 +3,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.core.environment.db.environment_models import Environment -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) PIVOT_ROLE_NAME_PREFIX = 'dataallPivotRole' diff --git a/backend/dataall/modules/datasets/aws/lf_table_client.py b/backend/dataall/modules/datasets/aws/lf_table_client.py index c0daade69..4a8d21337 100644 --- a/backend/dataall/modules/datasets/aws/lf_table_client.py +++ b/backend/dataall/modules/datasets/aws/lf_table_client.py @@ -2,7 +2,7 @@ from botocore.exceptions import ClientError from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTable log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/s3_dataset_bucket_policy_client.py b/backend/dataall/modules/datasets/aws/s3_dataset_bucket_policy_client.py index bc449326e..57a9dde43 100644 --- a/backend/dataall/modules/datasets/aws/s3_dataset_bucket_policy_client.py +++ b/backend/dataall/modules/datasets/aws/s3_dataset_bucket_policy_client.py @@ -4,7 +4,7 @@ from botocore.exceptions import ClientError from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/s3_dataset_client.py b/backend/dataall/modules/datasets/aws/s3_dataset_client.py index dd199aec0..a96589a47 100644 --- a/backend/dataall/modules/datasets/aws/s3_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/s3_dataset_client.py @@ -5,7 +5,7 @@ from botocore.exceptions import ClientError from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/s3_location_client.py b/backend/dataall/modules/datasets/aws/s3_location_client.py index 3d1041183..94b2d33a5 100644 --- a/backend/dataall/modules/datasets/aws/s3_location_client.py +++ b/backend/dataall/modules/datasets/aws/s3_location_client.py @@ -1,7 +1,7 @@ import logging from dataall.base.aws.sts import SessionHelper -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/aws/sns_dataset_client.py b/backend/dataall/modules/datasets/aws/sns_dataset_client.py index 9aaa9e63b..c2cce4e5a 100644 --- a/backend/dataall/modules/datasets/aws/sns_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/sns_dataset_client.py @@ -5,7 +5,7 @@ from dataall.base.aws.sts import SessionHelper from dataall.core.environment.db.environment_models import Environment -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/cdk/dataset_stack.py b/backend/dataall/modules/datasets/cdk/dataset_stack.py index 67cb8809e..abd428ad1 100644 --- a/backend/dataall/modules/datasets/cdk/dataset_stack.py +++ b/backend/dataall/modules/datasets/cdk/dataset_stack.py @@ -23,7 +23,7 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.core.stacks.services.runtime_stacks_tagging import TagsUtil from dataall.modules.datasets.aws.lf_dataset_client import LakeFormationDatasetClient -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset from dataall.base.utils.cdk_nag_utils import CDKNagUtil from dataall.base.config import config diff --git a/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py b/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py index 1ec61b7f8..2fab433f5 100644 --- a/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py +++ b/backend/dataall/modules/datasets/cdk/env_role_dataset_s3_policy.py @@ -2,8 +2,8 @@ from aws_cdk import aws_iam as iam from dataall.core.environment.cdk.env_role_core_policies.data_policy import S3Policy -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset class DatasetS3Policy(S3Policy): diff --git a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py index 575fa8b6e..72f16de6e 100644 --- a/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py +++ b/backend/dataall/modules/datasets/cdk/pivot_role_datasets_policy.py @@ -5,8 +5,8 @@ split_policy_with_mutiple_value_condition_in_statements, ) from dataall.core.environment.cdk.pivot_role_stack import PivotRoleStatementSet -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset from aws_cdk import aws_iam as iam diff --git a/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py index 767bc4be6..ba1da41e3 100644 --- a/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_bucket_repositories.py @@ -1,6 +1,6 @@ import logging -from dataall.modules.datasets_base.db.dataset_models import DatasetBucket, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetBucket, Dataset logger = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/db/dataset_column_repositories.py b/backend/dataall/modules/datasets/db/dataset_column_repositories.py index 4ed1cda2c..c147d5f77 100644 --- a/backend/dataall/modules/datasets/db/dataset_column_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_column_repositories.py @@ -2,7 +2,7 @@ from dataall.base.db import paginate from dataall.base.db.exceptions import ObjectNotFound -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn class DatasetColumnRepository: diff --git a/backend/dataall/modules/datasets/db/dataset_location_repositories.py b/backend/dataall/modules/datasets/db/dataset_location_repositories.py index 87f170a9b..8cb8c1e45 100644 --- a/backend/dataall/modules/datasets/db/dataset_location_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_location_repositories.py @@ -3,7 +3,7 @@ from sqlalchemy import and_, or_ from dataall.base.db import paginate, exceptions -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset logger = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets_base/db/dataset_models.py b/backend/dataall/modules/datasets/db/dataset_models.py similarity index 98% rename from backend/dataall/modules/datasets_base/db/dataset_models.py rename to backend/dataall/modules/datasets/db/dataset_models.py index de2588f3d..7f2e8dd41 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_models.py +++ b/backend/dataall/modules/datasets/db/dataset_models.py @@ -2,7 +2,7 @@ from sqlalchemy.dialects.postgresql import JSON, ARRAY from sqlalchemy.orm import query_expression from dataall.base.db import Base, Resource, utils -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language class DatasetTableColumn(Resource, Base): diff --git a/backend/dataall/modules/datasets/db/dataset_profiling_repositories.py b/backend/dataall/modules/datasets/db/dataset_profiling_repositories.py index 633e11a5f..000c364cb 100644 --- a/backend/dataall/modules/datasets/db/dataset_profiling_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_profiling_repositories.py @@ -1,7 +1,7 @@ from sqlalchemy import and_ from dataall.base.db import paginate -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun, DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun, DatasetTable class DatasetProfilingRepository: diff --git a/backend/dataall/modules/datasets_base/db/dataset_repositories.py b/backend/dataall/modules/datasets/db/dataset_repositories.py similarity index 98% rename from backend/dataall/modules/datasets_base/db/dataset_repositories.py rename to backend/dataall/modules/datasets/db/dataset_repositories.py index d6d308a02..b4a28064b 100644 --- a/backend/dataall/modules/datasets_base/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_repositories.py @@ -7,9 +7,9 @@ from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db import paginate from dataall.base.db.exceptions import ObjectNotFound -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language from dataall.core.environment.services.environment_resource_manager import EnvironmentResource -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset, DatasetLock +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset, DatasetLock from dataall.base.utils.naming_convention import ( NamingConventionService, NamingConventionPattern, diff --git a/backend/dataall/modules/datasets/db/dataset_table_repositories.py b/backend/dataall/modules/datasets/db/dataset_table_repositories.py index 58bbc5a3b..13188fd9a 100644 --- a/backend/dataall/modules/datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_table_repositories.py @@ -8,7 +8,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareItemSM from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn, DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn, DatasetTable, Dataset from dataall.base.utils import json_utils logger = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/handlers/glue_dataset_handler.py b/backend/dataall/modules/datasets/handlers/glue_dataset_handler.py index 81b4e5f2b..a255926af 100644 --- a/backend/dataall/modules/datasets/handlers/glue_dataset_handler.py +++ b/backend/dataall/modules/datasets/handlers/glue_dataset_handler.py @@ -3,8 +3,8 @@ from dataall.core.tasks.service_handlers import Worker from dataall.core.tasks.db.task_models import Task from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/handlers/glue_profiling_handler.py b/backend/dataall/modules/datasets/handlers/glue_profiling_handler.py index c782b0a60..a94221796 100644 --- a/backend/dataall/modules/datasets/handlers/glue_profiling_handler.py +++ b/backend/dataall/modules/datasets/handlers/glue_profiling_handler.py @@ -4,8 +4,8 @@ from dataall.core.tasks.db.task_models import Task from dataall.modules.datasets.aws.glue_profiler_client import GlueDatasetProfilerClient from dataall.modules.datasets.db.dataset_profiling_repositories import DatasetProfilingRepository -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun, Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun, Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/handlers/glue_table_sync_handler.py b/backend/dataall/modules/datasets/handlers/glue_table_sync_handler.py index 9d56df046..3588f3c02 100644 --- a/backend/dataall/modules/datasets/handlers/glue_table_sync_handler.py +++ b/backend/dataall/modules/datasets/handlers/glue_table_sync_handler.py @@ -5,7 +5,7 @@ from dataall.core.tasks.db.task_models import Task from dataall.modules.datasets.aws.glue_table_client import GlueTableClient from dataall.modules.datasets.aws.lf_table_client import LakeFormationTableClient -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn, DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn, DatasetTable log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/indexers/dataset_catalog_indexer.py b/backend/dataall/modules/datasets/indexers/dataset_catalog_indexer.py index 5d12cca1d..015476467 100644 --- a/backend/dataall/modules/datasets/indexers/dataset_catalog_indexer.py +++ b/backend/dataall/modules/datasets/indexers/dataset_catalog_indexer.py @@ -3,8 +3,8 @@ from dataall.modules.datasets.indexers.dataset_indexer import DatasetIndexer from dataall.modules.datasets.indexers.location_indexer import DatasetLocationIndexer from dataall.modules.datasets.indexers.table_indexer import DatasetTableIndexer -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset from dataall.modules.catalog.indexers.catalog_indexer import CatalogIndexer log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/indexers/dataset_indexer.py b/backend/dataall/modules/datasets/indexers/dataset_indexer.py index 0cbca6025..e2764a82b 100644 --- a/backend/dataall/modules/datasets/indexers/dataset_indexer.py +++ b/backend/dataall/modules/datasets/indexers/dataset_indexer.py @@ -5,7 +5,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.modules.vote.db.vote_repositories import VoteRepository -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository from dataall.modules.catalog.indexers.base_indexer import BaseIndexer diff --git a/backend/dataall/modules/datasets/indexers/location_indexer.py b/backend/dataall/modules/datasets/indexers/location_indexer.py index d339938e4..3ee140182 100644 --- a/backend/dataall/modules/datasets/indexers/location_indexer.py +++ b/backend/dataall/modules/datasets/indexers/location_indexer.py @@ -5,7 +5,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.indexers.dataset_indexer import DatasetIndexer from dataall.modules.catalog.indexers.base_indexer import BaseIndexer diff --git a/backend/dataall/modules/datasets/indexers/table_indexer.py b/backend/dataall/modules/datasets/indexers/table_indexer.py index 3f5ecec05..4f4f4c8c3 100644 --- a/backend/dataall/modules/datasets/indexers/table_indexer.py +++ b/backend/dataall/modules/datasets/indexers/table_indexer.py @@ -5,7 +5,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.modules.datasets.db.dataset_table_repositories import DatasetTableRepository -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.indexers.dataset_indexer import DatasetIndexer from dataall.modules.catalog.indexers.base_indexer import BaseIndexer diff --git a/backend/dataall/modules/datasets/services/dataset_column_service.py b/backend/dataall/modules/datasets/services/dataset_column_service.py index 8a95d2f32..f9991d91f 100644 --- a/backend/dataall/modules/datasets/services/dataset_column_service.py +++ b/backend/dataall/modules/datasets/services/dataset_column_service.py @@ -7,10 +7,10 @@ from dataall.modules.datasets.db.dataset_column_repositories import DatasetColumnRepository from dataall.modules.datasets.db.dataset_table_repositories import DatasetTableRepository from dataall.modules.datasets.services.dataset_permissions import UPDATE_DATASET_TABLE -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, DatasetTableColumn -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification -from dataall.modules.datasets_base.services.permissions import PREVIEW_DATASET_TABLE +from dataall.modules.datasets.db.dataset_models import DatasetTable, DatasetTableColumn +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.services.dataset_permissions import PREVIEW_DATASET_TABLE class DatasetColumnService: diff --git a/backend/dataall/modules/datasets/services/dataset_location_service.py b/backend/dataall/modules/datasets/services/dataset_location_service.py index 3d05b33ba..c07dcfae0 100644 --- a/backend/dataall/modules/datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/datasets/services/dataset_location_service.py @@ -14,9 +14,9 @@ LIST_DATASET_FOLDERS, DELETE_DATASET_FOLDER, ) -from dataall.modules.datasets_base.services.permissions import DATASET_FOLDER_READ, GET_DATASET_FOLDER -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.services.dataset_permissions import DATASET_FOLDER_READ, GET_DATASET_FOLDER +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset class DatasetLocationService: diff --git a/backend/dataall/modules/datasets/services/dataset_permissions.py b/backend/dataall/modules/datasets/services/dataset_permissions.py index d89e00371..5d12baeb7 100644 --- a/backend/dataall/modules/datasets/services/dataset_permissions.py +++ b/backend/dataall/modules/datasets/services/dataset_permissions.py @@ -11,7 +11,6 @@ RESOURCES_ALL, RESOURCES_ALL_WITH_DESC, ) -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ, DATASET_FOLDER_READ MANAGE_DATASETS = 'MANAGE_DATASETS' @@ -63,6 +62,23 @@ DATASET_ALL = list(set(DATASET_WRITE + DATASET_READ)) RESOURCES_ALL.extend(DATASET_ALL) +""" +DATASET TABLE PERMISSIONS +""" + +GET_DATASET_TABLE = 'GET_DATASET_TABLE' +PREVIEW_DATASET_TABLE = 'PREVIEW_DATASET_TABLE' + +DATASET_TABLE_READ = [GET_DATASET_TABLE, PREVIEW_DATASET_TABLE] + +""" +DATASET FOLDER PERMISSIONS +""" +GET_DATASET_FOLDER = 'GET_DATASET_FOLDER' + +DATASET_FOLDER_READ = [GET_DATASET_FOLDER] + + RESOURCES_ALL.extend(DATASET_TABLE_READ) RESOURCES_ALL.extend(DATASET_FOLDER_READ) diff --git a/backend/dataall/modules/datasets/services/dataset_profiling_service.py b/backend/dataall/modules/datasets/services/dataset_profiling_service.py index 954117251..b21c9db3d 100644 --- a/backend/dataall/modules/datasets/services/dataset_profiling_service.py +++ b/backend/dataall/modules/datasets/services/dataset_profiling_service.py @@ -12,10 +12,10 @@ from dataall.modules.datasets.db.dataset_profiling_repositories import DatasetProfilingRepository from dataall.modules.datasets.db.dataset_table_repositories import DatasetTableRepository from dataall.modules.datasets.services.dataset_permissions import PROFILE_DATASET_TABLE, GET_DATASET -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun, DatasetTable -from dataall.modules.datasets_base.services.permissions import PREVIEW_DATASET_TABLE +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun, DatasetTable +from dataall.modules.datasets.services.dataset_permissions import PREVIEW_DATASET_TABLE class DatasetProfilingService: diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 44296fa0e..20982db08 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -42,10 +42,10 @@ DATASET_READ, IMPORT_DATASET, ) -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.services.datasets_base_enums import DatasetRole -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.services.datasets_base_enums import DatasetRole +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetTable +from dataall.modules.datasets.services.dataset_permissions import DATASET_TABLE_READ log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/services/dataset_table_service.py b/backend/dataall/modules/datasets/services/dataset_table_service.py index 54f8cd6e9..2772cad99 100644 --- a/backend/dataall/modules/datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/datasets/services/dataset_table_service.py @@ -17,10 +17,10 @@ DELETE_DATASET_TABLE, SYNC_DATASET, ) -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset -from dataall.modules.datasets_base.services.permissions import ( +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.services.dataset_permissions import ( PREVIEW_DATASET_TABLE, DATASET_TABLE_READ, GET_DATASET_TABLE, diff --git a/backend/dataall/modules/datasets_base/services/datasets_base_enums.py b/backend/dataall/modules/datasets/services/datasets_base_enums.py similarity index 100% rename from backend/dataall/modules/datasets_base/services/datasets_base_enums.py rename to backend/dataall/modules/datasets/services/datasets_base_enums.py diff --git a/backend/dataall/modules/datasets/tasks/dataset_stack_finder.py b/backend/dataall/modules/datasets/tasks/dataset_stack_finder.py index c55b0f7c5..684dbf245 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_stack_finder.py +++ b/backend/dataall/modules/datasets/tasks/dataset_stack_finder.py @@ -2,8 +2,8 @@ from typing import List from dataall.core.environment.tasks.env_stack_finder import StackFinder -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import Dataset log = logging.getLogger(__name__) diff --git a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py index 3a1916107..d4fe628b5 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py @@ -16,8 +16,8 @@ from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository from dataall.modules.datasets.db.dataset_table_repositories import DatasetTableRepository from dataall.modules.datasets.tasks.subscriptions import poll_queues -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset root = logging.getLogger() root.setLevel(logging.INFO) diff --git a/backend/dataall/modules/datasets/tasks/tables_syncer.py b/backend/dataall/modules/datasets/tasks/tables_syncer.py index 67032618a..7c27f3143 100644 --- a/backend/dataall/modules/datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/datasets/tasks/tables_syncer.py @@ -10,8 +10,8 @@ from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.datasets.aws.lf_table_client import LakeFormationTableClient from dataall.modules.datasets.services.dataset_table_service import DatasetTableService -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.datasets.indexers.table_indexer import DatasetTableIndexer from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService diff --git a/backend/dataall/modules/datasets_base/db/__init__.py b/backend/dataall/modules/datasets_base/db/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py index e5297161d..c7b843d6a 100644 --- a/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py +++ b/backend/migrations/versions/5e5c84138af7_backfill_confidentiality.py @@ -14,7 +14,7 @@ from sqlalchemy.ext.declarative import declarative_base from dataall.base.db import utils, Resource -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language revision = '5e5c84138af7' diff --git a/backend/migrations/versions/72b8a90b6ee8__share_request_purpose.py b/backend/migrations/versions/72b8a90b6ee8__share_request_purpose.py index ece8964fe..54f6de292 100644 --- a/backend/migrations/versions/72b8a90b6ee8__share_request_purpose.py +++ b/backend/migrations/versions/72b8a90b6ee8__share_request_purpose.py @@ -14,7 +14,7 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER, SHARE_OBJECT_REQUESTER -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository # revision identifiers, used by Alembic. revision = '72b8a90b6ee8' diff --git a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py index f2a13c988..25a6fbe1e 100644 --- a/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py +++ b/backend/migrations/versions/8c79fb896983_add_table_for_buckets.py @@ -19,7 +19,7 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectStatus from datetime import datetime -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. diff --git a/backend/migrations/versions/97050ec09354_release_3_7_8.py b/backend/migrations/versions/97050ec09354_release_3_7_8.py index bb9775ab1..1bcb39fab 100644 --- a/backend/migrations/versions/97050ec09354_release_3_7_8.py +++ b/backend/migrations/versions/97050ec09354_release_3_7_8.py @@ -12,7 +12,7 @@ from sqlalchemy.ext.declarative import declarative_base from dataall.base.db import utils, Resource -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. diff --git a/backend/migrations/versions/c6d01930179d_add_backfill_read_folder_permissions.py b/backend/migrations/versions/c6d01930179d_add_backfill_read_folder_permissions.py index 6708ad534..2d372c5bb 100644 --- a/backend/migrations/versions/c6d01930179d_add_backfill_read_folder_permissions.py +++ b/backend/migrations/versions/c6d01930179d_add_backfill_read_folder_permissions.py @@ -12,8 +12,8 @@ from dataall.core.permissions.api.enums import PermissionType from dataall.core.permissions.services.permission_service import PermissionService from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService -from dataall.modules.datasets_base.services.permissions import DATASET_FOLDER_READ, GET_DATASET_FOLDER -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.services.dataset_permissions import DATASET_FOLDER_READ, GET_DATASET_FOLDER +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareItemStatus, ShareableType diff --git a/backend/migrations/versions/d05f9a5b215e_backfill_dataset_table_permissions.py b/backend/migrations/versions/d05f9a5b215e_backfill_dataset_table_permissions.py index f40aa2b10..17cbca3b0 100644 --- a/backend/migrations/versions/d05f9a5b215e_backfill_dataset_table_permissions.py +++ b/backend/migrations/versions/d05f9a5b215e_backfill_dataset_table_permissions.py @@ -25,8 +25,8 @@ ShareItemStatus, ) from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.services.dataset_permissions import DATASET_TABLE_READ # revision identifiers, used by Alembic. revision = 'd05f9a5b215e' diff --git a/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py b/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py index 5998380bd..8eba7dce7 100644 --- a/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py +++ b/backend/migrations/versions/e1cd4927482b_rename_imported_dataset_aws_resources.py @@ -16,7 +16,7 @@ NamingConventionService, NamingConventionPattern, ) -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification, Language +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification, Language # revision identifiers, used by Alembic. revision = 'e1cd4927482b' diff --git a/tests/modules/datasets/conftest.py b/tests/modules/datasets/conftest.py index 7d578ac58..ecff12d7d 100644 --- a/tests/modules/datasets/conftest.py +++ b/tests/modules/datasets/conftest.py @@ -9,9 +9,9 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareableType, PrincipalType from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_REQUESTER, SHARE_OBJECT_APPROVER -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.services.dataset_permissions import DATASET_TABLE_READ +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetTable, DatasetStorageLocation from dataall.modules.datasets.services.dataset_permissions import DATASET_ALL diff --git a/tests/modules/datasets/tasks/conftest.py b/tests/modules/datasets/tasks/conftest.py index 8d8c24f2c..373cbc450 100644 --- a/tests/modules/datasets/tasks/conftest.py +++ b/tests/modules/datasets/tasks/conftest.py @@ -9,7 +9,7 @@ PrincipalType, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket @pytest.fixture(scope='module') diff --git a/tests/modules/datasets/tasks/test_dataset_catalog_indexer.py b/tests/modules/datasets/tasks/test_dataset_catalog_indexer.py index 91e1aabfd..f58bdc2ad 100644 --- a/tests/modules/datasets/tasks/test_dataset_catalog_indexer.py +++ b/tests/modules/datasets/tasks/test_dataset_catalog_indexer.py @@ -1,7 +1,7 @@ import pytest from dataall.modules.catalog.tasks.catalog_indexer_task import index_objects -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset @pytest.fixture(scope='module', autouse=True) diff --git a/tests/modules/datasets/tasks/test_dataset_subscriptions.py b/tests/modules/datasets/tasks/test_dataset_subscriptions.py index 7ab667e7b..16ce16dcd 100644 --- a/tests/modules/datasets/tasks/test_dataset_subscriptions.py +++ b/tests/modules/datasets/tasks/test_dataset_subscriptions.py @@ -11,7 +11,7 @@ PrincipalType, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.datasets.tasks.dataset_subscription_task import DatasetSubscriptionService from dataall.core.environment.api.enums import EnvironmentPermission diff --git a/tests/modules/datasets/tasks/test_dataset_tables_sync.py b/tests/modules/datasets/tasks/test_dataset_tables_sync.py index e6e76647c..49755daa1 100644 --- a/tests/modules/datasets/tasks/test_dataset_tables_sync.py +++ b/tests/modules/datasets/tasks/test_dataset_tables_sync.py @@ -1,7 +1,7 @@ from unittest.mock import MagicMock import pytest -from dataall.modules.datasets_base.db.dataset_models import DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTable from dataall.modules.datasets.tasks.tables_syncer import sync_tables diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index 46aa422e5..e7434fb7a 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -16,7 +16,7 @@ from dataall.core.environment.db.environment_models import Environment, EnvironmentGroup from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareItemStatus from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.services.share_processors.lakeformation_process_share import ( ProcessLakeFormationShare, diff --git a/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py index e4c02c5e3..fbfe2020e 100644 --- a/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py +++ b/tests/modules/datasets/tasks/test_s3_access_point_share_manager.py @@ -12,7 +12,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.dataset_sharing.services.managed_share_policy_service import SharePolicyService from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset SOURCE_ENV_ACCOUNT = '111111111111' SOURCE_ENV_ROLE_NAME = 'dataall-ProducerEnvironment-i6v1v1c2' diff --git a/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py index a81aa9355..48a2561e3 100644 --- a/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py +++ b/tests/modules/datasets/tasks/test_s3_bucket_share_manager.py @@ -10,7 +10,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager from dataall.modules.dataset_sharing.services.managed_share_policy_service import SharePolicyService -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetBucket +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetBucket SOURCE_ENV_ACCOUNT = '111111111111' SOURCE_ENV_ROLE_NAME = 'dataall-ProducerEnvironment-i6v1v1c2' diff --git a/tests/modules/datasets/tasks/test_stacks_updater_with_datasets.py b/tests/modules/datasets/tasks/test_stacks_updater_with_datasets.py index 3b479af3e..68380aa04 100644 --- a/tests/modules/datasets/tasks/test_stacks_updater_with_datasets.py +++ b/tests/modules/datasets/tasks/test_stacks_updater_with_datasets.py @@ -1,5 +1,5 @@ import pytest -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset from dataall.core.environment.tasks.env_stacks_updater import update_stacks diff --git a/tests/modules/datasets/test_dataset.py b/tests/modules/datasets/test_dataset.py index ba30a7e21..a96760dca 100644 --- a/tests/modules/datasets/test_dataset.py +++ b/tests/modules/datasets/test_dataset.py @@ -6,11 +6,11 @@ from dataall.base.config import config from dataall.core.environment.db.environment_models import Environment from dataall.core.organizations.db.organization_models import Organization -from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository -from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetLock +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetLock from tests.core.stacks.test_stack import update_stack_query -from dataall.modules.datasets_base.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification mocked_key_id = 'some_key' diff --git a/tests/modules/datasets/test_dataset_feed.py b/tests/modules/datasets/test_dataset_feed.py index a2b98cc51..5f9b85b4b 100644 --- a/tests/modules/datasets/test_dataset_feed.py +++ b/tests/modules/datasets/test_dataset_feed.py @@ -1,5 +1,5 @@ from dataall.modules.feed.api.registry import FeedRegistry -from dataall.modules.datasets_base.db.dataset_models import DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetTable def test_dataset_registered(): diff --git a/tests/modules/datasets/test_dataset_glossary.py b/tests/modules/datasets/test_dataset_glossary.py index ca35a19f5..aaac934b5 100644 --- a/tests/modules/datasets/test_dataset_glossary.py +++ b/tests/modules/datasets/test_dataset_glossary.py @@ -1,7 +1,7 @@ from typing import List from dataall.modules.catalog.db.glossary_models import TermLink -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn from tests.modules.catalog.test_glossary import * diff --git a/tests/modules/datasets/test_dataset_location.py b/tests/modules/datasets/test_dataset_location.py index 20473670c..8fc3af77e 100644 --- a/tests/modules/datasets/test_dataset_location.py +++ b/tests/modules/datasets/test_dataset_location.py @@ -2,7 +2,7 @@ import pytest from dataall.base.config import config -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset @pytest.fixture(scope='module') diff --git a/tests/modules/datasets/test_dataset_permissions.py b/tests/modules/datasets/test_dataset_permissions.py index 363310401..6b835c1ad 100644 --- a/tests/modules/datasets/test_dataset_permissions.py +++ b/tests/modules/datasets/test_dataset_permissions.py @@ -9,8 +9,8 @@ DATASET_READ, ) from dataall.modules.datasets.services.dataset_service import DatasetService -from dataall.modules.datasets_base.db.dataset_models import Dataset -from dataall.modules.datasets_base.services.permissions import DATASET_TABLE_READ +from dataall.modules.datasets.db.dataset_models import Dataset +from dataall.modules.datasets.services.dataset_permissions import DATASET_TABLE_READ from tests.core.permissions.test_permission import * from dataall.core.organizations.services.organization_service import OrganizationService diff --git a/tests/modules/datasets/test_dataset_profiling.py b/tests/modules/datasets/test_dataset_profiling.py index 9015bdae3..e0e5a6430 100644 --- a/tests/modules/datasets/test_dataset_profiling.py +++ b/tests/modules/datasets/test_dataset_profiling.py @@ -2,7 +2,7 @@ import pytest -from dataall.modules.datasets_base.db.dataset_models import DatasetProfilingRun, Dataset, DatasetTable +from dataall.modules.datasets.db.dataset_models import DatasetProfilingRun, Dataset, DatasetTable @pytest.fixture(scope='module', autouse=True) diff --git a/tests/modules/datasets/test_dataset_resource_found.py b/tests/modules/datasets/test_dataset_resource_found.py index 77a5e0758..7f6959423 100644 --- a/tests/modules/datasets/test_dataset_resource_found.py +++ b/tests/modules/datasets/test_dataset_resource_found.py @@ -1,4 +1,4 @@ -from dataall.modules.datasets_base.db.dataset_models import Dataset, DatasetLock +from dataall.modules.datasets.db.dataset_models import Dataset, DatasetLock from dataall.modules.datasets.services.dataset_permissions import CREATE_DATASET diff --git a/tests/modules/datasets/test_dataset_stack.py b/tests/modules/datasets/test_dataset_stack.py index 7043c9171..22edbd396 100644 --- a/tests/modules/datasets/test_dataset_stack.py +++ b/tests/modules/datasets/test_dataset_stack.py @@ -6,7 +6,7 @@ from dataall.core.environment.db.environment_models import Environment from dataall.modules.datasets.cdk.dataset_stack import DatasetStack -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset @pytest.fixture(scope='module', autouse=True) diff --git a/tests/modules/datasets/test_dataset_table.py b/tests/modules/datasets/test_dataset_table.py index bd8bb422c..117160ff4 100644 --- a/tests/modules/datasets/test_dataset_table.py +++ b/tests/modules/datasets/test_dataset_table.py @@ -1,5 +1,5 @@ from dataall.modules.datasets.services.dataset_table_service import DatasetTableService -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn, DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTableColumn, DatasetTable, Dataset def test_add_tables(table, dataset_fixture, db): diff --git a/tests/modules/datasets/test_environment_stack_with_dataset.py b/tests/modules/datasets/test_environment_stack_with_dataset.py index de29dc3f0..f96f808f3 100644 --- a/tests/modules/datasets/test_environment_stack_with_dataset.py +++ b/tests/modules/datasets/test_environment_stack_with_dataset.py @@ -4,7 +4,7 @@ from dataall.core.environment.cdk.environment_stack import EnvironmentSetup from dataall.core.environment.db.environment_models import EnvironmentGroup -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset @pytest.fixture(scope='function', autouse=True) diff --git a/tests/modules/datasets/test_import_dataset_check_unit.py b/tests/modules/datasets/test_import_dataset_check_unit.py index 9abee5bc0..402bdbd60 100644 --- a/tests/modules/datasets/test_import_dataset_check_unit.py +++ b/tests/modules/datasets/test_import_dataset_check_unit.py @@ -5,7 +5,7 @@ from dataall.base.db.exceptions import RequiredParameter, InvalidInput, UnauthorizedOperation, AWSResourceNotFound from dataall.modules.datasets.services.dataset_service import DatasetService -from dataall.modules.datasets_base.db.dataset_models import Dataset +from dataall.modules.datasets.db.dataset_models import Dataset def test_s3_managed_bucket_import(mock_aws_client): diff --git a/tests/modules/datasets/test_share.py b/tests/modules/datasets/test_share.py index 6aca1c976..f315ad88f 100644 --- a/tests/modules/datasets/test_share.py +++ b/tests/modules/datasets/test_share.py @@ -22,7 +22,7 @@ ShareObjectSM, ) from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService -from dataall.modules.datasets_base.db.dataset_models import DatasetTable, Dataset +from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset @pytest.fixture(scope='function') From 5a98c5fd8a77bf2db55d46d2ffb22c11eaab35ca Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 15 Apr 2024 17:19:02 +0200 Subject: [PATCH 02/18] Update __init__ files dependencies for datasets and dataset_sharing --- .../modules/dataset_sharing/__init__.py | 7 +++--- backend/dataall/modules/datasets/__init__.py | 25 +------------------ 2 files changed, 5 insertions(+), 27 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index 31c9d93c5..fb87cc4b5 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -3,7 +3,6 @@ from dataall.core.environment.services.environment_resource_manager import EnvironmentResourceManager from dataall.modules.dataset_sharing.db.share_object_repositories import ShareEnvironmentResource -from dataall.modules.datasets_base import DatasetBaseModuleInterface from dataall.base.loader import ModuleInterface, ImportMode @@ -18,8 +17,9 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: from dataall.modules.notifications import NotificationsModuleInterface + from dataall.modules.datasets import DatasetApiModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetApiModuleInterface, NotificationsModuleInterface] def __init__(self): from dataall.modules.dataset_sharing import api @@ -39,8 +39,9 @@ def is_supported(modes: List[ImportMode]): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: from dataall.modules.notifications import NotificationsModuleInterface + from dataall.modules.datasets import DatasetAsyncHandlersModuleInterface - return [DatasetBaseModuleInterface, NotificationsModuleInterface] + return [DatasetAsyncHandlersModuleInterface, NotificationsModuleInterface] def __init__(self): import dataall.modules.dataset_sharing.handlers diff --git a/backend/dataall/modules/datasets/__init__.py b/backend/dataall/modules/datasets/__init__.py index 2c17b981a..5f11885ff 100644 --- a/backend/dataall/modules/datasets/__init__.py +++ b/backend/dataall/modules/datasets/__init__.py @@ -17,15 +17,11 @@ def is_supported(modes): @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.datasets_base import DatasetBaseModuleInterface - from dataall.modules.dataset_sharing import SharingApiModuleInterface from dataall.modules.catalog import CatalogApiModuleInterface from dataall.modules.feed import FeedApiModuleInterface from dataall.modules.vote import VoteApiModuleInterface return [ - SharingApiModuleInterface, - DatasetBaseModuleInterface, CatalogApiModuleInterface, FeedApiModuleInterface, VoteApiModuleInterface, @@ -94,12 +90,6 @@ def __init__(self): log.info('Dataset handlers have been imported') - @staticmethod - def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.datasets_base import DatasetBaseModuleInterface - from dataall.modules.dataset_sharing import SharingAsyncHandlersModuleInterface - - return [SharingAsyncHandlersModuleInterface, DatasetBaseModuleInterface] class DatasetCdkModuleInterface(ModuleInterface): @@ -109,12 +99,6 @@ class DatasetCdkModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]): return ImportMode.CDK in modes - @staticmethod - def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.datasets_base import DatasetBaseModuleInterface - from dataall.modules.dataset_sharing import DataSharingCdkModuleInterface - - return [DatasetBaseModuleInterface, DataSharingCdkModuleInterface] def __init__(self): import dataall.modules.datasets.cdk @@ -133,12 +117,6 @@ class DatasetStackUpdaterModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]) -> bool: return ImportMode.STACK_UPDATER_TASK in modes - @staticmethod - def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.datasets_base import DatasetBaseModuleInterface - - return [DatasetBaseModuleInterface] - def __init__(self): from dataall.modules.datasets.tasks.dataset_stack_finder import DatasetStackFinder @@ -153,10 +131,9 @@ def is_supported(modes: Set[ImportMode]) -> bool: @staticmethod def depends_on() -> List[Type['ModuleInterface']]: - from dataall.modules.datasets_base import DatasetBaseModuleInterface from dataall.modules.catalog import CatalogIndexerModuleInterface - return [DatasetBaseModuleInterface, CatalogIndexerModuleInterface] + return [CatalogIndexerModuleInterface] def __init__(self): from dataall.modules.datasets.indexers.dataset_catalog_indexer import DatasetCatalogIndexer From 3604615467b69006b27078d31f44452fe3bce450 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 15 Apr 2024 17:24:45 +0200 Subject: [PATCH 03/18] Delete datasets_base --- backend/dataall/modules/datasets_base/__init__.py | 15 --------------- .../modules/datasets_base/services/__init__.py | 0 .../modules/datasets_base/services/permissions.py | 15 --------------- 3 files changed, 30 deletions(-) delete mode 100644 backend/dataall/modules/datasets_base/__init__.py delete mode 100644 backend/dataall/modules/datasets_base/services/__init__.py delete mode 100644 backend/dataall/modules/datasets_base/services/permissions.py diff --git a/backend/dataall/modules/datasets_base/__init__.py b/backend/dataall/modules/datasets_base/__init__.py deleted file mode 100644 index de7543a1b..000000000 --- a/backend/dataall/modules/datasets_base/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -from typing import Set -from dataall.base.loader import ModuleInterface, ImportMode - - -class DatasetBaseModuleInterface(ModuleInterface): - @staticmethod - def is_supported(modes: Set[ImportMode]) -> bool: - supported_modes = { - ImportMode.API, - ImportMode.CDK, - ImportMode.HANDLERS, - ImportMode.STACK_UPDATER_TASK, - ImportMode.CATALOG_INDEXER_TASK, - } - return modes & supported_modes diff --git a/backend/dataall/modules/datasets_base/services/__init__.py b/backend/dataall/modules/datasets_base/services/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/backend/dataall/modules/datasets_base/services/permissions.py b/backend/dataall/modules/datasets_base/services/permissions.py deleted file mode 100644 index d0e7daf3b..000000000 --- a/backend/dataall/modules/datasets_base/services/permissions.py +++ /dev/null @@ -1,15 +0,0 @@ -""" -DATASET TABLE PERMISSIONS -""" - -GET_DATASET_TABLE = 'GET_DATASET_TABLE' -PREVIEW_DATASET_TABLE = 'PREVIEW_DATASET_TABLE' - -DATASET_TABLE_READ = [GET_DATASET_TABLE, PREVIEW_DATASET_TABLE] - -""" -DATASET FOLDER PERMISSIONS -""" -GET_DATASET_FOLDER = 'GET_DATASET_FOLDER' - -DATASET_FOLDER_READ = [GET_DATASET_FOLDER] From 32a0597bde638ddeb47687f3591398a71ad0d2bb Mon Sep 17 00:00:00 2001 From: dlpzx Date: Mon, 15 Apr 2024 17:25:20 +0200 Subject: [PATCH 04/18] Split datasetAlarm class into datasets and dataset_sharing --- ...ce.py => dataset_sharing_alarm_service.py} | 20 +------------ .../share_managers/lf_share_manager.py | 6 ++-- .../s3_access_point_share_manager.py | 6 ++-- .../share_managers/s3_bucket_share_manager.py | 6 ++-- .../services/dataset_alarm_service.py | 29 +++++++++++++++++++ .../modules/datasets/tasks/tables_syncer.py | 2 +- .../datasets/tasks/test_lf_share_manager.py | 2 +- 7 files changed, 41 insertions(+), 30 deletions(-) rename backend/dataall/modules/dataset_sharing/services/{dataset_alarm_service.py => dataset_sharing_alarm_service.py} (87%) create mode 100644 backend/dataall/modules/datasets/services/dataset_alarm_service.py diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_alarm_service.py similarity index 87% rename from backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py rename to backend/dataall/modules/dataset_sharing/services/dataset_sharing_alarm_service.py index 8d537f48c..b9a0fd858 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_alarm_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_alarm_service.py @@ -9,7 +9,7 @@ log = logging.getLogger(__name__) -class DatasetAlarmService(AlarmService): +class DatasetSharingAlarmService(AlarmService): """Contains set of alarms for datasets""" def trigger_table_sharing_failure_alarm( @@ -72,24 +72,6 @@ def trigger_revoke_table_sharing_failure_alarm( """ return self.publish_message_to_alarms_topic(subject, message) - def trigger_dataset_sync_failure_alarm(self, dataset: Dataset, error: str): - log.info(f'Triggering dataset {dataset.name} tables sync failure alarm...') - subject = f'Data.all Dataset Tables Sync Failure for {dataset.name}'[:100] - message = f""" -You are receiving this email because your Data.all {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to synchronize Dataset {dataset.name} tables from AWS Glue to the Search Catalog. - -Alarm Details: - - State Change: OK -> ALARM - - Reason for State Change: {error} - - Timestamp: {datetime.now()} - Dataset - - Dataset URI: {dataset.datasetUri} - - AWS Account: {dataset.AwsAccountId} - - Region: {dataset.region} - - Glue Database: {dataset.GlueDatabaseName} - """ - return self.publish_message_to_alarms_topic(subject, message) - def trigger_folder_sharing_failure_alarm( self, folder: DatasetStorageLocation, diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index 496f619ed..b4f4a54ad 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -19,7 +19,7 @@ ShareItemHealthStatus, ) from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset -from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.dataset_sharing.services.dataset_sharing_alarm_service import DatasetSharingAlarmService from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter @@ -584,7 +584,7 @@ def handle_share_failure( f'due to: {error}' ) - DatasetAlarmService().trigger_table_sharing_failure_alarm(table, self.share, self.target_environment) + DatasetSharingAlarmService().trigger_table_sharing_failure_alarm(table, self.share, self.target_environment) return True def handle_revoke_failure( @@ -604,7 +604,7 @@ def handle_revoke_failure( f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetAlarmService().trigger_revoke_table_sharing_failure_alarm(table, self.share, self.target_environment) + DatasetSharingAlarmService().trigger_revoke_table_sharing_failure_alarm(table, self.share, self.target_environment) return True def handle_share_failure_for_all_tables(self, tables, error, share_item_status, reapply=False): diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py index 4a948b8d9..2274a4aa3 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_access_point_share_manager.py @@ -21,7 +21,7 @@ ) from dataall.base.aws.iam import IAM from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.dataset_sharing.services.dataset_sharing_alarm_service import DatasetSharingAlarmService from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter @@ -736,7 +736,7 @@ def handle_share_failure(self, error: Exception) -> None: f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetAlarmService().trigger_folder_sharing_failure_alarm( + DatasetSharingAlarmService().trigger_folder_sharing_failure_alarm( self.target_folder, self.share, self.target_environment ) @@ -753,7 +753,7 @@ def handle_revoke_failure(self, error: Exception) -> bool: f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetAlarmService().trigger_revoke_folder_sharing_failure_alarm( + DatasetSharingAlarmService().trigger_revoke_folder_sharing_failure_alarm( self.target_folder, self.share, self.target_environment ) return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py index e8326b71b..7d41cf6b3 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/s3_bucket_share_manager.py @@ -16,7 +16,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter -from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.dataset_sharing.services.dataset_sharing_alarm_service import DatasetSharingAlarmService from dataall.modules.dataset_sharing.services.managed_share_policy_service import ( SharePolicyService, IAM_S3_BUCKETS_STATEMENT_SID, @@ -591,7 +591,7 @@ def handle_share_failure(self, error: Exception) -> bool: f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetAlarmService().trigger_s3_bucket_sharing_failure_alarm( + DatasetSharingAlarmService().trigger_s3_bucket_sharing_failure_alarm( self.target_bucket, self.share, self.target_environment ) return True @@ -609,7 +609,7 @@ def handle_revoke_failure(self, error: Exception) -> bool: f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetAlarmService().trigger_revoke_s3_bucket_sharing_failure_alarm( + DatasetSharingAlarmService().trigger_revoke_s3_bucket_sharing_failure_alarm( self.target_bucket, self.share, self.target_environment ) return True diff --git a/backend/dataall/modules/datasets/services/dataset_alarm_service.py b/backend/dataall/modules/datasets/services/dataset_alarm_service.py new file mode 100644 index 000000000..758c5288f --- /dev/null +++ b/backend/dataall/modules/datasets/services/dataset_alarm_service.py @@ -0,0 +1,29 @@ +import logging +from datetime import datetime + +from dataall.modules.datasets.db.dataset_models import Dataset +from dataall.base.utils.alarm_service import AlarmService + +log = logging.getLogger(__name__) + + +class DatasetAlarmService(AlarmService): + """Contains set of alarms for datasets""" + + def trigger_dataset_sync_failure_alarm(self, dataset: Dataset, error: str): + log.info(f'Triggering dataset {dataset.name} tables sync failure alarm...') + subject = f'Data.all Dataset Tables Sync Failure for {dataset.name}'[:100] + message = f""" +You are receiving this email because your Data.all {self.envname} environment in the {self.region} region has entered the ALARM state, because it failed to synchronize Dataset {dataset.name} tables from AWS Glue to the Search Catalog. + +Alarm Details: + - State Change: OK -> ALARM + - Reason for State Change: {error} + - Timestamp: {datetime.now()} + Dataset + - Dataset URI: {dataset.datasetUri} + - AWS Account: {dataset.AwsAccountId} + - Region: {dataset.region} + - Glue Database: {dataset.GlueDatabaseName} + """ + return self.publish_message_to_alarms_topic(subject, message) diff --git a/backend/dataall/modules/datasets/tasks/tables_syncer.py b/backend/dataall/modules/datasets/tasks/tables_syncer.py index 7c27f3143..22c9b5df5 100644 --- a/backend/dataall/modules/datasets/tasks/tables_syncer.py +++ b/backend/dataall/modules/datasets/tasks/tables_syncer.py @@ -13,7 +13,7 @@ from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.datasets.indexers.table_indexer import DatasetTableIndexer -from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.datasets.services.dataset_alarm_service import DatasetAlarmService root = logging.getLogger() root.setLevel(logging.INFO) diff --git a/tests/modules/datasets/tasks/test_lf_share_manager.py b/tests/modules/datasets/tasks/test_lf_share_manager.py index e7434fb7a..6602dce55 100644 --- a/tests/modules/datasets/tasks/test_lf_share_manager.py +++ b/tests/modules/datasets/tasks/test_lf_share_manager.py @@ -17,7 +17,7 @@ from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareItemStatus from dataall.modules.dataset_sharing.db.share_object_models import ShareObject, ShareObjectItem from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset -from dataall.modules.dataset_sharing.services.dataset_alarm_service import DatasetAlarmService +from dataall.modules.dataset_sharing.services.dataset_sharing_alarm_service import DatasetAlarmService from dataall.modules.dataset_sharing.services.share_processors.lakeformation_process_share import ( ProcessLakeFormationShare, ) From f7ebb65f497db6e792a72827ee330475677e964b Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 09:11:46 +0200 Subject: [PATCH 05/18] Invert the dependency in delete check and clean-up: sharing depends on dataset --- .../services/environment_resource_manager.py | 1 + .../modules/dataset_sharing/__init__.py | 3 ++ .../db/share_object_repositories.py | 44 ++++++++++++++++++- .../services/share_object_service.py | 1 + .../datasets/db/dataset_repositories.py | 31 ++++++++++++- .../services/dataset_location_service.py | 13 ++---- .../datasets/services/dataset_service.py | 12 +---- .../services/dataset_table_service.py | 14 +----- 8 files changed, 84 insertions(+), 35 deletions(-) diff --git a/backend/dataall/core/environment/services/environment_resource_manager.py b/backend/dataall/core/environment/services/environment_resource_manager.py index 125e113f0..7060a7341 100644 --- a/backend/dataall/core/environment/services/environment_resource_manager.py +++ b/backend/dataall/core/environment/services/environment_resource_manager.py @@ -24,6 +24,7 @@ def count_role_resources(session, role_uri): return 0 + class EnvironmentResourceManager: """ API for managing environment and environment group lifecycle. diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index fb87cc4b5..4aa401c40 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -24,8 +24,11 @@ def depends_on() -> List[Type['ModuleInterface']]: def __init__(self): from dataall.modules.dataset_sharing import api from dataall.modules.dataset_sharing.services.managed_share_policy_service import SharePolicyService + from dataall.modules.datasets.db.dataset_repositories import DatasetRepository + from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectDatasetExtensionRepository EnvironmentResourceManager.register(ShareEnvironmentResource()) + DatasetRepository.register(ShareObjectDatasetExtensionRepository()) log.info('API of dataset sharing has been imported') diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 18c66b87c..2fc972c77 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -20,8 +20,9 @@ PrincipalType, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets.db.dataset_repositories import DatasetRepository +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository, DatasetRepositoryInterface from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket +from dataall.modules.datasets.services.dataset_permissions import DELETE_DATASET, DELETE_DATASET_TABLE, DELETE_DATASET_FOLDER logger = logging.getLogger(__name__) @@ -325,8 +326,47 @@ def count_role_resources(session, role_uri): def delete_env(session, environment): ShareObjectRepository.delete_all_share_items(session, environment.environmentUri) +class ShareObjectDatasetExtensionRepository(DatasetRepositoryInterface): + + @staticmethod + def check_before_delete(session, uri, **kwargs): + """Implemented as part of the DatasetRepositoryInterface""" + action = kwargs.get('action') + if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: + has_share = ShareObjectRepository.has_shared_items(session, uri) + if has_share: + raise exceptions.ResourceShared( + action=action, + message='Revoke all shares for this item before deletion', + ) + elif action in [DELETE_DATASET]: + shares = ShareObjectRepository.list_dataset_shares_with_existing_shared_items( + session=session, dataset_uri=uri + ) + if shares: + raise exceptions.ResourceShared( + action=DELETE_DATASET, + message='Revoke all dataset shares before deletion.', + ) + else: + raise exceptions.RequiredParameter('Delete action') + return True + + @staticmethod + def execute_on_delete(self, session, uri, **kwargs): + """Implemented as part of the DatasetRepositoryInterface""" + action = kwargs.get('action') + if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: + ShareObjectRepository.delete_shares(session, uri) + elif action in [DELETE_DATASET]: + ShareObjectRepository.delete_shares_with_no_shared_items(session, uri) + else: + raise exceptions.RequiredParameter('Delete action') + return True + + +class ShareObjectRepository(DatasetRepositoryInterface): -class ShareObjectRepository: @staticmethod def save_and_commit(session, share): session.add(share) diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index 683ea2be9..c2548f6bd 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -52,6 +52,7 @@ class ShareObjectService: + @staticmethod def verify_principal_role(session, share: ShareObject) -> bool: role_name = share.principalIAMRoleName diff --git a/backend/dataall/modules/datasets/db/dataset_repositories.py b/backend/dataall/modules/datasets/db/dataset_repositories.py index b4a28064b..2482f6578 100644 --- a/backend/dataall/modules/datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_repositories.py @@ -1,5 +1,6 @@ import logging - +from typing import List +from abc import ABC, abstractmethod from sqlalchemy import and_, or_ from sqlalchemy.orm import Query from dataall.core.activity.db.activity_models import Activity @@ -17,10 +18,38 @@ logger = logging.getLogger(__name__) +class DatasetRepositoryInterface(ABC): + @staticmethod + def check_before_delete(self, session, uri, **kwargs): + """Abstract method to be implemented by dependent modules that want to add checks before deletion for dataset objects""" + return True + + @staticmethod + def execute_on_delete(self, session, uri, **kwargs): + """Abstract method to be implemented by dependent modules that want to add clean-up actions when a dataset object is deleted""" + return True + class DatasetRepository(EnvironmentResource): """DAO layer for Datasets""" + _interfaces: List[DatasetRepositoryInterface] = [] + + @classmethod + def register(cls, interface: DatasetRepositoryInterface): + cls._interfaces.append(interface) + + @classmethod + def check_before_delete(cls, session, uri, action) -> bool: + can_be_deleted = [interface.check_before_delete(session, uri, action) for interface in cls._interfaces] + return False not in set(can_be_deleted) + + @classmethod + def execute_on_delete(cls, session, uri) -> bool: + for interface in cls._interfaces: + interface.execute_on_delete(session, uri) + return True + @classmethod def build_dataset(cls, username: str, env: Environment, data: dict) -> Dataset: """Builds a datasets based on the request data, but doesn't save it in the database""" diff --git a/backend/dataall/modules/datasets/services/dataset_location_service.py b/backend/dataall/modules/datasets/services/dataset_location_service.py index c07dcfae0..50f078380 100644 --- a/backend/dataall/modules/datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/datasets/services/dataset_location_service.py @@ -2,8 +2,7 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository -from dataall.base.db.exceptions import ResourceShared, ResourceAlreadyExists -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository +from dataall.base.db.exceptions import ResourceAlreadyExists from dataall.modules.datasets.aws.s3_location_client import S3LocationClient from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository from dataall.modules.datasets.indexers.location_indexer import DatasetLocationIndexer @@ -87,14 +86,8 @@ def remove_storage_location(uri: str = None): with get_context().db_engine.scoped_session() as session: location = DatasetLocationRepository.get_location_by_uri(session, uri) dataset = DatasetRepository.get_dataset_by_uri(session, location.datasetUri) - has_shares = ShareObjectRepository.has_shared_items(session, location.locationUri) - if has_shares: - raise ResourceShared( - action=DELETE_DATASET_FOLDER, - message='Revoke all folder shares before deletion', - ) - - ShareObjectRepository.delete_shares(session, location.locationUri) + DatasetRepository.check_before_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) + DatasetRepository.execute_on_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) DatasetLocationService._delete_dataset_folder_read_permission(session, dataset, location.locationUri) DatasetLocationRepository.delete(session, location) GlossaryRepository.delete_glossary_terms_links( diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 20982db08..92f51caea 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -377,15 +377,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): with context.db_engine.scoped_session() as session: dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, uri) env = EnvironmentService.get_environment_by_uri(session, dataset.environmentUri) - shares = ShareObjectRepository.list_dataset_shares_with_existing_shared_items( - session=session, dataset_uri=uri - ) - if shares: - raise exceptions.UnauthorizedOperation( - action=DELETE_DATASET, - message=f'Dataset {dataset.name} is shared with other teams. ' - 'Revoke all dataset shares before deletion.', - ) + DatasetRepository.check_before_delete(session, uri, action=DELETE_DATASET) tables = [t.tableUri for t in DatasetRepository.get_dataset_tables(session, uri)] for tableUri in tables: @@ -397,7 +389,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): DatasetIndexer.delete_doc(doc_id=uri) - ShareObjectRepository.delete_shares_with_no_shared_items(session, uri) + DatasetRepository.execute_on_delete(session, uri, action=DELETE_DATASET) DatasetService.delete_dataset_term_links(session, uri) DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri) DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri) diff --git a/backend/dataall/modules/datasets/services/dataset_table_service.py b/backend/dataall/modules/datasets/services/dataset_table_service.py index 2772cad99..1f182b184 100644 --- a/backend/dataall/modules/datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/datasets/services/dataset_table_service.py @@ -5,8 +5,6 @@ from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.environment.services.environment_service import EnvironmentService -from dataall.base.db.exceptions import ResourceShared -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.datasets.aws.athena_table_client import AthenaTableClient from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.datasets.db.dataset_table_repositories import DatasetTableRepository @@ -67,16 +65,8 @@ def update_table(uri: str, table_data: dict = None): def delete_table(uri: str): with get_context().db_engine.scoped_session() as session: table = DatasetTableRepository.get_dataset_table_by_uri(session, uri) - has_share = ShareObjectRepository.has_shared_items(session, table.tableUri) - if has_share: - raise ResourceShared( - action=DELETE_DATASET_TABLE, - message='Revoke all table shares before deletion', - ) - - ShareObjectRepository.delete_shares(session, table.tableUri) - DatasetTableRepository.delete(session, table) - + DatasetRepository.check_before_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) + DatasetRepository.execute_on_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) GlossaryRepository.delete_glossary_terms_links( session, target_uri=table.tableUri, target_type='DatasetTable' ) From d55b865cc02e3ffb918ad0d2b240bb13bda9a5b8 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 09:23:39 +0200 Subject: [PATCH 06/18] Move verify_dataset_shares api to dataset_sharing --- .../modules/dataset_sharing/api/mutations.py | 8 +++++++ .../modules/dataset_sharing/api/resolvers.py | 15 +++++++++++++ .../services/share_object_service.py | 21 +++++++++++++++++++ .../modules/datasets/api/dataset/mutations.py | 8 ------- .../modules/datasets/api/dataset/resolvers.py | 16 -------------- .../datasets/services/dataset_service.py | 18 +--------------- 6 files changed, 45 insertions(+), 41 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/api/mutations.py b/backend/dataall/modules/dataset_sharing/api/mutations.py index 9a41dddd2..c878234a0 100644 --- a/backend/dataall/modules/dataset_sharing/api/mutations.py +++ b/backend/dataall/modules/dataset_sharing/api/mutations.py @@ -12,6 +12,7 @@ update_share_reject_purpose, update_share_request_purpose, verify_items_share_object, + verify_dataset_share_objects, ) createShareObject = gql.MutationField( @@ -117,3 +118,10 @@ type=gql.Boolean, resolver=update_share_request_purpose, ) + +verifyDatasetShareObjects = gql.MutationField( + name='verifyDatasetShareObjects', + args=[gql.Argument(name='input', type=gql.Ref('ShareObjectSelectorInput'))], + type=gql.Boolean, + resolver=verify_dataset_share_objects, +) diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index b073d3dff..9229580dd 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -37,6 +37,15 @@ def validate_item_selector_input(data): if not data.get('itemUris'): raise RequiredParameter('itemUris') + @staticmethod + def validate_dataset_share_selector_input(data): + if not data: + raise RequiredParameter(data) + if not data.get('datasetUri'): + raise RequiredParameter('datasetUri') + if not data.get('shareUris'): + raise RequiredParameter('shareUris') + def create_share_object( context: Context, @@ -303,3 +312,9 @@ def update_share_reject_purpose(context: Context, source, shareUri: str = None, uri=shareUri, reject_purpose=rejectPurpose, ) + +def verify_dataset_share_objects(context: Context, source, input): + RequestValidator.validate_dataset_share_selector_input(input) + dataset_uri = input.get('datasetUri') + verify_share_uris = input.get('shareUris') + return ShareObjectService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index c2548f6bd..d5c672b69 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -2,6 +2,7 @@ from warnings import warn from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.tasks.service_handlers import Worker from dataall.base.context import get_context from dataall.core.activity.db.activity_models import Activity @@ -40,6 +41,10 @@ DELETE_SHARE_OBJECT, GET_SHARE_OBJECT, ) +from dataall.modules.datasets.services.dataset_permissions import ( + MANAGE_DATASETS, + UPDATE_DATASET, +) from dataall.modules.dataset_sharing.aws.glue_client import GlueClient from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation @@ -564,3 +569,19 @@ def _attach_dataset_folder_read_permission(session, share): resource_uri=location.itemUri, resource_type=DatasetStorageLocation.__name__, ) + + @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) + @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) + def verify_dataset_share_objects(uri: str, share_uris: list): + #TODO: when we abstract dataset_sharing_base from s3_dataset_sharing this function won't pollute the ShareObject service + with get_context().db_engine.scoped_session() as session: + for share_uri in share_uris: + share = ShareObjectRepository.get_share_by_uri(session, share_uri) + states = ShareItemSM.get_share_item_revokable_states() + items = ShareObjectRepository.list_shareable_items( + session, share, states, {'pageSize': 1000, 'isShared': True} + ) + item_uris = [item.shareItemUri for item in items.get('nodes', [])] + ShareItemService.verify_items_share_object(uri=share_uri, item_uris=item_uris) + return True diff --git a/backend/dataall/modules/datasets/api/dataset/mutations.py b/backend/dataall/modules/datasets/api/dataset/mutations.py index 063125e4e..ed8f85b0a 100644 --- a/backend/dataall/modules/datasets/api/dataset/mutations.py +++ b/backend/dataall/modules/datasets/api/dataset/mutations.py @@ -11,7 +11,6 @@ delete_dataset, import_dataset, start_crawler, - verify_dataset_share_objects, ) createDataset = gql.MutationField( @@ -69,10 +68,3 @@ resolver=start_crawler, type=gql.Ref('GlueCrawler'), ) - -verifyDatasetShareObjects = gql.MutationField( - name='verifyDatasetShareObjects', - args=[gql.Argument(name='input', type=gql.Ref('ShareObjectSelectorInput'))], - type=gql.Boolean, - resolver=verify_dataset_share_objects, -) diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 8f303b6e6..98a1f9fc2 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -178,13 +178,6 @@ def list_datasets_owned_by_env_group( return DatasetService.list_datasets_owned_by_env_group(environmentUri, groupUri, filter) -def verify_dataset_share_objects(context: Context, source, input): - RequestValidator.validate_dataset_share_selector_input(input) - dataset_uri = input.get('datasetUri') - verify_share_uris = input.get('shareUris') - return DatasetService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) - - class RequestValidator: @staticmethod def validate_creation_request(data): @@ -205,12 +198,3 @@ def validate_import_request(data): RequestValidator.validate_creation_request(data) if not data.get('bucketName'): raise RequiredParameter('bucketName') - - @staticmethod - def validate_dataset_share_selector_input(data): - if not data: - raise RequiredParameter(data) - if not data.get('datasetUri'): - raise RequiredParameter('datasetUri') - if not data.get('shareUris'): - raise RequiredParameter('shareUris') diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 92f51caea..e83926e10 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -22,8 +22,7 @@ from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.modules.dataset_sharing.db.share_object_models import ShareObject -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM -from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService +from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.datasets.aws.s3_dataset_client import S3DatasetClient @@ -551,18 +550,3 @@ def delete_dataset_term_links(session, dataset_uri): for table_uri in tables: GlossaryRepository.delete_glossary_terms_links(session, table_uri, 'DatasetTable') GlossaryRepository.delete_glossary_terms_links(session, dataset_uri, 'Dataset') - - @staticmethod - @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) - @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) - def verify_dataset_share_objects(uri: str, share_uris: list): - with get_context().db_engine.scoped_session() as session: - for share_uri in share_uris: - share = ShareObjectRepository.get_share_by_uri(session, share_uri) - states = ShareItemSM.get_share_item_revokable_states() - items = ShareObjectRepository.list_shareable_items( - session, share, states, {'pageSize': 1000, 'isShared': True} - ) - item_uris = [item.shareItemUri for item in items.get('nodes', [])] - ShareItemService.verify_items_share_object(uri=share_uri, item_uris=item_uris) - return True From 06a38201b6c841a5a0bf2a028eadeb71ade97bb8 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 09:33:08 +0200 Subject: [PATCH 07/18] Separate KMS client for datasets --- .../modules/dataset_sharing/aws/kms_client.py | 19 ------- .../datasets/aws/kms_dataset_client.py | 54 +++++++++++++++++++ .../datasets/services/dataset_service.py | 2 +- 3 files changed, 55 insertions(+), 20 deletions(-) create mode 100644 backend/dataall/modules/datasets/aws/kms_dataset_client.py diff --git a/backend/dataall/modules/dataset_sharing/aws/kms_client.py b/backend/dataall/modules/dataset_sharing/aws/kms_client.py index 61e44927f..890f21d39 100644 --- a/backend/dataall/modules/dataset_sharing/aws/kms_client.py +++ b/backend/dataall/modules/dataset_sharing/aws/kms_client.py @@ -80,22 +80,3 @@ def get_key_id(self, key_alias: str): return None else: return response['KeyMetadata']['KeyId'] - - def check_key_exists(self, key_alias: str): - try: - key_exist = False - paginator = self._client.get_paginator('list_aliases') - for page in paginator.paginate(): - key_aliases = [alias['AliasName'] for alias in page['Aliases']] - if key_alias in key_aliases: - key_exist = True - break - except ClientError as e: - if e.response['Error']['Code'] == 'AccessDenied': - raise Exception( - f'Data.all Environment Pivot Role does not have kms:ListAliases Permission in account {self._account_id}: {e}' - ) - log.error(f'Failed to list KMS key aliases in account {self._account_id}: {e}') - return None - else: - return key_exist diff --git a/backend/dataall/modules/datasets/aws/kms_dataset_client.py b/backend/dataall/modules/datasets/aws/kms_dataset_client.py new file mode 100644 index 000000000..0ff5e0cda --- /dev/null +++ b/backend/dataall/modules/datasets/aws/kms_dataset_client.py @@ -0,0 +1,54 @@ +import logging + +from dataall.base.aws.sts import SessionHelper +from botocore.exceptions import ClientError +from dataall.modules.dataset_sharing.aws.share_policy_verifier import SharePolicyVerifier + + +log = logging.getLogger(__name__) + + +class KmsClient: + _DEFAULT_POLICY_NAME = 'default' + + def __init__(self, account_id: str, region: str): + session = SessionHelper.remote_session(accountid=account_id, region=region) + self._client = session.client('kms', region_name=region) + self._account_id = account_id + self.region = region + + + def get_key_id(self, key_alias: str): + # The same client function is defined in the data_sharing module. Duplication is allowed to avoid coupling. + try: + response = self._client.describe_key( + KeyId=key_alias, + ) + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDenied': + raise Exception( + f'Data.all Environment Pivot Role does not have kms:DescribeKey Permission for key {key_alias}: {e}' + ) + log.error(f'Failed to get kms key id of {key_alias}: {e}') + return None + else: + return response['KeyMetadata']['KeyId'] + + def check_key_exists(self, key_alias: str): + try: + key_exist = False + paginator = self._client.get_paginator('list_aliases') + for page in paginator.paginate(): + key_aliases = [alias['AliasName'] for alias in page['Aliases']] + if key_alias in key_aliases: + key_exist = True + break + except ClientError as e: + if e.response['Error']['Code'] == 'AccessDenied': + raise Exception( + f'Data.all Environment Pivot Role does not have kms:ListAliases Permission in account {self._account_id}: {e}' + ) + log.error(f'Failed to list KMS key aliases in account {self._account_id}: {e}') + return None + else: + return key_exist diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index e83926e10..606818440 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -9,7 +9,7 @@ from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.core.tasks.service_handlers import Worker from dataall.base.aws.sts import SessionHelper -from dataall.modules.dataset_sharing.aws.kms_client import KmsClient +from dataall.modules.datasets.aws.kms_dataset_client import KmsClient from dataall.base.context import get_context from dataall.core.permissions.services.group_policy_service import GroupPolicyService from dataall.core.environment.services.environment_service import EnvironmentService From 9f88e79c3bcb666f6ae77ee3c7b5f8d301795f79 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 10:37:55 +0200 Subject: [PATCH 08/18] Move dataset_subscription_task to dataset sharing --- .../tasks/dataset_subscription_task.py | 1 + .../tasks/subscriptions/__init__.py | 0 .../tasks/subscriptions/sqs_poller.py | 0 deploy/stacks/container.py | 2 +- tests/modules/datasets/tasks/test_dataset_subscriptions.py | 4 ++-- 5 files changed, 4 insertions(+), 3 deletions(-) rename backend/dataall/modules/{datasets => dataset_sharing}/tasks/dataset_subscription_task.py (99%) rename backend/dataall/modules/{datasets => dataset_sharing}/tasks/subscriptions/__init__.py (100%) rename backend/dataall/modules/{datasets => dataset_sharing}/tasks/subscriptions/sqs_poller.py (100%) diff --git a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py b/backend/dataall/modules/dataset_sharing/tasks/dataset_subscription_task.py similarity index 99% rename from backend/dataall/modules/datasets/tasks/dataset_subscription_task.py rename to backend/dataall/modules/dataset_sharing/tasks/dataset_subscription_task.py index d4fe628b5..de79f637c 100644 --- a/backend/dataall/modules/datasets/tasks/dataset_subscription_task.py +++ b/backend/dataall/modules/dataset_sharing/tasks/dataset_subscription_task.py @@ -26,6 +26,7 @@ log = logging.getLogger(__name__) # TODO: review this task usage and remove if not needed +# It provides information about the shares a dataset items class DatasetSubscriptionService: diff --git a/backend/dataall/modules/datasets/tasks/subscriptions/__init__.py b/backend/dataall/modules/dataset_sharing/tasks/subscriptions/__init__.py similarity index 100% rename from backend/dataall/modules/datasets/tasks/subscriptions/__init__.py rename to backend/dataall/modules/dataset_sharing/tasks/subscriptions/__init__.py diff --git a/backend/dataall/modules/datasets/tasks/subscriptions/sqs_poller.py b/backend/dataall/modules/dataset_sharing/tasks/subscriptions/sqs_poller.py similarity index 100% rename from backend/dataall/modules/datasets/tasks/subscriptions/sqs_poller.py rename to backend/dataall/modules/dataset_sharing/tasks/subscriptions/sqs_poller.py diff --git a/deploy/stacks/container.py b/deploy/stacks/container.py index 288ae714d..a8dd7e27c 100644 --- a/deploy/stacks/container.py +++ b/deploy/stacks/container.py @@ -292,7 +292,7 @@ def add_subscription_task(self): command=[ 'python3.9', '-m', - 'dataall.modules.datasets.tasks.dataset_subscription_task', + 'dataall.modules.dataset_sharing.tasks.dataset_subscription_task', ], container_id='container', ecr_repository=self._ecr_repository, diff --git a/tests/modules/datasets/tasks/test_dataset_subscriptions.py b/tests/modules/datasets/tasks/test_dataset_subscriptions.py index 16ce16dcd..2e5cdecee 100644 --- a/tests/modules/datasets/tasks/test_dataset_subscriptions.py +++ b/tests/modules/datasets/tasks/test_dataset_subscriptions.py @@ -12,7 +12,7 @@ ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset -from dataall.modules.datasets.tasks.dataset_subscription_task import DatasetSubscriptionService +from dataall.modules.dataset_sharing.tasks.dataset_subscription_task import DatasetSubscriptionService from dataall.core.environment.api.enums import EnvironmentPermission @@ -90,7 +90,7 @@ def share( def test_subscriptions(org, env, otherenv, db, dataset, share, mocker): sns_client = MagicMock() - mocker.patch('dataall.modules.datasets.tasks.dataset_subscription_task.SnsDatasetClient', sns_client) + mocker.patch('dataall.modules.dataset_sharing.tasks.dataset_subscription_task.SnsDatasetClient', sns_client) sns_client.publish_dataset_message.return_value = True subscriber = DatasetSubscriptionService(db) messages = [ From b3f27f39d7a5fb498206d99398821df4b440052d Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 10:58:17 +0200 Subject: [PATCH 09/18] Create DatasetSharingService and import on KMS --- .../modules/dataset_sharing/api/resolvers.py | 3 +- .../services/dataset_sharing_service.py | 35 +++++++++++++++++++ .../services/share_object_service.py | 22 +----------- .../datasets/aws/kms_dataset_client.py | 1 - 4 files changed, 38 insertions(+), 23 deletions(-) create mode 100644 backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index 9229580dd..4d38e142e 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -9,6 +9,7 @@ from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService +from dataall.modules.dataset_sharing.services.dataset_sharing_service import DatasetSharingService from dataall.modules.dataset_sharing.aws.glue_client import GlueClient from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset @@ -317,4 +318,4 @@ def verify_dataset_share_objects(context: Context, source, input): RequestValidator.validate_dataset_share_selector_input(input) dataset_uri = input.get('datasetUri') verify_share_uris = input.get('shareUris') - return ShareObjectService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) + return DatasetSharingService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py new file mode 100644 index 000000000..ff8360fd9 --- /dev/null +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -0,0 +1,35 @@ +from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService +from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService +from dataall.base.context import get_context +from dataall.modules.dataset_sharing.db.share_object_repositories import ( + ShareObjectRepository, + ShareItemSM, +) +from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService +from dataall.modules.datasets.services.dataset_permissions import ( + MANAGE_DATASETS, + UPDATE_DATASET, +) + +import logging + +log = logging.getLogger(__name__) + + +class DatasetSharingService: + + @staticmethod + @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) + @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) + def verify_dataset_share_objects(uri: str, share_uris: list): + #TODO: when we abstract dataset_sharing_base from s3_dataset_sharing this function won't pollute the ShareObject service + with get_context().db_engine.scoped_session() as session: + for share_uri in share_uris: + share = ShareObjectRepository.get_share_by_uri(session, share_uri) + states = ShareItemSM.get_share_item_revokable_states() + items = ShareObjectRepository.list_shareable_items( + session, share, states, {'pageSize': 1000, 'isShared': True} + ) + item_uris = [item.shareItemUri for item in items.get('nodes', [])] + ShareItemService.verify_items_share_object(uri=share_uri, item_uris=item_uris) + return True diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index d5c672b69..70759f213 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -19,7 +19,6 @@ ShareItemStatus, ShareObjectStatus, PrincipalType, - ShareItemHealthStatus, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ( @@ -41,10 +40,7 @@ DELETE_SHARE_OBJECT, GET_SHARE_OBJECT, ) -from dataall.modules.datasets.services.dataset_permissions import ( - MANAGE_DATASETS, - UPDATE_DATASET, -) + from dataall.modules.dataset_sharing.aws.glue_client import GlueClient from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation @@ -569,19 +565,3 @@ def _attach_dataset_folder_read_permission(session, share): resource_uri=location.itemUri, resource_type=DatasetStorageLocation.__name__, ) - - @staticmethod - @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) - @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) - def verify_dataset_share_objects(uri: str, share_uris: list): - #TODO: when we abstract dataset_sharing_base from s3_dataset_sharing this function won't pollute the ShareObject service - with get_context().db_engine.scoped_session() as session: - for share_uri in share_uris: - share = ShareObjectRepository.get_share_by_uri(session, share_uri) - states = ShareItemSM.get_share_item_revokable_states() - items = ShareObjectRepository.list_shareable_items( - session, share, states, {'pageSize': 1000, 'isShared': True} - ) - item_uris = [item.shareItemUri for item in items.get('nodes', [])] - ShareItemService.verify_items_share_object(uri=share_uri, item_uris=item_uris) - return True diff --git a/backend/dataall/modules/datasets/aws/kms_dataset_client.py b/backend/dataall/modules/datasets/aws/kms_dataset_client.py index 0ff5e0cda..36e545d62 100644 --- a/backend/dataall/modules/datasets/aws/kms_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/kms_dataset_client.py @@ -2,7 +2,6 @@ from dataall.base.aws.sts import SessionHelper from botocore.exceptions import ClientError -from dataall.modules.dataset_sharing.aws.share_policy_verifier import SharePolicyVerifier log = logging.getLogger(__name__) From b3cd4989964380cdd0efa9d2d2b7b6a776e491c6 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 11:10:07 +0200 Subject: [PATCH 10/18] Add interface to the new service and remove form db repository --- .../modules/dataset_sharing/__init__.py | 6 +-- .../db/share_object_repositories.py | 43 +----------------- .../services/dataset_sharing_service.py | 44 ++++++++++++++++++- .../datasets/db/dataset_repositories.py | 30 ------------- .../datasets/services/dataset_service.py | 36 +++++++++++++-- 5 files changed, 81 insertions(+), 78 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/__init__.py b/backend/dataall/modules/dataset_sharing/__init__.py index 4aa401c40..aa78fa117 100644 --- a/backend/dataall/modules/dataset_sharing/__init__.py +++ b/backend/dataall/modules/dataset_sharing/__init__.py @@ -24,11 +24,11 @@ def depends_on() -> List[Type['ModuleInterface']]: def __init__(self): from dataall.modules.dataset_sharing import api from dataall.modules.dataset_sharing.services.managed_share_policy_service import SharePolicyService - from dataall.modules.datasets.db.dataset_repositories import DatasetRepository - from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectDatasetExtensionRepository + from dataall.modules.datasets.services.dataset_service import DatasetService + from dataall.modules.dataset_sharing.services.dataset_sharing_service import DatasetSharingService EnvironmentResourceManager.register(ShareEnvironmentResource()) - DatasetRepository.register(ShareObjectDatasetExtensionRepository()) + DatasetService.register(DatasetSharingService()) log.info('API of dataset sharing has been imported') diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 2fc972c77..25a10fa7e 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -20,9 +20,8 @@ PrincipalType, ) from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.datasets.db.dataset_repositories import DatasetRepository, DatasetRepositoryInterface +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket -from dataall.modules.datasets.services.dataset_permissions import DELETE_DATASET, DELETE_DATASET_TABLE, DELETE_DATASET_FOLDER logger = logging.getLogger(__name__) @@ -326,46 +325,8 @@ def count_role_resources(session, role_uri): def delete_env(session, environment): ShareObjectRepository.delete_all_share_items(session, environment.environmentUri) -class ShareObjectDatasetExtensionRepository(DatasetRepositoryInterface): - @staticmethod - def check_before_delete(session, uri, **kwargs): - """Implemented as part of the DatasetRepositoryInterface""" - action = kwargs.get('action') - if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: - has_share = ShareObjectRepository.has_shared_items(session, uri) - if has_share: - raise exceptions.ResourceShared( - action=action, - message='Revoke all shares for this item before deletion', - ) - elif action in [DELETE_DATASET]: - shares = ShareObjectRepository.list_dataset_shares_with_existing_shared_items( - session=session, dataset_uri=uri - ) - if shares: - raise exceptions.ResourceShared( - action=DELETE_DATASET, - message='Revoke all dataset shares before deletion.', - ) - else: - raise exceptions.RequiredParameter('Delete action') - return True - - @staticmethod - def execute_on_delete(self, session, uri, **kwargs): - """Implemented as part of the DatasetRepositoryInterface""" - action = kwargs.get('action') - if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: - ShareObjectRepository.delete_shares(session, uri) - elif action in [DELETE_DATASET]: - ShareObjectRepository.delete_shares_with_no_shared_items(session, uri) - else: - raise exceptions.RequiredParameter('Delete action') - return True - - -class ShareObjectRepository(DatasetRepositoryInterface): +class ShareObjectRepository: @staticmethod def save_and_commit(session, share): diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index ff8360fd9..87d0572d7 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -1,6 +1,7 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService from dataall.base.context import get_context +from dataall.base.db import exceptions from dataall.modules.dataset_sharing.db.share_object_repositories import ( ShareObjectRepository, ShareItemSM, @@ -9,14 +10,55 @@ from dataall.modules.datasets.services.dataset_permissions import ( MANAGE_DATASETS, UPDATE_DATASET, + DELETE_DATASET, + DELETE_DATASET_TABLE, + DELETE_DATASET_FOLDER, ) +from dataall.modules.datasets.services.dataset_service import DatasetServiceInterface + import logging log = logging.getLogger(__name__) -class DatasetSharingService: +class DatasetSharingService(DatasetServiceInterface): + + @staticmethod + def check_before_delete(session, uri, **kwargs): + """Implemented as part of the DatasetServiceInterface""" + action = kwargs.get('action') + if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: + has_share = ShareObjectRepository.has_shared_items(session, uri) + if has_share: + raise exceptions.ResourceShared( + action=action, + message='Revoke all shares for this item before deletion', + ) + elif action in [DELETE_DATASET]: + shares = ShareObjectRepository.list_dataset_shares_with_existing_shared_items( + session=session, dataset_uri=uri + ) + if shares: + raise exceptions.ResourceShared( + action=DELETE_DATASET, + message='Revoke all dataset shares before deletion.', + ) + else: + raise exceptions.RequiredParameter('Delete action') + return True + + @staticmethod + def execute_on_delete(self, session, uri, **kwargs): + """Implemented as part of the DatasetServiceInterface""" + action = kwargs.get('action') + if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: + ShareObjectRepository.delete_shares(session, uri) + elif action in [DELETE_DATASET]: + ShareObjectRepository.delete_shares_with_no_shared_items(session, uri) + else: + raise exceptions.RequiredParameter('Delete action') + return True @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) diff --git a/backend/dataall/modules/datasets/db/dataset_repositories.py b/backend/dataall/modules/datasets/db/dataset_repositories.py index 2482f6578..8bd260731 100644 --- a/backend/dataall/modules/datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_repositories.py @@ -1,6 +1,4 @@ import logging -from typing import List -from abc import ABC, abstractmethod from sqlalchemy import and_, or_ from sqlalchemy.orm import Query from dataall.core.activity.db.activity_models import Activity @@ -18,38 +16,10 @@ logger = logging.getLogger(__name__) -class DatasetRepositoryInterface(ABC): - @staticmethod - def check_before_delete(self, session, uri, **kwargs): - """Abstract method to be implemented by dependent modules that want to add checks before deletion for dataset objects""" - return True - - @staticmethod - def execute_on_delete(self, session, uri, **kwargs): - """Abstract method to be implemented by dependent modules that want to add clean-up actions when a dataset object is deleted""" - return True - class DatasetRepository(EnvironmentResource): """DAO layer for Datasets""" - _interfaces: List[DatasetRepositoryInterface] = [] - - @classmethod - def register(cls, interface: DatasetRepositoryInterface): - cls._interfaces.append(interface) - - @classmethod - def check_before_delete(cls, session, uri, action) -> bool: - can_be_deleted = [interface.check_before_delete(session, uri, action) for interface in cls._interfaces] - return False not in set(can_be_deleted) - - @classmethod - def execute_on_delete(cls, session, uri) -> bool: - for interface in cls._interfaces: - interface.execute_on_delete(session, uri) - return True - @classmethod def build_dataset(cls, username: str, env: Environment, data: dict) -> Dataset: """Builds a datasets based on the request data, but doesn't save it in the database""" diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 606818440..03afd9e26 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -1,7 +1,8 @@ import os import json import logging - +from typing import List +from abc import ABC, abstractmethod from dataall.base.aws.quicksight import QuicksightClient from dataall.base.db import exceptions from dataall.base.utils.naming_convention import NamingConventionPattern @@ -48,8 +49,37 @@ log = logging.getLogger(__name__) +class DatasetServiceInterface(ABC): + @staticmethod + def check_before_delete(self, session, uri, **kwargs): + """Abstract method to be implemented by dependent modules that want to add checks before deletion for dataset objects""" + return True + + @staticmethod + def execute_on_delete(self, session, uri, **kwargs): + """Abstract method to be implemented by dependent modules that want to add clean-up actions when a dataset object is deleted""" + return True class DatasetService: + _interfaces: List[DatasetServiceInterface] = [] + + @classmethod + def register(cls, interface: DatasetServiceInterface): + cls._interfaces.append(interface) + + @classmethod + def check_before_delete(cls, session, uri, **kwargs) -> bool: + """All actions form other modules that need to be executed before deletion""" + can_be_deleted = [interface.check_before_delete(session, uri, **kwargs) for interface in cls._interfaces] + return False not in set(can_be_deleted) + + @classmethod + def execute_on_delete(cls, session, uri, **kwargs) -> bool: + """All actions form other modules that need to be executed during deletion""" + for interface in cls._interfaces: + interface.execute_on_delete(session, uri, **kwargs) + return True + @staticmethod def check_dataset_account(session, environment): dashboards_enabled = EnvironmentService.get_boolean_env_param(session, environment, 'dashboardsEnabled') @@ -376,7 +406,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): with context.db_engine.scoped_session() as session: dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, uri) env = EnvironmentService.get_environment_by_uri(session, dataset.environmentUri) - DatasetRepository.check_before_delete(session, uri, action=DELETE_DATASET) + DatasetService.check_before_delete(session, uri, action=DELETE_DATASET) tables = [t.tableUri for t in DatasetRepository.get_dataset_tables(session, uri)] for tableUri in tables: @@ -388,7 +418,7 @@ def delete_dataset(uri: str, delete_from_aws: bool = False): DatasetIndexer.delete_doc(doc_id=uri) - DatasetRepository.execute_on_delete(session, uri, action=DELETE_DATASET) + DatasetService.execute_on_delete(session, uri, action=DELETE_DATASET) DatasetService.delete_dataset_term_links(session, uri) DatasetTableRepository.delete_dataset_tables(session, dataset.datasetUri) DatasetLocationRepository.delete_dataset_locations(session, dataset.datasetUri) From a783dfd91560c736c0f752470ad483d5686c358b Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 11:44:40 +0200 Subject: [PATCH 11/18] Split list dataset logic into list owned and list shared. Inject list shared through interface --- .../db/share_object_repositories.py | 19 +----------- .../services/dataset_sharing_service.py | 5 +++ .../modules/datasets/api/dataset/queries.py | 4 +-- .../modules/datasets/api/dataset/resolvers.py | 4 +-- .../datasets/db/dataset_repositories.py | 31 +++++++++++++++++++ .../datasets/services/dataset_service.py | 28 +++++++++++++---- 6 files changed, 63 insertions(+), 28 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 25a10fa7e..b2b208119 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -972,7 +972,7 @@ def delete_shares_with_no_shared_items(session, dataset_uri): session.delete(share_obj) @staticmethod - def _query_user_datasets(session, username, groups, filter) -> Query: + def query_user_shared_datasets(session, username, groups) -> Query: share_item_shared_states = ShareItemSM.get_share_item_shared_states() query = ( session.query(Dataset) @@ -983,9 +983,6 @@ def _query_user_datasets(session, username, groups, filter) -> Query: .outerjoin(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri) .filter( or_( - Dataset.owner == username, - Dataset.SamlAdminGroupName.in_(groups), - Dataset.stewards.in_(groups), and_( ShareObject.principalId.in_(groups), ShareObjectItem.status.in_(share_item_shared_states), @@ -997,22 +994,8 @@ def _query_user_datasets(session, username, groups, filter) -> Query: ) ) ) - if filter and filter.get('term'): - query = query.filter( - or_( - Dataset.description.ilike(filter.get('term') + '%%'), - Dataset.label.ilike(filter.get('term') + '%%'), - ) - ) return query.distinct(Dataset.datasetUri) - @staticmethod - def paginated_user_datasets(session, username, groups, data=None) -> dict: - return paginate( - query=ShareObjectRepository._query_user_datasets(session, username, groups, data), - page=data.get('page', 1), - page_size=data.get('pageSize', 10), - ).to_dict() @staticmethod def find_dataset_shares(session, dataset_uri): diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index 87d0572d7..cf307a93c 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -24,6 +24,11 @@ class DatasetSharingService(DatasetServiceInterface): + @staticmethod + def append_to_list_user_datasets(session, username, groups): + """Implemented as part of the DatasetServiceInterface""" + return ShareObjectRepository.query_user_shared_datasets(session, username, groups) + @staticmethod def check_before_delete(session, uri, **kwargs): """Implemented as part of the DatasetServiceInterface""" diff --git a/backend/dataall/modules/datasets/api/dataset/queries.py b/backend/dataall/modules/datasets/api/dataset/queries.py index f452c8da8..a26bb0db0 100644 --- a/backend/dataall/modules/datasets/api/dataset/queries.py +++ b/backend/dataall/modules/datasets/api/dataset/queries.py @@ -2,7 +2,7 @@ from dataall.modules.datasets.api.dataset.input_types import DatasetFilter from dataall.modules.datasets.api.dataset.resolvers import ( get_dataset, - list_owned_shared_datasets, + list_all_user_datasets, list_owned_datasets, get_dataset_assume_role_url, get_file_upload_presigned_url, @@ -25,7 +25,7 @@ name='listDatasets', args=[gql.Argument('filter', DatasetFilter)], type=DatasetSearchResult, - resolver=list_owned_shared_datasets, + resolver=list_all_user_datasets, test_scope='Dataset', ) diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 98a1f9fc2..2cab20762 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -57,10 +57,10 @@ def get_file_upload_presigned_url(context, source, datasetUri: str = None, input return DatasetService.get_file_upload_presigned_url(uri=datasetUri, data=input) -def list_owned_shared_datasets(context: Context, source, filter: dict = None): +def list_all_user_datasets(context: Context, source, filter: dict = None): if not filter: filter = {'page': 1, 'pageSize': 5} - return DatasetService.list_owned_shared_datasets(filter) + return DatasetService.list_all_user_datasets(filter) def list_owned_datasets(context: Context, source, filter: dict = None): diff --git a/backend/dataall/modules/datasets/db/dataset_repositories.py b/backend/dataall/modules/datasets/db/dataset_repositories.py index 8bd260731..fd8eda4b0 100644 --- a/backend/dataall/modules/datasets/db/dataset_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_repositories.py @@ -343,6 +343,37 @@ def _query_user_datasets(session, username, groups, filter) -> Query: ) return query.distinct(Dataset.datasetUri) + @staticmethod + def paginated_all_user_datasets(session, username, groups, all_subqueries, data=None) -> dict: + return paginate( + query=DatasetRepository._query_all_user_datasets(session, username, groups, all_subqueries, data), + page=data.get('page', 1), + page_size=data.get('pageSize', 10), + ).to_dict() + + @staticmethod + def _query_all_user_datasets(session, username, groups, all_subqueries, filter) -> Query: + query = session.query(Dataset).filter( + or_( + Dataset.owner == username, + Dataset.SamlAdminGroupName.in_(groups), + Dataset.stewards.in_(groups), + ) + ) + if query.first() is not None: + all_subqueries.append(query) + + union_query = all_subqueries[0].union(*all_subqueries[1:]) + + if filter and filter.get('term'): + union_query = union_query.filter( + or_( + Dataset.description.ilike(filter.get('term') + '%%'), + Dataset.label.ilike(filter.get('term') + '%%'), + ) + ) + return union_query.distinct(Dataset.datasetUri) + @staticmethod def _set_import_data(dataset, data): dataset.imported = True if data.get('imported') else False diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 03afd9e26..861995c01 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -51,15 +51,20 @@ class DatasetServiceInterface(ABC): @staticmethod - def check_before_delete(self, session, uri, **kwargs): + def check_before_delete(session, uri, **kwargs): """Abstract method to be implemented by dependent modules that want to add checks before deletion for dataset objects""" return True @staticmethod - def execute_on_delete(self, session, uri, **kwargs): + def execute_on_delete(session, uri, **kwargs): """Abstract method to be implemented by dependent modules that want to add clean-up actions when a dataset object is deleted""" return True + @staticmethod + def append_to_list_user_datasets(session, username, groups): + """Abstract method to be implemented by dependent modules that want to add datasets to the list_datasets that list all datasets that the user has access to""" + return [] + class DatasetService: _interfaces: List[DatasetServiceInterface] = [] @@ -69,17 +74,27 @@ def register(cls, interface: DatasetServiceInterface): @classmethod def check_before_delete(cls, session, uri, **kwargs) -> bool: - """All actions form other modules that need to be executed before deletion""" + """All actions from other modules that need to be executed before deletion""" can_be_deleted = [interface.check_before_delete(session, uri, **kwargs) for interface in cls._interfaces] return False not in set(can_be_deleted) @classmethod def execute_on_delete(cls, session, uri, **kwargs) -> bool: - """All actions form other modules that need to be executed during deletion""" + """All actions from other modules that need to be executed during deletion""" for interface in cls._interfaces: interface.execute_on_delete(session, uri, **kwargs) return True + @classmethod + def _list_all_user_interface_datasets(cls, session, username, groups) -> List: + """All list_datasets from other modules that need to be appended to the list of datasets""" + all_subqueries = [] + for interface in cls._interfaces: + interface_subquery = interface.append_to_list_user_datasets(session, username, groups) + if interface_subquery.first() is not None: + all_subqueries.append(interface_subquery) + return all_subqueries + @staticmethod def check_dataset_account(session, environment): dashboards_enabled = EnvironmentService.get_boolean_env_param(session, environment, 'dashboardsEnabled') @@ -216,10 +231,11 @@ def get_file_upload_presigned_url(uri: str, data: dict): return S3DatasetClient(dataset).get_file_upload_presigned_url(data) @staticmethod - def list_owned_shared_datasets(data: dict): + def list_all_user_datasets(data: dict): context = get_context() with context.db_engine.scoped_session() as session: - return ShareObjectRepository.paginated_user_datasets(session, context.username, context.groups, data=data) + all_subqueries = DatasetService._list_all_user_interface_datasets(session, context.username, context.groups) + return DatasetRepository.paginated_all_user_datasets(session, context.username, context.groups, all_subqueries, data=data) @staticmethod def list_owned_datasets(data: dict): From 677b3e689d023f462286592471fd21533ae1d06c Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 13:17:23 +0200 Subject: [PATCH 12/18] Implement interface to resolve user roles based on other modules --- .../db/share_object_repositories.py | 9 ++++++-- .../services/dataset_sharing_service.py | 12 ++++++++++- .../modules/datasets/api/dataset/resolvers.py | 7 +++---- .../datasets/services/dataset_service.py | 21 ++++++++++++++++--- 4 files changed, 39 insertions(+), 10 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index b2b208119..400b607b4 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -369,11 +369,16 @@ def get_share_by_uri(session, uri): return share @staticmethod - def get_share_by_dataset_attributes(session, dataset_uri, dataset_owner): + def get_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups=[]): share: ShareObject = ( session.query(ShareObject) .filter(ShareObject.datasetUri == dataset_uri) - .filter(ShareObject.owner == dataset_owner) + .filter( + or_( + ShareObject.owner == dataset_owner, + ShareObject.principalId.in_(groups) + ) + ) .first() ) return share diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index cf307a93c..d67003c30 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -16,6 +16,7 @@ ) from dataall.modules.datasets.services.dataset_service import DatasetServiceInterface +from dataall.modules.datasets.services.datasets_base_enums import DatasetRole import logging @@ -54,7 +55,7 @@ def check_before_delete(session, uri, **kwargs): return True @staticmethod - def execute_on_delete(self, session, uri, **kwargs): + def execute_on_delete(session, uri, **kwargs): """Implemented as part of the DatasetServiceInterface""" action = kwargs.get('action') if action in [DELETE_DATASET_FOLDER, DELETE_DATASET_TABLE]: @@ -65,6 +66,15 @@ def execute_on_delete(self, session, uri, **kwargs): raise exceptions.RequiredParameter('Delete action') return True + @staticmethod + def resolve_additional_dataset_user_role(session, uri, username, groups): + """Implemented as part of the DatasetServiceInterface""" + share = ShareObjectRepository.get_share_by_dataset_attributes(session, uri, username, groups) + if share is not None: + return DatasetRole.Shared.value + return None + + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 2cab20762..942203e49 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -7,7 +7,6 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db.exceptions import RequiredParameter, InvalidInput -from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.datasets.db.dataset_models import Dataset from dataall.modules.datasets.services.datasets_base_enums import DatasetRole, ConfidentialityClassification from dataall.modules.datasets.services.dataset_service import DatasetService @@ -46,9 +45,9 @@ def resolve_user_role(context: Context, source: Dataset, **kwargs): return DatasetRole.DataSteward.value else: with context.engine.scoped_session() as session: - share = session.query(ShareObject).filter(ShareObject.datasetUri == source.datasetUri).first() - if share and (share.owner == context.username or share.principalId in context.groups): - return DatasetRole.Shared.value + other_modules_user_role = DatasetService.get_other_modules_dataset_user_role(session, source.datasetUri, context.username, context.groups) + if other_modules_user_role is not None: + return other_modules_user_role return DatasetRole.NoPermission.value diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 861995c01..c9b345d56 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -51,20 +51,26 @@ class DatasetServiceInterface(ABC): @staticmethod - def check_before_delete(session, uri, **kwargs): + def check_before_delete(session, uri, **kwargs) -> bool: """Abstract method to be implemented by dependent modules that want to add checks before deletion for dataset objects""" return True @staticmethod - def execute_on_delete(session, uri, **kwargs): + def execute_on_delete(session, uri, **kwargs) -> bool: """Abstract method to be implemented by dependent modules that want to add clean-up actions when a dataset object is deleted""" return True @staticmethod - def append_to_list_user_datasets(session, username, groups): + def append_to_list_user_datasets(session, username, groups) -> List: """Abstract method to be implemented by dependent modules that want to add datasets to the list_datasets that list all datasets that the user has access to""" return [] + @staticmethod + def resolve_additional_dataset_user_role(session, uri, username, groups): + """Abstract method to be implemented by dependent modules that want to add new types of user role in relation to a Dataset """ + return None + + class DatasetService: _interfaces: List[DatasetServiceInterface] = [] @@ -85,6 +91,15 @@ def execute_on_delete(cls, session, uri, **kwargs) -> bool: interface.execute_on_delete(session, uri, **kwargs) return True + @classmethod + def get_other_modules_dataset_user_role(cls, session, uri, username, groups) -> str: + """All other user role types that might come from other modules""" + for interface in cls._interfaces: + role = interface.resolve_additional_dataset_user_role(session, uri, username, groups) + if role is not None: + return role + return None + @classmethod def _list_all_user_interface_datasets(cls, session, username, groups) -> List: """All list_datasets from other modules that need to be appended to the list of datasets""" From 83b0f7189dd4c98c2d53528b1b84c1d8694e180e Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 15:19:13 +0200 Subject: [PATCH 13/18] Split the getDatasetAssumeRole for shared role or dataset owner role --- .../modules/dataset_sharing/api/queries.py | 9 ++++ .../modules/dataset_sharing/api/resolvers.py | 5 +++ .../services/dataset_sharing_service.py | 41 +++++++++++++++++++ .../datasets/services/dataset_service.py | 26 ++++-------- .../src/modules/Folders/views/FolderView.js | 26 ++++++++---- .../Datasets/getDatasetSharedAssumeRoleUrl.js | 12 ++++++ .../src/services/graphql/Datasets/index.js | 1 + 7 files changed, 95 insertions(+), 25 deletions(-) create mode 100644 frontend/src/services/graphql/Datasets/getDatasetSharedAssumeRoleUrl.js diff --git a/backend/dataall/modules/dataset_sharing/api/queries.py b/backend/dataall/modules/dataset_sharing/api/queries.py index 9882f6fd8..4f25adae7 100644 --- a/backend/dataall/modules/dataset_sharing/api/queries.py +++ b/backend/dataall/modules/dataset_sharing/api/queries.py @@ -1,5 +1,6 @@ from dataall.base.api import gql from dataall.modules.dataset_sharing.api.resolvers import ( + get_dataset_shared_assume_role_url, get_share_object, list_shared_with_environment_data_items, list_shares_in_my_inbox, @@ -38,3 +39,11 @@ type=gql.Ref('EnvironmentPublishedItemSearchResults'), test_scope='Dataset', ) + +getDatasetSharedAssumeRoleUrl = gql.QueryField( + name='getDatasetSharedAssumeRoleUrl', + args=[gql.Argument(name='datasetUri', type=gql.String)], + type=gql.String, + resolver=get_dataset_shared_assume_role_url, + test_scope='Dataset', +) diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index 4d38e142e..eae2978e3 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -5,6 +5,7 @@ from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import OrganizationRepository from dataall.base.db.exceptions import RequiredParameter +from dataall.base.feature_toggle_checker import is_feature_enabled from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectPermission from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService @@ -319,3 +320,7 @@ def verify_dataset_share_objects(context: Context, source, input): dataset_uri = input.get('datasetUri') verify_share_uris = input.get('shareUris') return DatasetSharingService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) + +@is_feature_enabled('modules.datasets.features.aws_actions') +def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None): + return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) \ No newline at end of file diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index d67003c30..c2c65ac0a 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -1,18 +1,24 @@ from dataall.core.permissions.services.resource_policy_service import ResourcePolicyService from dataall.core.permissions.services.tenant_policy_service import TenantPolicyService +from dataall.core.environment.services.environment_service import EnvironmentService from dataall.base.context import get_context from dataall.base.db import exceptions +from dataall.base.aws.sts import SessionHelper from dataall.modules.dataset_sharing.db.share_object_repositories import ( ShareObjectRepository, ShareItemSM, ) +from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.services.share_item_service import ShareItemService +from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER +from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.services.dataset_permissions import ( MANAGE_DATASETS, UPDATE_DATASET, DELETE_DATASET, DELETE_DATASET_TABLE, DELETE_DATASET_FOLDER, + CREDENTIALS_DATASET ) from dataall.modules.datasets.services.dataset_service import DatasetServiceInterface @@ -90,3 +96,38 @@ def verify_dataset_share_objects(uri: str, share_uris: list): item_uris = [item.shareItemUri for item in items.get('nodes', [])] ShareItemService.verify_items_share_object(uri=share_uri, item_uris=item_uris) return True + + @staticmethod + @ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET) + def get_dataset_shared_assume_role_url(uri): + context = get_context() + with context.db_engine.scoped_session() as session: + dataset = DatasetRepository.get_dataset_by_uri(session, uri) + + if dataset.SamlAdminGroupName in context.groups: + role_arn = dataset.IAMDatasetAdminRoleArn + account_id = dataset.AwsAccountId + region = dataset.region + else: + share = ShareObjectRepository.get_share_by_dataset_attributes( + session=session, dataset_uri=uri, dataset_owner=context.username + ) + shared_environment = EnvironmentService.get_environment_by_uri( + session=session, uri=share.environmentUri + ) + env_group = EnvironmentService.get_environment_group( + session=session, group_uri=share.principalId, environment_uri=share.environmentUri + ) + role_arn = env_group.environmentIAMRoleArn + account_id = shared_environment.AwsAccountId + region = shared_environment.region + + + pivot_session = SessionHelper.remote_session(account_id, region) + aws_session = SessionHelper.get_session(base_session=pivot_session, role_arn=role_arn) + url = SessionHelper.get_console_access_url( + aws_session, + region=dataset.region, + bucket=dataset.S3BucketName, + ) + return url diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index c9b345d56..76791777d 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -22,9 +22,7 @@ from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository -from dataall.modules.dataset_sharing.db.share_object_models import ShareObject from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository -from dataall.modules.dataset_sharing.services.share_permissions import SHARE_OBJECT_APPROVER from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.datasets.aws.s3_dataset_client import S3DatasetClient from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository @@ -61,9 +59,9 @@ def execute_on_delete(session, uri, **kwargs) -> bool: return True @staticmethod - def append_to_list_user_datasets(session, username, groups) -> List: + def append_to_list_user_datasets(session, username, groups): """Abstract method to be implemented by dependent modules that want to add datasets to the list_datasets that list all datasets that the user has access to""" - return [] + return True @staticmethod def resolve_additional_dataset_user_role(session, uri, username, groups): @@ -343,24 +341,16 @@ def get_dataset_assume_role_url(uri): context = get_context() with context.db_engine.scoped_session() as session: dataset = DatasetRepository.get_dataset_by_uri(session, uri) - if dataset.SamlAdminGroupName not in context.groups: - share = ShareObjectRepository.get_share_by_dataset_attributes( - session=session, dataset_uri=uri, dataset_owner=context.username - ) - shared_environment = EnvironmentService.get_environment_by_uri( - session=session, uri=share.environmentUri - ) - env_group = EnvironmentService.get_environment_group( - session=session, group_uri=share.principalId, environment_uri=share.environmentUri - ) - role_arn = env_group.environmentIAMRoleArn - account_id = shared_environment.AwsAccountId - region = shared_environment.region - else: + if dataset.SamlAdminGroupName in context.groups: role_arn = dataset.IAMDatasetAdminRoleArn account_id = dataset.AwsAccountId region = dataset.region + else: + raise exceptions.UnauthorizedOperation( + action=CREDENTIALS_DATASET, + message=f'User: {context.username} is not a member of the group {dataset.SamlAdminGroupName}', + ) pivot_session = SessionHelper.remote_session(account_id, region) aws_session = SessionHelper.get_session(base_session=pivot_session, role_arn=role_arn) url = SessionHelper.get_console_access_url( diff --git a/frontend/src/modules/Folders/views/FolderView.js b/frontend/src/modules/Folders/views/FolderView.js index f717afb8d..775145196 100644 --- a/frontend/src/modules/Folders/views/FolderView.js +++ b/frontend/src/modules/Folders/views/FolderView.js @@ -32,7 +32,8 @@ import { SET_ERROR, useDispatch } from 'globalErrors'; import { useClient, deleteDatasetStorageLocation, - getDatasetAssumeRoleUrl + getDatasetAssumeRoleUrl, + getDatasetSharedAssumeRoleUrl } from 'services'; import { getDatasetStorageLocation } from '../services'; @@ -51,13 +52,24 @@ function FolderPageHeader(props) { const goToS3Console = async () => { setIsLoadingUI(true); - const response = await client.query( - getDatasetAssumeRoleUrl(folder.dataset.datasetUri) - ); - if (!response.errors) { - window.open(response.data.getDatasetAssumeRoleUrl, '_blank'); + if (isAdmin) { + const response = await client.query( + getDatasetAssumeRoleUrl(folder.dataset.datasetUri) + ); + if (!response.errors) { + window.open(response.data.getDatasetAssumeRoleUrl, '_blank'); + } else { + dispatch({ type: SET_ERROR, error: response.errors[0].message }); + } } else { - dispatch({ type: SET_ERROR, error: response.errors[0].message }); + const response = await client.query( + getDatasetSharedAssumeRoleUrl(folder.dataset.datasetUri) + ); + if (!response.errors) { + window.open(response.data.getDatasetSharedAssumeRoleUrl, '_blank'); + } else { + dispatch({ type: SET_ERROR, error: response.errors[0].message }); + } } setIsLoadingUI(false); }; diff --git a/frontend/src/services/graphql/Datasets/getDatasetSharedAssumeRoleUrl.js b/frontend/src/services/graphql/Datasets/getDatasetSharedAssumeRoleUrl.js new file mode 100644 index 000000000..411480045 --- /dev/null +++ b/frontend/src/services/graphql/Datasets/getDatasetSharedAssumeRoleUrl.js @@ -0,0 +1,12 @@ +import { gql } from 'apollo-boost'; + +export const getDatasetSharedAssumeRoleUrl = (datasetUri) => ({ + variables: { + datasetUri + }, + query: gql` + query GetDatasetSharedAssumeRoleUrl($datasetUri: String!) { + getDatasetSharedAssumeRoleUrl(datasetUri: $datasetUri) + } + ` +}); diff --git a/frontend/src/services/graphql/Datasets/index.js b/frontend/src/services/graphql/Datasets/index.js index c9f35114e..eb94b7292 100644 --- a/frontend/src/services/graphql/Datasets/index.js +++ b/frontend/src/services/graphql/Datasets/index.js @@ -1,6 +1,7 @@ export * from './addDatasetStorageLocation'; export * from './getDataset'; export * from './getDatasetAssumeRoleUrl'; +export * from './getDatasetSharedAssumeRoleUrl'; export * from './listDatasetTables'; export * from './listShareObjects'; export * from './removeDatasetStorageLocation'; From 9c6dba66e4bac0f74cbed9f4e25dff86415f2062 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 15:19:44 +0200 Subject: [PATCH 14/18] Add interface to extend stewards permissions --- .../services/dataset_sharing_service.py | 33 ++++++++++++ .../datasets/services/dataset_service.py | 50 +++++++++---------- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index c2c65ac0a..daaa42f14 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -80,6 +80,39 @@ def resolve_additional_dataset_user_role(session, uri, username, groups): return DatasetRole.Shared.value return None + @staticmethod + def extend_attach_steward_permissions(session, dataset, new_stewards, **kwargs): + """Implemented as part of the DatasetServiceInterface""" + dataset_shares = ShareObjectRepository.find_dataset_shares(session, dataset.datasetUri) + if dataset_shares: + for share in dataset_shares: + ResourcePolicyService.attach_resource_policy( + session=session, + group=new_stewards, + permissions=SHARE_OBJECT_APPROVER, + resource_uri=share.shareUri, + resource_type=ShareObject.__name__, + ) + if dataset.stewards != dataset.SamlAdminGroupName: + ResourcePolicyService.delete_resource_policy( + session=session, + group=dataset.stewards, + resource_uri=share.shareUri, + ) + + @staticmethod + def extend_delete_steward_permissions(session, dataset, **kwargs): + """Implemented as part of the DatasetServiceInterface""" + dataset_shares = ShareObjectRepository.find_dataset_shares(session, dataset.datasetUri) + if dataset_shares: + for share in dataset_shares: + if dataset.stewards != dataset.SamlAdminGroupName: + ResourcePolicyService.delete_resource_policy( + session=session, + group=dataset.stewards, + resource_uri=share.shareUri, + ) + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 76791777d..b2a231c66 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -68,6 +68,15 @@ def resolve_additional_dataset_user_role(session, uri, username, groups): """Abstract method to be implemented by dependent modules that want to add new types of user role in relation to a Dataset """ return None + @staticmethod + def extend_attach_steward_permissions(session, dataset, new_stewards) -> bool: + """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards """ + return True + + def extend_delete_steward_permissions(session, dataset, new_stewards) -> bool: + """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards """ + return True + class DatasetService: _interfaces: List[DatasetServiceInterface] = [] @@ -108,6 +117,18 @@ def _list_all_user_interface_datasets(cls, session, username, groups) -> List: all_subqueries.append(interface_subquery) return all_subqueries + @classmethod + def _attach_additional_steward_permissions(cls, session, dataset, new_stewards): + """All permissions from other modules that need to be granted to stewards""" + for interface in cls._interfaces: + interface.extend_attach_steward_permissions(session, dataset, new_stewards) + + @classmethod + def _delete_additional_steward__permissions(cls, session, dataset): + """All permissions from other modules that need to be deleted to stewards""" + for interface in cls._interfaces: + interface.extend_delete_steward_permissions(session, dataset) + @staticmethod def check_dataset_account(session, environment): dashboards_enabled = EnvironmentService.get_boolean_env_param(session, environment, 'dashboardsEnabled') @@ -533,15 +554,8 @@ def _transfer_stewardship_to_owners(session, dataset): resource_uri=tableUri, ) - # Remove Steward Resource Policy on Dataset Share Objects - dataset_shares = ShareObjectRepository.find_dataset_shares(session, dataset.datasetUri) - if dataset_shares: - for share in dataset_shares: - ResourcePolicyService.delete_resource_policy( - session=session, - group=dataset.stewards, - resource_uri=share.shareUri, - ) + DatasetService._delete_additional_steward__permissions(session, dataset) + return dataset @staticmethod @@ -577,22 +591,8 @@ def _transfer_stewardship_to_new_stewards(session, dataset, new_stewards): resource_type=DatasetTable.__name__, ) - dataset_shares = ShareObjectRepository.find_dataset_shares(session, dataset.datasetUri) - if dataset_shares: - for share in dataset_shares: - ResourcePolicyService.attach_resource_policy( - session=session, - group=new_stewards, - permissions=SHARE_OBJECT_APPROVER, - resource_uri=share.shareUri, - resource_type=ShareObject.__name__, - ) - if dataset.stewards != dataset.SamlAdminGroupName: - ResourcePolicyService.delete_resource_policy( - session=session, - group=dataset.stewards, - resource_uri=share.shareUri, - ) + DatasetService._attach_additional_steward_permissions(session, dataset, new_stewards) + return dataset @staticmethod From 007b5d2a705d7ae74104e6833e883e9848a1b9f2 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 15:25:53 +0200 Subject: [PATCH 15/18] Move listDatasetShares query to datasets_sharing --- .../dataall/modules/dataset_sharing/api/queries.py | 12 ++++++++++++ .../dataall/modules/dataset_sharing/api/resolvers.py | 10 +++++++++- .../services/dataset_sharing_service.py | 5 +++++ .../dataall/modules/datasets/api/dataset/queries.py | 10 ---------- 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/api/queries.py b/backend/dataall/modules/dataset_sharing/api/queries.py index 4f25adae7..83798210c 100644 --- a/backend/dataall/modules/dataset_sharing/api/queries.py +++ b/backend/dataall/modules/dataset_sharing/api/queries.py @@ -2,6 +2,7 @@ from dataall.modules.dataset_sharing.api.resolvers import ( get_dataset_shared_assume_role_url, get_share_object, + list_dataset_share_objects, list_shared_with_environment_data_items, list_shares_in_my_inbox, list_shares_in_my_outbox, @@ -47,3 +48,14 @@ resolver=get_dataset_shared_assume_role_url, test_scope='Dataset', ) + +listShareObjects = gql.QueryField( + name='listDatasetShareObjects', + resolver=list_dataset_share_objects, + args=[ + gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='environmentUri', type=gql.String), + gql.Argument(name='page', type=gql.Integer), + ], + type=gql.Ref('ShareSearchResult'), +) diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index eae2978e3..75823656c 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -323,4 +323,12 @@ def verify_dataset_share_objects(context: Context, source, input): @is_feature_enabled('modules.datasets.features.aws_actions') def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None): - return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) \ No newline at end of file + return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) + + +def list_dataset_share_objects(context, source, filter: dict = None): + if not source: + return None + if not filter: + filter = {'page': 1, 'pageSize': 5} + return DatasetSharingService.list_dataset_share_objects(source, filter) \ No newline at end of file diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index daaa42f14..15613ba38 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -23,6 +23,7 @@ from dataall.modules.datasets.services.dataset_service import DatasetServiceInterface from dataall.modules.datasets.services.datasets_base_enums import DatasetRole +from dataall.modules.datasets.db.dataset_models import Dataset import logging @@ -113,6 +114,10 @@ def extend_delete_steward_permissions(session, dataset, **kwargs): resource_uri=share.shareUri, ) + @staticmethod + def list_dataset_share_objects(dataset: Dataset, data: dict = None): + with get_context().db_engine.scoped_session() as session: + return ShareObjectRepository.paginated_dataset_shares(session=session, uri=dataset.datasetUri, data=data) @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) diff --git a/backend/dataall/modules/datasets/api/dataset/queries.py b/backend/dataall/modules/datasets/api/dataset/queries.py index a26bb0db0..c34852263 100644 --- a/backend/dataall/modules/datasets/api/dataset/queries.py +++ b/backend/dataall/modules/datasets/api/dataset/queries.py @@ -57,16 +57,6 @@ resolver=get_file_upload_presigned_url, ) -listShareObjects = gql.QueryField( - name='listDatasetShareObjects', - resolver=list_dataset_share_objects, - args=[ - gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='environmentUri', type=gql.String), - gql.Argument(name='page', type=gql.Integer), - ], - type=gql.Ref('ShareSearchResult'), -) listDatasetsOwnedByEnvGroup = gql.QueryField( name='listDatasetsOwnedByEnvGroup', From ae47b6af108d869f57159a9530f6cc78496e9679 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 17:44:39 +0200 Subject: [PATCH 16/18] Removing unused field from datasets (verified listDatasets, listOwnedDatasets, listDatasetsOwnedByEnvGroup, listDatasetsCreatedInEnvironment and getDataset) --- .../modules/datasets/api/dataset/queries.py | 1 - .../modules/datasets/api/dataset/resolvers.py | 8 -------- .../dataall/modules/datasets/api/dataset/types.py | 15 --------------- .../modules/datasets/services/dataset_service.py | 6 ------ 4 files changed, 30 deletions(-) diff --git a/backend/dataall/modules/datasets/api/dataset/queries.py b/backend/dataall/modules/datasets/api/dataset/queries.py index c34852263..d1c0d2855 100644 --- a/backend/dataall/modules/datasets/api/dataset/queries.py +++ b/backend/dataall/modules/datasets/api/dataset/queries.py @@ -6,7 +6,6 @@ list_owned_datasets, get_dataset_assume_role_url, get_file_upload_presigned_url, - list_dataset_share_objects, list_datasets_owned_by_env_group, list_datasets_created_in_environment, ) diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 942203e49..c7e8a5f4b 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -130,14 +130,6 @@ def start_crawler(context: Context, source, datasetUri: str, input: dict = None) return DatasetService.start_crawler(uri=datasetUri, data=input) -def list_dataset_share_objects(context, source, filter: dict = None): - if not source: - return None - if not filter: - filter = {'page': 1, 'pageSize': 5} - return DatasetService.list_dataset_share_objects(source, filter) - - @is_feature_enabled('modules.datasets.features.aws_actions') def generate_dataset_access_token(context, source, datasetUri: str = None): return DatasetService.generate_dataset_access_token(uri=datasetUri) diff --git a/backend/dataall/modules/datasets/api/dataset/types.py b/backend/dataall/modules/datasets/api/dataset/types.py index 7426a1f5d..b4e3d7f8a 100644 --- a/backend/dataall/modules/datasets/api/dataset/types.py +++ b/backend/dataall/modules/datasets/api/dataset/types.py @@ -9,7 +9,6 @@ list_locations, resolve_user_role, get_dataset_statistics, - list_dataset_share_objects, get_dataset_glossary_terms, get_dataset_stack, ) @@ -100,20 +99,6 @@ ), gql.Field(name='userRoleInEnvironment', type=EnvironmentPermission.toGraphQLEnum()), gql.Field(name='statistics', type=DatasetStatistics, resolver=get_dataset_statistics), - gql.Field( - name='shares', - args=[gql.Argument(name='filter', type=gql.Ref('ShareObjectFilter'))], - type=gql.Ref('ShareSearchResult'), - resolver=list_dataset_share_objects, - test_scope='ShareObject', - test_cases=[ - 'anonymous', - 'businessowner', - 'admins', - 'stewards', - 'unauthorized', - ], - ), gql.Field( name='terms', resolver=get_dataset_glossary_terms, diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index b2a231c66..382d48eea 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -22,7 +22,6 @@ from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.modules.datasets.db.dataset_bucket_repositories import DatasetBucketRepository from dataall.modules.vote.db.vote_repositories import VoteRepository -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository from dataall.modules.datasets.aws.glue_dataset_client import DatasetCrawler from dataall.modules.datasets.aws.s3_dataset_client import S3DatasetClient from dataall.modules.datasets.db.dataset_location_repositories import DatasetLocationRepository @@ -412,11 +411,6 @@ def start_crawler(uri: str, data: dict = None): 'status': crawler.get('LastCrawl', {}).get('Status', 'N/A'), } - @staticmethod - def list_dataset_share_objects(dataset: Dataset, data: dict = None): - with get_context().db_engine.scoped_session() as session: - return ShareObjectRepository.paginated_dataset_shares(session=session, uri=dataset.datasetUri, data=data) - @staticmethod @ResourcePolicyService.has_resource_permission(CREDENTIALS_DATASET) def generate_dataset_access_token(uri): From 61555622318768797631fca0a9282042832a260f Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 17:54:16 +0200 Subject: [PATCH 17/18] Move get_shared_tables to data_sharing module and fix reference to DatasetService --- .../modules/dataset_sharing/api/queries.py | 11 +++++ .../modules/dataset_sharing/api/resolvers.py | 5 ++- .../db/share_object_repositories.py | 38 ++++++++++++++++++ .../services/dataset_sharing_service.py | 13 +++++- .../modules/datasets/api/table/queries.py | 12 +----- .../modules/datasets/api/table/resolvers.py | 4 -- .../datasets/db/dataset_table_repositories.py | 40 ------------------- .../services/dataset_location_service.py | 5 ++- .../services/dataset_table_service.py | 16 ++------ 9 files changed, 72 insertions(+), 72 deletions(-) diff --git a/backend/dataall/modules/dataset_sharing/api/queries.py b/backend/dataall/modules/dataset_sharing/api/queries.py index 83798210c..a0f08ca58 100644 --- a/backend/dataall/modules/dataset_sharing/api/queries.py +++ b/backend/dataall/modules/dataset_sharing/api/queries.py @@ -6,6 +6,7 @@ list_shared_with_environment_data_items, list_shares_in_my_inbox, list_shares_in_my_outbox, + list_shared_tables_by_env_dataset, ) getShareObject = gql.QueryField( @@ -59,3 +60,13 @@ ], type=gql.Ref('ShareSearchResult'), ) + +getSharedDatasetTables = gql.QueryField( + name='getSharedDatasetTables', + args=[ + gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), + gql.Argument(name='envUri', type=gql.NonNullableType(gql.String)), + ], + type=gql.ArrayType(gql.Ref('SharedDatasetTableItem')), + resolver=list_shared_tables_by_env_dataset, +) diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index 75823656c..6d943c701 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -331,4 +331,7 @@ def list_dataset_share_objects(context, source, filter: dict = None): return None if not filter: filter = {'page': 1, 'pageSize': 5} - return DatasetSharingService.list_dataset_share_objects(source, filter) \ No newline at end of file + return DatasetSharingService.list_dataset_share_objects(source, filter) + +def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str): + return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri) \ No newline at end of file diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index 400b607b4..f5a7351cf 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -1280,3 +1280,41 @@ def count_role_principal_shares(session, principal_id: str, principal_type: Prin ) .count() ) + + @staticmethod + def query_dataset_tables_shared_with_env( + session, environment_uri: str, dataset_uri: str, username: str, groups: [str] + ): + """For a given dataset, returns the list of Tables shared with the environment + This means looking at approved ShareObject items + for the share object associating the dataset and environment + """ + share_item_shared_states = ShareItemSM.get_share_item_shared_states() + env_tables_shared = ( + session.query(DatasetTable) # all tables + .join( + ShareObjectItem, # found in ShareObjectItem + ShareObjectItem.itemUri == DatasetTable.tableUri, + ) + .join( + ShareObject, # jump to share object + ShareObject.shareUri == ShareObjectItem.shareUri, + ) + .filter( + and_( + ShareObject.datasetUri == dataset_uri, # for this dataset + ShareObject.environmentUri == environment_uri, # for this environment + ShareObjectItem.status.in_(share_item_shared_states), + ShareObject.principalType + != PrincipalType.ConsumptionRole.value, # Exclude Consumption roles shares + or_( + ShareObject.owner == username, + ShareObject.principalId.in_(groups), + ), + ) + ) + .all() + ) + + return env_tables_shared + diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index 15613ba38..7dd1d2dba 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -31,6 +31,7 @@ class DatasetSharingService(DatasetServiceInterface): + # TODO: when we abstract dataset_sharing_base from s3_dataset_sharing this class won't pollute the shares module @staticmethod def append_to_list_user_datasets(session, username, groups): @@ -119,11 +120,21 @@ def list_dataset_share_objects(dataset: Dataset, data: dict = None): with get_context().db_engine.scoped_session() as session: return ShareObjectRepository.paginated_dataset_shares(session=session, uri=dataset.datasetUri, data=data) + @staticmethod + def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str): + context = get_context() + with context.db_engine.scoped_session() as session: + return [ + {'tableUri': t.tableUri, 'GlueTableName': t.GlueTableName} + for t in ShareObjectRepository.query_dataset_tables_shared_with_env( + session, env_uri, dataset_uri, context.username, context.groups + ) + ] + @staticmethod @TenantPolicyService.has_tenant_permission(MANAGE_DATASETS) @ResourcePolicyService.has_resource_permission(UPDATE_DATASET) def verify_dataset_share_objects(uri: str, share_uris: list): - #TODO: when we abstract dataset_sharing_base from s3_dataset_sharing this function won't pollute the ShareObject service with get_context().db_engine.scoped_session() as session: for share_uri in share_uris: share = ShareObjectRepository.get_share_by_uri(session, share_uri) diff --git a/backend/dataall/modules/datasets/api/table/queries.py b/backend/dataall/modules/datasets/api/table/queries.py index 6fd33ba5e..04e3e64ad 100644 --- a/backend/dataall/modules/datasets/api/table/queries.py +++ b/backend/dataall/modules/datasets/api/table/queries.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.modules.datasets.api.table.input_types import DatasetTableFilter -from dataall.modules.datasets.api.table.resolvers import get_table, list_shared_tables_by_env_dataset, preview +from dataall.modules.datasets.api.table.resolvers import get_table, preview from dataall.modules.datasets.api.table.types import ( DatasetTable, DatasetTableSearchResult, @@ -36,13 +36,3 @@ resolver=preview, type=gql.Ref('QueryPreviewResult'), ) - -getSharedDatasetTables = gql.QueryField( - name='getSharedDatasetTables', - args=[ - gql.Argument(name='datasetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='envUri', type=gql.NonNullableType(gql.String)), - ], - type=gql.ArrayType(gql.Ref('SharedDatasetTableItem')), - resolver=list_shared_tables_by_env_dataset, -) diff --git a/backend/dataall/modules/datasets/api/table/resolvers.py b/backend/dataall/modules/datasets/api/table/resolvers.py index 0b312b5ea..19f8991bc 100644 --- a/backend/dataall/modules/datasets/api/table/resolvers.py +++ b/backend/dataall/modules/datasets/api/table/resolvers.py @@ -56,7 +56,3 @@ def resolve_glossary_terms(context: Context, source: DatasetTable, **kwargs): return None with context.engine.scoped_session() as session: return GlossaryRepository.get_glossary_terms_links(session, source.tableUri, 'DatasetTable') - - -def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str): - return DatasetTableService.list_shared_tables_by_env_dataset(datasetUri, envUri) diff --git a/backend/dataall/modules/datasets/db/dataset_table_repositories.py b/backend/dataall/modules/datasets/db/dataset_table_repositories.py index 13188fd9a..4ecbf5251 100644 --- a/backend/dataall/modules/datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_table_repositories.py @@ -1,13 +1,9 @@ import logging from datetime import datetime -from sqlalchemy import or_ from sqlalchemy.sql import and_ from dataall.base.db import exceptions -from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject -from dataall.modules.dataset_sharing.db.share_object_repositories import ShareItemSM -from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType from dataall.modules.datasets.db.dataset_models import DatasetTableColumn, DatasetTable, Dataset from dataall.base.utils import json_utils @@ -43,42 +39,6 @@ def create_synced_table(session, dataset: Dataset, table: dict): def delete(session, table: DatasetTable): session.delete(table) - @staticmethod - def query_dataset_tables_shared_with_env( - session, environment_uri: str, dataset_uri: str, username: str, groups: [str] - ): - """For a given dataset, returns the list of Tables shared with the environment - This means looking at approved ShareObject items - for the share object associating the dataset and environment - """ - share_item_shared_states = ShareItemSM.get_share_item_shared_states() - env_tables_shared = ( - session.query(DatasetTable) # all tables - .join( - ShareObjectItem, # found in ShareObjectItem - ShareObjectItem.itemUri == DatasetTable.tableUri, - ) - .join( - ShareObject, # jump to share object - ShareObject.shareUri == ShareObjectItem.shareUri, - ) - .filter( - and_( - ShareObject.datasetUri == dataset_uri, # for this dataset - ShareObject.environmentUri == environment_uri, # for this environment - ShareObjectItem.status.in_(share_item_shared_states), - ShareObject.principalType - != PrincipalType.ConsumptionRole.value, # Exclude Consumption roles shares - or_( - ShareObject.owner == username, - ShareObject.principalId.in_(groups), - ), - ) - ) - .all() - ) - - return env_tables_shared @staticmethod def get_dataset_table_by_uri(session, table_uri): diff --git a/backend/dataall/modules/datasets/services/dataset_location_service.py b/backend/dataall/modules/datasets/services/dataset_location_service.py index 50f078380..8ea17c28a 100644 --- a/backend/dataall/modules/datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/datasets/services/dataset_location_service.py @@ -14,6 +14,7 @@ DELETE_DATASET_FOLDER, ) from dataall.modules.datasets.services.dataset_permissions import DATASET_FOLDER_READ, GET_DATASET_FOLDER +from dataall.modules.datasets.services.dataset_service import DatasetService from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.db.dataset_models import DatasetStorageLocation, Dataset @@ -86,8 +87,8 @@ def remove_storage_location(uri: str = None): with get_context().db_engine.scoped_session() as session: location = DatasetLocationRepository.get_location_by_uri(session, uri) dataset = DatasetRepository.get_dataset_by_uri(session, location.datasetUri) - DatasetRepository.check_before_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) - DatasetRepository.execute_on_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) + DatasetService.check_before_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) + DatasetService.execute_on_delete(session, location.locationUri, action=DELETE_DATASET_FOLDER) DatasetLocationService._delete_dataset_folder_read_permission(session, dataset, location.locationUri) DatasetLocationRepository.delete(session, location) GlossaryRepository.delete_glossary_terms_links( diff --git a/backend/dataall/modules/datasets/services/dataset_table_service.py b/backend/dataall/modules/datasets/services/dataset_table_service.py index 1f182b184..652ae6e4b 100644 --- a/backend/dataall/modules/datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/datasets/services/dataset_table_service.py @@ -17,6 +17,7 @@ ) from dataall.modules.datasets.db.dataset_repositories import DatasetRepository from dataall.modules.datasets.services.datasets_base_enums import ConfidentialityClassification +from dataall.modules.datasets.services.dataset_service import DatasetService from dataall.modules.datasets.db.dataset_models import DatasetTable, Dataset from dataall.modules.datasets.services.dataset_permissions import ( PREVIEW_DATASET_TABLE, @@ -65,8 +66,8 @@ def update_table(uri: str, table_data: dict = None): def delete_table(uri: str): with get_context().db_engine.scoped_session() as session: table = DatasetTableRepository.get_dataset_table_by_uri(session, uri) - DatasetRepository.check_before_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) - DatasetRepository.execute_on_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) + DatasetService.check_before_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) + DatasetService.execute_on_delete(session, table.tableUri, action=DELETE_DATASET_TABLE) GlossaryRepository.delete_glossary_terms_links( session, target_uri=table.tableUri, target_type='DatasetTable' ) @@ -100,17 +101,6 @@ def get_glue_table_properties(uri: str): table: DatasetTable = DatasetTableRepository.get_dataset_table_by_uri(session, uri) return json_utils.to_string(table.GlueTableProperties).replace('\\', ' ') - @staticmethod - def list_shared_tables_by_env_dataset(dataset_uri: str, env_uri: str): - context = get_context() - with context.db_engine.scoped_session() as session: - return [ - {'tableUri': t.tableUri, 'GlueTableName': t.GlueTableName} - for t in DatasetTableRepository.query_dataset_tables_shared_with_env( - session, env_uri, dataset_uri, context.username, context.groups - ) - ] - @classmethod @ResourcePolicyService.has_resource_permission(SYNC_DATASET) def sync_tables_for_dataset(cls, uri): From 3a00066549c0aed77ec32ee66b8ba65b7f4f9844 Mon Sep 17 00:00:00 2001 From: dlpzx Date: Tue, 16 Apr 2024 18:03:06 +0200 Subject: [PATCH 18/18] Linting --- .../services/environment_resource_manager.py | 1 - .../dataall/modules/dataset_sharing/api/resolvers.py | 5 ++++- .../dataset_sharing/db/share_object_repositories.py | 10 +--------- .../services/dataset_sharing_service.py | 3 +-- .../services/share_managers/lf_share_manager.py | 4 +++- .../dataset_sharing/services/share_object_service.py | 1 - backend/dataall/modules/datasets/__init__.py | 2 -- .../dataall/modules/datasets/api/dataset/resolvers.py | 4 +++- .../modules/datasets/aws/kms_dataset_client.py | 1 - .../modules/datasets/db/dataset_table_repositories.py | 1 - .../modules/datasets/services/dataset_service.py | 11 +++++++---- 11 files changed, 19 insertions(+), 24 deletions(-) diff --git a/backend/dataall/core/environment/services/environment_resource_manager.py b/backend/dataall/core/environment/services/environment_resource_manager.py index 7060a7341..125e113f0 100644 --- a/backend/dataall/core/environment/services/environment_resource_manager.py +++ b/backend/dataall/core/environment/services/environment_resource_manager.py @@ -24,7 +24,6 @@ def count_role_resources(session, role_uri): return 0 - class EnvironmentResourceManager: """ API for managing environment and environment group lifecycle. diff --git a/backend/dataall/modules/dataset_sharing/api/resolvers.py b/backend/dataall/modules/dataset_sharing/api/resolvers.py index 6d943c701..922b2a6c9 100644 --- a/backend/dataall/modules/dataset_sharing/api/resolvers.py +++ b/backend/dataall/modules/dataset_sharing/api/resolvers.py @@ -315,12 +315,14 @@ def update_share_reject_purpose(context: Context, source, shareUri: str = None, reject_purpose=rejectPurpose, ) + def verify_dataset_share_objects(context: Context, source, input): RequestValidator.validate_dataset_share_selector_input(input) dataset_uri = input.get('datasetUri') verify_share_uris = input.get('shareUris') return DatasetSharingService.verify_dataset_share_objects(uri=dataset_uri, share_uris=verify_share_uris) + @is_feature_enabled('modules.datasets.features.aws_actions') def get_dataset_shared_assume_role_url(context: Context, source, datasetUri: str = None): return DatasetSharingService.get_dataset_shared_assume_role_url(uri=datasetUri) @@ -333,5 +335,6 @@ def list_dataset_share_objects(context, source, filter: dict = None): filter = {'page': 1, 'pageSize': 5} return DatasetSharingService.list_dataset_share_objects(source, filter) + def list_shared_tables_by_env_dataset(context: Context, source, datasetUri: str, envUri: str): - return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri) \ No newline at end of file + return DatasetSharingService.list_shared_tables_by_env_dataset(datasetUri, envUri) diff --git a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py index f5a7351cf..716dcb152 100644 --- a/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py +++ b/backend/dataall/modules/dataset_sharing/db/share_object_repositories.py @@ -327,7 +327,6 @@ def delete_env(session, environment): class ShareObjectRepository: - @staticmethod def save_and_commit(session, share): session.add(share) @@ -373,12 +372,7 @@ def get_share_by_dataset_attributes(session, dataset_uri, dataset_owner, groups= share: ShareObject = ( session.query(ShareObject) .filter(ShareObject.datasetUri == dataset_uri) - .filter( - or_( - ShareObject.owner == dataset_owner, - ShareObject.principalId.in_(groups) - ) - ) + .filter(or_(ShareObject.owner == dataset_owner, ShareObject.principalId.in_(groups))) .first() ) return share @@ -1001,7 +995,6 @@ def query_user_shared_datasets(session, username, groups) -> Query: ) return query.distinct(Dataset.datasetUri) - @staticmethod def find_dataset_shares(session, dataset_uri): return session.query(ShareObject).filter(ShareObject.datasetUri == dataset_uri).all() @@ -1317,4 +1310,3 @@ def query_dataset_tables_shared_with_env( ) return env_tables_shared - diff --git a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py index 7dd1d2dba..7f8b06037 100644 --- a/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py +++ b/backend/dataall/modules/dataset_sharing/services/dataset_sharing_service.py @@ -18,7 +18,7 @@ DELETE_DATASET, DELETE_DATASET_TABLE, DELETE_DATASET_FOLDER, - CREDENTIALS_DATASET + CREDENTIALS_DATASET, ) from dataall.modules.datasets.services.dataset_service import DatasetServiceInterface @@ -171,7 +171,6 @@ def get_dataset_shared_assume_role_url(uri): account_id = shared_environment.AwsAccountId region = shared_environment.region - pivot_session = SessionHelper.remote_session(account_id, region) aws_session = SessionHelper.get_session(base_session=pivot_session, role_arn=role_arn) url = SessionHelper.get_console_access_url( diff --git a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py index b4f4a54ad..51198d06e 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py +++ b/backend/dataall/modules/dataset_sharing/services/share_managers/lf_share_manager.py @@ -604,7 +604,9 @@ def handle_revoke_failure( f'with target account {self.target_environment.AwsAccountId}/{self.target_environment.region} ' f'due to: {error}' ) - DatasetSharingAlarmService().trigger_revoke_table_sharing_failure_alarm(table, self.share, self.target_environment) + DatasetSharingAlarmService().trigger_revoke_table_sharing_failure_alarm( + table, self.share, self.target_environment + ) return True def handle_share_failure_for_all_tables(self, tables, error, share_item_status, reapply=False): diff --git a/backend/dataall/modules/dataset_sharing/services/share_object_service.py b/backend/dataall/modules/dataset_sharing/services/share_object_service.py index 70759f213..5ae6c1fce 100644 --- a/backend/dataall/modules/dataset_sharing/services/share_object_service.py +++ b/backend/dataall/modules/dataset_sharing/services/share_object_service.py @@ -53,7 +53,6 @@ class ShareObjectService: - @staticmethod def verify_principal_role(session, share: ShareObject) -> bool: role_name = share.principalIAMRoleName diff --git a/backend/dataall/modules/datasets/__init__.py b/backend/dataall/modules/datasets/__init__.py index 5f11885ff..dd847a1b3 100644 --- a/backend/dataall/modules/datasets/__init__.py +++ b/backend/dataall/modules/datasets/__init__.py @@ -91,7 +91,6 @@ def __init__(self): log.info('Dataset handlers have been imported') - class DatasetCdkModuleInterface(ModuleInterface): """Loads dataset cdk stacks""" @@ -99,7 +98,6 @@ class DatasetCdkModuleInterface(ModuleInterface): def is_supported(modes: Set[ImportMode]): return ImportMode.CDK in modes - def __init__(self): import dataall.modules.datasets.cdk from dataall.core.environment.cdk.environment_stack import EnvironmentSetup diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index c7e8a5f4b..9aacb48e0 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -45,7 +45,9 @@ def resolve_user_role(context: Context, source: Dataset, **kwargs): return DatasetRole.DataSteward.value else: with context.engine.scoped_session() as session: - other_modules_user_role = DatasetService.get_other_modules_dataset_user_role(session, source.datasetUri, context.username, context.groups) + other_modules_user_role = DatasetService.get_other_modules_dataset_user_role( + session, source.datasetUri, context.username, context.groups + ) if other_modules_user_role is not None: return other_modules_user_role return DatasetRole.NoPermission.value diff --git a/backend/dataall/modules/datasets/aws/kms_dataset_client.py b/backend/dataall/modules/datasets/aws/kms_dataset_client.py index 36e545d62..3b3a5d4a5 100644 --- a/backend/dataall/modules/datasets/aws/kms_dataset_client.py +++ b/backend/dataall/modules/datasets/aws/kms_dataset_client.py @@ -16,7 +16,6 @@ def __init__(self, account_id: str, region: str): self._account_id = account_id self.region = region - def get_key_id(self, key_alias: str): # The same client function is defined in the data_sharing module. Duplication is allowed to avoid coupling. try: diff --git a/backend/dataall/modules/datasets/db/dataset_table_repositories.py b/backend/dataall/modules/datasets/db/dataset_table_repositories.py index 4ecbf5251..6dd3f1325 100644 --- a/backend/dataall/modules/datasets/db/dataset_table_repositories.py +++ b/backend/dataall/modules/datasets/db/dataset_table_repositories.py @@ -39,7 +39,6 @@ def create_synced_table(session, dataset: Dataset, table: dict): def delete(session, table: DatasetTable): session.delete(table) - @staticmethod def get_dataset_table_by_uri(session, table_uri): table: DatasetTable = session.query(DatasetTable).get(table_uri) diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index 382d48eea..75e713b7e 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -46,6 +46,7 @@ log = logging.getLogger(__name__) + class DatasetServiceInterface(ABC): @staticmethod def check_before_delete(session, uri, **kwargs) -> bool: @@ -64,16 +65,16 @@ def append_to_list_user_datasets(session, username, groups): @staticmethod def resolve_additional_dataset_user_role(session, uri, username, groups): - """Abstract method to be implemented by dependent modules that want to add new types of user role in relation to a Dataset """ + """Abstract method to be implemented by dependent modules that want to add new types of user role in relation to a Dataset""" return None @staticmethod def extend_attach_steward_permissions(session, dataset, new_stewards) -> bool: - """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards """ + """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards""" return True def extend_delete_steward_permissions(session, dataset, new_stewards) -> bool: - """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards """ + """Abstract method to be implemented by dependent modules that want to attach additional permissions to Dataset stewards""" return True @@ -268,7 +269,9 @@ def list_all_user_datasets(data: dict): context = get_context() with context.db_engine.scoped_session() as session: all_subqueries = DatasetService._list_all_user_interface_datasets(session, context.username, context.groups) - return DatasetRepository.paginated_all_user_datasets(session, context.username, context.groups, all_subqueries, data=data) + return DatasetRepository.paginated_all_user_datasets( + session, context.username, context.groups, all_subqueries, data=data + ) @staticmethod def list_owned_datasets(data: dict):