Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generic dataset module and specific s3_datasets module - part 3 (Create DatasetBase db model and S3Dataset model) #1258

Merged
merged 28 commits into from
May 17, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
035a637
Rename datasets to s3_datasets
dlpzx May 6, 2024
aa99257
Rename datasets to s3_datasets
dlpzx May 6, 2024
8ea5ca2
Rename datasets to s3_datasets
dlpzx May 6, 2024
2ad4a2e
Fix references to config in frontend
dlpzx May 6, 2024
59d4c93
Merge branch 'refs/heads/main' into feat/generic-dataset-model-refact…
dlpzx May 6, 2024
17fe992
Fix s3_dataset references frontend
dlpzx May 6, 2024
2c333a2
Added datasets_base module and dependencies
dlpzx May 7, 2024
7d66809
Moved dataset_enums to datasets_base
dlpzx May 7, 2024
38a2275
Use S3Dataset instead of Dataset in s3_dataset module
dlpzx May 7, 2024
fe71766
Use S3Dataset instead of Dataset in dataset_sharing module
dlpzx May 7, 2024
3985167
Use S3Dataset instead of Dataset in tests+some missing in modules
dlpzx May 7, 2024
f05a4f6
Use S3Dataset instead of Dataset in migration scripts and init
dlpzx May 7, 2024
aacd1f0
Fix foreign key between datasetBase and s3dataset
dlpzx May 7, 2024
41db8ed
Fix migration references to Dataset and add new migration script with…
dlpzx May 7, 2024
b3849cd
Added first draft of migration scripts
dlpzx May 7, 2024
313dd89
Merge remote-tracking branch 'refs/remotes/origin/main' into feat/gen…
dlpzx May 8, 2024
7e287d1
Fix details of init files
dlpzx May 8, 2024
98660df
Finis migration scripts
dlpzx May 10, 2024
c4ec66d
Add datasets_base in config.json
dlpzx May 13, 2024
58ea763
Merge remote-tracking branch 'refs/remotes/origin/feat/generic-datase…
dlpzx May 14, 2024
449d689
Merge remote-tracking branch 'refs/remotes/origin/main' into feat/gen…
dlpzx May 15, 2024
5d472e8
Adapt permission resourceType to DatasetBase
dlpzx May 15, 2024
8a573da
Adapt permission resourceType to DatasetBase
dlpzx May 15, 2024
b806100
linting
dlpzx May 15, 2024
b31922f
Fix issues in foreign keys migration scripts
dlpzx May 15, 2024
ffe372e
PR review comments - fix downgrade and add enums to dataset tables
dlpzx May 16, 2024
3a6765e
Fixes from PR review: stewards and polymorphic definition with enum
dlpzx May 17, 2024
efcd7c9
Fix foreign key of dataset_bucket in dataset_models
dlpzx May 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions backend/dataall/modules/dataset_sharing/api/resolvers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from dataall.modules.dataset_sharing.services.dataset_sharing_service import DatasetSharingService
from dataall.modules.dataset_sharing.aws.glue_client import GlueClient
from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -136,7 +136,7 @@ def resolve_user_role(context: Context, source: ShareObject, **kwargs):
if not source:
return None
with context.engine.scoped_session() as session:
dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri)
dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri)

can_approve = (
True
Expand Down Expand Up @@ -168,7 +168,7 @@ def resolve_dataset(context: Context, source: ShareObject, **kwargs):
if not source:
return None
with context.engine.scoped_session() as session:
ds: Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri)
ds: S3Dataset = DatasetRepository.get_dataset_by_uri(session, source.datasetUri)
if ds:
env: Environment = EnvironmentService.get_environment_by_uri(session, ds.environmentUri)
return {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
)
from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject
from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset, DatasetBucket
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, DatasetTable, S3Dataset, DatasetBucket

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -337,7 +337,7 @@ def list_all_active_share_objects(session) -> [ShareObject]:
return session.query(ShareObject).filter(ShareObject.deleted.is_(None)).all()

@staticmethod
def find_share(session, dataset: Dataset, env, principal_id, group_uri) -> ShareObject:
def find_share(session, dataset: S3Dataset, env, principal_id, group_uri) -> ShareObject:
return (
session.query(ShareObject)
.filter(
Expand Down Expand Up @@ -603,15 +603,15 @@ def list_user_received_share_requests(session, username, groups, data=None):
query = (
session.query(ShareObject)
.join(
Dataset,
Dataset.datasetUri == ShareObject.datasetUri,
S3Dataset,
S3Dataset.datasetUri == ShareObject.datasetUri,
)
.filter(
or_(
Dataset.businessOwnerEmail == username,
Dataset.businessOwnerDelegationEmails.contains(f'{{{username}}}'),
Dataset.stewards.in_(groups),
Dataset.SamlAdminGroupName.in_(groups),
S3Dataset.businessOwnerEmail == username,
S3Dataset.businessOwnerDelegationEmails.contains(f'{{{username}}}'),
S3Dataset.stewards.in_(groups),
S3Dataset.SamlAdminGroupName.in_(groups),
)
)
)
Expand All @@ -621,7 +621,7 @@ def list_user_received_share_requests(session, username, groups, data=None):
query = query.filter(ShareObject.status.in_(data.get('status')))
if data and data.get('dataset_owners'):
if len(data.get('dataset_owners')) > 0:
query = query.filter(Dataset.SamlAdminGroupName.in_(data.get('dataset_owners')))
query = query.filter(S3Dataset.SamlAdminGroupName.in_(data.get('dataset_owners')))
if data and data.get('datasets_uris'):
if len(data.get('datasets_uris')) > 0:
query = query.filter(ShareObject.datasetUri.in_(data.get('datasets_uris')))
Expand All @@ -642,8 +642,8 @@ def list_user_sent_share_requests(session, username, groups, data=None):
Environment.environmentUri == ShareObject.environmentUri,
)
.join(
Dataset,
Dataset.datasetUri == ShareObject.datasetUri,
S3Dataset,
S3Dataset.datasetUri == ShareObject.datasetUri,
)
.filter(
or_(
Expand All @@ -660,7 +660,7 @@ def list_user_sent_share_requests(session, username, groups, data=None):
query = query.filter(ShareObject.status.in_(data.get('status')))
if data and data.get('dataset_owners'):
if len(data.get('dataset_owners')) > 0:
query = query.filter(Dataset.SamlAdminGroupName.in_(data.get('dataset_owners')))
query = query.filter(S3Dataset.SamlAdminGroupName.in_(data.get('dataset_owners')))
if data and data.get('datasets_uris'):
if len(data.get('datasets_uris')) > 0:
query = query.filter(ShareObject.datasetUri.in_(data.get('datasets_uris')))
Expand Down Expand Up @@ -771,7 +771,7 @@ def update_share_item_status_batch(
def get_share_data(session, share_uri):
share: ShareObject = ShareObjectRepository.get_share_by_uri(session, share_uri)

dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, share.datasetUri)
dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, share.datasetUri)

source_environment: Environment = session.query(Environment).get(dataset.environmentUri)
if not source_environment:
Expand Down Expand Up @@ -1002,10 +1002,10 @@ def delete_shares_with_no_shared_items(session, dataset_uri):
def query_user_shared_datasets(session, username, groups) -> Query:
share_item_shared_states = ShareItemSM.get_share_item_shared_states()
query = (
session.query(Dataset)
session.query(S3Dataset)
.outerjoin(
ShareObject,
ShareObject.datasetUri == Dataset.datasetUri,
ShareObject.datasetUri == S3Dataset.datasetUri,
)
.outerjoin(ShareObjectItem, ShareObjectItem.shareUri == ShareObject.shareUri)
.filter(
Expand All @@ -1021,7 +1021,7 @@ def query_user_shared_datasets(session, username, groups) -> Query:
)
)
)
return query.distinct(Dataset.datasetUri)
return query.distinct(S3Dataset.datasetUri)

@staticmethod
def find_dataset_shares(session, dataset_uri):
Expand Down Expand Up @@ -1114,9 +1114,9 @@ def paginate_shared_datasets(session, env_uri, data):
q = (
session.query(
ShareObjectItem.shareUri.label('shareUri'),
Dataset.datasetUri.label('datasetUri'),
Dataset.name.label('datasetName'),
Dataset.description.label('datasetDescription'),
S3Dataset.datasetUri.label('datasetUri'),
S3Dataset.name.label('datasetName'),
S3Dataset.description.label('datasetDescription'),
Environment.environmentUri.label('environmentUri'),
Environment.name.label('environmentName'),
ShareObject.created.label('created'),
Expand Down Expand Up @@ -1152,12 +1152,12 @@ def paginate_shared_datasets(session, env_uri, data):
ShareObject.shareUri == ShareObjectItem.shareUri,
)
.join(
Dataset,
ShareObject.datasetUri == Dataset.datasetUri,
S3Dataset,
ShareObject.datasetUri == S3Dataset.datasetUri,
)
.join(
Environment,
Environment.environmentUri == Dataset.environmentUri,
Environment.environmentUri == S3Dataset.environmentUri,
)
.join(
Organization,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
CREDENTIALS_DATASET,
)

from dataall.modules.s3_datasets.db.dataset_models import Dataset
from dataall.modules.s3_datasets.db.dataset_models import S3Dataset
from dataall.modules.datasets_base.services.datasets_enums import DatasetRole
from dataall.modules.s3_datasets.services.dataset_service import DatasetServiceInterface

Expand Down Expand Up @@ -130,7 +130,7 @@ def verify_dataset_share_objects(uri: str, share_uris: list):
return True

@staticmethod
def list_dataset_share_objects(dataset: Dataset, data: dict = None):
def list_dataset_share_objects(dataset: S3Dataset, data: dict = None):
with get_context().db_engine.scoped_session() as session:
return ShareObjectRepository.paginated_dataset_shares(session=session, uri=dataset.datasetUri, data=data)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
APPROVE_SHARE_OBJECT,
)
from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository
from dataall.modules.s3_datasets.db.dataset_models import Dataset
from dataall.modules.s3_datasets.db.dataset_models import S3Dataset

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -124,7 +124,7 @@ def add_shared_item(uri: str, data: dict = None):
item_type = data.get('itemType')
item_uri = data.get('itemUri')
share = ShareObjectRepository.get_share_by_uri(session, uri)
dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, share.datasetUri)
dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, share.datasetUri)
target_environment = EnvironmentService.get_environment_by_uri(session, share.environmentUri)

share_sm = ShareObjectSM(share.status)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
ShareItemActions,
ShareItemHealthStatus,
)
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, Dataset
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, S3Dataset
from dataall.modules.dataset_sharing.services.dataset_sharing_alarm_service import DatasetSharingAlarmService
from dataall.modules.dataset_sharing.db.share_object_models import ShareObjectItem, ShareObject
from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter
Expand All @@ -30,7 +30,7 @@ class LFShareManager:
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
tables: [DatasetTable],
source_environment: Environment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
EMPTY_STATEMENT_SID,
)
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, Dataset
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, S3Dataset

logger = logging.getLogger(__name__)
ACCESS_POINT_CREATION_TIME = 30
Expand All @@ -42,7 +42,7 @@ class S3AccessPointShareManager:
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
target_folder: DatasetStorageLocation,
source_environment: Environment,
Expand Down Expand Up @@ -700,7 +700,7 @@ def revoke_target_role_access_policy(self):

def delete_dataset_bucket_key_policy(
self,
dataset: Dataset,
dataset: S3Dataset,
):
logger.info('Deleting dataset bucket KMS key policy...')
key_alias = f'alias/{dataset.KmsAlias}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
EMPTY_STATEMENT_SID,
)
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import PrincipalType
from dataall.modules.s3_datasets.db.dataset_models import Dataset, DatasetBucket
from dataall.modules.s3_datasets.db.dataset_models import S3Dataset, DatasetBucket
from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository

logger = logging.getLogger(__name__)
Expand All @@ -33,7 +33,7 @@ class S3BucketShareManager:
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
target_bucket: DatasetBucket,
source_environment: Environment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from dataall.core.tasks.db.task_models import Task
from dataall.core.tasks.service_handlers import Worker
from dataall.modules.dataset_sharing.db.share_object_models import ShareObject
from dataall.modules.s3_datasets.db.dataset_models import Dataset
from dataall.modules.s3_datasets.db.dataset_models import S3Dataset
from dataall.base.context import get_context
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import ShareObjectStatus
from dataall.modules.notifications.db.notification_repositories import NotificationRepository
Expand Down Expand Up @@ -35,7 +35,7 @@ class ShareNotificationService:
- share.owner (person that opened the request) OR share.groupUri (if group_notifications=true)
"""

def __init__(self, session, dataset: Dataset, share: ShareObject):
def __init__(self, session, dataset: S3Dataset, share: ShareObject):
self.dataset = dataset
self.share = share
self.session = session
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
)
from dataall.modules.dataset_sharing.aws.glue_client import GlueClient
from dataall.modules.s3_datasets.db.dataset_repositories import DatasetRepository
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, Dataset, DatasetStorageLocation
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, S3Dataset, DatasetStorageLocation
from dataall.modules.s3_datasets.services.dataset_permissions import DATASET_TABLE_READ, DATASET_FOLDER_READ
from dataall.base.aws.iam import IAM

Expand Down Expand Up @@ -101,7 +101,7 @@ def create_share_object(
):
context = get_context()
with context.db_engine.scoped_session() as session:
dataset: Dataset = DatasetRepository.get_dataset_by_uri(session, dataset_uri)
dataset: S3Dataset = DatasetRepository.get_dataset_by_uri(session, dataset_uri)
environment = EnvironmentService.get_environment_by_uri(session, uri)

if environment.region != dataset.region:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from dataall.modules.dataset_sharing.services.share_managers import LFShareManager
from dataall.modules.dataset_sharing.aws.ram_client import RamClient
from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, Dataset
from dataall.modules.s3_datasets.db.dataset_models import DatasetTable, S3Dataset
from dataall.modules.dataset_sharing.db.share_object_models import ShareObject
from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository, ShareItemSM
from dataall.modules.dataset_sharing.services.share_managers.share_manager_utils import ShareErrorFormatter
Expand All @@ -25,7 +25,7 @@ class ProcessLakeFormationShare(LFShareManager):
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
tables: [DatasetTable],
source_environment: Environment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound
from dataall.modules.dataset_sharing.services.share_managers import S3AccessPointShareManager
from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, Dataset
from dataall.modules.s3_datasets.db.dataset_models import DatasetStorageLocation, S3Dataset
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import (
ShareItemHealthStatus,
ShareItemStatus,
Expand All @@ -23,7 +23,7 @@ class ProcessS3AccessPointShare(S3AccessPointShareManager):
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
share_folder: DatasetStorageLocation,
source_environment: Environment,
Expand All @@ -47,7 +47,7 @@ def __init__(
def process_approved_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
share_folders: [DatasetStorageLocation],
source_environment: Environment,
Expand Down Expand Up @@ -130,7 +130,7 @@ def process_approved_shares(
def process_revoked_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
revoke_folders: [DatasetStorageLocation],
source_environment: Environment,
Expand Down Expand Up @@ -205,7 +205,7 @@ def process_revoked_shares(
def verify_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
share_folders: [DatasetStorageLocation],
source_environment: Environment,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from dataall.modules.dataset_sharing.services.share_exceptions import PrincipalRoleNotFound
from dataall.modules.dataset_sharing.services.share_managers import S3BucketShareManager
from dataall.modules.dataset_sharing.services.share_object_service import ShareObjectService
from dataall.modules.s3_datasets.db.dataset_models import Dataset, DatasetBucket
from dataall.modules.s3_datasets.db.dataset_models import S3Dataset, DatasetBucket
from dataall.modules.dataset_sharing.services.dataset_sharing_enums import (
ShareItemHealthStatus,
ShareItemStatus,
Expand All @@ -23,7 +23,7 @@ class ProcessS3BucketShare(S3BucketShareManager):
def __init__(
self,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
s3_bucket: DatasetBucket,
source_environment: Environment,
Expand All @@ -46,7 +46,7 @@ def __init__(
def process_approved_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
shared_buckets: [DatasetBucket],
source_environment: Environment,
Expand Down Expand Up @@ -123,7 +123,7 @@ def process_approved_shares(
def process_revoked_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
revoked_buckets: [DatasetBucket],
source_environment: Environment,
Expand Down Expand Up @@ -196,7 +196,7 @@ def process_revoked_shares(
def verify_shares(
cls,
session,
dataset: Dataset,
dataset: S3Dataset,
share: ShareObject,
buckets_to_verify: [DatasetBucket],
source_environment: Environment,
Expand Down
Loading
Loading