From 9158691bb215bba854754b3f3a2827e191c2409b Mon Sep 17 00:00:00 2001 From: dlpzx <71252798+dlpzx@users.noreply.github.com> Date: Mon, 11 Sep 2023 07:34:04 +0200 Subject: [PATCH] DA v2: fix glossaries permissions and refactor catalog module (#731) ### Feature or Bugfix - Bugfix - Refactoring ### Detail - moved glossaries permissions from core to module.catalog - refractored catalog module to follow services, resolvers, db layer design - fix list_term_associations - remove Columns from glossaries registry --> they cannot be tagged - clean up unused code in glossaries ### Relates V2.0.0 release ### Security Please answer the questions below briefly where applicable, or write `N/A`. Based on [OWASP 10](https://owasp.org/Top10/en/). `N/A` - Does this PR introduce or modify any input fields or queries - this includes fetching data from storage outside the application (e.g. a database, an S3 bucket)? - Is the input sanitized? - What precautions are you taking before deserializing the data you consume? - Is injection prevented by parametrizing queries? - Have you ensured no `eval` or similar functions are used? - Does this PR introduce any functionality or component that requires authorization? - How have you ensured it respects the existing AuthN/AuthZ mechanisms? - Are you logging failed auth attempts? - Are you using or adding any cryptographic features? - Do you use a standard proven implementations? - Are the used keys controlled by the customer? Where are they stored? - Are you introducing any new policies/roles/users? - Have you used the least-privilege principle? How? By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 license. --- .../dataall/core/permissions/permissions.py | 19 - .../dataall/modules/catalog/api/mutations.py | 29 +- .../dataall/modules/catalog/api/queries.py | 59 +-- .../dataall/modules/catalog/api/resolvers.py | 449 ++++-------------- backend/dataall/modules/catalog/api/types.py | 10 +- .../catalog/db/glossary_repositories.py | 445 ++++++++--------- .../modules/catalog/services/__init__.py | 7 + .../services/glossaries_permissions.py | 28 ++ .../catalog/services/glossaries_service.py | 136 ++++++ .../modules/dashboards/api/resolvers.py | 4 +- .../dashboards/services/dashboard_service.py | 6 +- backend/dataall/modules/datasets/__init__.py | 4 +- .../modules/datasets/api/dataset/resolvers.py | 4 +- .../api/storage_location/resolvers.py | 4 +- .../modules/datasets/api/table/resolvers.py | 4 +- .../services/dataset_location_service.py | 6 +- .../datasets/services/dataset_service.py | 8 +- .../services/dataset_table_service.py | 6 +- backend/local_graphql_server.py | 1 + frontend/docker/dev/Dockerfile | 2 +- .../Glossaries/services/getGlossary.js | 3 + tests/modules/catalog/test_glossary.py | 92 +--- tests/modules/datasets/test_dataset_feed.py | 10 +- .../modules/datasets/test_dataset_glossary.py | 90 +--- 24 files changed, 563 insertions(+), 863 deletions(-) create mode 100644 backend/dataall/modules/catalog/services/__init__.py create mode 100644 backend/dataall/modules/catalog/services/glossaries_permissions.py create mode 100644 backend/dataall/modules/catalog/services/glossaries_service.py diff --git a/backend/dataall/core/permissions/permissions.py b/backend/dataall/core/permissions/permissions.py index 702a98622..23c410843 100644 --- a/backend/dataall/core/permissions/permissions.py +++ b/backend/dataall/core/permissions/permissions.py @@ -24,7 +24,6 @@ """ MANAGE_GROUPS = 'MANAGE_GROUPS' MANAGE_ENVIRONMENT = 'MANAGE_ENVIRONMENT' -MANAGE_GLOSSARIES = 'MANAGE_GLOSSARIES' MANAGE_ENVIRONMENTS = 'MANAGE_ENVIRONMENTS' MANAGE_ORGANIZATIONS = 'MANAGE_ORGANIZATIONS' @@ -94,34 +93,17 @@ REMOVE_ENVIRONMENT_CONSUMPTION_ROLE ] -""" -GLOSSARIES -""" -CREATE_CATEGORY = 'CREATE_CATEGORY' -CREATE_TERM = 'CREATE_TERM' -UPDATE_NODE = 'UPDATE_NODE' -DELETE_GLOSSARY = 'DELETE_GLOSSARY' -APPROVE_ASSOCIATION = 'APPROVE_ASSOCIATION' -GLOSSARY_ALL = [ - CREATE_CATEGORY, - CREATE_TERM, - UPDATE_NODE, - DELETE_GLOSSARY, - APPROVE_ASSOCIATION, -] """ TENANT ALL """ TENANT_ALL = [ - MANAGE_GLOSSARIES, MANAGE_GROUPS, MANAGE_ENVIRONMENTS, MANAGE_ORGANIZATIONS, ] TENANT_ALL_WITH_DESC = {k: k for k in TENANT_ALL} -TENANT_ALL_WITH_DESC[MANAGE_GLOSSARIES] = 'Manage glossaries' TENANT_ALL_WITH_DESC[MANAGE_ENVIRONMENTS] = 'Manage environments' TENANT_ALL_WITH_DESC[MANAGE_GROUPS] = 'Manage teams' TENANT_ALL_WITH_DESC[MANAGE_ORGANIZATIONS] = 'Manage organizations' @@ -141,7 +123,6 @@ ORGANIZATION_ALL + ENVIRONMENT_ALL + CONSUMPTION_ROLE_ALL - + GLOSSARY_ALL + NETWORK_ALL ) diff --git a/backend/dataall/modules/catalog/api/mutations.py b/backend/dataall/modules/catalog/api/mutations.py index d46639325..4aa91b198 100644 --- a/backend/dataall/modules/catalog/api/mutations.py +++ b/backend/dataall/modules/catalog/api/mutations.py @@ -1,7 +1,7 @@ from dataall.base.api import gql from dataall.modules.catalog.api.resolvers import ( - create_glossary, update_node, delete_node, create_category, create_term, link_term, - request_link, approve_term_association, dismiss_term_association + create_glossary, update_node, delete_node, create_category, create_term, + approve_term_association, dismiss_term_association ) @@ -62,30 +62,6 @@ type=gql.Integer, ) - -linkTerm = gql.MutationField( - name='linkTerm', - resolver=link_term, - args=[ - gql.Argument(name='nodeUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='targetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='targetType', type=gql.NonNullableType(gql.String)), - ], - type=gql.Ref('GlossaryTermLink'), -) - -requestLink = gql.MutationField( - name='requestLink', - resolver=request_link, - args=[ - gql.Argument(name='nodeUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='targetUri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='targetType', type=gql.NonNullableType(gql.String)), - ], - type=gql.Ref('GlossaryTermLink'), -) - - createTerm = gql.MutationField( name='createTerm', type=gql.Ref('Term'), @@ -113,7 +89,6 @@ args=[gql.Argument(name='nodeUri', type=gql.NonNullableType(gql.String))], ) - approveTermAssociation = gql.MutationField( name='approveTermAssociation', type=gql.Boolean, diff --git a/backend/dataall/modules/catalog/api/queries.py b/backend/dataall/modules/catalog/api/queries.py index 5d6a9eba5..e3b24d21b 100644 --- a/backend/dataall/modules/catalog/api/queries.py +++ b/backend/dataall/modules/catalog/api/queries.py @@ -1,6 +1,6 @@ from dataall.base.api import gql from dataall.modules.catalog.api.resolvers import ( - get_node, list_glossaries, search_terms, hierarchical_search, get_link, list_asset_linked_terms + get_node, list_glossaries, search_glossary, ) getGlossary = gql.QueryField( @@ -10,22 +10,6 @@ type=gql.Ref('Glossary'), ) - -getCategory = gql.QueryField( - name='getCategory', - resolver=get_node, - args=[gql.Argument(name='nodeUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('Category'), -) - - -getTerm = gql.QueryField( - name='getTerm', - resolver=get_node, - args=[gql.Argument(name='nodeUri', type=gql.NonNullableType(gql.String))], - type=gql.Ref('Term'), -) - listGlossaries = gql.QueryField( name='listGlossaries', type=gql.Ref('GlossarySearchResult'), @@ -33,49 +17,10 @@ resolver=list_glossaries, ) - -SearchTerms = gql.QueryField( - name='searchTerms', - doc='Search glossary terms', - type=gql.Ref('TermSearchResult'), - args=[gql.Argument(name='filter', type=gql.Ref('TermFilter'))], - resolver=search_terms, -) - - -searchGlossaryHierarchy = gql.QueryField( - name='searchGlossaryHierarchy', - doc='Search glossary terms in the hierarchy', - type=gql.Ref('GlossaryChildrenSearchResult'), - args=[gql.Argument(name='filter', type=gql.Ref('TermFilter'))], - resolver=hierarchical_search, -) - - SearchGlossary = gql.QueryField( name='searchGlossary', doc='Search glossary ', type=gql.Ref('GlossaryChildrenSearchResult'), args=[gql.Argument(name='filter', type=gql.Ref('GlossaryNodeSearchFilter'))], - resolver=search_terms, -) - - -getGlossaryTermLink = gql.QueryField( - name='getGlossaryTermLink', - doc='Returns a TermLink from its linkUri', - type=gql.Ref('GlossaryTermLink'), - resolver=get_link, - args=[gql.Argument(name='linkUri', type=gql.NonNullableType(gql.String))], -) - -listAssetLinkedTerms = gql.QueryField( - name='listAssetLinkedTerms', - doc='return all terms associated with a data asset', - args=[ - gql.Argument(name='uri', type=gql.NonNullableType(gql.String)), - gql.Argument(name='filter', type=gql.Ref('GlossaryTermTargetFilter')), - ], - resolver=list_asset_linked_terms, - type=gql.Ref('TermLinkSearchResults'), + resolver=search_glossary, ) diff --git a/backend/dataall/modules/catalog/api/resolvers.py b/backend/dataall/modules/catalog/api/resolvers.py index 061a9abf4..dab32f7b4 100644 --- a/backend/dataall/modules/catalog/api/resolvers.py +++ b/backend/dataall/modules/catalog/api/resolvers.py @@ -1,159 +1,78 @@ -from datetime import datetime - -from sqlalchemy import and_, or_, asc - from dataall.modules.catalog.api.enums import GlossaryRole from dataall.modules.catalog.api.registry import GlossaryRegistry +from dataall.modules.catalog.services.glossaries_service import GlossariesService from dataall.base.api.context import Context -from dataall.modules.catalog.db.glossary_repositories import Glossary from dataall.modules.catalog.db.glossary_models import TermLink, GlossaryNode -from dataall.base.db import paginate, exceptions +from dataall.base.db import exceptions -def resolve_glossary_node(obj: GlossaryNode, *_): - if obj.nodeType == 'G': - return 'Glossary' - elif obj.nodeType == 'C': - return 'Category' - elif obj.nodeType == 'T': - return 'Term' - else: - return None +def _validate_creation_request(data): + if not data: + raise exceptions.RequiredParameter(data) + if not data.get('admin'): + raise exceptions.RequiredParameter('admin') + if not data.get('label'): + raise exceptions.RequiredParameter('label') -def create_glossary( - context: Context, source, input: dict = None -) -> GlossaryNode: - with context.engine.scoped_session() as session: - return Glossary.create_glossary(session, input) +def _required_uri(uri): + if not uri: + raise exceptions.RequiredParameter('URI') -def tree(context: Context, source: GlossaryNode): - if not source: - return None - adjency_list = {} - with context.engine.scoped_session() as session: - q = session.query(GlossaryNode).filter( - GlossaryNode.path.startswith(f'{source.path}/') - ) - for node in q: - if not adjency_list.get(node.parentUri): - adjency_list[node.parentUri] = [] +def _required_path(path): + if not path: + raise exceptions.RequiredParameter('PATH') -def node_tree(context: Context, source: GlossaryNode, filter: dict = None): - if not source: - return None - if not filter: - filter = {} - with context.engine.scoped_session() as session: - q = ( - session.query(GlossaryNode) - .filter(GlossaryNode.path.startswith(source.path)) - .filter(GlossaryNode.deleted.is_(None)) - .order_by(asc(GlossaryNode.path)) - ) - term = filter.get('term') - nodeType = filter.get('nodeType') - if term: - q = q.filter( - or_( - GlossaryNode.label.ilike(term), - GlossaryNode.readme.ilike(term), - ) - ) - if nodeType: - q = q.filter(GlossaryNode.nodeType == nodeType) - - return paginate( - q, page_size=filter.get('pageSize', 10), page=filter.get('page', 1) - ).to_dict() - - -def list_node_children( - context: Context, source: GlossaryNode, filter: dict = None -): - if not filter: - filter = {} - with context.engine.scoped_session() as session: - return Glossary.list_node_children(session, source, filter) +def create_glossary(context: Context, source, input: dict = None): + _validate_creation_request(input) + return GlossariesService.create_glossary(data=input) def create_category( context: Context, source, parentUri: str = None, input: dict = None ): - with context.engine.scoped_session() as session: - return Glossary.create_category( - session=session, - uri=parentUri, - data=input, - ) + _required_uri(parentUri) + return GlossariesService.create_category(uri=parentUri, data=input) def create_term(context: Context, source, parentUri: str = None, input: dict = None): - with context.engine.scoped_session() as session: - return Glossary.create_term( - session=session, - uri=parentUri, - data=input, - ) + _required_uri(parentUri) + return GlossariesService.create_term(uri=parentUri, data=input) -def list_glossaries(context: Context, source, filter: dict = None): - if filter is None: - filter = {} - with context.engine.scoped_session() as session: - return Glossary.list_glossaries( - session=session, - data=filter, - ) +def update_node(context: Context, source, nodeUri: str = None, input: dict = None): + _required_uri(nodeUri) + return GlossariesService.update_node(uri=nodeUri, data=input) -def resolve_categories( - context: Context, source: GlossaryNode, filter: dict = None -): - if not source: - return None - if not filter: - filter = {} - with context.engine.scoped_session() as session: - return Glossary.list_categories( - session=session, - uri=source.nodeUri, - data=filter, - ) +def delete_node(context: Context, source, nodeUri: str = None) -> bool: + _required_uri(nodeUri) + return GlossariesService.delete_node(uri=nodeUri) -def resolve_terms(context: Context, source: GlossaryNode, filter: dict = None): - if not source: - return None - if not filter: +def list_glossaries(context: Context, source, filter: dict = None): + if filter is None: filter = {} - with context.engine.scoped_session() as session: - return Glossary.list_terms( - session=session, - uri=source.nodeUri, - data=filter, - ) + return GlossariesService.list_glossaries(data=filter) -def update_node( - context: Context, source, nodeUri: str = None, input: dict = None -) -> GlossaryNode: - with context.engine.scoped_session() as session: - return Glossary.update_node( - session, - uri=nodeUri, - data=input, - ) +def get_node(context: Context, source, nodeUri: str = None): + """Get a node which can be either a glossary, a category, or a term""" + _required_uri(nodeUri) + return GlossariesService.get_node(uri=nodeUri) -def get_node(context: Context, source, nodeUri: str = None): - with context.engine.scoped_session() as session: - node: GlossaryNode = session.query(GlossaryNode).get(nodeUri) - if not node: - raise exceptions.ObjectNotFound('Node', nodeUri) - return node +def resolve_glossary_node(obj: GlossaryNode, *_): + if obj.nodeType == 'G': + return 'Glossary' + elif obj.nodeType == 'C': + return 'Category' + elif obj.nodeType == 'T': + return 'Term' + else: + return None def resolve_user_role(context: Context, source: GlossaryNode, **kwargs): @@ -164,113 +83,63 @@ def resolve_user_role(context: Context, source: GlossaryNode, **kwargs): return GlossaryRole.NoPermission.value -def delete_node(context: Context, source, nodeUri: str = None) -> bool: - with context.engine.scoped_session() as session: - return Glossary.delete_node(session, nodeUri) +def resolve_link(context, source, targetUri: str = None): + _required_uri(source.nodeUri) + _required_uri(targetUri) + return GlossariesService.get_node_link_to_target(uri=source.nodeUri, targetUri=targetUri) + + +def resolve_stats(context, source: GlossaryNode, **kwargs): + _required_path(source.path) + return GlossariesService.get_glossary_categories_terms_and_associations(path=source.path) -def hierarchical_search(context: Context, source, filter: dict = None): +def resolve_node_tree(context: Context, source: GlossaryNode, filter: dict = None): + _required_path(source.path) if not filter: filter = {} - - with context.engine.scoped_session() as session: - return Glossary.hierarchical_search( - session=session, - data=filter, - ) + return GlossariesService.get_node_tree(path=source.path, filter=filter) -def resolve_link(context, source, targetUri: str = None): - if not source: - return None - with context.engine.scoped_session() as session: - link = ( - session.query(TermLink) - .filter( - and_( - TermLink.nodeUri == source.nodeUri, - TermLink.targetUri == targetUri, - ) - ) - .first() - ) - if not link: - link = { - 'nodeUri': source.nodeUri, - 'targetUri': targetUri, - 'created': datetime.now().isoformat(), - 'owner': context.username, - 'approvedByOwner': False, - 'approvedBySteward': False, - } - - return link - - -def search_terms(context: Context, source, filter: dict = None): +def resolve_node_children(context: Context, source: GlossaryNode, filter: dict = None): + _required_path(source.path) if not filter: filter = {} - with context.engine.scoped_session() as session: - return Glossary.search_terms( - session=session, - data=filter, - ) - - -def request_link( - context: Context, - source, - nodeUri: str = None, - targetUri: str = None, - targetType: str = None, + return GlossariesService.list_node_children(path=source.path, filter=filter) + + +def resolve_categories( + context: Context, source: GlossaryNode, filter: dict = None ): - with context.engine.scoped_session() as session: - return Glossary.link_term( - session=session, - uri=nodeUri, - data={ - 'targetUri': targetUri, - 'targetType': targetType, - 'approvedByOwner': True, - 'approvedBySteward': False, - }, - target_model=_target_model(targetType), - ) - - -def link_term( - context: Context, - source, - nodeUri: str = None, - targetUri: str = None, - targetType: str = None, + _required_uri(source.nodeUri) + if not filter: + filter = {} + return GlossariesService.list_categories(uri=source.nodeUri, data=filter) + + +def resolve_term_associations( + context, source: GlossaryNode, filter: dict = None ): - with context.engine.scoped_session() as session: - return Glossary.link_term( - session=session, - uri=nodeUri, - data={ - 'targetUri': targetUri, - 'targetType': targetType, - 'approvedByOwner': True, - 'approvedBySteward': True, - }, - target_model=_target_model(targetType), - ) + if not filter: + filter = {} + return GlossariesService.list_term_associations( + node=source, + filter=filter + ) -def resolve_term_glossary(context, source: GlossaryNode, **kwargs): - with context.engine.scoped_session() as session: - parentUri = source.path.split('/')[1] - return session.query(GlossaryNode).get(parentUri) +def resolve_terms(context: Context, source: GlossaryNode, filter: dict = None): + _required_uri(source.nodeUri) + if not filter: + filter = {} + return GlossariesService.list_terms(uri=source.nodeUri, data=filter) -def get_link(context: Context, source, linkUri: str = None): - with context.engine.scoped_session() as session: - link = session.query(TermLink).get(linkUri) - if not link: - raise exceptions.ObjectNotFound('Link', linkUri) - return link +def resolve_term_glossary(context, source: GlossaryNode, **kwargs): + _required_path(source.path) + parentUri = source.path.split('/')[1] + _required_uri(parentUri) + return GlossariesService.get_node(uri=parentUri) def target_union_resolver(obj, *_): @@ -278,89 +147,8 @@ def target_union_resolver(obj, *_): def resolve_link_target(context, source, **kwargs): - with context.engine.scoped_session() as session: - model = GlossaryRegistry.find_model(source.targetType) - target = session.query(model).get(source.targetUri) - return target - - -def resolve_term_associations( - context, source: GlossaryNode, filter: dict = None -): - if not filter: - filter = {} - with context.engine.scoped_session() as session: - return Glossary.list_term_associations( - session=session, - data={'source': source, 'filter': filter}, - target_model_definitions=GlossaryRegistry.definitions() - ) - - -def resolve_stats(context, source: GlossaryNode, **kwargs): - - with context.engine.scoped_session() as session: - categories = ( - session.query(GlossaryNode) - .filter( - and_( - GlossaryNode.path.startswith(source.path), - GlossaryNode.nodeType == 'C', - GlossaryNode.deleted.is_(None), - ) - ) - .count() - ) - terms = ( - session.query(GlossaryNode) - .filter( - and_( - GlossaryNode.path.startswith(source.path), - GlossaryNode.nodeType == 'T', - GlossaryNode.deleted.is_(None), - ) - ) - .count() - ) - - associations = ( - session.query(TermLink) - .join( - GlossaryNode, - GlossaryNode.nodeType == TermLink.nodeUri, - ) - .filter(GlossaryNode.path.startswith(source.path)) - .count() - ) - - return {'categories': categories, 'terms': terms, 'associations': associations} - - -def list_asset_linked_terms( - context: Context, source, uri: str = None, filter: dict = None -): - if not filter: - filter = {} - with context.engine.scoped_session() as session: - q = ( - session.query(TermLink) - .join( - GlossaryNode, - GlossaryNode.nodeUri == TermLink.nodeUri, - ) - .filter(TermLink.targetUri == uri) - ) - term = filter.get('term') - if term: - q = q.filter( - or_( - GlossaryNode.label.ilike(term), - GlossaryNode.readme.ilike(term), - ) - ) - return paginate( - q, page=filter.get('page', 1), page_size=filter.get('pageSize', 10) - ).to_dict() + _required_uri(source.targetUri) + return GlossariesService.get_link_target(targetUri=source.targetUri, targetType=source.targetType) def resolve_link_node(context: Context, source: TermLink, **kwargs): @@ -370,59 +158,16 @@ def resolve_link_node(context: Context, source: TermLink, **kwargs): def approve_term_association(context: Context, source, linkUri: str = None): - updated = False - with context.engine.scoped_session() as session: - link: TermLink = session.query(TermLink).get(linkUri) - if not link: - raise exceptions.ObjectNotFound('Link', linkUri) - verify_term_association_approver_role( - session, context.username, context.groups, link - ) - if not link.approvedBySteward: - link.approvedBySteward = True - updated = True - reindex(context, linkUri=linkUri) - return updated + _required_uri(linkUri) + return GlossariesService.approve_term_association(linkUri=linkUri) def dismiss_term_association(context: Context, source, linkUri: str = None): - updated = False - with context.engine.scoped_session() as session: - link: TermLink = session.query(TermLink).get(linkUri) - if not link: - raise exceptions.ObjectNotFound('Link', linkUri) - verify_term_association_approver_role( - session, context.username, context.groups, link - ) - if link.approvedBySteward: - link.approvedBySteward = False - updated = True - reindex(context, linkUri=linkUri) - return updated - - -def verify_term_association_approver_role(session, username, groups, link): - glossary_node = session.query(GlossaryNode).get(link.nodeUri) - if glossary_node.owner != username and glossary_node.admin not in groups: - raise exceptions.UnauthorizedOperation( - 'ASSOCIATE_GLOSSARY_TERM', - f'User: {username} is not allowed to manage glossary term associations', - ) - - -def reindex(context, linkUri): - with context.engine.scoped_session() as session: - link: TermLink = session.query(TermLink).get(linkUri) - if not link: - return + _required_uri(linkUri) + return GlossariesService.dismiss_term_association(linkUri=linkUri) - GlossaryRegistry.reindex(session, link.targetType, link.targetUri) - -def _target_model(target_type: str): - target_model = GlossaryRegistry.find_model(target_type) - if not target_model: - raise exceptions.InvalidInput( - 'NodeType', 'term.nodeType', 'association target type is invalid' - ) - return target_model +def search_glossary(context: Context, source, filter: dict = None): + if not filter: + filter = {} + return GlossariesService.search_glossary_terms(data=filter) diff --git a/backend/dataall/modules/catalog/api/types.py b/backend/dataall/modules/catalog/api/types.py index 7179ceba9..4e0112eed 100644 --- a/backend/dataall/modules/catalog/api/types.py +++ b/backend/dataall/modules/catalog/api/types.py @@ -2,7 +2,7 @@ from dataall.modules.catalog.api.enums import GlossaryRole from dataall.modules.catalog.api.resolvers import ( resolve_glossary_node, resolve_user_role, resolve_link, resolve_term_glossary, resolve_stats, - node_tree, list_node_children, resolve_categories, resolve_term_associations, resolve_terms, target_union_resolver, + resolve_node_tree, resolve_node_children, resolve_categories, resolve_term_associations, resolve_terms, target_union_resolver, resolve_link_node, resolve_link_target, ) @@ -61,7 +61,7 @@ name='stats', resolver=resolve_stats, type=gql.Ref('GlossaryNodeStatistics') ), gql.Field( - resolver=node_tree, + resolver=resolve_node_tree, args=[ gql.Argument(name='filter', type=gql.Ref('GlossaryNodeSearchFilter')) ], @@ -69,7 +69,7 @@ type=gql.Ref('GlossaryChildrenSearchResult'), ), gql.Field( - resolver=list_node_children, + resolver=resolve_node_children, args=[ gql.Argument(name='filter', type=gql.Ref('GlossaryNodeSearchFilter')) ], @@ -119,7 +119,7 @@ name='stats', resolver=resolve_stats, type=gql.Ref('GlossaryNodeStatistics') ), gql.Field( - resolver=list_node_children, + resolver=resolve_node_children, args=[ gql.Argument(name='filter', type=gql.Ref('GlossaryNodeSearchFilter')) ], @@ -175,7 +175,7 @@ type=gql.Ref('GlossaryTermLink'), ), gql.Field( - resolver=list_node_children, + resolver=resolve_node_children, args=[ gql.Argument(name='filter', type=gql.Ref('GlossaryNodeSearchFilter')) ], diff --git a/backend/dataall/modules/catalog/db/glossary_repositories.py b/backend/dataall/modules/catalog/db/glossary_repositories.py index 48c8a4b02..b4c97551d 100644 --- a/backend/dataall/modules/catalog/db/glossary_repositories.py +++ b/backend/dataall/modules/catalog/db/glossary_repositories.py @@ -1,24 +1,21 @@ import logging from datetime import datetime -from sqlalchemy import asc, or_, and_, literal, case -from sqlalchemy.orm import with_expression, aliased +from sqlalchemy import asc, or_, and_, literal +from sqlalchemy.orm import with_expression -from dataall.base.db import exceptions, paginate, Resource -from dataall.core.permissions import permissions +from dataall.base.db import exceptions, paginate from dataall.modules.catalog.db.glossary_models import GlossaryNodeStatus, TermLink, GlossaryNode +from dataall.modules.catalog.api.registry import GlossaryRegistry from dataall.base.db.paginator import Page from dataall.base.context import get_context -from dataall.core.permissions.permission_checker import has_tenant_permission logger = logging.getLogger(__name__) -class Glossary: +class GlossaryRepository: @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) def create_glossary(session, data=None): - Glossary.validate_params(data) g: GlossaryNode = GlossaryNode( label=data.get('label'), nodeType='G', @@ -35,9 +32,7 @@ def create_glossary(session, data=None): return g @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) def create_category(session, uri, data=None): - Glossary.validate_params(data) parent: GlossaryNode = session.query(GlossaryNode).get(uri) if not parent: raise exceptions.ObjectNotFound('Glossary', uri) @@ -56,9 +51,7 @@ def create_category(session, uri, data=None): return cat @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) def create_term(session, uri, data=None): - Glossary.validate_params(data) parent: GlossaryNode = session.query(GlossaryNode).get(uri) if not parent: raise exceptions.ObjectNotFound('Glossary or Category', uri) @@ -80,68 +73,6 @@ def create_term(session, uri, data=None): term.path = parent.path + '/' + term.nodeUri return term - @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) - def delete_node(session, uri): - count = 0 - node: GlossaryNode = session.query(GlossaryNode).get(uri) - if not node: - raise exceptions.ObjectNotFound('Node', uri) - node.deleted = datetime.now() - if node.nodeType in ['G', 'C']: - children = session.query(GlossaryNode).filter( - and_( - GlossaryNode.path.startswith(node.path), - GlossaryNode.deleted.is_(None), - ) - ) - count = children.count() + 1 - children.update({'deleted': datetime.now()}, synchronize_session=False) - else: - count = 1 - return count - - @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) - def update_node(session, uri, data=None): - node: GlossaryNode = session.query(GlossaryNode).get(uri) - if not node: - raise exceptions.ObjectNotFound('Node', uri) - for k in data.keys(): - setattr(node, k, data.get(k)) - return node - - @staticmethod - @has_tenant_permission(permissions.MANAGE_GLOSSARIES) - def link_term(session, uri, target_model: Resource, data): - term: GlossaryNode = session.query(GlossaryNode).get(uri) - if not term: - raise exceptions.ObjectNotFound('Node', uri) - if term.nodeType != 'T': - raise exceptions.InvalidInput( - 'NodeType', - 'term.nodeType', - 'associations are allowed for Glossary terms only', - ) - - target_uri: str = data['targetUri'] - target_type: str = data['targetType'] - - target = session.query(target_model).get(target_uri) - if not target: - raise exceptions.ObjectNotFound('Association target', uri) - - link = TermLink( - owner=get_context().username, - approvedByOwner=data.get('approvedByOwner', True), - approvedBySteward=data.get('approvedBySteward', True), - nodeUri=uri, - targetUri=target_uri, - targetType=target_type, - ) - session.add(link) - return link - @staticmethod def list_glossaries(session, data=None): q = session.query(GlossaryNode).filter( @@ -160,16 +91,14 @@ def list_glossaries(session, data=None): ).to_dict() @staticmethod - def list_categories(session, uri, data=None): - q = session.query(GlossaryNode).filter( - and_( - GlossaryNode.parentUri == uri, - GlossaryNode.nodeType == 'C', - GlossaryNode.deleted.is_(None), - ) + def list_node_children(session, path, filter): + q = ( + session.query(GlossaryNode) + .filter(GlossaryNode.path.startswith(path + '/')) + .order_by(asc(GlossaryNode.path)) ) - - term = data.get('term') + term = filter.get('term') + nodeType = filter.get('nodeType') if term: q = q.filter( or_( @@ -177,20 +106,22 @@ def list_categories(session, uri, data=None): GlossaryNode.readme.ilike(term), ) ) + if nodeType: + q = q.filter(GlossaryNode.nodeType == nodeType) return paginate( - q, page=data.get('page', 1), page_size=data.get('pageSize', 10) + q, page_size=filter.get('pageSize', 10), page=filter.get('page', 1) ).to_dict() @staticmethod - def list_terms(session, uri, data=None): - q = session.query(GlossaryNode).filter( - and_( - GlossaryNode.parentUri == uri, - GlossaryNode.nodeType == 'T', - GlossaryNode.deleted.is_(None), - ) + def get_node_tree(session, path, filter): + q = ( + session.query(GlossaryNode) + .filter(GlossaryNode.path.startswith(path)) + .filter(GlossaryNode.deleted.is_(None)) + .order_by(asc(GlossaryNode.path)) ) - term = data.get('term') + term = filter.get('term') + nodeType = filter.get('nodeType') if term: q = q.filter( or_( @@ -198,141 +129,78 @@ def list_terms(session, uri, data=None): GlossaryNode.readme.ilike(term), ) ) + if nodeType: + q = q.filter(GlossaryNode.nodeType == nodeType) + return paginate( - q, page=data.get('page', 1), page_size=data.get('pageSize', 10) + q, page_size=filter.get('pageSize', 10), page=filter.get('page', 1) ).to_dict() @staticmethod - def hierarchical_search(session, data=None): - q = session.query(GlossaryNode).options( - with_expression(GlossaryNode.isMatch, literal(True)) - ) - q = q.filter(GlossaryNode.deleted.is_(None)) - term = data.get('term', None) - if term: - q = q.filter( - or_( - GlossaryNode.label.ilike('%' + term.upper() + '%'), - GlossaryNode.readme.ilike('%' + term.upper() + '%'), + def get_node_link_to_target(session, username, uri, targetUri): + link = ( + session.query(TermLink) + .filter( + and_( + TermLink.nodeUri == uri, + TermLink.targetUri == targetUri, ) ) - matches = q.subquery('matches') - parents = aliased(GlossaryNode, name='parents') - children = aliased(GlossaryNode, name='children') + .first() + ) + if not link: + link = { + 'nodeUri': uri, + 'targetUri': targetUri, + 'created': datetime.now().isoformat(), + 'owner': username, + 'approvedByOwner': False, + 'approvedBySteward': False, + } - if term: - parent_expr = case( - [ - ( - or_( - parents.label.ilike(f'%{term}%'), - parents.readme.ilike(f'%{term}%'), - ) - ) - ], - else_=literal(False), - ) - else: - parent_expr = literal(False) + return link - ascendants = ( - session.query(parents) - .options(with_expression(parents.isMatch, parent_expr)) - .join( + @staticmethod + def get_glossary_categories_terms_and_associations(session, path): + categories = ( + session.query(GlossaryNode) + .filter( and_( - matches, - matches.c.path.startswith(parents.path), - matches, - matches.c.deleted.is_(None), + GlossaryNode.path.startswith(path), + GlossaryNode.nodeType == 'C', + GlossaryNode.deleted.is_(None), ) ) + .count() ) - - if term: - child_expr = case( - [ - ( - or_( - children.label.ilike(f'%{term}%'), - children.readme.ilike(f'%{term}%'), - ), - and_(children.deleted.is_(None)), - ) - ], - else_=literal(False), + terms = ( + session.query(GlossaryNode) + .filter( + and_( + GlossaryNode.path.startswith(path), + GlossaryNode.nodeType == 'T', + GlossaryNode.deleted.is_(None), + ) ) - else: - child_expr = literal(False) + .count() + ) - descendants = ( - session.query(children) - .options(with_expression(children.isMatch, child_expr)) + associations = ( + session.query(TermLink) .join( - matches, - children.path.startswith(matches.c.path), + GlossaryNode, + GlossaryNode.nodeType == TermLink.nodeUri, ) + .filter(GlossaryNode.path.startswith(path)) + .count() ) - all = ascendants.union(descendants) - q = all.order_by(GlossaryNode.path) - - return paginate( - q, page=data.get('page', 1), page_size=data.get('pageSize', 100) - ).to_dict() - - @staticmethod - def search_terms(session, data=None): - q = session.query(GlossaryNode).filter( - GlossaryNode.deleted.is_(None) - ) - term = data.get('term') - if term: - q = q.filter( - or_( - GlossaryNode.label.ilike(term), - GlossaryNode.readme.ilike(term), - ) - ) - q = q.order_by(asc(GlossaryNode.path)) - return paginate( - q, page=data.get('page', 1), page_size=data.get('pageSize', 10) - ).to_dict() - - @staticmethod - def validate_params(data): - if not data: - exceptions.RequiredParameter('data') - if not data.get('label'): - exceptions.RequiredParameter('name') - - @staticmethod - def list_node_children(session, source, filter): - q = ( - session.query(GlossaryNode) - .filter(GlossaryNode.path.startswith(source.path + '/')) - .order_by(asc(GlossaryNode.path)) - ) - term = filter.get('term') - nodeType = filter.get('nodeType') - if term: - q = q.filter( - or_( - GlossaryNode.label.ilike(term), - GlossaryNode.readme.ilike(term), - ) - ) - if nodeType: - q = q.filter(GlossaryNode.nodeType == nodeType) - return paginate( - q, page_size=filter.get('pageSize', 10), page=filter.get('page', 1) - ).to_dict() + return {'categories': categories, 'terms': terms, 'associations': associations} @staticmethod - def list_term_associations(session, target_model_definitions, data=None): - source = data['source'] - filter = data['filter'] - + def list_term_associations(session, target_model_definitions, node, filter=None): query = None + subqueries = [] for definition in target_model_definitions: model = definition.model subquery = session.query( @@ -342,10 +210,10 @@ def list_term_associations(session, target_model_definitions, data=None): model.name.label('name'), model.description.label('description'), ) - if query: - query.union(subquery) - else: - query = subquery + if subquery.first() is not None: + subqueries.append(subquery) + + query = subqueries[0].union(*subqueries[1:]) if query is None: return Page([], 1, 1, 0) # empty page. All modules are turned off @@ -364,12 +232,13 @@ def list_term_associations(session, target_model_definitions, data=None): linked_objects, TermLink.targetUri == linked_objects.c.targetUri ) ) - if source.nodeType == 'T': - q = q.filter(TermLink.nodeUri == source.nodeUri) - elif source.nodeType in ['C', 'G']: - q = q.filter(GlossaryNode.path.startswith(source.path)) + + if node.nodeType == 'T': + q = q.filter(TermLink.nodeUri == node.nodeUri) + elif node.nodeType in ['C', 'G']: + q = q.filter(GlossaryNode.path.startswith(node.path)) else: - raise Exception(f'InvalidNodeType ({source.nodeUri}/{source.nodeType})') + raise Exception(f'InvalidNodeType ({node.nodeUri}/{node.nodeType})') term = filter.get('term') if term: @@ -386,10 +255,136 @@ def list_term_associations(session, target_model_definitions, data=None): q, page=filter.get('page', 1), page_size=filter.get('pageSize', 25) ).to_dict() + @staticmethod + def list_categories(session, uri, data=None): + q = session.query(GlossaryNode).filter( + and_( + GlossaryNode.parentUri == uri, + GlossaryNode.nodeType == 'C', + GlossaryNode.deleted.is_(None), + ) + ) + + term = data.get('term') + if term: + q = q.filter( + or_( + GlossaryNode.label.ilike(term), + GlossaryNode.readme.ilike(term), + ) + ) + return paginate( + q, page=data.get('page', 1), page_size=data.get('pageSize', 10) + ).to_dict() + + @staticmethod + def list_terms(session, uri, data=None): + q = session.query(GlossaryNode).filter( + and_( + GlossaryNode.parentUri == uri, + GlossaryNode.nodeType == 'T', + GlossaryNode.deleted.is_(None), + ) + ) + term = data.get('term') + if term: + q = q.filter( + or_( + GlossaryNode.label.ilike(term), + GlossaryNode.readme.ilike(term), + ) + ) + return paginate( + q, page=data.get('page', 1), page_size=data.get('pageSize', 10) + ).to_dict() + + @staticmethod + def get_node(session, uri) -> GlossaryNode: + node: GlossaryNode = session.query(GlossaryNode).get(uri) + if not node: + raise exceptions.ObjectNotFound('Node', uri) + return node + + @staticmethod + def update_node(session, uri, data=None) -> GlossaryNode: + node: GlossaryNode = session.query(GlossaryNode).get(uri) + if not node: + raise exceptions.ObjectNotFound('Node', uri) + for k in data.keys(): + setattr(node, k, data.get(k)) + return node + + @staticmethod + def delete_node(session, uri) -> bool: + count = 0 + node: GlossaryNode = session.query(GlossaryNode).get(uri) + if not node: + raise exceptions.ObjectNotFound('Node', uri) + node.deleted = datetime.now() + if node.nodeType in ['G', 'C']: + children = session.query(GlossaryNode).filter( + and_( + GlossaryNode.path.startswith(node.path), + GlossaryNode.deleted.is_(None), + ) + ) + count = children.count() + 1 + children.update({'deleted': datetime.now()}, synchronize_session=False) + else: + count = 1 + return count + + @staticmethod + def approve_term_association(session, username, groups, linkUri: str = None): + updated = False + link: TermLink = session.query(TermLink).get(linkUri) + if not link: + raise exceptions.ObjectNotFound('Link', linkUri) + GlossaryRepository._verify_term_association_approver_role( + session, username, groups, link + ) + if not link.approvedBySteward: + link.approvedBySteward = True + updated = True + GlossaryRepository._reindex(session=session, linkUri=linkUri) + return updated + + @staticmethod + def dismiss_term_association(session, username, groups, linkUri: str = None): + updated = False + link: TermLink = session.query(TermLink).get(linkUri) + if not link: + raise exceptions.ObjectNotFound('Link', linkUri) + GlossaryRepository._verify_term_association_approver_role( + session, username, groups, link + ) + if link.approvedBySteward: + link.approvedBySteward = False + updated = True + GlossaryRepository._reindex(session, linkUri=linkUri) + return updated + + @staticmethod + def _verify_term_association_approver_role(session, username, groups, link): + glossary_node = session.query(GlossaryNode).get(link.nodeUri) + if glossary_node.owner != username and glossary_node.admin not in groups: + raise exceptions.UnauthorizedOperation( + 'ASSOCIATE_GLOSSARY_TERM', + f'User: {username} is not allowed to manage glossary term associations', + ) + + @staticmethod + def _reindex(session, linkUri): + link: TermLink = session.query(TermLink).get(linkUri) + if not link: + return + GlossaryRegistry.reindex(session, link.targetType, link.targetUri) + @staticmethod def set_glossary_terms_links( session, username, target_uri, target_type, glossary_terms ): + """Used in dependent modules to assign glossary terms to resources""" current_links = session.query(TermLink).filter( TermLink.targetUri == target_uri ) @@ -421,6 +416,7 @@ def set_glossary_terms_links( @staticmethod def get_glossary_terms_links(session, target_uri, target_type): + """Used in dependent modules get assigned glossary terms to resources""" terms = ( session.query(GlossaryNode) .join( @@ -438,6 +434,7 @@ def get_glossary_terms_links(session, target_uri, target_type): @staticmethod def delete_glossary_terms_links(session, target_uri, target_type): + """Used in dependent modules remove assigned glossary terms to resources""" term_links = ( session.query(TermLink) .filter( @@ -450,3 +447,21 @@ def delete_glossary_terms_links(session, target_uri, target_type): ) for link in term_links: session.delete(link) + + @staticmethod + def search_glossary_terms(session, data=None): + q = session.query(GlossaryNode).filter( + GlossaryNode.deleted.is_(None) + ) + term = data.get('term') + if term: + q = q.filter( + or_( + GlossaryNode.label.ilike(term), + GlossaryNode.readme.ilike(term), + ) + ) + q = q.order_by(asc(GlossaryNode.path)) + return paginate( + q, page=data.get('page', 1), page_size=data.get('pageSize', 10) + ).to_dict() diff --git a/backend/dataall/modules/catalog/services/__init__.py b/backend/dataall/modules/catalog/services/__init__.py new file mode 100644 index 000000000..ce4b25893 --- /dev/null +++ b/backend/dataall/modules/catalog/services/__init__.py @@ -0,0 +1,7 @@ +""" +Contains the code needed for service layer. +The service layer is a layer where all business logic is aggregated +""" +from dataall.modules.catalog.services import glossaries_permissions + +__all__ = ["glossaries_permissions"] diff --git a/backend/dataall/modules/catalog/services/glossaries_permissions.py b/backend/dataall/modules/catalog/services/glossaries_permissions.py new file mode 100644 index 000000000..84e2306b4 --- /dev/null +++ b/backend/dataall/modules/catalog/services/glossaries_permissions.py @@ -0,0 +1,28 @@ +from dataall.core.permissions.permissions import TENANT_ALL, TENANT_ALL_WITH_DESC, RESOURCES_ALL, \ + RESOURCES_ALL_WITH_DESC + +MANAGE_GLOSSARIES = 'MANAGE_GLOSSARIES' + +TENANT_ALL.append(MANAGE_GLOSSARIES) +TENANT_ALL_WITH_DESC[MANAGE_GLOSSARIES] = 'Manage glossaries' + +""" +GLOSSARIES +""" +CREATE_CATEGORY = 'CREATE_CATEGORY' +CREATE_TERM = 'CREATE_TERM' +UPDATE_NODE = 'UPDATE_NODE' +DELETE_GLOSSARY = 'DELETE_GLOSSARY' +APPROVE_ASSOCIATION = 'APPROVE_ASSOCIATION' +GLOSSARY_ALL = [ + CREATE_CATEGORY, + CREATE_TERM, + UPDATE_NODE, + DELETE_GLOSSARY, + APPROVE_ASSOCIATION, +] + +RESOURCES_ALL.extend(GLOSSARY_ALL) + +for perm in GLOSSARY_ALL: + RESOURCES_ALL_WITH_DESC[perm] = perm diff --git a/backend/dataall/modules/catalog/services/glossaries_service.py b/backend/dataall/modules/catalog/services/glossaries_service.py new file mode 100644 index 000000000..8f48e28c2 --- /dev/null +++ b/backend/dataall/modules/catalog/services/glossaries_service.py @@ -0,0 +1,136 @@ +import logging + +from dataall.base.context import get_context +from dataall.core.permissions.permission_checker import has_tenant_permission + +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository +from dataall.modules.catalog.db.glossary_models import TermLink, GlossaryNode +from dataall.modules.catalog.services.glossaries_permissions import ( + MANAGE_GLOSSARIES +) +from dataall.modules.catalog.api.registry import GlossaryRegistry +logger = logging.getLogger(__name__) + + +def _session(): + return get_context().db_engine.scoped_session() + + +class GlossariesService: + + @staticmethod + @has_tenant_permission(MANAGE_GLOSSARIES) + def create_glossary(data: dict = None) -> GlossaryNode: + with _session() as session: + return GlossaryRepository.create_glossary(session=session, data=data) + + @staticmethod + @has_tenant_permission(MANAGE_GLOSSARIES) + def create_category(uri: str, data: dict = None): + with _session() as session: + return GlossaryRepository.create_category(session=session, uri=uri, data=data) + + @staticmethod + @has_tenant_permission(MANAGE_GLOSSARIES) + def create_term(uri: str, data: dict = None): + with _session() as session: + return GlossaryRepository.create_term(session=session, uri=uri, data=data) + + @staticmethod + def list_glossaries(data: dict = None): + with _session() as session: + return GlossaryRepository.list_glossaries(session=session, data=data) + + @staticmethod + def list_categories(uri: str, data: dict = None): + with _session() as session: + return GlossaryRepository.list_categories(session=session, uri=uri, data=data) + + @staticmethod + def list_terms(uri: str, data: dict = None): + with _session() as session: + return GlossaryRepository.list_terms(session=session, uri=uri, data=data) + + @staticmethod + def list_node_children(path: str, filter: dict = None): + with _session() as session: + return GlossaryRepository.list_node_children(session=session, path=path, filter=filter) + + @staticmethod + def get_node_tree(path: str, filter: dict = None): + with _session() as session: + return GlossaryRepository.get_node_tree(session=session, path=path, filter=filter) + + @staticmethod + def get_node_link_to_target(uri: str, targetUri: str,): + with _session() as session: + return GlossaryRepository.get_node_link_to_target( + session=session, + username=get_context().username, + uri=uri, + targetUri=targetUri + ) + + @staticmethod + def get_glossary_categories_terms_and_associations(path: str): + with _session() as session: + return GlossaryRepository.get_glossary_categories_terms_and_associations(session=session, path=path) + + @staticmethod + def list_term_associations(node: GlossaryNode, filter: dict = None): + with _session() as session: + return GlossaryRepository.list_term_associations( + session=session, + node=node, + filter=filter, + target_model_definitions=GlossaryRegistry.definitions() + ) + + @staticmethod + def get_node(uri: str): + with _session() as session: + return GlossaryRepository.get_node(session=session, uri=uri) + + @staticmethod + def get_link_target(targetUri: str, targetType: str): + with _session() as session: + model = GlossaryRegistry.find_model(targetType) + target = session.query(model).get(targetUri) + return target + + @staticmethod + @has_tenant_permission(MANAGE_GLOSSARIES) + def update_node(uri: str = None, data: dict = None): + with _session() as session: + return GlossaryRepository.update_node(session=session, uri=uri, data=data) + + @staticmethod + @has_tenant_permission(MANAGE_GLOSSARIES) + def delete_node(uri: str = None): + with _session() as session: + return GlossaryRepository.delete_node(session=session, uri=uri) + + @staticmethod + def approve_term_association(linkUri: str): + with _session() as session: + return GlossaryRepository.approve_term_association( + session=session, + username=get_context().username, + groups=get_context().groups, + linkUri=linkUri + ) + + @staticmethod + def dismiss_term_association(linkUri: str): + with _session() as session: + return GlossaryRepository.dismiss_term_association( + session=session, + username=get_context().username, + groups=get_context().groups, + linkUri=linkUri + ) + + @staticmethod + def search_glossary_terms(data: dict = None): + with _session() as session: + return GlossaryRepository.search_glossary_terms(session=session, data=data) diff --git a/backend/dataall/modules/dashboards/api/resolvers.py b/backend/dataall/modules/dashboards/api/resolvers.py index 761545342..eefae4eca 100644 --- a/backend/dataall/modules/dashboards/api/resolvers.py +++ b/backend/dataall/modules/dashboards/api/resolvers.py @@ -1,5 +1,5 @@ from dataall.base.api.context import Context -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.organizations.db.organization_repositories import Organization from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import RequiredParameter @@ -106,7 +106,7 @@ def delete_dashboard(context: Context, source, dashboardUri: str = None): def resolve_glossary_terms(context: Context, source: Dashboard, **kwargs): with context.engine.scoped_session() as session: - return Glossary.get_glossary_terms_links( + return GlossaryRepository.get_glossary_terms_links( session, source.dashboardUri, 'Dashboard' ) diff --git a/backend/dataall/modules/dashboards/services/dashboard_service.py b/backend/dataall/modules/dashboards/services/dashboard_service.py index a1e5f33e3..86ddc46f8 100644 --- a/backend/dataall/modules/dashboards/services/dashboard_service.py +++ b/backend/dataall/modules/dashboards/services/dashboard_service.py @@ -2,7 +2,7 @@ from dataall.core.activity.db.activity_models import Activity from dataall.core.environment.env_permission_checker import has_group_permission from dataall.core.environment.services.environment_service import EnvironmentService -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy from dataall.core.permissions.permission_checker import has_tenant_permission, has_resource_permission from dataall.modules.vote.db.vote_repositories import VoteRepository @@ -101,7 +101,7 @@ def delete_dashboard(uri) -> bool: ResourcePolicy.delete_resource_policy( session=session, resource_uri=uri, group=dashboard.SamlGroupName ) - Glossary.delete_glossary_terms_links( + GlossaryRepository.delete_glossary_terms_links( session, target_uri=dashboard.dashboardUri, target_type='Dashboard' ) VoteRepository.delete_votes(session, dashboard.dashboardUri, 'dashboard') @@ -129,7 +129,7 @@ def _attach_dashboard_policy(session, group: str, dashboard: Dashboard): def _update_glossary(session, dashboard, data): context = get_context() if 'terms' in data: - Glossary.set_glossary_terms_links( + GlossaryRepository.set_glossary_terms_links( session, context.username, dashboard.dashboardUri, diff --git a/backend/dataall/modules/datasets/__init__.py b/backend/dataall/modules/datasets/__init__.py index f4471c4a6..2a4603848 100644 --- a/backend/dataall/modules/datasets/__init__.py +++ b/backend/dataall/modules/datasets/__init__.py @@ -41,14 +41,12 @@ def __init__(self): import dataall.modules.datasets.api from dataall.modules.datasets.services.dataset_permissions import GET_DATASET, UPDATE_DATASET from dataall.modules.datasets_base.db.dataset_repositories import DatasetRepository - from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn, DatasetStorageLocation, DatasetTable, Dataset + from dataall.modules.datasets_base.db.dataset_models import DatasetStorageLocation, DatasetTable, Dataset - FeedRegistry.register(FeedDefinition("DatasetTableColumn", DatasetTableColumn)) FeedRegistry.register(FeedDefinition("DatasetStorageLocation", DatasetStorageLocation)) FeedRegistry.register(FeedDefinition("DatasetTable", DatasetTable)) FeedRegistry.register(FeedDefinition("Dataset", Dataset)) - GlossaryRegistry.register(GlossaryDefinition("Column", "DatasetTableColumn", DatasetTableColumn)) GlossaryRegistry.register(GlossaryDefinition( target_type="Folder", object_type="DatasetStorageLocation", diff --git a/backend/dataall/modules/datasets/api/dataset/resolvers.py b/backend/dataall/modules/datasets/api/dataset/resolvers.py index 2e61fddae..d1bc301e5 100644 --- a/backend/dataall/modules/datasets/api/dataset/resolvers.py +++ b/backend/dataall/modules/datasets/api/dataset/resolvers.py @@ -3,7 +3,7 @@ from dataall.core.stacks.api import stack_helper from dataall.base.api.context import Context from dataall.core.feature_toggle_checker import is_feature_enabled -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.organizations.db.organization_repositories import Organization from dataall.base.db.exceptions import RequiredParameter, InvalidInput @@ -164,7 +164,7 @@ def get_dataset_glossary_terms(context: Context, source: Dataset, **kwargs): if not source: return None with context.engine.scoped_session() as session: - return Glossary.get_glossary_terms_links(session, source.datasetUri, 'Dataset') + return GlossaryRepository.get_glossary_terms_links(session, source.datasetUri, 'Dataset') def list_datasets_created_in_environment( diff --git a/backend/dataall/modules/datasets/api/storage_location/resolvers.py b/backend/dataall/modules/datasets/api/storage_location/resolvers.py index cf7b0a661..dcaf33bfd 100644 --- a/backend/dataall/modules/datasets/api/storage_location/resolvers.py +++ b/backend/dataall/modules/datasets/api/storage_location/resolvers.py @@ -1,5 +1,5 @@ from dataall.base.api.context import Context -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.base.db.exceptions import RequiredParameter from dataall.core.feature_toggle_checker import is_feature_enabled from dataall.modules.datasets.services.dataset_location_service import DatasetLocationService @@ -58,6 +58,6 @@ def resolve_glossary_terms( if not source: return None with context.engine.scoped_session() as session: - return Glossary.get_glossary_terms_links( + return GlossaryRepository.get_glossary_terms_links( session, source.locationUri, 'DatasetStorageLocation' ) diff --git a/backend/dataall/modules/datasets/api/table/resolvers.py b/backend/dataall/modules/datasets/api/table/resolvers.py index f1638c161..2952d8250 100644 --- a/backend/dataall/modules/datasets/api/table/resolvers.py +++ b/backend/dataall/modules/datasets/api/table/resolvers.py @@ -1,6 +1,6 @@ import logging -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.modules.datasets.api.dataset.resolvers import get_dataset from dataall.base.api.context import Context from dataall.modules.datasets.services.dataset_table_service import DatasetTableService @@ -53,7 +53,7 @@ def resolve_glossary_terms(context: Context, source: DatasetTable, **kwargs): if not source: return None with context.engine.scoped_session() as session: - return Glossary.get_glossary_terms_links( + return GlossaryRepository.get_glossary_terms_links( session, source.tableUri, 'DatasetTable' ) diff --git a/backend/dataall/modules/datasets/services/dataset_location_service.py b/backend/dataall/modules/datasets/services/dataset_location_service.py index e0f3c5aa3..41c0bbe22 100644 --- a/backend/dataall/modules/datasets/services/dataset_location_service.py +++ b/backend/dataall/modules/datasets/services/dataset_location_service.py @@ -1,5 +1,5 @@ from dataall.base.context import get_context -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.permissions.permission_checker import has_resource_permission, has_tenant_permission from dataall.base.db.exceptions import ResourceShared, ResourceAlreadyExists from dataall.modules.dataset_sharing.db.share_object_repositories import ShareObjectRepository @@ -88,7 +88,7 @@ def remove_storage_location(uri: str = None): ShareObjectRepository.delete_shares(session, location.locationUri) DatasetLocationRepository.delete(session, location) - Glossary.delete_glossary_terms_links( + GlossaryRepository.delete_glossary_terms_links( session, target_uri=location.locationUri, target_type='DatasetStorageLocation', @@ -98,7 +98,7 @@ def remove_storage_location(uri: str = None): @staticmethod def _create_glossary_links(session, location, terms): - Glossary.set_glossary_terms_links( + GlossaryRepository.set_glossary_terms_links( session, get_context().username, location.locationUri, diff --git a/backend/dataall/modules/datasets/services/dataset_service.py b/backend/dataall/modules/datasets/services/dataset_service.py index f1dd53e47..4bf756b68 100644 --- a/backend/dataall/modules/datasets/services/dataset_service.py +++ b/backend/dataall/modules/datasets/services/dataset_service.py @@ -14,7 +14,7 @@ from dataall.core.stacks.db.keyvaluetag_repositories import KeyValueTag from dataall.core.stacks.db.stack_repositories import Stack from dataall.core.tasks.db.task_models import Task -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.modules.vote.db.vote_repositories import VoteRepository from dataall.base.db.exceptions import AWSResourceNotFound, UnauthorizedOperation from dataall.modules.dataset_sharing.aws.kms_client import KmsClient @@ -205,7 +205,7 @@ def update_dataset(uri: str, data: dict): resource_type=Dataset.__name__, ) if data.get('terms'): - Glossary.set_glossary_terms_links(session, username, uri, 'Dataset', data.get('terms')) + GlossaryRepository.set_glossary_terms_links(session, username, uri, 'Dataset', data.get('terms')) DatasetRepository.update_dataset_activity(session, dataset, username) DatasetIndexer.upsert(session, dataset_uri=uri) @@ -530,5 +530,5 @@ def _transfer_stewardship_to_new_stewards(session, dataset, new_stewards): def delete_dataset_term_links(session, dataset_uri): tables = [t.tableUri for t in DatasetRepository.get_dataset_tables(session, dataset_uri)] for table_uri in tables: - Glossary.delete_glossary_terms_links(session, table_uri, 'DatasetTable') - Glossary.delete_glossary_terms_links(session, dataset_uri, 'Dataset') + GlossaryRepository.delete_glossary_terms_links(session, table_uri, 'DatasetTable') + GlossaryRepository.delete_glossary_terms_links(session, dataset_uri, 'Dataset') diff --git a/backend/dataall/modules/datasets/services/dataset_table_service.py b/backend/dataall/modules/datasets/services/dataset_table_service.py index fdfdca51c..c9be4ed27 100644 --- a/backend/dataall/modules/datasets/services/dataset_table_service.py +++ b/backend/dataall/modules/datasets/services/dataset_table_service.py @@ -1,7 +1,7 @@ import logging from dataall.base.context import get_context -from dataall.modules.catalog.db.glossary_repositories import Glossary +from dataall.modules.catalog.db.glossary_repositories import GlossaryRepository from dataall.core.environment.services.environment_service import EnvironmentService from dataall.core.permissions.db.resource_policy_repositories import ResourcePolicy from dataall.core.permissions.permission_checker import has_resource_permission, has_tenant_permission @@ -47,7 +47,7 @@ def update_table(uri: str, table_data: dict = None): DatasetTableRepository.save(session, table) if 'terms' in table_data: - Glossary.set_glossary_terms_links( + GlossaryRepository.set_glossary_terms_links( session, get_context().username, table.tableUri, 'DatasetTable', table_data['terms'] ) @@ -70,7 +70,7 @@ def delete_table(uri: str): ShareObjectRepository.delete_shares(session, table.tableUri) DatasetTableRepository.delete(session, table) - Glossary.delete_glossary_terms_links( + GlossaryRepository.delete_glossary_terms_links( session, target_uri=table.tableUri, target_type='DatasetTable' ) DatasetTableIndexer.delete_doc(doc_id=uri) diff --git a/backend/local_graphql_server.py b/backend/local_graphql_server.py index f150b4edb..9ae5ff0f4 100644 --- a/backend/local_graphql_server.py +++ b/backend/local_graphql_server.py @@ -140,6 +140,7 @@ def graphql_server(): # GraphQL queries are always sent as POST logger.debug(request.data) data = request.get_json() + print('*** Request ***', request.data) context = request_context(request.headers, mock=True) logger.debug(context) diff --git a/frontend/docker/dev/Dockerfile b/frontend/docker/dev/Dockerfile index c44983ed1..e3a39fd5b 100644 --- a/frontend/docker/dev/Dockerfile +++ b/frontend/docker/dev/Dockerfile @@ -7,7 +7,7 @@ COPY ./frontend/yarn.lock ./ # Install packages, use --ignore-scripts to not call postinstall, as it causes this step to fail because config.json # is not copied yet, and copying it here will trigger new install if config changes, which is inconvenient for development. -RUN yarn install --ignore-scripts +RUN yarn install --ignore-scripts --verbose COPY ./frontend/docker/dev/.env . COPY ./frontend . diff --git a/frontend/src/modules/Glossaries/services/getGlossary.js b/frontend/src/modules/Glossaries/services/getGlossary.js index f96905462..470deac62 100644 --- a/frontend/src/modules/Glossaries/services/getGlossary.js +++ b/frontend/src/modules/Glossaries/services/getGlossary.js @@ -36,6 +36,9 @@ export const getGlossary = (nodeUri) => ({ ... on DatasetTable { label } + ... on DatasetStorageLocation { + label + } } } } diff --git a/tests/modules/catalog/test_glossary.py b/tests/modules/catalog/test_glossary.py index e7373a28d..3eee36e9f 100644 --- a/tests/modules/catalog/test_glossary.py +++ b/tests/modules/catalog/test_glossary.py @@ -18,6 +18,7 @@ def g1(client, group): """, input={ 'label': 'Customer Glossary', + 'admin': group.name, 'readme': 'Glossary of customer related data', }, username='alice', @@ -133,10 +134,11 @@ def test_list_glossaries(client): """ ) assert response.data.listGlossaries.count == 1 + assert response.data.listGlossaries.nodes[0].stats.terms == 1 assert response.data.listGlossaries.nodes[0].stats.categories == 2 -def test_hierarchical_search(client): +def test_search_glossary(client): response = client.query( """ query SearchGlossary($filter:GlossaryNodeSearchFilter){ @@ -201,43 +203,6 @@ def test_get_glossary(client, g1): assert r.data.getGlossary.readme == g1.readme -def test_get_category(client, c1): - r = client.query( - """ - query GetCategory($nodeUri:String!){ - getCategory(nodeUri:$nodeUri){ - nodeUri - label - readme - } - } - """, - nodeUri=c1.nodeUri, - ) - print(r) - assert r.data.getCategory.nodeUri == c1.nodeUri - assert r.data.getCategory.label == c1.label - assert r.data.getCategory.readme == c1.readme - - -def test_get_term(client, t1): - r = client.query( - """ - query GetTerm($nodeUri:String!){ - getTerm(nodeUri:$nodeUri){ - nodeUri - label - readme - } - } - """, - nodeUri=t1.nodeUri, - ) - assert r.data.getTerm.nodeUri == t1.nodeUri - assert r.data.getTerm.label == t1.label - assert r.data.getTerm.readme == t1.readme - - def test_glossary_categories(client, g1, c1): r = client.query( """ @@ -267,55 +232,6 @@ def test_glossary_categories(client, g1, c1): assert r.data.getGlossary.categories.nodes[0].nodeUri == c1.nodeUri -def test_list_subcategory(client, c1): - r = client.query( - """ - query GetCategory($nodeUri:String!){ - getCategory(nodeUri:$nodeUri){ - nodeUri - label - readme - categories{ - count - nodes{ - nodeUri - label - readme - } - } - } - } - """, - nodeUri=c1.nodeUri, - ) - - assert r.data.getCategory.categories.count == 1 - - -def test_list_category_terms(client, c1): - r = client.query( - """ - query GetCategory($nodeUri:String!){ - getCategory(nodeUri:$nodeUri){ - nodeUri - label - readme - terms{ - count - nodes{ - nodeUri - label - readme - } - } - } - } - """, - nodeUri=c1.nodeUri, - ) - assert r.data.getCategory.terms.count == 1 - - def test_update_glossary(client, g1, group): r = client.query( """ @@ -443,7 +359,7 @@ def test_list_glossaries_after_delete(client): assert response.data.listGlossaries.nodes[0].stats.categories == 0 -def test_hierarchical_search_after_delete(client): +def test_search_glossary_after_delete(client): response = client.query( """ query SearchGlossary($filter:GlossaryNodeSearchFilter){ diff --git a/tests/modules/datasets/test_dataset_feed.py b/tests/modules/datasets/test_dataset_feed.py index 64a0c5ba1..dccc1d061 100644 --- a/tests/modules/datasets/test_dataset_feed.py +++ b/tests/modules/datasets/test_dataset_feed.py @@ -1,11 +1,11 @@ from dataall.modules.feed.api.registry import FeedRegistry -from dataall.modules.datasets_base.db.dataset_models import DatasetTableColumn +from dataall.modules.datasets_base.db.dataset_models import DatasetTable def test_dataset_registered(): - model = FeedRegistry.find_model("DatasetTableColumn") - assert model == DatasetTableColumn + model = FeedRegistry.find_model("DatasetTable") + assert model == DatasetTable - model = DatasetTableColumn() - assert "DatasetTableColumn" == FeedRegistry.find_target(model) + model = DatasetTable() + assert "DatasetTable" == FeedRegistry.find_target(model) diff --git a/tests/modules/datasets/test_dataset_glossary.py b/tests/modules/datasets/test_dataset_glossary.py index 393c317e8..ffba163d2 100644 --- a/tests/modules/datasets/test_dataset_glossary.py +++ b/tests/modules/datasets/test_dataset_glossary.py @@ -26,54 +26,6 @@ def _columns(db, dataset_fixture, table_fixture) -> List[DatasetTableColumn]: yield cols -def test_dataset_link_term(client, t1, _columns, group): - col = _columns[0] - r = client.query( - """ - mutation LinkTerm( - $nodeUri:String!, - $targetUri:String!, - $targetType:String!, - ){ - linkTerm( - nodeUri:$nodeUri, - targetUri:$targetUri, - targetType:$targetType - ){ - linkUri - } - } - """, - nodeUri=t1.nodeUri, - targetUri=col.columnUri, - targetType='Column', - username='alice', - groups=[group.name], - ) - link_uri = r.data.linkTerm.linkUri - - r = client.query( - """ - query GetGlossaryTermLink($linkUri:String!){ - getGlossaryTermLink(linkUri:$linkUri){ - linkUri - created - target{ - __typename - ... on DatasetTableColumn{ - label - columnUri - } - } - } - } - """, - linkUri=link_uri, - username='alice', - ) - print(r) - - def test_dataset_term_link_approval(db, client, t1, dataset_fixture, user, group): response = client.query( """ @@ -128,33 +80,31 @@ def test_dataset_term_link_approval(db, client, t1, dataset_fixture, user, group assert not link.approvedBySteward -def test_get_column_term_associations(t1, db, client): +def test_get_column_term_associations(t1, dataset_fixture, group, db, client): r = client.query( """ - query GetTerm($nodeUri:String!){ - getTerm(nodeUri:$nodeUri){ - nodeUri - label - readme - associations{ - count - nodes{ - linkUri - target{ - ... on DatasetTableColumn{ - label - columnUri - } - } - } + query GetDataset($datasetUri: String!) { + getDataset(datasetUri: $datasetUri) { + datasetUri + owner + description + terms { + count + nodes { + __typename + ... on Term { + nodeUri + path + label } + } } - + } } """, - nodeUri=t1.nodeUri, + datasetUri=dataset_fixture.datasetUri, username='alice', + groups=[group.name], ) - assert r.data.getTerm.nodeUri == t1.nodeUri - assert r.data.getTerm.label == t1.label - assert r.data.getTerm.readme == t1.readme + assert r.data.getDataset.terms.nodes[0].nodeUri == t1.nodeUri + assert r.data.getDataset.terms.nodes[0].label == t1.label