Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/mx 1332 organigram check for duplicates #324

Merged
merged 9 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- organigram extraction checks for duplicate emails/labels in different organigram units

### Changes

Expand Down
2 changes: 1 addition & 1 deletion mex/common/backend_api/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def get_merged_item(
identifier: The merged item's identifier

Raises:
MExError: If no merged item was found
HTTPError: If no merged item was found

Returns:
A single merged item
Expand Down
46 changes: 36 additions & 10 deletions mex/common/organigram/extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from collections.abc import Generator, Iterable

from mex.common.exceptions import MExError
from mex.common.logging import watch
from mex.common.models import ExtractedOrganizationalUnit
from mex.common.organigram.models import OrganigramUnit
Expand Down Expand Up @@ -55,14 +56,26 @@ def get_unit_merged_ids_by_synonyms(
Args:
extracted_units: Iterable of extracted units

Raises:
MExError: If the same entry maps to different merged IDs

Returns:
Mapping from unit synonyms to stableTargetIds
"""
return {
synonym: MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for synonym in _get_synonyms(extracted_unit)
}
synonym_dict: dict[str, MergedOrganizationalUnitIdentifier] = {}
for extracted_unit in extracted_units:
for synonym in _get_synonyms(extracted_unit):
if (
synonym in synonym_dict
and synonym_dict[synonym] != extracted_unit.stableTargetId
):
msg = (
f"Conflict: label '{synonym}' is associated with merged unit IDs "
f"{synonym_dict[synonym]} and {extracted_unit.stableTargetId}."
)
raise MExError(msg)
synonym_dict[synonym] = extracted_unit.stableTargetId
return synonym_dict


def get_unit_merged_ids_by_emails(
Expand All @@ -75,11 +88,24 @@ def get_unit_merged_ids_by_emails(
Args:
extracted_units: Iterable of extracted units

Raises:
MExError: If the same entry maps to different merged IDs

Returns:
Mapping from lowercased `email` to stableTargetIds
"""
return {
email.lower(): MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for email in extracted_unit.email
}
email_dict: dict[str, MergedOrganizationalUnitIdentifier] = {}
for extracted_unit in extracted_units:
for email in extracted_unit.email:
lower_email = email.lower()
if (
lower_email in email_dict
and email_dict[lower_email] != extracted_unit.stableTargetId
):
msg = (
f"Conflict: email '{email}' is associated with merged unit IDs "
f"{email_dict[lower_email]} and {extracted_unit.stableTargetId}."
)
raise MExError(msg)
email_dict[lower_email] = extracted_unit.stableTargetId
return email_dict
40 changes: 40 additions & 0 deletions tests/organigram/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import pytest

from mex.common.exceptions import MExError
from mex.common.models import ExtractedOrganizationalUnit
from mex.common.organigram.extract import (
extract_organigram_units,
get_unit_merged_ids_by_emails,
get_unit_merged_ids_by_synonyms,
)
from mex.common.organigram.models import OrganigramUnit
from mex.common.types import Text


def test_extract_organigram_units(
Expand Down Expand Up @@ -40,6 +44,24 @@ def test_get_unit_merged_ids_by_synonyms(
}


def test_get_unit_merged_ids_by_synonyms_error(
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
) -> None:
erroneus_extracted_child_unit = extracted_child_unit
erroneus_extracted_child_unit.name.append(Text(value="PARENT Dept."))

msg = (
f"MExError: Conflict: label 'PARENT Dept.' is associated with "
f"merged unit IDs {erroneus_extracted_child_unit.stableTargetId} and "
f"{extracted_parent_unit.stableTargetId}."
)
with pytest.raises(MExError, match=msg):
get_unit_merged_ids_by_synonyms(
[erroneus_extracted_child_unit, extracted_parent_unit]
)


def test_get_unit_merged_ids_by_emails(
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
Expand All @@ -52,3 +74,21 @@ def test_get_unit_merged_ids_by_emails(
"pu@example.com": extracted_parent_unit.stableTargetId,
# child unit has no emails
}


def test_get_unit_merged_ids_by_emails_error(
esinsj marked this conversation as resolved.
Show resolved Hide resolved
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
) -> None:
erroneus_extracted_child_unit = extracted_child_unit
erroneus_extracted_child_unit.email.append("PARENT@example.com")

msg = (
f"MExError: Conflict: email 'PARENT@example.com' is associated with "
f"merged unit IDs {erroneus_extracted_child_unit.stableTargetId} and "
f"{extracted_parent_unit.stableTargetId}."
)
with pytest.raises(MExError, match=msg):
get_unit_merged_ids_by_emails(
[erroneus_extracted_child_unit, extracted_parent_unit]
)