Skip to content

Commit

Permalink
Feature/mx 1332 organigram check for duplicates (#324)
Browse files Browse the repository at this point in the history
# Added
- organigram extraction checks for duplicate emails/labels in different
organigram units
  • Loading branch information
esinsj authored Nov 12, 2024
1 parent 53b2e76 commit 1e62cc3
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 11 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## [Unreleased]

### Added
- organigram extraction checks for duplicate emails/labels in different organigram units

### Changes

Expand Down
2 changes: 1 addition & 1 deletion mex/common/backend_api/connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def get_merged_item(
identifier: The merged item's identifier
Raises:
MExError: If no merged item was found
HTTPError: If no merged item was found
Returns:
A single merged item
Expand Down
46 changes: 36 additions & 10 deletions mex/common/organigram/extract.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import json
from collections.abc import Generator, Iterable

from mex.common.exceptions import MExError
from mex.common.logging import watch
from mex.common.models import ExtractedOrganizationalUnit
from mex.common.organigram.models import OrganigramUnit
Expand Down Expand Up @@ -55,14 +56,26 @@ def get_unit_merged_ids_by_synonyms(
Args:
extracted_units: Iterable of extracted units
Raises:
MExError: If the same entry maps to different merged IDs
Returns:
Mapping from unit synonyms to stableTargetIds
"""
return {
synonym: MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for synonym in _get_synonyms(extracted_unit)
}
synonym_dict: dict[str, MergedOrganizationalUnitIdentifier] = {}
for extracted_unit in extracted_units:
for synonym in _get_synonyms(extracted_unit):
if (
synonym in synonym_dict
and synonym_dict[synonym] != extracted_unit.stableTargetId
):
msg = (
f"Conflict: label '{synonym}' is associated with merged unit IDs "
f"{synonym_dict[synonym]} and {extracted_unit.stableTargetId}."
)
raise MExError(msg)
synonym_dict[synonym] = extracted_unit.stableTargetId
return synonym_dict


def get_unit_merged_ids_by_emails(
Expand All @@ -75,11 +88,24 @@ def get_unit_merged_ids_by_emails(
Args:
extracted_units: Iterable of extracted units
Raises:
MExError: If the same entry maps to different merged IDs
Returns:
Mapping from lowercased `email` to stableTargetIds
"""
return {
email.lower(): MergedOrganizationalUnitIdentifier(extracted_unit.stableTargetId)
for extracted_unit in extracted_units
for email in extracted_unit.email
}
email_dict: dict[str, MergedOrganizationalUnitIdentifier] = {}
for extracted_unit in extracted_units:
for email in extracted_unit.email:
lower_email = email.lower()
if (
lower_email in email_dict
and email_dict[lower_email] != extracted_unit.stableTargetId
):
msg = (
f"Conflict: email '{email}' is associated with merged unit IDs "
f"{email_dict[lower_email]} and {extracted_unit.stableTargetId}."
)
raise MExError(msg)
email_dict[lower_email] = extracted_unit.stableTargetId
return email_dict
40 changes: 40 additions & 0 deletions tests/organigram/test_extract.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import pytest

from mex.common.exceptions import MExError
from mex.common.models import ExtractedOrganizationalUnit
from mex.common.organigram.extract import (
extract_organigram_units,
get_unit_merged_ids_by_emails,
get_unit_merged_ids_by_synonyms,
)
from mex.common.organigram.models import OrganigramUnit
from mex.common.types import Text


def test_extract_organigram_units(
Expand Down Expand Up @@ -40,6 +44,24 @@ def test_get_unit_merged_ids_by_synonyms(
}


def test_get_unit_merged_ids_by_synonyms_error(
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
) -> None:
erroneus_extracted_child_unit = extracted_child_unit
erroneus_extracted_child_unit.name.append(Text(value="PARENT Dept."))

msg = (
f"MExError: Conflict: label 'PARENT Dept.' is associated with "
f"merged unit IDs {erroneus_extracted_child_unit.stableTargetId} and "
f"{extracted_parent_unit.stableTargetId}."
)
with pytest.raises(MExError, match=msg):
get_unit_merged_ids_by_synonyms(
[erroneus_extracted_child_unit, extracted_parent_unit]
)


def test_get_unit_merged_ids_by_emails(
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
Expand All @@ -52,3 +74,21 @@ def test_get_unit_merged_ids_by_emails(
"pu@example.com": extracted_parent_unit.stableTargetId,
# child unit has no emails
}


def test_get_unit_merged_ids_by_emails_error(
extracted_child_unit: ExtractedOrganizationalUnit,
extracted_parent_unit: ExtractedOrganizationalUnit,
) -> None:
erroneus_extracted_child_unit = extracted_child_unit
erroneus_extracted_child_unit.email.append("PARENT@example.com")

msg = (
f"MExError: Conflict: email 'PARENT@example.com' is associated with "
f"merged unit IDs {erroneus_extracted_child_unit.stableTargetId} and "
f"{extracted_parent_unit.stableTargetId}."
)
with pytest.raises(MExError, match=msg):
get_unit_merged_ids_by_emails(
[erroneus_extracted_child_unit, extracted_parent_unit]
)

0 comments on commit 1e62cc3

Please sign in to comment.