Skip to content

Commit

Permalink
CU-8693v3tt6 SOMED opcs refset selection (#402)
Browse files Browse the repository at this point in the history
* CU-8693v3tt6: Update refset ID for OPCS4 mappings in newer SNOMED releases

* CU-8693v3tt6: Add method to get direct refset mappings

* CU-8693v3tt6: Add tests to direct refset mappings method

* CU-8693v3tt6: Fix OPCS4 refset ID selection logic

* CU-8693v3tt6: Add test for OPCS4 refset ID selection
  • Loading branch information
mart-r authored Feb 16, 2024
1 parent d01084c commit a3138a6
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 1 deletion.
37 changes: 36 additions & 1 deletion medcat/utils/preprocess_snomed.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,32 @@ def get_all_children(sctid, pt2ch):
return result


def get_direct_refset_mapping(in_dict: dict) -> dict:
"""This method uses the output from Snomed.map_snomed2icd10 or
Snomed.map_snomed2opcs4 and removes the metadata and maps each
SNOMED CUI to the prioritised list of the target ontology CUIs.
The input dict is expected to be in the following format:
- Keys are SnomedCT CUIs
- The values are lists of dictionaries, each list item (at least)
- Has a key 'code' that specifies the target onotlogy CUI
- Has a key 'mapPriority' that specifies the priority
Args:
in_dict (dict): The input dict.
Returns:
dict: The map from Snomed CUI to list of priorities list of target ontology CUIs.
"""
ret_dict = dict()
for k, vals in in_dict.items():
# sort such that highest priority values are first
svals = sorted(vals, key=lambda el: el['mapPriority'], reverse=True)
# only keep the code / CUI
ret_dict[k] = [v['code'] for v in svals]
return ret_dict


class Snomed:
"""
Pre-process SNOMED CT release files.
Expand All @@ -53,6 +79,15 @@ def __init__(self, data_path, uk_ext=False, uk_drug_ext=False):
self.release = data_path[-16:-8]
self.uk_ext = uk_ext
self.uk_drug_ext = uk_drug_ext
self.opcs_refset_id = "1126441000000105"
if ((self.uk_ext or self.uk_drug_ext) and
# using lexicographical comparison below
# e.g "20240101" > "20231122" results in True
# yet "20231121" > "20231122" reults in False
len(self.release) == len("20231122") and self.release >= "20231122"):
# NOTE for UK extensions starting from 20231122 the
# OPCS4 refset ID seems to be different
self.opcs_refset_id = '1382401000000109'

def to_concept_df(self):
"""
Expand Down Expand Up @@ -398,7 +433,7 @@ def _map_snomed2refset(self):
mapping_df = pd.concat(dfs2merge)
del dfs2merge
if self.uk_ext or self.uk_drug_ext:
opcs_df = mapping_df[mapping_df['refsetId'] == '1126441000000105']
opcs_df = mapping_df[mapping_df['refsetId'] == self.opcs_refset_id]
icd10_df = mapping_df[mapping_df['refsetId']
== '999002271000000101']
return icd10_df, opcs_df
Expand Down
64 changes: 64 additions & 0 deletions tests/utils/test_preprocess_snomed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
from typing import Dict
from medcat.utils import preprocess_snomed

import unittest


EXAMPLE_REFSET_DICT: Dict = {
'SCUI1': [
{'code': 'TCUI1', 'mapPriority': '1'},
{'code': 'TCUI2', 'mapPriority': '2'},
{'code': 'TCUI3', 'mapPriority': '3'},
]
}

# in order from highest priority to lowest
EXPECTED_DIRECT_MAPPINGS = {"SCUI1": ['TCUI3', 'TCUI2', 'TCUI1']}

EXAMPLE_REFSET_DICT_WITH_EXTRAS = dict(
(k, [dict(v, otherKey=f"val-{k}") for v in vals]) for k, vals in EXAMPLE_REFSET_DICT.items())

EXAMPLE_REFSET_DICT_NO_PRIORITY = dict(
(k, [{ik: iv for ik, iv in v.items() if ik != 'mapPriority'} for v in vals]) for k, vals in EXAMPLE_REFSET_DICT.items()
)

EXAMPLE_REFSET_DICT_NO_CODE = dict(
(k, [{ik: iv for ik, iv in v.items() if ik != 'code'} for v in vals]) for k, vals in EXAMPLE_REFSET_DICT.items()
)


class DirectMappingTest(unittest.TestCase):

def test_example_gets_direct_mappings(self):
res = preprocess_snomed.get_direct_refset_mapping(EXAMPLE_REFSET_DICT)
self.assertEqual(res, EXPECTED_DIRECT_MAPPINGS)

def test_example_w_extras_gets_direct_mappings(self):
res = preprocess_snomed.get_direct_refset_mapping(EXAMPLE_REFSET_DICT_WITH_EXTRAS)
self.assertEqual(res, EXPECTED_DIRECT_MAPPINGS)

def test_example_no_priority_fails(self):
with self.assertRaises(KeyError):
preprocess_snomed.get_direct_refset_mapping(EXAMPLE_REFSET_DICT_NO_PRIORITY)

def test_example_no_codfe_fails(self):
with self.assertRaises(KeyError):
preprocess_snomed.get_direct_refset_mapping(EXAMPLE_REFSET_DICT_NO_CODE)

EXAMPLE_SNOMED_PATH_OLD = "SnomedCT_InternationalRF2_PRODUCTION_20220831T120000Z"
EXAMPLE_SNOMED_PATH_NEW = "SnomedCT_UKClinicalRF2_PRODUCTION_20231122T000001Z"


class TestSnomedVersionsOPCS4(unittest.TestCase):

def test_old_gets_old_OPCS4_mapping_nonuk_ext(self):
snomed = preprocess_snomed.Snomed(EXAMPLE_SNOMED_PATH_OLD, uk_ext=False)
self.assertEqual(snomed.opcs_refset_id, "1126441000000105")

def test_old_gets_old_OPCS4_mapping_uk_ext(self):
snomed = preprocess_snomed.Snomed(EXAMPLE_SNOMED_PATH_OLD, uk_ext=True)
self.assertEqual(snomed.opcs_refset_id, "1126441000000105")

def test_new_gets_new_OCPS4_mapping_uk_ext(self):
snomed = preprocess_snomed.Snomed(EXAMPLE_SNOMED_PATH_NEW, uk_ext=True)
self.assertEqual(snomed.opcs_refset_id, "1382401000000109")

0 comments on commit a3138a6

Please sign in to comment.