1- # Copyright 2021-2023 MONAI Consortium
1+ # Copyright 2021-2025 MONAI Consortium
22# Licensed under the Apache License, Version 2.0 (the "License");
33# you may not use this file except in compliance with the License.
44# You may obtain a copy of the License at
1212import logging
1313import os
1414from pathlib import Path
15- from typing import List
15+ from typing import Dict , List , Optional , Tuple , cast
16+
17+ from pydicom .dataset import Dataset
1618
1719from monai .deploy .core import ConditionType , Fragment , Operator , OperatorSpec
1820from monai .deploy .core .domain .dicom_series import DICOMSeries
@@ -163,6 +165,7 @@ def _load_data(self, files: List[str]):
163165 study_dict = {}
164166 series_dict = {}
165167 sop_instances = []
168+ sop_map : Dict [Tuple [str , str ], Dict [Optional [Tuple [float , float , float ]], List [Dataset ]]] = {}
166169
167170 for file in files :
168171 try :
@@ -171,34 +174,94 @@ def _load_data(self, files: List[str]):
171174 self ._logger .warn (f"Ignored { file } , reason being: { ex } " )
172175
173176 for sop_instance in sop_instances :
174- study_instance_uid = sop_instance [0x0020 , 0x000D ].value .name # name is the UID as str
175-
176- # First need to eliminate the SOP instances whose SOP Class is to be ignored.
177- if "SOPInstanceUID" not in sop_instance :
178- self ._logger .warn ("Instance ignored due to missing SOP instance UID tag" )
179- continue
180- sop_instance_uid = sop_instance ["SOPInstanceUID" ].value
181- if "SOPClassUID" not in sop_instance :
182- self ._logger .warn (f"Instance ignored due to missing SOP Class UID tag, { sop_instance_uid } " )
183- continue
184- if sop_instance ["SOPClassUID" ].value in DICOMDataLoaderOperator .SOP_CLASSES_TO_IGNORE :
185- self ._logger .warn (f"Instance ignored for being in the ignored class, { sop_instance_uid } " )
186- continue
187-
188- if study_instance_uid not in study_dict :
189- study = DICOMStudy (study_instance_uid )
190- self .populate_study_attributes (study , sop_instance )
191- study_dict [study_instance_uid ] = study
192-
193- series_instance_uid = sop_instance [0x0020 , 0x000E ].value .name # name is the UID as str
194-
195- if series_instance_uid not in series_dict :
196- series = DICOMSeries (series_instance_uid )
197- series_dict [series_instance_uid ] = series
198- self .populate_series_attributes (series , sop_instance )
199- study_dict [study_instance_uid ].add_series (series )
200-
201- series_dict [series_instance_uid ].add_sop_instance (sop_instance )
177+ try :
178+ study_instance_uid = sop_instance [0x0020 , 0x000D ].value .name # name is the UID as str
179+
180+ # First need to eliminate the SOP instances whose SOP Class is to be ignored.
181+ if "SOPInstanceUID" not in sop_instance :
182+ self ._logger .warn ("Instance ignored due to missing SOP instance UID tag" )
183+ continue
184+ sop_instance_uid = sop_instance ["SOPInstanceUID" ].value
185+ if "SOPClassUID" not in sop_instance :
186+ self ._logger .warn (f"Instance ignored due to missing SOP Class UID tag, { sop_instance_uid } " )
187+ continue
188+ if sop_instance ["SOPClassUID" ].value in DICOMDataLoaderOperator .SOP_CLASSES_TO_IGNORE :
189+ self ._logger .warn (f"Instance ignored for being in the ignored class, { sop_instance_uid } " )
190+ continue
191+
192+ if study_instance_uid not in study_dict :
193+ study = DICOMStudy (study_instance_uid )
194+ self .populate_study_attributes (study , sop_instance )
195+ study_dict [study_instance_uid ] = study
196+
197+ series_instance_uid = sop_instance [0x0020 , 0x000E ].value .name # name is the UID as str
198+
199+ if series_instance_uid not in series_dict :
200+ series = DICOMSeries (series_instance_uid )
201+ series_dict [series_instance_uid ] = series
202+ self .populate_series_attributes (series , sop_instance )
203+ study_dict [study_instance_uid ].add_series (series )
204+
205+ # Prepare sop_map entry
206+ series_key = (study_instance_uid , series_instance_uid )
207+ sop_map .setdefault (series_key , {})
208+ ipp = sop_instance .get ("ImagePositionPatient" , None )
209+ if ipp is not None :
210+ # Convert IPP to tuple
211+ ipp_tuple = cast (Tuple [float , float , float ], tuple (float (v ) for v in ipp ))
212+ else :
213+ # Non-image files will be missing IPP; store SOP instance under "None" key, move on to next SOP instance
214+ sop_map [series_key ].setdefault (ipp , []).append (sop_instance )
215+ continue
216+
217+ sop_list = sop_map [series_key ].setdefault (ipp_tuple , [])
218+
219+ if not sop_list :
220+ # First occurrence of this spatial position — store the SOP instance
221+ sop_list .append (sop_instance )
222+ else :
223+ # Duplicate spatial location found — compare AcquisitionNumbers (if absent, set to -1)
224+ exist = sop_list [0 ]
225+ exist_acq_num = int (exist .get ("AcquisitionNumber" , - 1 ))
226+ curr_acq_num = int (sop_instance .get ("AcquisitionNumber" , - 1 ))
227+ if curr_acq_num > exist_acq_num :
228+ # Current SOP instance AcquisitionNumber is greater - replace existing SOP instance
229+ self ._logger .info (
230+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
231+ f"in Series { series_instance_uid } ; removed SOP instance with lower AcquisitionNumber "
232+ f"({ curr_acq_num } < { exist_acq_num } )"
233+ )
234+ sop_list [0 ] = sop_instance
235+ elif curr_acq_num < exist_acq_num :
236+ # Existing SOP instance AcquisitionNumber is greater - don't store current SOP instance
237+ self ._logger .info (
238+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
239+ f"in Series { series_instance_uid } ; kept SOP instance with higher AcquisitionNumber "
240+ f"({ exist_acq_num } > { curr_acq_num } )"
241+ )
242+ elif curr_acq_num == - 1 :
243+ # AcquisitionNumber tag is absent for compared SOP instances - don't store current SOP instance
244+ self ._logger .info (
245+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
246+ f"in Series { series_instance_uid } ; AcquisitionNumber tags are absent"
247+ )
248+ else :
249+ # AcquisitionNumber tag values are equal for compared SOP instances - don't store current SOP instance
250+ self ._logger .info (
251+ f"Duplicate spatial coordinates detected; removing duplicate SOP at IPP { ipp_tuple } "
252+ f"in Series { series_instance_uid } ; AcquisitionNumber tag values are equal "
253+ f"({ exist_acq_num } = { curr_acq_num } )"
254+ )
255+
256+ except Exception as ex :
257+ self ._logger .warn (f"Error parsing SOP Instance: { ex } " )
258+
259+ # Add unique SOPs to series_dict following potential duplication removal
260+ for (_ , series_uid ), ipp_dict in sop_map .items ():
261+ for _ , sop_list in ipp_dict .items ():
262+ for sop_instance in sop_list :
263+ series_dict [series_uid ].add_sop_instance (sop_instance )
264+
202265 return list (study_dict .values ())
203266
204267 def populate_study_attributes (self , study , sop_instance ):
0 commit comments