diff --git a/THIRD_PARTY_NOTICES/PyPDF2_BSD-3_LICENSE.txt b/THIRD_PARTY_NOTICES/PyPDF2_BSD-3_LICENSE.txt new file mode 100644 index 00000000..6b83096f --- /dev/null +++ b/THIRD_PARTY_NOTICES/PyPDF2_BSD-3_LICENSE.txt @@ -0,0 +1,29 @@ +Copyright (c) 2006-2008, Mathieu Fenniak +Some contributions copyright (c) 2007, Ashish Kulkarni +Some contributions copyright (c) 2014, Steve Witham + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. +* The name of the author may not be used to endorse or promote products +derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py b/examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py index 2759737c..3705109b 100644 --- a/examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py +++ b/examples/apps/mednist_classifier_monaideploy/mednist_classifier_monaideploy.py @@ -22,7 +22,7 @@ Operator, OutputContext, ) -from monai.deploy.operators.dicom_text_sr_writer_operator import DICOMTextSRWriterOperator, EquipmentInfo, ModelInfo +from monai.deploy.operators import DICOMTextSRWriterOperator, EquipmentInfo, ModelInfo from monai.transforms import AddChannel, Compose, EnsureType, ScaleIntensity MEDNIST_CLASSES = ["AbdomenCT", "BreastMRI", "CXR", "ChestCT", "Hand", "HeadCT"] diff --git a/monai/deploy/operators/__init__.py b/monai/deploy/operators/__init__.py index b0ac5ed9..c9dce3a4 100644 --- a/monai/deploy/operators/__init__.py +++ b/monai/deploy/operators/__init__.py @@ -19,8 +19,10 @@ DICOMSeriesSelectorOperator DICOMSeriesToVolumeOperator DICOMTextSRWriterOperator + EquipmentInfo InferenceOperator IOMapping + ModelInfo MonaiBundleInferenceOperator MonaiSegInferenceOperator PNGConverterOperator @@ -34,7 +36,8 @@ from .dicom_seg_writer_operator import DICOMSegmentationWriterOperator from .dicom_series_selector_operator import DICOMSeriesSelectorOperator from .dicom_series_to_volume_operator import DICOMSeriesToVolumeOperator -from .dicom_text_sr_writer_operator import DICOMTextSRWriterOperator, EquipmentInfo, ModelInfo +from .dicom_text_sr_writer_operator import DICOMTextSRWriterOperator +from .dicom_utils import EquipmentInfo, ModelInfo, random_with_n_digits, save_dcm_file, write_common_modules from .inference_operator import InferenceOperator from .monai_bundle_inference_operator import BundleConfigNames, IOMapping, MonaiBundleInferenceOperator from .monai_seg_inference_operator import MonaiSegInferenceOperator diff --git a/monai/deploy/operators/dicom_encapsulated_pdf_writer_operator.py b/monai/deploy/operators/dicom_encapsulated_pdf_writer_operator.py new file mode 100644 index 00000000..4e4fafd6 --- /dev/null +++ b/monai/deploy/operators/dicom_encapsulated_pdf_writer_operator.py @@ -0,0 +1,271 @@ +# Copyright 2022 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from ast import Bytes +from io import BytesIO +from pathlib import Path +from typing import Dict, List, Optional + +from PyPDF2 import PdfReader + +from monai.deploy.utils.importutil import optional_import + +dcmread, _ = optional_import("pydicom", name="dcmread") +dcmwrite, _ = optional_import("pydicom.filewriter", name="dcmwrite") +generate_uid, _ = optional_import("pydicom.uid", name="generate_uid") +ImplicitVRLittleEndian, _ = optional_import("pydicom.uid", name="ImplicitVRLittleEndian") +Dataset, _ = optional_import("pydicom.dataset", name="Dataset") +FileDataset, _ = optional_import("pydicom.dataset", name="FileDataset") +Sequence, _ = optional_import("pydicom.sequence", name="Sequence") + +import monai.deploy.core as md +from monai.deploy.core import DataPath, ExecutionContext, InputContext, IOType, Operator, OutputContext +from monai.deploy.core.domain.dicom_series import DICOMSeries +from monai.deploy.core.domain.dicom_series_selection import StudySelectedSeries +from monai.deploy.exceptions import ItemNotExistsError +from monai.deploy.operators.dicom_utils import EquipmentInfo, ModelInfo, save_dcm_file, write_common_modules +from monai.deploy.utils.version import get_sdk_semver + + +# The SR writer operator class +@md.input("pdf_bytes", Bytes, IOType.IN_MEMORY) +@md.input("pdf_file", DataPath, IOType.DISK) +@md.input("study_selected_series_list", List[StudySelectedSeries], IOType.IN_MEMORY) +@md.output("dicom_instance", DataPath, IOType.DISK) +@md.env(pip_packages=["pydicom >= 1.4.2"]) +class DICOMEncapsulatedPDFWriterOperator(Operator): + + DCM_EXTENSION = ".dcm" + + def __init__( + self, + copy_tags: bool, + model_info: ModelInfo, + equipment_info: Optional[EquipmentInfo] = None, + custom_tags: Optional[Dict[str, str]] = None, + *args, + **kwargs, + ): + """Class to write DICOM Encapsulated PDF Instance with PDF bytes in memory or in a file. + + Args: + copy_tags (bool): True for copying DICOM attributes from a provided DICOMSeries. + model_info (ModelInfo): Object encapsulating model creator, name, version and UID. + equipment_info (EquipmentInfo, optional): Object encapsulating info for DICOM Equipment Module. + Defaults to None. + custom_tags (Dict[str, str], optional): Dictionary for setting custom DICOM tags using Keywords + and str values only. Defaults to None. + + Raises: + ValueError: If copy_tags is true and no DICOMSeries object provided, or + if PDF bytes cannot be found in memory or loaded from the file. + """ + super().__init__(*args, **kwargs) + self._logger = logging.getLogger("{}.{}".format(__name__, type(self).__name__)) + self.copy_tags = copy_tags + self.model_info = model_info if model_info else ModelInfo() + self.equipment_info = equipment_info if equipment_info else EquipmentInfo() + self.custom_tags = custom_tags + + # Set own Modality and SOP Class UID + # Modality, e.g., + # "OT" for PDF, "DOC" would do too. + # "SR" for Structured Report. + # Media Storage SOP Class UID, e.g., + # "1.2.840.10008.5.1.4.1.1.88.11" for Basic Text SR Storage + # "1.2.840.10008.5.1.4.1.1.104.1" for Encapsulated PDF Storage, + # "1.2.840.10008.5.1.4.1.1.88.34" for Comprehensive 3D SR IOD + # "1.2.840.10008.5.1.4.1.1.66.4" for Segmentation Storage + # '1.2.840.10008.5.1.4.1.1.104.1' for Encapsulated PDF Storage + self.modality_type = "OT" + self.sop_class_uid = "1.2.840.10008.5.1.4.1.1.104.1" + + # Equipment version may be different from contributing equipment version + try: + self.software_version_number = get_sdk_semver() # SDK Version + except Exception: + self.software_version_number = "" + self.operators_name = f"AI Algorithm {self.model_info.name}" + + def compute(self, op_input: InputContext, op_output: OutputContext, context: ExecutionContext): + """Performs computation for this operator and handles I/O. + + For now, only a single result content is supported, which could be in bytes or a path + to the PDF file. The DICOM series used during inference is optional, but is required if the + `copy_tags` is true indicating the generated DICOM object needs to copy study level metadata. + + When there are multiple selected series in the input, the first series' containing study will + be used for retrieving DICOM Study module attributes, e.g. StudyInstanceUID. + + Raises: + FileNotFoundError: When bytes are not in the input, and the file is not given or found. + ValueError: Input bytes and PDF file not in the input, or no DICOM series when required. + IOError: If fails to get the bytes of the PDF + """ + + # Gets the input, prepares the output folder, and then delegates the processing. + pdf_bytes: bytes = b"" + try: + pdf_bytes = op_input.get("pdf_bytes") + except ItemNotExistsError: + try: + file_path = op_input.get("pdf_file") + except ItemNotExistsError: + raise ValueError("None of the named inputs can be found.") from None + # Read file, and if exception, let it bubble up + with open(file_path.path, "rb") as f: + pdf_bytes = f.read().strip() + + if not pdf_bytes or not len(pdf_bytes.strip()): + raise IOError("Input is read but blank.") + + try: + study_selected_series_list = op_input.get("study_selected_series_list") + except ItemNotExistsError: + study_selected_series_list = None + + dicom_series = None # It can be None if not to copy_tags. + if self.copy_tags: + # Get the first DICOM Series for retrieving study level tags. + if not study_selected_series_list or len(study_selected_series_list) < 1: + raise ValueError("Missing input, list of 'StudySelectedSeries'.") + for study_selected_series in study_selected_series_list: + if not isinstance(study_selected_series, StudySelectedSeries): + raise ValueError("Element in input is not expected type, 'StudySelectedSeries'.") + for selected_series in study_selected_series.selected_series: + dicom_series = selected_series.series + break + + output_dir = op_output.get().path + output_dir.mkdir(parents=True, exist_ok=True) + + # Now ready to starting writing the DICOM instance + self.write(pdf_bytes, dicom_series, output_dir) + + def write(self, content_bytes, dicom_series: Optional[DICOMSeries], output_dir: Path): + """Writes DICOM object + + Args: + content_bytes (bytes): Content bytes of PDF + dicom_series (DicomSeries): DicomSeries object encapsulating the original series. + output_dir (Path): Folder path for saving the generated file. + """ + self._logger.debug("Writing DICOM object...\n") + + if not isinstance(content_bytes, bytes): + raise ValueError("Input must be bytes.") + elif not len(content_bytes.strip()): + raise ValueError("Content is empty.") + elif not self._is_pdf_bytes(content_bytes): + raise ValueError("Not PDF bytes.") + + if not isinstance(output_dir, Path): + raise ValueError("output_dir is not a valid Path.") + + output_dir.mkdir(parents=True, exist_ok=True) # Just in case + + ds = write_common_modules( + dicom_series, self.copy_tags, self.modality_type, self.sop_class_uid, self.model_info, self.equipment_info + ) + + # Encapsulated PDF specific + # SC Equipment Module + ds.Modality = self.modality_type + ds.ConversionType = "SD" # Describes the kind of image conversion, Scanned Doc + + # Encapsulated Document Module + ds.VerificationFlag = "UNVERIFIED" # Not attested by a legally accountable person. + + ds.BurnedInAnnotation = "YES" + ds.DocumentTitle = "Generated Observations" + ds.MIMETypeOfEncapsulatedDocument = "application/pdf" + ds.EncapsulatedDocument = content_bytes + + # ConceptNameCode Sequence + seq_concept_name_code = Sequence() + ds_concept_name_code = Dataset() + ds_concept_name_code.CodeValue = "18748-4" + ds_concept_name_code.CodingSchemeDesignator = "LN" + ds_concept_name_code.CodeMeaning = "Diagnostic Imaging Report" + seq_concept_name_code.append(ds_concept_name_code) + ds.ConceptNameCodeSequence = seq_concept_name_code + + # For now, only allow str Keywords and str value + if self.custom_tags: + for k, v in self.custom_tags.items(): + if isinstance(k, str) and isinstance(v, str): + try: + ds.update({k: v}) + except Exception as ex: + # Best effort for now. + logging.warning(f"Tag {k} was not written, due to {ex}") + + # Instance file name is the same as the new SOP instance UID + file_path = output_dir.joinpath(f"{ds.SOPInstanceUID}{DICOMEncapsulatedPDFWriterOperator.DCM_EXTENSION}") + save_dcm_file(ds, file_path) + self._logger.info(f"DICOM SOP instance saved in {file_path}") + + def _is_pdf_bytes(self, content: bytes): + try: + bytes_stream = BytesIO(content) + reader = PdfReader(bytes_stream) + self._logger.debug(f"The PDF has {reader.pages} page(s).") + except Exception as ex: + self._logger.exception(f"Cannot read as PDF: {ex}") + return False + return True + + +def test(): + from monai.deploy.operators.dicom_data_loader_operator import DICOMDataLoaderOperator + from monai.deploy.operators.dicom_series_selector_operator import DICOMSeriesSelectorOperator + + current_file_dir = Path(__file__).parent.resolve() + dcm_folder = current_file_dir.joinpath("../../../inputs/livertumor_ct/dcm/1-CT_series_liver_tumor_from_nii014") + pdf_file = current_file_dir.joinpath("../../../inputs/pdf/TestPDF.pdf") + out_path = "output_pdf_op" + pdf_bytes = b"Not PDF bytes." + test_copy_tags = False + + loader = DICOMDataLoaderOperator() + series_selector = DICOMSeriesSelectorOperator() + sr_writer = DICOMEncapsulatedPDFWriterOperator( + copy_tags=test_copy_tags, + model_info=None, + equipment_info=EquipmentInfo(), + custom_tags={"SeriesDescription": "Report from AI algorithm. Not for clinical use."}, + ) + + # Testing with the main entry functions + dicom_series = None + if test_copy_tags: + study_list = loader.load_data_to_studies(Path(dcm_folder).absolute()) + study_selected_series_list = series_selector.filter(None, study_list) + # Get the first DICOM Series, as for now, only expecting this. + if not study_selected_series_list or len(study_selected_series_list) < 1: + raise ValueError("Missing input, list of 'StudySelectedSeries'.") + for study_selected_series in study_selected_series_list: + if not isinstance(study_selected_series, StudySelectedSeries): + raise ValueError("Element in input is not expected type, 'StudySelectedSeries'.") + for selected_series in study_selected_series.selected_series: + print(type(selected_series)) + dicom_series = selected_series.series + print(type(dicom_series)) + + with open(pdf_file, "rb") as f: + pdf_bytes = f.read() + + sr_writer.write(pdf_bytes, dicom_series, Path(out_path).absolute()) + + +if __name__ == "__main__": + test() diff --git a/monai/deploy/operators/dicom_text_sr_writer_operator.py b/monai/deploy/operators/dicom_text_sr_writer_operator.py index 5e34c5b2..43d115da 100644 --- a/monai/deploy/operators/dicom_text_sr_writer_operator.py +++ b/monai/deploy/operators/dicom_text_sr_writer_operator.py @@ -1,4 +1,4 @@ -# Copyright 2021 MONAI Consortium +# Copyright 2021-2022 MONAI Consortium # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -9,11 +9,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import datetime import logging from pathlib import Path -from random import randint -from typing import Dict, List, Optional, Text, Union +from typing import Dict, List, Optional, Text from monai.deploy.utils.importutil import optional_import @@ -30,61 +28,10 @@ from monai.deploy.core.domain.dicom_series import DICOMSeries from monai.deploy.core.domain.dicom_series_selection import StudySelectedSeries from monai.deploy.exceptions import ItemNotExistsError +from monai.deploy.operators.dicom_utils import EquipmentInfo, ModelInfo, save_dcm_file, write_common_modules from monai.deploy.utils.version import get_sdk_semver -# Utility classes considered to be moved into Domain module -class ModelInfo(object): - """Class encapsulating AI model information, according to IHE AI Results (AIR) Rev 1.1. - - The attributes of the class will be used to populate the Contributing Equipment Sequence in the DICOM IOD - per IHE AIR Rev 1.1, Section 6.5.3.1 General Result Encoding Requirements, as the following, - - The Creator shall describe each algorithm that was used to generate the results in the - Contributing Equipment Sequence (0018,A001). Multiple items may be included. The Creator - shall encode the following details in the Contributing Equipment Sequence, - - Purpose of Reference Code Sequence (0040,A170) shall be (Newcode1, 99IHE, 1630 "Processing Algorithm") - - Manufacturer (0008,0070) - - Manufacturer's Model Name (0008,1090) - - Software Versions (0018,1020) - - Device UID (0018,1002) - - Each time an AI Model is modified, for example by training, it would be appropriate to update - the Device UID. - """ - - def __init__(self, creator: str = "", name: str = "", version: str = "", uid: str = ""): - - self.creator = creator if isinstance(creator, str) else "" - self.name = name if isinstance(name, str) else "" - self.version = version if isinstance(version, str) else "" - self.uid = uid if isinstance(uid, str) else "" - - -class EquipmentInfo(object): - """Class encapsulating attributes required for DICOM Equipment Module.""" - - def __init__( - self, - manufacturer: str = "MONAI Deploy", - manufacturer_model: str = "MONAI Deploy App SDK", - series_number: str = "0000", - software_version_number: str = "", - ): - - self.manufacturer = manufacturer if isinstance(manufacturer, str) else "" - self.manufacturer_model = manufacturer_model if isinstance(manufacturer_model, str) else "" - self.series_number = series_number if isinstance(series_number, str) else "" - if software_version_number: - self.software_version_number = software_version_number - else: - try: - version_str = get_sdk_semver() # SDK Version - except Exception: - version_str = "" # Fall back to the initial version - self.software_version_number = version_str - - # The SR writer operator class @md.input("classification_result", Text, IOType.IN_MEMORY) @md.input("classification_result_file", DataPath, IOType.DISK) @@ -93,14 +40,15 @@ def __init__( @md.env(pip_packages=["pydicom >= 1.4.2"]) class DICOMTextSRWriterOperator(Operator): + # File extension for the generated DICOM Part 10 file. DCM_EXTENSION = ".dcm" def __init__( self, copy_tags: bool, model_info: ModelInfo, - equipment_info: Union[EquipmentInfo, None] = None, - custom_tags: Union[Dict[str, str], None] = None, + equipment_info: Optional[EquipmentInfo] = None, + custom_tags: Optional[Dict[str, str]] = None, *args, **kwargs, ): @@ -146,14 +94,17 @@ def __init__( def compute(self, op_input: InputContext, op_output: OutputContext, context: ExecutionContext): """Performs computation for this operator and handles I/O. - For now, only a single image object or result content is supported and the selected DICOM - series for inference is required, because the generated IOD needs to refer to original instance. + For now, only a single result content is supported, which could be in memory or an accessible file. + The DICOM series used during inference is optional, but is required if the + `copy_tags` is true indicating the generated DICOM object needs to copy study level metadata. + When there are multiple selected series in the input, the first series' containing study will be used for retrieving DICOM Study module attributes, e.g. StudyInstanceUID. Raises: FileNotFoundError: When result object not in the input, and result file not found either. - ValueError: Neither image object nor image file's folder is in the input, or no selected series. + ValueError: Content object and file path not in the inputs, or no DICOM series when required. + IOError: If the input content is blank. """ # Gets the input, prepares the output folder, and then delegates the processing. @@ -177,9 +128,9 @@ def compute(self, op_input: InputContext, op_output: OutputContext, context: Exe except ItemNotExistsError: study_selected_series_list = None - dicom_series = None # It can be None of copy_tags is false. + dicom_series = None # It can be None if not to copy_tags. if self.copy_tags: - # Get the first DICOM Series, as for now, only expecting this. + # Get the first DICOM Series to retrieve study level tags. if not study_selected_series_list or len(study_selected_series_list) < 1: raise ValueError("Missing input, list of 'StudySelectedSeries'.") for study_selected_series in study_selected_series_list: @@ -187,6 +138,7 @@ def compute(self, op_input: InputContext, op_output: OutputContext, context: Exe raise ValueError("Element in input is not expected type, 'StudySelectedSeries'.") for selected_series in study_selected_series.selected_series: dicom_series = selected_series.series + break output_dir = op_output.get().path output_dir.mkdir(parents=True, exist_ok=True) @@ -205,10 +157,16 @@ def write(self, content_text, dicom_series: Optional[DICOMSeries], output_dir: P Returns: PyDicom Dataset """ - self._logger.debug("Writing DICOM object...\n{}") + self._logger.debug("Writing DICOM object...\n") + + if not content_text or not len(content_text.strip()): + raise ValueError("Content is empty.") + if not isinstance(output_dir, Path): + raise ValueError("output_dir is not a valid Path.") + output_dir.mkdir(parents=True, exist_ok=True) # Just in case - ds = DICOMTextSRWriterOperator.write_common_modules( + ds = write_common_modules( dicom_series, self.copy_tags, self.modality_type, self.sop_class_uid, self.model_info, self.equipment_info ) @@ -274,194 +232,10 @@ def write(self, content_text, dicom_series: Optional[DICOMSeries], output_dir: P # Best effort for now. logging.warning(f"Tag {k} was not written, due to {ex}") - # Create the dcm file name, based on series instance UID, then save it. - file_name = f"{ds.SeriesInstanceUID}_{ds.Modality}{DICOMTextSRWriterOperator.DCM_EXTENSION}" - file_path = output_dir.joinpath(file_name) - self.save_dcm_file(ds, file_path) - - @staticmethod - def save_dcm_file(data_set, file_path: Path, validate_readable: bool = True): - logging.debug(f"DICOM dataset to be written:{data_set}") - - # Write out the DCM file - if file_path: - dcmwrite(str(file_path), data_set, write_like_original=False) - logging.info(f"Finished writing DICOM instance to file {file_path}") - if validate_readable: - # Test reading back - _ = dcmread(str(file_path)) - - # TODO: The following function can be moved into Domain module as it's common. - @staticmethod - def write_common_modules( - dicom_series: Optional[DICOMSeries], - copy_tags: bool, - modality_type: str, - sop_class_uid: str, - model_info: Optional[ModelInfo] = None, - equipment_info: Optional[EquipmentInfo] = None, - ): - """Writes DICOM object common modules with or without a reference DCIOM Series - - Common modules include Study, Patient, Equipment, Series, and SOP common. - - Args: - dicom_series (DicomSeries): DicomSeries object encapsulating the original series. - copy_tags (bool): If true, dicom_series must be provided for copying tags. - modality_type (str): DICOM Modality Type, e.g. SR. - sop_class_uid (str): Media Storage SOP Class UID, e.g. "1.2.840.10008.5.1.4.1.1.88.34" for Comprehensive 3D SR IOD. - model_info (MoelInfo): Object encapsulating model creator, name, version and UID. - equipment_info(EquipmentInfo): Object encapsulating attributes for DICOM Equipment Module - - Returns: - pydicom Dataset - - Raises: - ValueError: When dicom_series is not a DICOMSeries object, and new_study is not True. - """ - - if copy_tags: - if not isinstance(dicom_series, DICOMSeries): - raise ValueError("A DICOMSeries object is required for coping tags.") - - if len(dicom_series.get_sop_instances()) < 1: - raise ValueError("DICOMSeries must have at least one SOP instance.") - - # Get one of the SOP instance's native sop instance dataset - orig_ds = dicom_series.get_sop_instances()[0].get_native_sop_instance() - - logging.debug("Writing DICOM common modules...") - - # Get and format date and time per DICOM standards. - dt_now = datetime.datetime.now() - date_now_dcm = dt_now.strftime("%Y%m%d") - time_now_dcm = dt_now.strftime("%H%M%S") - offset_from_utc = dt_now.astimezone().isoformat()[-6:].replace(":", "") # '2022-09-27T22:36:20.143857-07:00' - - # Generate UIDs and descriptions - my_sop_instance_uid = generate_uid() - my_series_instance_uid = generate_uid() - my_series_description = "CAUTION: Not for Diagnostic Use, for research use only." - my_series_number = str(DICOMTextSRWriterOperator.random_with_n_digits(4)) # 4 digit number to avoid conflict - my_study_instance_uid = orig_ds.StudyInstanceUID if copy_tags else generate_uid() - - # File meta info data set - file_meta = Dataset() - file_meta.FileMetaInformationGroupLength = 198 - file_meta.FileMetaInformationVersion = bytes("01", "utf-8") # '\x00\x01' - - file_meta.MediaStorageSOPClassUID = sop_class_uid - file_meta.MediaStorageSOPInstanceUID = my_sop_instance_uid - file_meta.TransferSyntaxUID = ImplicitVRLittleEndian # 1.2.840.10008.1.2, Little Endian Implicit VR - file_meta.ImplementationClassUID = "1.2.40.0.13.1.1.1" # Made up. Not registered. - file_meta.ImplementationVersionName = equipment_info.software_version_number if equipment_info else "" - - # Write modules to data set - ds = Dataset() - ds.file_meta = file_meta - ds.is_implicit_VR = True - ds.is_little_endian = True - - # Content Date (0008,0023) and Content Time (0008,0033) are defined to be the date and time that - # the document content creation started. In the context of analysis results, these may be considered - # to be the date and time that the analysis that generated the result(s) started executing. - # Use current time for now, but could potentially use the actual inference start time. - ds.ContentDate = date_now_dcm - ds.ContentTime = time_now_dcm - ds.TimezoneOffsetFromUTC = offset_from_utc - - # The date and time that the original generation of the data in the document started. - ds.AcquisitionDateTime = date_now_dcm + time_now_dcm # Result has just been created. - - # Patient Module, mandatory. - # Copy over from the original DICOM metadata. - ds.PatientName = orig_ds.get("PatientName", "") if copy_tags else "" - ds.PatientID = orig_ds.get("PatientID", "") if copy_tags else "" - ds.IssuerOfPatientID = orig_ds.get("IssuerOfPatientID", "") if copy_tags else "" - ds.PatientBirthDate = orig_ds.get("PatientBirthDate", "") if copy_tags else "" - ds.PatientSex = orig_ds.get("PatientSex", "") if copy_tags else "" - - # Study Module, mandatory - # Copy over from the original DICOM metadata. - ds.StudyDate = orig_ds.get("StudyDate", "") if copy_tags else date_now_dcm - ds.StudyTime = orig_ds.get("StudyTime", "") if copy_tags else time_now_dcm - ds.AccessionNumber = orig_ds.get("AccessionNumber", "") if copy_tags else "" - ds.StudyDescription = orig_ds.get("StudyDescription", "") if copy_tags else "AI results." - ds.StudyInstanceUID = my_study_instance_uid - ds.StudyID = orig_ds.get("StudyID", "") if copy_tags else "1" - ds.ReferringPhysicianName = orig_ds.get("ReferringPhysicianName", "") if copy_tags else "" - - # Equipment Module, mandatory - if equipment_info: - ds.Manufacturer = equipment_info.manufacturer - ds.ManufacturerModelName = equipment_info.manufacturer_model - ds.SeriesNumber = equipment_info.series_number - ds.SoftwareVersions = equipment_info.software_version_number - - # SOP Common Module, mandatory - ds.InstanceCreationDate = date_now_dcm - ds.InstanceCreationTime = time_now_dcm - ds.SOPClassUID = sop_class_uid - ds.SOPInstanceUID = my_sop_instance_uid - ds.InstanceNumber = "1" - ds.SpecificCharacterSet = "ISO_IR 100" - - # Series Module, mandatory - ds.Modality = modality_type - ds.SeriesInstanceUID = my_series_instance_uid - ds.SeriesNumber = my_series_number - ds.SeriesDescription = my_series_description - ds.SeriesDate = date_now_dcm - ds.SeriesTime = time_now_dcm - # Body part copied over, although not mandatory depending on modality - ds.BodyPartExamined = orig_ds.get("BodyPartExamined", "") if copy_tags else "" - ds.RequestedProcedureID = orig_ds.get("RequestedProcedureID", "") if copy_tags else "" - - # Contributing Equipment Sequence - # The Creator shall describe each algorithm that was used to generate the results in the - # Contributing Equipment Sequence (0018,A001). Multiple items may be included. The Creator - # shall encode the following details in the Contributing Equipment Sequence: - # • Purpose of Reference Code Sequence (0040,A170) shall be (Newcode1, 99IHE, 1630 "Processing Algorithm") - # • Manufacturer (0008,0070) - # • Manufacturer’s Model Name (0008,1090) - # • Software Versions (0018,1020) - # • Device UID (0018,1002) - - if model_info: - # First create the Purpose of Reference Code Sequence - seq_purpose_of_reference_code = Sequence() - ds_purpose_of_reference_code = Dataset() - ds_purpose_of_reference_code.CodeValue = "Newcode1" - ds_purpose_of_reference_code.CodingSchemeDesignator = "99IHE" - ds_purpose_of_reference_code.CodeMeaning = '"Processing Algorithm' - seq_purpose_of_reference_code.append(ds_purpose_of_reference_code) - - seq_contributing_equipment = Sequence() - ds_contributing_equipment = Dataset() - ds_contributing_equipment.PurposeOfReferenceCodeSequence = seq_purpose_of_reference_code - # '(121014, DCM, “Device Observer Manufacturer")' - ds_contributing_equipment.Manufacturer = model_info.creator - # u'(121015, DCM, “Device Observer Model Name")' - ds_contributing_equipment.ManufacturerModelName = model_info.name - # u'(111003, DCM, “Algorithm Version")' - ds_contributing_equipment.SoftwareVersions = model_info.version - ds_contributing_equipment.DeviceUID = model_info.uid # u'(121012, DCM, “Device Observer UID")' - seq_contributing_equipment.append(ds_contributing_equipment) - ds.ContributingEquipmentSequence = seq_contributing_equipment - - logging.debug("DICOM common modules written:\n{}".format(ds)) - - return ds - - @staticmethod - def random_with_n_digits(n): - """Random number generator to generate n digit int, where 1 <= n <= 32.""" - - assert isinstance(n, int) and n <= 32, "Argument n must be an int, n <= 32." - n = n if n >= 1 else 1 - range_start = 10 ** (n - 1) - range_end = (10**n) - 1 - return randint(range_start, range_end) + # Instance file name is the same as the new SOP instance UID + file_path = output_dir.joinpath(f"{ds.SOPInstanceUID}{DICOMTextSRWriterOperator.DCM_EXTENSION}") + save_dcm_file(ds, file_path) + self._logger.info(f"DICOM SOP instance saved in {file_path}") def test(): @@ -470,16 +244,16 @@ def test(): current_file_dir = Path(__file__).parent.resolve() data_path = current_file_dir.joinpath("../../../inputs/livertumor_ct/dcm/1-CT_series_liver_tumor_from_nii014") - out_path = current_file_dir.joinpath("../../../examples/output_sr_op") + out_path = "output_sr_op" test_report_text = "Tumors detected in Liver using MONAI Liver Tumor Seg model." - test_copy_tags = False + test_copy_tags = True loader = DICOMDataLoaderOperator() series_selector = DICOMSeriesSelectorOperator() sr_writer = DICOMTextSRWriterOperator( copy_tags=test_copy_tags, model_info=None, - equipment_info=EquipmentInfo(software_version_number="0.4"), + equipment_info=EquipmentInfo(), custom_tags={"SeriesDescription": "Textual report from AI algorithm. Not for clinical use."}, ) diff --git a/monai/deploy/operators/dicom_utils.py b/monai/deploy/operators/dicom_utils.py new file mode 100644 index 00000000..6d434953 --- /dev/null +++ b/monai/deploy/operators/dicom_utils.py @@ -0,0 +1,284 @@ +# Copyright 2022 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import logging +from pathlib import Path +from random import randint +from typing import Any, Optional + +from monai.deploy.utils.importutil import optional_import + +dcmread, _ = optional_import("pydicom", name="dcmread") +dcmwrite, _ = optional_import("pydicom.filewriter", name="dcmwrite") +generate_uid, _ = optional_import("pydicom.uid", name="generate_uid") +ImplicitVRLittleEndian, _ = optional_import("pydicom.uid", name="ImplicitVRLittleEndian") +Dataset_, _ = optional_import("pydicom.dataset", name="Dataset") +FileDataset, _ = optional_import("pydicom.dataset", name="FileDataset") +Sequence, _ = optional_import("pydicom.sequence", name="Sequence") + +from monai.deploy.core.domain.dicom_series import DICOMSeries +from monai.deploy.utils.version import get_sdk_semver + +# To address mypy complaint +Dataset: Any = Dataset_ + +__all__ = [ + "EquipmentInfo", + "ModelInfo", + "random_with_n_digits", + "save_dcm_file", + "write_common_modules", +] + + +class ModelInfo(object): + """Class encapsulating AI model information, according to IHE AI Results (AIR) Rev 1.1. + + The attributes of the class will be used to populate the Contributing Equipment Sequence in the DICOM IOD + per IHE AIR Rev 1.1, Section 6.5.3.1 General Result Encoding Requirements, as the following, + + The Creator shall describe each algorithm that was used to generate the results in the + Contributing Equipment Sequence (0018,A001). Multiple items may be included. The Creator + shall encode the following details in the Contributing Equipment Sequence, + - Purpose of Reference Code Sequence (0040,A170) shall be (Newcode1, 99IHE, 1630 "Processing Algorithm") + - Manufacturer (0008,0070) + - Manufacturer's Model Name (0008,1090) + - Software Versions (0018,1020) + - Device UID (0018,1002) + + Each time an AI Model is modified, for example by training, it would be appropriate to update + the Device UID. + """ + + def __init__(self, creator: str = "", name: str = "", version: str = "", uid: str = ""): + + self.creator = creator if isinstance(creator, str) else "" + self.name = name if isinstance(name, str) else "" + self.version = version if isinstance(version, str) else "" + self.uid = uid if isinstance(uid, str) else "" + + +class EquipmentInfo(object): + """Class encapsulating attributes required for DICOM Equipment Module.""" + + def __init__( + self, + manufacturer: str = "MONAI Deploy", + manufacturer_model: str = "MONAI Deploy App SDK", + series_number: str = "0000", + software_version_number: str = "", + ): + + self.manufacturer = manufacturer if isinstance(manufacturer, str) else "" + self.manufacturer_model = manufacturer_model if isinstance(manufacturer_model, str) else "" + self.series_number = series_number if isinstance(series_number, str) else "" + if software_version_number: + self.software_version_number = str(software_version_number)[0:15] + else: + try: + version_str = get_sdk_semver() # SDK Version + except Exception: + version_str = "" # Fall back to the initial version + self.software_version_number = version_str[0:15] + + +# Utility functions + + +def random_with_n_digits(n): + """Random number generator to generate n digit int, where 1 <= n <= 32.""" + + assert isinstance(n, int) and n <= 32, "Argument n must be an int, n <= 32." + n = n if n >= 1 else 1 + range_start = 10 ** (n - 1) + range_end = (10**n) - 1 + return randint(range_start, range_end) + + +def save_dcm_file(data_set: Dataset, file_path: Path, validate_readable: bool = True): + """Save a DICOM data set, in pydicom Dataset, to the provided file path.""" + + logging.debug(f"DICOM dataset to be written:{data_set}") + + if not isinstance(data_set, Dataset): + raise ValueError("data_set is not the expected Dataset type.") + + if not str(file_path).strip(): + raise ValueError("file_path to save dcm file not provided.") + + dcmwrite(str(file_path).strip(), data_set, write_like_original=False) + logging.info(f"Finished writing DICOM instance to file {file_path}") + + if validate_readable: + # Test reading back + _ = dcmread(str(file_path)) + + +def write_common_modules( + dicom_series: Optional[DICOMSeries], + copy_tags: bool, + modality_type: str, + sop_class_uid: str, + model_info: Optional[ModelInfo] = None, + equipment_info: Optional[EquipmentInfo] = None, +) -> Dataset: + """Writes DICOM object common modules with or without a reference DCIOM Series + + Common modules include Study, Patient, Equipment, Series, and SOP common. + + Args: + dicom_series (DicomSeries): DicomSeries object encapsulating the original series. + copy_tags (bool): If true, dicom_series must be provided for copying tags. + modality_type (str): DICOM Modality Type, e.g. SR. + sop_class_uid (str): Media Storage SOP Class UID, e.g. "1.2.840.10008.5.1.4.1.1.88.34" for Comprehensive 3D SR IOD. + model_info (MoelInfo): Object encapsulating model creator, name, version and UID. + equipment_info(EquipmentInfo): Object encapsulating attributes for DICOM Equipment Module + + Returns: + pydicom Dataset + + Raises: + ValueError: When dicom_series is not a DICOMSeries object, and new_study is not True. + """ + + if copy_tags: + if not isinstance(dicom_series, DICOMSeries): + raise ValueError("A DICOMSeries object is required for coping tags.") + + if len(dicom_series.get_sop_instances()) < 1: + raise ValueError("DICOMSeries must have at least one SOP instance.") + + # Get one of the SOP instance's native sop instance dataset + orig_ds = dicom_series.get_sop_instances()[0].get_native_sop_instance() + + logging.debug("Writing DICOM common modules...") + + # Get and format date and time per DICOM standards. + dt_now = datetime.datetime.now() + date_now_dcm = dt_now.strftime("%Y%m%d") + time_now_dcm = dt_now.strftime("%H%M%S") + offset_from_utc = dt_now.astimezone().isoformat()[-6:].replace(":", "") # '2022-09-27T22:36:20.143857-07:00' + + # Generate UIDs and descriptions + my_sop_instance_uid = generate_uid() + my_series_instance_uid = generate_uid() + my_series_description = "CAUTION: Not for Diagnostic Use, for research use only." + my_series_number = str(random_with_n_digits(4)) # 4 digit number to avoid conflict + my_study_instance_uid = orig_ds.StudyInstanceUID if copy_tags else generate_uid() + + # File meta info data set + file_meta = Dataset() + file_meta.FileMetaInformationGroupLength = 198 + file_meta.FileMetaInformationVersion = bytes("01", "utf-8") # '\x00\x01' + + file_meta.MediaStorageSOPClassUID = sop_class_uid + file_meta.MediaStorageSOPInstanceUID = my_sop_instance_uid + file_meta.TransferSyntaxUID = ImplicitVRLittleEndian # 1.2.840.10008.1.2, Little Endian Implicit VR + file_meta.ImplementationClassUID = "1.2.40.0.13.1.1.1" # Made up. Not registered. + file_meta.ImplementationVersionName = equipment_info.software_version_number if equipment_info else "" + + # Write modules to data set + ds = Dataset() + ds.file_meta = file_meta + ds.is_implicit_VR = True + ds.is_little_endian = True + + # Content Date (0008,0023) and Content Time (0008,0033) are defined to be the date and time that + # the document content creation started. In the context of analysis results, these may be considered + # to be the date and time that the analysis that generated the result(s) started executing. + # Use current time for now, but could potentially use the actual inference start time. + ds.ContentDate = date_now_dcm + ds.ContentTime = time_now_dcm + ds.TimezoneOffsetFromUTC = offset_from_utc + + # The date and time that the original generation of the data in the document started. + ds.AcquisitionDateTime = date_now_dcm + time_now_dcm # Result has just been created. + + # Patient Module, mandatory. + # Copy over from the original DICOM metadata. + ds.PatientName = orig_ds.get("PatientName", "") if copy_tags else "" + ds.PatientID = orig_ds.get("PatientID", "") if copy_tags else "" + ds.IssuerOfPatientID = orig_ds.get("IssuerOfPatientID", "") if copy_tags else "" + ds.PatientBirthDate = orig_ds.get("PatientBirthDate", "") if copy_tags else "" + ds.PatientSex = orig_ds.get("PatientSex", "") if copy_tags else "" + + # Study Module, mandatory + # Copy over from the original DICOM metadata. + ds.StudyDate = orig_ds.get("StudyDate", "") if copy_tags else date_now_dcm + ds.StudyTime = orig_ds.get("StudyTime", "") if copy_tags else time_now_dcm + ds.AccessionNumber = orig_ds.get("AccessionNumber", "") if copy_tags else "" + ds.StudyDescription = orig_ds.get("StudyDescription", "") if copy_tags else "AI results." + ds.StudyInstanceUID = my_study_instance_uid + ds.StudyID = orig_ds.get("StudyID", "") if copy_tags else "1" + ds.ReferringPhysicianName = orig_ds.get("ReferringPhysicianName", "") if copy_tags else "" + + # Equipment Module, mandatory + if equipment_info: + ds.Manufacturer = equipment_info.manufacturer + ds.ManufacturerModelName = equipment_info.manufacturer_model + ds.SeriesNumber = equipment_info.series_number + ds.SoftwareVersions = equipment_info.software_version_number + + # SOP Common Module, mandatory + ds.InstanceCreationDate = date_now_dcm + ds.InstanceCreationTime = time_now_dcm + ds.SOPClassUID = sop_class_uid + ds.SOPInstanceUID = my_sop_instance_uid + ds.InstanceNumber = "1" + ds.SpecificCharacterSet = "ISO_IR 100" + + # Series Module, mandatory + ds.Modality = modality_type + ds.SeriesInstanceUID = my_series_instance_uid + ds.SeriesNumber = my_series_number + ds.SeriesDescription = my_series_description + ds.SeriesDate = date_now_dcm + ds.SeriesTime = time_now_dcm + # Body part copied over, although not mandatory depending on modality + ds.BodyPartExamined = orig_ds.get("BodyPartExamined", "") if copy_tags else "" + ds.RequestedProcedureID = orig_ds.get("RequestedProcedureID", "") if copy_tags else "" + + # Contributing Equipment Sequence + # The Creator shall describe each algorithm that was used to generate the results in the + # Contributing Equipment Sequence (0018,A001). Multiple items may be included. The Creator + # shall encode the following details in the Contributing Equipment Sequence: + # • Purpose of Reference Code Sequence (0040,A170) shall be (Newcode1, 99IHE, 1630 "Processing Algorithm") + # • Manufacturer (0008,0070) + # • Manufacturer’s Model Name (0008,1090) + # • Software Versions (0018,1020) + # • Device UID (0018,1002) + + if model_info: + # First create the Purpose of Reference Code Sequence + seq_purpose_of_reference_code = Sequence() + ds_purpose_of_reference_code = Dataset() + ds_purpose_of_reference_code.CodeValue = "Newcode1" + ds_purpose_of_reference_code.CodingSchemeDesignator = "99IHE" + ds_purpose_of_reference_code.CodeMeaning = '"Processing Algorithm' + seq_purpose_of_reference_code.append(ds_purpose_of_reference_code) + + seq_contributing_equipment = Sequence() + ds_contributing_equipment = Dataset() + ds_contributing_equipment.PurposeOfReferenceCodeSequence = seq_purpose_of_reference_code + # '(121014, DCM, “Device Observer Manufacturer")' + ds_contributing_equipment.Manufacturer = model_info.creator + # u'(121015, DCM, “Device Observer Model Name")' + ds_contributing_equipment.ManufacturerModelName = model_info.name + # u'(111003, DCM, “Algorithm Version")' + ds_contributing_equipment.SoftwareVersions = model_info.version + ds_contributing_equipment.DeviceUID = model_info.uid # u'(121012, DCM, “Device Observer UID")' + seq_contributing_equipment.append(ds_contributing_equipment) + ds.ContributingEquipmentSequence = seq_contributing_equipment + + logging.debug("DICOM common modules written:\n{}".format(ds)) + + return ds