Skip to content

Commit

Permalink
Merge pull request #67 from bhaskar2443053/60_xray_processor
Browse files Browse the repository at this point in the history
xray processor
  • Loading branch information
Piyush13y authored Nov 10, 2022
2 parents 2b8fd0d + 49c1555 commit 68cdac6
Show file tree
Hide file tree
Showing 8 changed files with 215 additions and 20 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ jobs:
- { dep: forte-wrapper, testfile: tests/fortex/health/processors/negation_context_analysis_test.py }
- { dep: scispacy, testfile: tests/fortex/health/processors/scispacy_processor_test.py }
- { testfile: tests/fortex/health/readers/mimic3_note_reader_test.py }
- { testfile: tests/fortex/health/readers/xray_image_reader_test.py }
- { testfile: tests/fortex/health/processors/xray_processor_test.py }
exclude:
- python-version: 3.6
torch-version: 1.7.1
Expand Down
Binary file added examples/xray/sample_data/normal_xray_image.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
25 changes: 13 additions & 12 deletions fortex/health/processors/icd_coding_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
from typing import Dict, Set
import importlib

from transformers import pipeline
from forte.common import Resources
from forte.common.configuration import Config
from forte.data.data_pack import DataPack
from forte.processors.base import PackProcessor

from transformers import AutoTokenizer
from transformers import BertForSequenceClassification
from ftx.medical.clinical_ontology import MedicalArticle


Expand All @@ -43,16 +43,13 @@ class ICDCodingProcessor(PackProcessor):

def __init__(self):
super().__init__()
self.extractor = None
self.tokenizer = None
self.model = None

def set_up(self): # , configs: Config
device_num = self.configs["cuda_devices"]
self.extractor = pipeline( # using Bert for SequenceClassification
"sentiment-analysis", # this is the actual pipeline name for Sequence-Classification
model=self.configs.model_name,
tokenizer=self.configs.model_name,
framework="pt",
device=device_num,
self.tokenizer = AutoTokenizer.from_pretrained(self.configs.model_name)
self.model = BertForSequenceClassification.from_pretrained(
self.configs.model_name
)

def initialize(self, resources: Resources, configs: Config):
Expand All @@ -76,9 +73,13 @@ def _process(self, input_pack: DataPack):
print("Found an entry greater than 512 in length, skipping..")
continue

result = self.extractor(inputs=entry_specified.text)
encoded_input = self.tokenizer(
entry_specified.text, return_tensors="pt"
)
output = self.model(**encoded_input)
result = output.logits.detach().cpu().numpy()[0].argsort()[::-1][:5]

icd_code = result[0]["label"]
icd_code = self.model.config.id2label[result[0]]
article = MedicalArticle(
pack=input_pack,
begin=entry_specified.span.begin,
Expand Down
125 changes: 125 additions & 0 deletions fortex/health/processors/xray_image_processor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
# Copyright 2022 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
XrayImageProcessor
"""
from typing import Dict, Set
import PIL
from transformers import pipeline
from forte.data.data_pack import DataPack
from forte.common.resources import Resources
from forte.common.configuration import Config
from forte.processors.base import PackProcessor
from ft.onto.base_ontology import Classification


__all__ = [
"XrayImageProcessor",
]


class XrayImageProcessor(PackProcessor):
r"""
Implementation of this XrayImageProcessor has been based on the fine-tuned
version of google/vit-base-patch16-224-in21k
on the chest-xray-pneumonia dataset.
The finetuned model nickmuchi/vit-finetuned-chest-xray-pneumonia (of huggingface transformers)
achieves the following results on the evaluation set:
Loss: 0.1271
Accuracy: 0.9551
Referred link:
https://huggingface.co/nickmuchi/vit-finetuned-chest-xray-pneumonia#vit-finetuned-chest-xray-pneumonia
"""

def __init__(self):
super().__init__()
self.extractor = None

def set_up(self):

device_num = self.configs["cuda_devices"]
self.extractor = pipeline(
"image-classification",
model=self.configs.model_name,
feature_extractor=self.configs.model_name,
framework="pt",
device=device_num,
)

def initialize(self, resources: Resources, configs: Config):
super().initialize(resources, configs)
self.set_up()

def _process(self, input_pack: DataPack):
r"""
The model predicts the probablity score for
the two classes 'PNEUMONIA' and 'NORMAL', based on
an xray image input.
"""
image_data = input_pack.image
pil_img = PIL.Image.fromarray(image_data)
vit_result = self.extractor(pil_img)
result_dict = {}
for i in vit_result:
result_dict[i["label"]] = i["score"]
class_labels: Classification = Classification(pack=input_pack)
class_labels.classification_result = result_dict

@classmethod
def default_configs(cls):
r"""
This defines a basic config structure for `XrayImageProcessor`.
Following are the keys for this dictionary:
- `model_name`: the higgingface transformer model name to be
used for classification,
- `cuda_devices`: This the GPU device id, if set to "-1" CPU
will be used for the computation.
Returns: A dictionary with the default config for this processor.
"""
return {
"model_name": "nickmuchi/vit-finetuned-chest-xray-pneumonia",
"cuda_devices": -1,
}

def expected_types_and_attributes(self):
r"""
Method to add user specified expected type which would be checked
before running the processor if the pipeline is initialized with
`enforce_consistency=True` or
:meth:`~forte.pipeline.Pipeline.enforce_consistency` was enabled for
the pipeline.
"""
return {
"ft.onto.base_ontology.ImagePayload": [],
}

def record(self, record_meta: Dict[str, Set[str]]):
r"""
Method to add output type record of `XrayImageProcessor` which
is `ft.onto.base_ontology.Classification` with attribute `classification_result`
to :attr:`forte.data.data_pack.Meta.record`.
Args:
record_meta: the field in the datapack for type record that need to
fill in for consistency checking.
"""
record_meta["ft.onto.base_ontology.Classification"] = {
"classification_result",
}
6 changes: 4 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,16 @@
),
namespace_packages=["fortex"],
install_requires=[
'forte~=0.2.0',
'forte==0.3.0.dev3',
],
extras_require={
"test": [
"ddt",
"testfixtures",
"transformers==4.2.2",
"transformers==4.18.0",
"protobuf==3.19.4",
"Pillow==8.4.0",
'forte @ git+https://github.com/asyml/forte',
],
"scispacy_processor": [
"scispacy==0.5.0",
Expand Down
12 changes: 6 additions & 6 deletions tests/fortex/health/processors/icd_coding_processor_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
"""
Unit tests for ICDCodingProcessor
"""

import unittest

from forte.data.data_pack import DataPack
Expand Down Expand Up @@ -44,15 +43,16 @@ def setUp(self):
self.nlp.initialize()

def test_huggingface_ICDCode_processor(self):
sentences = [
"subarachnoid hemorrhage scalp laceration service: surgery major surgical or invasive.",
# "Other related medical statements.", # if this line is added the classification changed to T34.99
]
document = "".join(sentences)

document = "subarachnoid hemorrhage scalp laceration service: surgery major surgical or invasive"
print(document)
pack = self.nlp.process(document)

expected_code = "H59.11"

for idx, icd_coding_item in enumerate(pack.get(MedicalArticle)):
self.assertEqual(icd_coding_item.icd_code, expected_code)


if __name__ == "__main__":
unittest.main()
65 changes: 65 additions & 0 deletions tests/fortex/health/processors/xray_processor_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright 2022 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Unit tests for XrayImageProcessor
"""

import os
import unittest
from ddt import ddt, data

from forte.pipeline import Pipeline
from forte.data.data_pack import DataPack
from fortex.health.readers.xray_image_reader import XrayImageReader
from fortex.health.processors.xray_image_processor import XrayImageProcessor
from ft.onto.base_ontology import Classification

import numpy as np
from PIL import Image


class XrayImageProcessorPipelineTest(unittest.TestCase):
def setUp(self):
self.orig_image_pth: str = os.path.abspath(
os.path.join(
os.path.dirname(os.path.abspath(__file__)),
os.pardir,
os.pardir,
os.pardir,
os.pardir,
"examples/xray/sample_data/",
)
)

self.reader = XrayImageReader()
self.pl = Pipeline[DataPack]()
self.pl.set_reader(self.reader)
self.pl.add(XrayImageProcessor())
self.pl.initialize()

def test_processor(self):
for pack in self.pl.process_dataset(self.orig_image_pth):
for output in pack.get(Classification):
if "pneumonia" in pack.pack_name:
self.assertTrue(
output.classification_result["PNEUMONIA"] >= 0.5
)
elif "normal" in pack.pack_name:
self.assertTrue(
output.classification_result["NORMAL"] >= 0.5
)


if __name__ == "__main__":
unittest.main()

0 comments on commit 68cdac6

Please sign in to comment.