forked from ubern-mialab/mialab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipeline.py
202 lines (161 loc) · 7.89 KB
/
pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
"""A medical image analysis pipeline.
The pipeline is used for brain tissue segmentation using a decision forest classifier.
"""
import argparse
import datetime
import os
import sys
import timeit
import SimpleITK as sitk
import sklearn.ensemble as sk_ensemble
import numpy as np
import pymia.data.conversion as conversion
import pymia.evaluation.writer as writer
try:
import mialab.data.structure as structure
import mialab.utilities.file_access_utilities as futil
import mialab.utilities.pipeline_utilities as putil
except ImportError:
# Append the MIALab root directory to Python path
sys.path.insert(0, os.path.join(os.path.dirname(sys.argv[0]), '..'))
import mialab.data.structure as structure
import mialab.utilities.file_access_utilities as futil
import mialab.utilities.pipeline_utilities as putil
LOADING_KEYS = [structure.BrainImageTypes.T1w,
structure.BrainImageTypes.T2w,
structure.BrainImageTypes.GroundTruth,
structure.BrainImageTypes.BrainMask,
structure.BrainImageTypes.RegistrationTransform] # the list of data we will load
def main(result_dir: str, data_atlas_dir: str, data_train_dir: str, data_test_dir: str):
"""Brain tissue segmentation using decision forests.
The main routine executes the medical image analysis pipeline:
- Image loading
- Registration
- Pre-processing
- Feature extraction
- Decision forest classifier model building
- Segmentation using the decision forest classifier model on unseen images
- Post-processing of the segmentation
- Evaluation of the segmentation
"""
# load atlas images
putil.load_atlas_images(data_atlas_dir)
print('-' * 5, 'Training...')
# crawl the training image directories
crawler = futil.FileSystemDataCrawler(data_train_dir,
LOADING_KEYS,
futil.BrainImageFilePathGenerator(),
futil.DataDirectoryFilter())
pre_process_params = {'skullstrip_pre' : True,
'normalization_pre' : True,
'registration_pre' : True,
'coordinates_feature' : True,
'intensity_feature' : True,
'gradient_intensity_feature': True,
'resampling_pre' : True,
'wiener_denoising_pre' : True,}
# load images for training and pre-process
images = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)
# generate feature matrix and label vector
data_train = np.concatenate([img.feature_matrix[0] for img in images])
labels_train = np.concatenate([img.feature_matrix[1] for img in images]).squeeze()
# TODO fine-tune random forest
forest = sk_ensemble.RandomForestClassifier(
max_features = images[0].feature_matrix[0].shape[1],
n_estimators = 50,
max_depth = 5,
criterion = 'gini',
min_samples_leaf = 3,
min_samples_split= 2,
bootstrap= True
)
start_time = timeit.default_timer()
forest.fit(data_train, labels_train)
print(' Time elapsed:', timeit.default_timer() - start_time, 's')
# create a result directory with timestamp
t = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
result_dir = os.path.join(result_dir, t)
os.makedirs(result_dir, exist_ok=True)
print('-' * 5, 'Testing...')
# initialize evaluator
evaluator = putil.init_evaluator()
# crawl the training image directories
crawler = futil.FileSystemDataCrawler(data_test_dir,
LOADING_KEYS,
futil.BrainImageFilePathGenerator(),
futil.DataDirectoryFilter())
# load images for testing and pre-process
pre_process_params['training'] = False
images_test = putil.pre_process_batch(crawler.data, pre_process_params, multi_process=False)
images_prediction = []
images_probabilities = []
for img in images_test:
print('-' * 10, 'Testing', img.id_)
start_time = timeit.default_timer()
predictions = forest.predict(img.feature_matrix[0])
probabilities = forest.predict_proba(img.feature_matrix[0])
print(' Time elapsed:', timeit.default_timer() - start_time, 's')
# convert prediction and probabilities back to SimpleITK images
image_prediction = conversion.NumpySimpleITKImageBridge.convert(predictions.astype(np.uint8),
img.image_properties)
image_probabilities = conversion.NumpySimpleITKImageBridge.convert(probabilities, img.image_properties)
# evaluate segmentation without post-processing
evaluator.evaluate(image_prediction, img.images[structure.BrainImageTypes.GroundTruth], img.id_)
images_prediction.append(image_prediction)
images_probabilities.append(image_probabilities)
# post-process segmentation and evaluate with post-processing
post_process_params = {
'simple_post': True,
'crf_post' : False}
images_post_processed = putil.post_process_batch(images_test, images_prediction, images_probabilities,
post_process_params, multi_process=True)
for i, img in enumerate(images_test):
evaluator.evaluate(images_post_processed[i], img.images[structure.BrainImageTypes.GroundTruth],
img.id_ + '-PP')
# save results
sitk.WriteImage(images_prediction[i], os.path.join(result_dir, images_test[i].id_ + '_SEG.mha'), True)
sitk.WriteImage(images_post_processed[i], os.path.join(result_dir, images_test[i].id_ + '_SEG-PP.mha'), True)
# use two writers to report the results
os.makedirs(result_dir, exist_ok=True) # generate result directory, if it does not exists
result_file = os.path.join(result_dir, 'results.csv')
writer.CSVWriter(result_file).write(evaluator.results)
print('\nSubject-wise results...')
writer.ConsoleWriter().write(evaluator.results)
# report also mean and standard deviation among all subjects
result_summary_file = os.path.join(result_dir, 'results_summary.csv')
functions = {'MEAN': np.mean, 'STD': np.std}
writer.CSVStatisticsWriter(result_summary_file, functions=functions).write(evaluator.results)
print('\nAggregated statistic results...')
writer.ConsoleStatisticsWriter(functions=functions).write(evaluator.results)
# clear results such that the evaluator is ready for the next evaluation
evaluator.clear()
if __name__ == "__main__":
"""The program's entry point."""
script_dir = os.path.dirname(sys.argv[0])
parser = argparse.ArgumentParser(description='Medical image analysis pipeline for brain tissue segmentation')
parser.add_argument(
'--result_dir',
type=str,
default=os.path.normpath(os.path.join(script_dir, './mia-result')),
help='Directory for results.'
)
parser.add_argument(
'--data_atlas_dir',
type=str,
default=os.path.normpath(os.path.join(script_dir, '../data/atlas')),
help='Directory with atlas data.'
)
parser.add_argument(
'--data_train_dir',
type=str,
default=os.path.normpath(os.path.join(script_dir, '../data/train/')),
help='Directory with training data.'
)
parser.add_argument(
'--data_test_dir',
type=str,
default=os.path.normpath(os.path.join(script_dir, '../data/test/')),
help='Directory with testing data.'
)
args = parser.parse_args()
main(args.result_dir, args.data_atlas_dir, args.data_train_dir, args.data_test_dir)