Add files via upload

JZK00 · Mar 2, 2021 · ba381c5 · ba381c5
1 parent 94278c1
commit ba381c5
Show file tree

Hide file tree

Showing 13 changed files with 393 additions and 0 deletions.
diff --git a/BM/Label_T1.nii.gz b/BM/Label_T1.nii.gz
diff --git a/BM/Label_T1C.nii.gz b/BM/Label_T1C.nii.gz
diff --git a/BM/Label_T2.nii.gz b/BM/Label_T2.nii.gz
diff --git a/BM/example_T1.nii b/BM/example_T1.nii
diff --git a/BM/example_T1C.nii b/BM/example_T1C.nii
diff --git a/BM/example_T2.nii b/BM/example_T2.nii
diff --git a/DL.py b/DL.py
@@ -0,0 +1,150 @@
+# Deeplearning-based radiomics
+
+from __future__ import print_function
+import logging
+import os
+import pandas
+import SimpleITK as sitk
+import numpy as np
+import collections
+import SimpleITK as sitk
+from scipy.ndimage.interpolation import zoom
+import os,sys
+import pandas as pd
+from tensorflow.keras.preprocessing import image
+from tensorflow.keras.applications.resnet50 import ResNet50
+from tensorflow.keras.applications.imagenet_utils import preprocess_input, decode_predictions
+from tensorflow.keras.models import Model
+
+# CNN
+base_model = ResNet50(weights='imagenet', include_top=True)
+from tensorflow.keras.models import Model
+model = Model(inputs=base_model.input, outputs=base_model.layers[-1].output)
+
+# Feature Extraction
+# Cropping box
+def maskcroppingbox(images_array, use2D=False):
+    images_array_2 = np.argwhere(images_array)
+    (zstart, ystart, xstart), (zstop, ystop, xstop) = images_array_2.min(axis=0), images_array_2.max(axis=0) + 1
+    empty = []
+    for i in range(zstart, zstop):
+        test_array = images_array[i, :, :]
+        num = test_array.sum()
+        empty.append(num)
+    max_index = empty.index(max(empty)) + 1
+    return (zstart, ystart, xstart), (zstop, ystop, xstop), max_index
+
+
+def featureextraction(imageFilepath, maskFilepath):
+    image_array = sitk.GetArrayFromImage(imageFilepath)
+    mask_array = sitk.GetArrayFromImage(maskFilepath)
+    (zstart, ystart, xstart), (zstop, ystop, xstop), maxIndex = maskcroppingbox(mask_array, use2D=False)
+    roi_images = image_array[zstart - 1:zstop + 1, ystart:ystop, xstart:xstop].transpose((2, 1, 0))
+    roi_images1 = zoom(roi_images, zoom=[224 / roi_images.shape[0], 224 / roi_images.shape[1], 1], order=3)
+    roi_images2 = np.array(roi_images1, dtype=np.float)
+    x = image.img_to_array(roi_images2)
+
+    x_order = np.asarray(x[:, :, maxIndex])
+    x_befor = np.asarray(x[:, :, maxIndex - 1])
+    x_later = np.asarray(x[:, :, maxIndex + 1])
+    a = np.asarray(x_order)
+    b = np.asarray(x_befor)
+    c = np.asarray(x_later)
+    mylist = [b, a, c]
+    x = np.asarray(mylist)
+    x = np.expand_dims(x, axis=0)
+
+    # print('multi channel x shape:', x.shape)  # multi channel x shape: (3, 1, 224, 224)
+    x = np.transpose(x, (0, 2, 3, 1))
+    # print('x1.shape:', x.shape)  # x1.shape: (1, 224, 224, 43)
+    x = preprocess_input(x)
+    base_model_pool_features = model.predict(x)
+    features = base_model_pool_features[0]
+    deeplearningfeatures = collections.OrderedDict()
+    for ind_, f_ in enumerate(features):
+        deeplearningfeatures[str(ind_)] = f_
+    return deeplearningfeatures
+
+def main():
+  outPath = 'D:/BM-GBM/'
+
+  inputCSV = os.path.join(outPath, 'example.csv')
+  outputFilepath = os.path.join(outPath+'/'+'example-DLR.csv')
+
+  # Configure logging
+  rLogger = logging.getLogger('DLradiomics')
+
+  # Initialize logging for batch log messages
+  logger = rLogger.getChild('batch')
+
+  # logger.info('pyradiomics version: %s', radiomics.__version__)
+  logger.info('Loading CSV')
+
+  # ####### Up to this point, this script is equal to the 'regular' batchprocessing script ########
+
+  try:
+    # Use pandas to read and transpose ('.T') the input data
+    # The transposition is needed so that each column represents one test case. This is easier for iteration over
+    # the input cases
+    flists = pandas.read_csv(inputCSV).T
+  except Exception:
+    logger.error('CSV READ FAILED', exc_info=True)
+    exit(-1)
+
+  logger.info('Loading Done')
+  logger.info('Patients: %d', len(flists.columns))
+
+  # Instantiate a pandas data frame to hold the results of all patients
+  results = pandas.DataFrame()
+
+  for entry in flists:  # Loop over all columns (i.e. the test cases)
+    logger.info("(%d/%d) Processing Patient (Image: %s, Mask: %s)",
+                entry + 1,
+                len(flists),
+                flists[entry]['Image'],
+                flists[entry]['Mask'])
+
+    imageFilepath = flists[entry]['Image']
+    maskFilepath = flists[entry]['Mask']
+    label = flists[entry].get('Label', None)
+
+    if str(label).isdigit():
+      label = int(label)
+    else:
+      label = None
+
+    if (imageFilepath is not None) and (maskFilepath is not None):
+      featureVector = flists[entry]  # This is a pandas Series
+      featureVector['Image'] = os.path.basename(imageFilepath)
+      featureVector['Mask'] = os.path.basename(maskFilepath)
+
+      try:
+        # PyRadiomics returns the result as an ordered dictionary, which can be easily converted to a pandas Series
+        # The keys in the dictionary will be used as the index (labels for the rows), with the values of the features
+        # as the values in the rows.
+        # color_channel = 0
+        im=sitk.ReadImage(imageFilepath)
+        mask=sitk.ReadImage(maskFilepath)
+
+        result = pandas.Series(featureextraction(im, mask))
+        featureVector = featureVector.append(result)
+
+      except Exception:
+        logger.error('FEATURE EXTRACTION FAILED:', exc_info=True)
+
+      # To add the calculated features for this case to our data frame, the series must have a name (which will be the
+      # name of the column.
+      featureVector.name = entry
+      # By specifying an 'outer' join, all calculated features are added to the data frame, including those not
+      # calculated for previous cases. This also ensures we don't end up with an empty frame, as for the first patient
+      # it is 'joined' with the empty data frame.
+      results = results.join(featureVector, how='outer')  # If feature extraction failed, results will be all NaN
+
+  logger.info('Extraction complete, writing CSV')
+  # .T transposes the data frame, so that each line will represent one patient, with the extracted features as columns
+  results.T.to_csv(outputFilepath, index=False, na_rep='NaN')
+  logger.info('CSV writing complete')
+
+
+if __name__ == '__main__':
+  main()
diff --git a/GBM/example_T1.nii b/GBM/example_T1.nii
diff --git a/example-DLR.csv b/example-DLR.csv
diff --git a/example-HCR.csv b/example-HCR.csv
diff --git a/example.csv b/example.csv
@@ -0,0 +1,7 @@
+ID,Image,Mask,Y,MRI
+1,D:\BM-GBM\BM\example_T1.nii,D:\BM-GBM\BM\Label_T1.nii.gz,BM,T1
+2,D:\BM-GBM\BM\example_T2.nii,D:\BM-GBM\BM\Label_T2.nii.gz,BM,T2
+3,D:\BM-GBM\BM\example_T1C.nii,D:\BM-GBM\BM\Label_T1C.nii.gz,BM,T1C
+4,D:\BM-GBM\GBM\example_T1.nii,D:\BM-GBM\GBM\label_T1.nii.gz,GBM,T1
+5,D:\BM-GBM\GBM\example_T2.nii,D:\BM-GBM\GBM\label_T2.nii.gz,GBM,T2
+6,D:\BM-GBM\GBM\example_T1C.nii,D:\BM-GBM\GBM\label_T1C.nii.gz,GBM,T1C
diff --git a/extractor.py b/extractor.py
@@ -0,0 +1,123 @@
+# Hand-crafted-radiomics
+
+from __future__ import print_function
+import logging
+import os
+import pandas
+import SimpleITK as sitk
+import radiomics
+from radiomics import featureextractor
+
+def main():
+  # Input data
+  outPath = r'D:/BM-GBM/'
+  inputCSV = os.path.join(outPath, 'example.csv')
+  outputFilepath = os.path.join(outPath, 'example-HCR.csv')
+  params = os.path.join(outPath,'tumor.yaml')
+
+  # Configure logging
+  rLogger = logging.getLogger('radiomics')
+
+  # Set logging level
+  # rLogger.setLevel(logging.INFO)  # Not needed, default log level of logger is INFO
+
+  # Create handler for writing to log file
+  # handler = logging.FileHandler(filename=progress_filename, mode='w')
+  # handler.setFormatter(logging.Formatter('%(levelname)s:%(name)s: %(message)s'))
+  # rLogger.addHandler(handler)
+
+  # Initialize logging for batch log messages
+  logger = rLogger.getChild('batch')
+
+  # Set verbosity level for output to stderr (default level = WARNING)
+  # radiomics.setVerbosity(logging.INFO)
+  #
+  # logger.info('pyradiomics version: %s', radiomics.__version__)
+  logger.info('Loading CSV')
+
+  # ####### Up to this point, this script is equal to the 'regular' batchprocessing script ########
+
+  try:
+    # Use pandas to read and transpose ('.T') the input data
+    # The transposition is needed so that each column represents one test case. This is easier for iteration over
+    # the input casesr
+    flists = pandas.read_csv(inputCSV).T
+  except Exception:
+    logger.error('CSV READ FAILED', exc_info=True)
+    exit(-1)
+
+  logger.info('Loading Done')
+  logger.info('Patients: %d', len(flists.columns))
+
+  if os.path.isfile(params):
+    extractor = featureextractor.RadiomicsFeatureExtractor(params)
+  else:  # Parameter file not found, use hardcoded settings instead
+    settings = {}
+    settings['binWidth'] = 25
+    settings['resampledPixelSpacing'] = None  # [3,3,3]
+    settings['interpolator'] = sitk.sitkBSpline
+    settings['enableCExtensions'] = True
+
+    extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
+    # extractor.enableInputImages(wavelet= {'level': 2})
+
+  logger.info('Enabled input images types: %s', extractor.enabledImagetypes)
+  logger.info('Enabled features: %s', extractor.enabledFeatures)
+  logger.info('Current settings: %s', extractor.settings)
+
+  # Instantiate a pandas data frame to hold the results of all patients
+  results = pandas.DataFrame()
+
+  for entry in flists:  # Loop over all columns (i.e. the test cases)
+    logger.info("(%d/%d) Processing Patient (Image: %s, Mask: %s)",
+                entry + 1,
+                len(flists),
+                flists[entry]['Image'],
+                flists[entry]['Mask'])
+
+    imageFilepath = flists[entry]['Image']
+    maskFilepath = flists[entry]['Mask']
+    label = flists[entry].get('Label', None)
+
+    if str(label).isdigit():
+      label = int(label)
+    else:
+      label = None
+
+    if (imageFilepath is not None) and (maskFilepath is not None):
+      featureVector = flists[entry]  # This is a pandas Series
+      featureVector['Image'] = os.path.basename(imageFilepath)
+      featureVector['Mask'] = os.path.basename(maskFilepath)
+
+      try:
+        # PyRadiomics returns the result as an ordered dictionary, which can be easily converted to a pandas Series
+        # The keys in the dictionary will be used as the index (labels for the rows), with the values of the features
+        # as the values in the rows.
+        result = pandas.Series(extractor.execute(imageFilepath, maskFilepath, label))
+        featureVector = featureVector.append(result)
+        # color_channel = 0
+        # im = sitk.ReadImage(imageFilepath)
+        # selector = sitk.VectorIndexSelectionCastImageFilter()
+        # selector.SetIndex(color_channel)
+        # im = selector.Execute(im)
+        # result = pandas.Series(extractor.execute(im, maskFilepath, label))
+        # featureVector = featureVector.append(result)
+      except Exception:
+        logger.error('FEATURE EXTRACTION FAILED:', exc_info=True)
+
+      # To add the calculated features for this case to our data frame, the series must have a name (which will be the
+      # name of the column.
+      featureVector.name = entry
+      # By specifying an 'outer' join, all calculated features are added to the data frame, including those not
+      # calculated for previous cases. This also ensures we don't end up with an empty frame, as for the first patient
+      # it is 'joined' with the empty data frame.
+      results = results.join(featureVector, how='outer')  # If feature extraction failed, results will be all NaN
+
+  logger.info('Extraction complete, writing CSV')
+  # .T transposes the data frame, so that each line will represent one patient, with the extracted features as columns
+  results.T.to_csv(outputFilepath, index=False, na_rep='NaN')
+  logger.info('CSV writing complete')
+
+
+if __name__ == '__main__':
+  main()
diff --git a/tumor.yaml b/tumor.yaml
@@ -0,0 +1,99 @@
+#######http://www.mpip.sdnu.edu.cn/##############################################################
+#######https://pyradiomics.readthedocs.io/en/latest/index.html###################################
+#######MRI-based Radiomics Extraction using PyRadiomics #########################################
+
+imageType:
+  Original: {}
+  LoG:
+    # If the in-plane spacing is large (> 2mm), consider removing sigma value 1.
+    sigma: [1.0, 3.0, 5.0]
+  Wavelet: {}
+    #start_level: 0
+    #level: 3
+    #wavelet: 'coif1' # There are many other wavelet algorithms.
+  #Square: {}
+  #SquareRoot: {}
+  #Logarithm: {}
+  #Exponential: {}
+  #Gradient：{}
+  #LocalBinaryPattern2D：{}
+  #LocalBinaryPattern3D：{}
+
+featureClass:
+  shape:
+  firstorder:
+  glcm:  # Disable SumAverage by specifying all other GLCM features available
+    - 'Autocorrelation'
+    - 'JointAverage'
+    - 'ClusterProminence'
+    - 'ClusterShade'
+    - 'ClusterTendency'
+    - 'Contrast'
+    - 'Correlation'
+    - 'DifferenceAverage'
+    - 'DifferenceEntropy'
+    - 'DifferenceVariance'
+    - 'JointEnergy'
+    - 'JointEntropy'
+    - 'Imc1'
+    - 'Imc2'
+    - 'Idm'
+    - 'Idmn'
+    - 'Id'
+    - 'Idn'
+    - 'InverseVariance'
+    - 'MaximumProbability'
+    - 'SumEntropy'
+    - 'SumSquares'
+  glrlm:
+  glszm:
+  gldm:
+  ngtdm:
+
+setting:
+  # Normalization:
+  # MR signal is usually relative, with large differences between scanners and vendors. By normalizing the image before
+  # feature calculation, this confounding effect may be reduced. However, if only one specific scanner is used, or the
+  # images reflect some absolute world value (e.g. ADC maps, T2maps (NOT T2 weighted)), consider disabling the
+  # normalization.
+  normalize: true
+  normalizeScale: 100  # This allows you to use more or less the same bin width.
+
+  # Resampling:
+  # Not enabled in this example. However, because texture calculation assumes isotropic spacing, a forced 2D extraction
+  # is used, therefore only requiring the voxels to be isotropic in-plane. Enable pre-cropping to reduce memory
+  # footprint and speed up applying the filters.
+  preCrop: true
+
+  # Forced 2D extracion:
+  # This allows to calculate texture features using anisotropic voxels (although it assumes that voxels are isotropic
+  # in-plane). This is an alternative to resampling the image to isotropic voxels.
+  #force2D: true
+  #force2Ddimension: 0  # axial slices, for coronal slices, use dimension 1 and for sagittal, dimension 2.
+
+  # Mask validation:
+  # correctMask and geometryTolerance are not needed, as both image and mask are resampled, if you expect very small
+  # masks, consider to enable a size constraint by uncommenting settings below:
+  #minimumROIDimensions: 2
+  #minimumROISize: 50
+
+  # Image discretization:
+  # The ideal number of bins is somewhere in the order of 16-128 bins. A possible way to define a good binwidt is to
+  # extract firstorder:Range from the dataset to analyze, and choose a binwidth so, that range/binwidth remains approximately
+  # in this range of bins.
+  binWidth: 5
+
+  # first order specific settings:
+  # When normalizing, gray values below the mean will be negative. Shifting by 300 (3 StdDevs * 100) ensures that the
+  # majority of voxels is positive (only outliers >3 SD lower than the mean will be negative).
+  voxelArrayShift: 300
+
+  # Misc:
+  # default label value. Labels can also be defined in the call to featureextractor.execute, as a commandline argument,
+  # or in a column "Label" in the input csv (batchprocessing)
+  label: 1
+
+#voxelSetting:
+#  kernelRadius: 2
+#  maskedKernel: true
+#  initValue: nan