Skip to content

DenseNet121_CWBCE_E20_B32

Tobias Schmidt edited this page Oct 19, 2020 · 1 revision

DenseNet121_CWBCE_E20_B32

Version: 1

Trained DenseNet121 architecture using the 'CWBCE_E20_B32' benchmark. The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffel set to True and images rescaled to dimension (256, 256). The training was done for 20 epochs using the Adam optimizer and weighted_binary_crossentropy loss. A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 141807 number of sample, the validation set 36980, and the test set 44627.

from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint
data = json.loads(os.environ['EXP_DATA'])
history = data['history']

Model and Benchmark Summary

for s in data["description"].split(".")[:-1]:
    print(s + ".\n")
Trained DenseNet121 architecture using the 'CWBCE_E20_B32' benchmark.

 The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffel set to True and images rescaled to dimension (256, 256).


The training was done for 20 epochs using the Adam optimizer and weighted_binary_crossentropy loss.


A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method.


The traing set included 141807 number of sample, the validation set 36980, and the test set 44627.

Extract and format metrics to be plotted

# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}

metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]

for default_name, custom_name in metric_custom_names.items():
    if not default_name in history.keys() and default_name in metric_keys:
        #replace default name with custom name
        metric_keys[metric_keys.index(default_name)]=custom_name

Plot training & validation accuracy values

def print_or_plot_metric(metric_key, metric_name, figure_name):
    if len(history[metric_key]) == 1:
        print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
        print('Train {}: '.format(metric_name), history[metric_key])
        print('Validation {}: '.format(metric_name), history['val_'+metric_key])
        print()        
    else:
        plot_epoch_metric(metric_key, metric_name, figure_name)
        
def plot_epoch_metric(metric_key, metric_name, figure_name):
    figure(num=None, figsize=(10, 6))
    plt.plot(history[metric_key])
    if 'val_'+metric_key in history.keys():
        plt.plot(history['val_'+metric_key])
    plt.title(figure_name)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    if 'val_'+metric_key in history.keys():
        plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

for i, metric_key in enumerate(metric_keys):
    print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])

png

png

png

png

png

Plot training & validation loss values

print_or_plot_metric("loss", "Loss", "Model loss")

png

if "lr" in history.keys():
    plot_epoch_metric("lr", "Learning Rate", "Learning Rate")

png

Classification Report

if 'classification_report' in data.keys() and data['classification_report']:
    print(data['classification_report'])
                            precision    recall  f1-score   support

                No Finding       0.10      0.27      0.14      4330
Enlarged Cardiomediastinum       0.05      0.30      0.08      2113
              Cardiomegaly       0.12      0.25      0.16      5240
              Lung Opacity       0.47      0.55      0.51     21148
               Lung Lesion       0.04      0.22      0.07      1917
                     Edema       0.23      0.37      0.28     10309
             Consolidation       0.07      0.44      0.12      3019
                 Pneumonia       0.03      0.21      0.05      1198
               Atelectasis       0.15      0.47      0.22      6573
              Pneumothorax       0.08      0.23      0.12      3889
          Pleural Effusion       0.39      0.46      0.42     17361
             Pleural Other       0.02      0.11      0.03       705
                  Fracture       0.04      0.21      0.07      1850
           Support Devices       0.51      0.55      0.53     23041

                 micro avg       0.22      0.44      0.29    102693
                 macro avg       0.16      0.33      0.20    102693
              weighted avg       0.33      0.44      0.37    102693
               samples avg       0.21      0.41      0.26    102693

Test Scores

if 'test' in data.keys() and data['test']:
    for score_name, score in data["test"].items():
        print('Test {}: '.format(score_name), score)
Test loss:  1.3472518920898438
Test auc:  0.7710159420967102
Test precision:  0.3716121017932892
Test recall:  0.7464773654937744
Test f2_score:  0.6211581230163574
Test binary_accuracy:  0.7508498430252075

Benchmark Details

pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
    pp.pprint(data["benchmark"])
{   'batch_size': 32,
    'benchmark_name': 'CWBCE_E20_B32',
    'dataset_folder': 'data/chexpert/full',
    'dataset_name': 'chexpert_full',
    'dim': [256, 256],
    'drop_last': True,
    'epochs': 20,
    'label_columns': [   'No Finding',
                         'Enlarged Cardiomediastinum',
                         'Cardiomegaly',
                         'Lung Opacity',
                         'Lung Lesion',
                         'Edema',
                         'Consolidation',
                         'Pneumonia',
                         'Atelectasis',
                         'Pneumothorax',
                         'Pleural Effusion',
                         'Pleural Other',
                         'Fracture',
                         'Support Devices'],
    'loss': 'weighted_binary_crossentropy',
    'metrics': ['auc', 'precision', 'recall', 'f2_score', 'binary_accuracy'],
    'models_dir': 'models',
    'n_channels': 3,
    'nan_replacement': 0,
    'optimizer': 'Adam',
    'path_column': 'Path',
    'path_column_prefix': '',
    'shuffle': True,
    'test_num_samples': 44627,
    'train_num_samples': 141807,
    'u_enc': 'uzeroes',
    'unc_value': -1,
    'valid_num_samples': 36980}

DenseNet121_CWBCE_E20_B32

Version: 1

Trained DenseNet121 architecture using the 'CWBCE_E20_B32' benchmark. The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffel set to True and images rescaled to dimension (256, 256). The training was done for 20 epochs using the Adam optimizer and weighted_binary_crossentropy loss. A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method. The traing set included 141807 number of sample, the validation set 36980, and the test set 44627.

from pathlib import Path
from dotenv import load_dotenv, find_dotenv
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
import json
import os
import re
import pprint

basepath = Path(os.getcwd())
if basepath.name != "idp-radio-1":
    os.chdir(basepath.parent.parent)
    print(os.getcwd())
load_dotenv(find_dotenv())

from src.preprocessing.split.train_test_split import train_test_split
/srv/idp-radio-1
data = json.loads(os.environ['EXP_DATA'])
history = data['history']

Model and Benchmark Summary

for s in data["description"].split(".")[:-1]:
    print(s + ".\n")
Trained DenseNet121 architecture using the 'CWBCE_E20_B32' benchmark.

 The benchmark was initialized for the chexpert_full dataset with batch size of 32, shuffel set to True and images rescaled to dimension (256, 256).


The training was done for 20 epochs using the Adam optimizer and weighted_binary_crossentropy loss.


A total of 14 labels/pathologies were included in the training and encoded using the 'uzeroes' method.


The traing set included 141807 number of sample, the validation set 36980, and the test set 44627.

Extract and format metrics to be plotted

# if there are any metrics that were renamed, add this new name here as ("default_name":"new_name")
metric_custom_names={"auc":"AUC_ROC"}

metric_names = [re.sub("([a-z0-9])([A-Z])","\g<1> \g<2>",name) for name in data["benchmark"]["metrics"]]
metric_keys = [re.sub("([a-z0-9])([A-Z])","\g<1>_\g<2>",name).lower() for name in data["benchmark"]["metrics"]]

for default_name, custom_name in metric_custom_names.items():
    if not default_name in history.keys() and default_name in metric_keys:
        #replace default name with custom name
        metric_keys[metric_keys.index(default_name)]=custom_name

Plot training & validation accuracy values

def print_or_plot_metric(metric_key, metric_name, figure_name):
    if len(history[metric_key]) == 1:
        print("Data for {m_name} only available for a single epoch. \nSkipping plot and printing data...".format(m_name=metric_name))
        print('Train {}: '.format(metric_name), history[metric_key])
        print('Validation {}: '.format(metric_name), history['val_'+metric_key])
        print()        
    else:
        plot_epoch_metric(metric_key, metric_name, figure_name)
        
def plot_epoch_metric(metric_key, metric_name, figure_name):
    figure(num=None, figsize=(10, 6))
    plt.plot(history[metric_key])
    if 'val_'+metric_key in history.keys():
        plt.plot(history['val_'+metric_key])
    plt.title(figure_name)
    plt.ylabel(metric_name)
    plt.xlabel('Epoch')
    if 'val_'+metric_key in history.keys():
        plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

for i, metric_key in enumerate(metric_keys):
    print_or_plot_metric(metric_key, metric_names[i], "Model "+metric_names[i])

png

png

png

png

png

Plot training & validation loss values

print_or_plot_metric("loss", "Loss", "Model loss")

png

if "lr" in history.keys():
    plot_epoch_metric("lr", "Learning Rate", "Learning Rate")

png

Classification Report

if 'classification_report' in data.keys() and data['classification_report']:
    print(data['classification_report'])
                            precision    recall  f1-score   support

                No Finding       0.10      0.27      0.14      4330
Enlarged Cardiomediastinum       0.05      0.30      0.08      2113
              Cardiomegaly       0.12      0.25      0.16      5240
              Lung Opacity       0.47      0.55      0.51     21148
               Lung Lesion       0.04      0.22      0.07      1917
                     Edema       0.23      0.37      0.28     10309
             Consolidation       0.07      0.44      0.12      3019
                 Pneumonia       0.03      0.21      0.05      1198
               Atelectasis       0.15      0.47      0.22      6573
              Pneumothorax       0.08      0.23      0.12      3889
          Pleural Effusion       0.39      0.46      0.42     17361
             Pleural Other       0.02      0.11      0.03       705
                  Fracture       0.04      0.21      0.07      1850
           Support Devices       0.51      0.55      0.53     23041

                 micro avg       0.22      0.44      0.29    102693
                 macro avg       0.16      0.33      0.20    102693
              weighted avg       0.33      0.44      0.37    102693
               samples avg       0.21      0.41      0.26    102693

Test Scores

if 'test' in data.keys() and data['test']:
    for score_name, score in data["test"].items():
        print('Test {}: '.format(score_name), score)
Test loss:  1.3472518920898438
Test auc:  0.7710159420967102
Test precision:  0.3716121017932892
Test recall:  0.7464773654937744
Test f2_score:  0.6211581230163574
Test binary_accuracy:  0.7508498430252075

Benchmark Details

pp = pprint.PrettyPrinter(indent=4)
if "benchmark" in data.keys():
    pp.pprint(data["benchmark"])
{   'batch_size': 32,
    'benchmark_name': 'CWBCE_E20_B32',
    'dataset_folder': 'data/chexpert/full',
    'dataset_name': 'chexpert_full',
    'dim': [256, 256],
    'drop_last': True,
    'epochs': 20,
    'label_columns': [   'No Finding',
                         'Enlarged Cardiomediastinum',
                         'Cardiomegaly',
                         'Lung Opacity',
                         'Lung Lesion',
                         'Edema',
                         'Consolidation',
                         'Pneumonia',
                         'Atelectasis',
                         'Pneumothorax',
                         'Pleural Effusion',
                         'Pleural Other',
                         'Fracture',
                         'Support Devices'],
    'loss': 'weighted_binary_crossentropy',
    'metrics': ['auc', 'precision', 'recall', 'f2_score', 'binary_accuracy'],
    'models_dir': 'models',
    'n_channels': 3,
    'nan_replacement': 0,
    'optimizer': 'Adam',
    'path_column': 'Path',
    'path_column_prefix': '',
    'shuffle': True,
    'test_num_samples': 44627,
    'train_num_samples': 141807,
    'u_enc': 'uzeroes',
    'unc_value': -1,
    'valid_num_samples': 36980}

Data Distribution

if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    benchmark = data['benchmark']

    dataset_path = Path(benchmark['dataset_folder'])
    train_labels = benchmark['train_labels'] if 'train_labels' in benchmark.keys() else 'train.csv'
    split_test_size =  benchmark['split_test_size'] if 'split_test_size' in benchmark.keys() else 0.2
    split_valid_size =  benchmark['split_valid_size'] if 'split_valid_size' in benchmark.keys() else 0.2
    split_group = benchmark['split_group'] if 'split_group' in benchmark.keys() else 'patient_id'
    split_seed = benchmark['split_seed']

    all_labels = pd.read_csv(dataset_path / train_labels)
    train_labels, test_labels = train_test_split(all_labels, test_size=split_test_size, group=split_group, seed=split_seed)
    train_labels, validation_labels = train_test_split(train_labels, test_size=split_valid_size, group=split_group, seed=split_seed)
from src.datasets.u_encoding import uencode

def get_distribution(labels):
    if 'nan_replacement' in benchmark.keys():
        labels = labels.fillna(benchmark['nan_replacement'])
    data = labels.to_numpy()
    data = uencode(benchmark['u_enc'], data, unc_value=benchmark['unc_value'])
    data = pd.DataFrame(data, columns=labels.columns)

    labels = data[benchmark['label_columns']]

    d = {'Pathology': [], 'Positive': [], 'Positive %': [], 'Negative': [], 'Negative %': [],}
    for label in labels.columns:
        values = labels.groupby(label)
        d['Pathology'].append(label)

        positive = values.size()[1.0] if 1.0 in values.size() else 0
        positive_percent = positive / labels.shape[0] * 100
        d['Positive'].append(positive)
        d['Positive %'].append(round(positive_percent))

        negative = values.size()[-0.0] if -0.0 in values.size() else 0
        negative_percent = negative / labels.shape[0] * 100
        d['Negative'].append(negative)
        d['Negative %'].append(round(negative_percent))
    
    df = pd.DataFrame(d)
    df = df.set_index('Pathology')

    return df
if 'benchmark' in data.keys() and 'split_seed' in data['benchmark']:
    train = get_distribution(train_labels)
    val = get_distribution(validation_labels)
    test = get_distribution(test_labels)
    
    positives = train[['Positive %']].merge(val[['Positive %']], left_index=True, right_index=True).merge(test[['Positive %']], left_index=True,  right_index=True).rename(columns={"Positive %_x": "Positives Train", "Positive %_y": "Positives Validation", "Positive %": "Positives Test", })
    positives.copy().plot(kind='bar', figsize=(10,7), title="Positive Labels Distribution")
    
    negatives = train[['Negative %']].merge(val[['Negative %']], left_index=True, right_index=True).merge(test[['Negative %']], left_index=True,  right_index=True).rename(columns={"Negative %_x": "Negative Train", "Negative %_y": "Negative Validation", "Negative %": "Negative Test", })
    negatives.copy().plot(kind='bar', figsize=(10,7), title="Negative Labels Distribution")

    train[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Training set")
    val[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Validation set")
    test[['Positive %', 'Negative %']].copy().plot(kind='bar', figsize=(10,7), title="Test set")
Clone this wiki locally