Skip to content

Commit

Permalink
TensorMap interpretation field (#101)
Browse files Browse the repository at this point in the history
* default tensor from file mostly removed, group is gone, source becomes path_prefix
  • Loading branch information
lucidtronix authored Feb 4, 2020
1 parent 431e458 commit eea8602
Show file tree
Hide file tree
Showing 15 changed files with 7,094 additions and 7,999 deletions.
761 changes: 159 additions & 602 deletions ml4cvd/TensorMap.py

Large diffs are not rendered by default.

7 changes: 1 addition & 6 deletions ml4cvd/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ml4cvd.TensorMap import TensorMap
from ml4cvd.tensor_maps_by_hand import TMAPS
from ml4cvd.defines import IMPUTATION_RANDOM, IMPUTATION_MEAN
from ml4cvd.tensor_map_maker import generate_multi_field_continuous_tensor_map, generate_continuous_tensor_map_from_file
from ml4cvd.tensor_map_maker import generate_continuous_tensor_map_from_file


def parse_args():
Expand All @@ -38,7 +38,6 @@ def parse_args():
# Tensor Map arguments
parser.add_argument('--input_tensors', default=[], nargs='+')
parser.add_argument('--output_tensors', default=[], nargs='+')
parser.add_argument('--input_continuous_tensors', default=[], nargs='+', help='Continuous tensor maps to be combined.')
parser.add_argument('--tensor_maps_in', default=[], help='Do not set this directly. Use input_tensors')
parser.add_argument('--tensor_maps_out', default=[], help='Do not set this directly. Use output_tensors')

Expand Down Expand Up @@ -194,10 +193,6 @@ def _process_args(args):
f.write(k + ' = ' + str(v) + '\n')
load_config(args.logging_level, os.path.join(args.output_folder, args.id), 'log_' + now_string, args.min_sample_id)
args.tensor_maps_in = [_get_tmap(it) for it in args.input_tensors]
if len(args.input_continuous_tensors) > 0:
multi_field_tensor_map = [generate_multi_field_continuous_tensor_map(args.input_continuous_tensors, args.include_missing_continuous_channel,
args.imputation_method_for_continuous_fields)]
args.tensor_maps_in.extend(multi_field_tensor_map)

args.tensor_maps_out = []
if args.continuous_file is not None:
Expand Down
14 changes: 8 additions & 6 deletions ml4cvd/defines.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@
from enum import Enum, auto


class DataSetType(Enum):
FLOAT_ARRAY = auto()
class StorageType(Enum):
CONTINUOUS = auto()
CATEGORICAL = auto()
CATEGORICAL_INDEX = auto()
CATEGORICAL_FLAG = auto()
ONE_HOT = auto()
STRING = auto()
SERIES = auto()
BYTE_STRING = auto()

def __str__(self):
"""DataSetType.FLOAT_ARRAY becomes float_array"""
"""StorageType.FLOAT_ARRAY becomes float_array"""
return str.lower(super().__str__().split('.')[1])


Expand All @@ -23,6 +24,7 @@ def __str__(self):
PDF_EXT = '.pdf'
TENSOR_EXT = '.hd5'

STOP_CHAR = '!'
JOIN_CHAR = '_'
CONCAT_CHAR = '-'
HD5_GROUP_CHAR = '/'
Expand All @@ -43,7 +45,7 @@ def __str__(self):
MRI_PATIENT_ORIENTATION = 'mri_patient_orientation'
MRI_SEGMENTED_CHANNEL_MAP = {'background': 0, 'ventricle': 1, 'myocardium': 2}
MRI_ANNOTATION_CHANNEL_MAP = {'good': 0, 'included-lvot': 1, 'mistrace': 2, 'phantom-apex': 3, 'hardclip': 4}
MRI_LAX_3CH_SEGMENTED_CHANNEL_MAP = {'background': 0, 'LV_A_S': 1, 'left_atrium': 2, 'LV_I_P': 3, 'LV_Pap': 4, 'LV_Cavity': 5}
MRI_LAX_3CH_SEGMENTED_CHANNEL_MAP = {'background': 0, 'LV_anteroseptum': 1, 'left_atrium': 2, 'LV_inferior_wall': 3, 'LV_Papillary': 4, 'LV_Cavity': 5}
MRI_LAX_4CH_SEGMENTED_CHANNEL_MAP = {'background': 0, 'RV_free_wall': 1, 'RA_free_wall': 2, 'LA_free_wall': 3, 'LV_anterolateral_wall': 4,
'interventricular_septum': 5, 'interatrial_septum': 6, 'crista_terminalis': 7, 'RA_cavity': 8, 'RV_cavity': 9,
'LA_cavity': 10, 'LV_cavity': 11, 'descending_aorta': 12, 'thoracic_cavity': 13}
Expand Down
18 changes: 10 additions & 8 deletions ml4cvd/explorations.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,13 @@ def sort_csv(input_csv_file, value_csv):
def predictions_to_pngs(predictions: np.ndarray, tensor_maps_in: List[TensorMap], tensor_maps_out: List[TensorMap], data: Dict[str, np.ndarray],
labels: Dict[str, np.ndarray], paths: List[str], folder: str) -> None:
input_map = tensor_maps_in[0]
if not os.path.exists(folder):
os.makedirs(folder)
for y, tm in zip(predictions, tensor_maps_out):
if not isinstance(predictions, list): # When models have a single output model.predict returns a ndarray otherwise it returns a list
y = predictions
for im in tensor_maps_in:
if tm.is_categorical_any() and im.dependent_map == tm:
if tm.is_categorical() and im.dependent_map == tm:
input_map = im
elif len(tm.shape) == len(im.shape):
input_map = im
Expand All @@ -80,7 +82,7 @@ def predictions_to_pngs(predictions: np.ndarray, tensor_maps_in: List[TensorMap]
elif len(tm.shape) == 3:
for i in range(y.shape[0]):
sample_id = os.path.basename(paths[i]).replace(TENSOR_EXT, '')
if tm.is_categorical_any():
if tm.is_categorical():
plt.imsave(f"{folder}{sample_id}_truth_{i:02d}{IMAGE_EXT}", np.argmax(labels[tm.output_name()][i], axis=-1, cmap='gray'))
plt.imsave(f"{folder}{sample_id}_prediction_{i:02d}{IMAGE_EXT}", np.argmax(y[i], axis=-1), cmap='gray')
if input_map is not None:
Expand All @@ -97,7 +99,7 @@ def predictions_to_pngs(predictions: np.ndarray, tensor_maps_in: List[TensorMap]
for i in range(y.shape[0]):
sample_id = os.path.basename(paths[i]).replace(TENSOR_EXT, '')
for j in range(y.shape[3]):
if tm.is_categorical_any():
if tm.is_categorical():
truth = np.argmax(labels[tm.output_name()][i, :, :, j, :], axis=-1)
prediction = np.argmax(y[i, :, :, j, :], axis=-1)
true_donut = np.ma.masked_where(truth == 2, data[im.input_name()][i, :, :, j, 0])
Expand Down Expand Up @@ -134,12 +136,12 @@ def plot_while_learning(model, tensor_maps_in: List[TensorMap], tensor_maps_out:
vmin = np.min(mri_in)
vmax = np.max(mri_in)
logging.info(f"epoch:{i} write segmented mris y shape:{y.shape} label shape:{test_labels[tm.output_name()].shape} to folder:{folder}")
if tm.is_categorical_any() and len(tm.shape) == 3:
if tm.is_categorical() and len(tm.shape) == 3:
for yi in range(y.shape[0]):
plt.imsave(f"{folder}batch_{yi}_truth_epoch_{i:03d}{IMAGE_EXT}", np.argmax(test_labels[tm.output_name()][yi], axis=-1), cmap='gray')
plt.imsave(f"{folder}batch_{yi}_prediction_epoch_{i:03d}{IMAGE_EXT}", np.argmax(y[yi], axis=-1), cmap='gray')
plt.imsave(f"{folder}batch_{yi}_mri_epoch_{i:03d}{IMAGE_EXT}", mri_in[yi, :, :, 0], cmap='gray', vmin=vmin, vmax=vmax)
elif tm.is_categorical_any() and len(tm.shape) == 4:
elif tm.is_categorical() and len(tm.shape) == 4:
for yi in range(y.shape[0]):
for j in range(y.shape[3]):
truth = np.argmax(test_labels[tm.output_name()][yi, :, :, j, :], axis=-1)
Expand Down Expand Up @@ -341,7 +343,7 @@ def test_labels_to_label_map(test_labels: Dict[TensorMap, np.ndarray], examples:
if tm.is_continuous():
label_dict[tm][i] = tm.rescale(test_labels[tm][i])
continuous_labels.append(tm)
elif tm.is_categorical_any():
elif tm.is_categorical():
label_dict[tm][i] = np.argmax(test_labels[tm][i])
categorical_labels.append(tm)

Expand All @@ -364,7 +366,7 @@ def infer_with_pixels(args):
for ot, otm in zip(args.output_tensors, args.tensor_maps_out):
if len(otm.shape) == 1 and otm.is_continuous():
header.extend([ot+'_prediction', ot+'_actual'])
elif len(otm.shape) == 1 and otm.is_categorical_any():
elif len(otm.shape) == 1 and otm.is_categorical():
channel_columns = []
for k in otm.channel_map:
channel_columns.append(ot + '_' + k + '_prediction')
Expand Down Expand Up @@ -395,7 +397,7 @@ def infer_with_pixels(args):
csv_row.append("NA")
else:
csv_row.append(str(tm.rescale(true_label[tm.output_name()])[0][0]))
elif len(tm.shape) == 1 and tm.is_categorical_any():
elif len(tm.shape) == 1 and tm.is_categorical():
for k in tm.channel_map:
csv_row.append(str(y[0][tm.channel_map[k]]))
csv_row.append(str(true_label[tm.output_name()][0][tm.channel_map[k]]))
Expand Down
20 changes: 14 additions & 6 deletions ml4cvd/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ def angle_between_batches(tensors):
return tf.acos(numerator / (l0*l1))


def sum_pred_loss(y_true, y_pred):
return K.sum(y_pred, axis=-1)


def two_batch_euclidean(tensors):
return K.sqrt(K.sum(K.square(tensors[0] - tensors[1]), axis=-1, keepdims=True) + K.epsilon())

Expand Down Expand Up @@ -110,8 +106,20 @@ def ignore_sentinel_logcosh(y_true, y_pred):
return ignore_sentinel_logcosh


def sum_pred_loss(y_true, y_pred):
return K.sum(y_pred, axis=-1)
def y_true_times_mse(y_true, y_pred):
return K.maximum(y_true, 1.0)*mean_squared_error(y_true, y_pred)


def y_true_squared_times_mse(y_true, y_pred):
return K.maximum(1.0+y_true, 1.0)*K.maximum(1.0+y_true, 1.0)*mean_squared_error(y_true, y_pred)


def y_true_cubed_times_mse(y_true, y_pred):
return K.maximum(y_true, 1.0)*K.maximum(y_true, 1.0)*K.maximum(y_true, 1.0)*mean_squared_error(y_true, y_pred)


def y_true_squared_times_logcosh(y_true, y_pred):
return K.maximum(1.0+y_true, 1.0)*K.maximum(1.0+y_true, 1.0)*logcosh(y_true, y_pred)


def two_batch_euclidean(tensors):
Expand Down
42 changes: 21 additions & 21 deletions ml4cvd/models.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# models.py
# This file defines model factories.
# Model factories connect input TensorMaps to output TensorMaps with computational graphs.

# Imports
import os
import h5py
import time
import logging
import operator
import numpy as np
from collections import defaultdict
from typing import Dict, List, Tuple, Iterable, Union, Optional
Expand All @@ -24,12 +25,12 @@
from keras.layers import Conv1D, Conv2D, Conv3D, UpSampling1D, UpSampling2D, UpSampling3D, MaxPooling1D
from keras.layers import MaxPooling2D, MaxPooling3D, AveragePooling1D, AveragePooling2D, AveragePooling3D, Layer

from ml4cvd.TensorMap import TensorMap
from ml4cvd.metrics import get_metric_dict
from ml4cvd.optimizers import get_optimizer
from ml4cvd.plots import plot_metric_history
from ml4cvd.TensorMap import TensorMap, Interpretation
from ml4cvd.defines import JOIN_CHAR, IMAGE_EXT, TENSOR_EXT, ECG_CHAR_2_IDX
from ml4cvd.optimizers import get_optimizer
from ml4cvd.lookahead import Lookahead


CHANNEL_AXIS = -1 # Set to 1 for Theano backend

Expand Down Expand Up @@ -202,15 +203,15 @@ def make_character_model(tensor_maps_in: List[TensorMap], tensor_maps_out: List[

input_layers = []
for it in tensor_maps_in:
if it.is_hidden_layer():
if it.is_embedding():
embed_in = Input(shape=it.shape, name=it.input_name())
input_layers.append(embed_in)
elif it.is_ecg_text():
elif it.is_language():
burn_in = Input(shape=it.shape, name=it.input_name())
input_layers.append(burn_in)
repeater = RepeatVector(it.shape[0])
else:
logging.warning(f"character model cant handle {it.name} from group:{it.group}")
logging.warning(f"character model cant handle input TensorMap:{it.name} with interpretation:{it.interpretation}")

logging.info(f"inputs: {[il.name for il in input_layers]}")
wave_embeds = repeater(embed_in)
Expand Down Expand Up @@ -634,7 +635,7 @@ def make_multimodal_multitask_model(tensor_maps_in: List[TensorMap] = None,
dense_pool_layers = _pool_layers_from_kind_and_dimension(len(tm.shape), pool_type, len(dense_blocks), pool_x, pool_y, pool_z)
last_conv = _dense_block(last_conv, layers, block_size, dense_conv_fxns, dense_pool_layers, len(tm.shape), activation, conv_normalize,
conv_regularize, conv_dropout)
input_multimodal.append(Flatten()(last_conv))
input_multimodal.append(Flatten(name=f'embed_{tm.output_name()}')(last_conv))
else:
mlp_input = input_tensors[j]
mlp = _dense_layer(mlp_input, layers, tm.annotation_units, activation, conv_normalize)
Expand Down Expand Up @@ -689,15 +690,15 @@ def make_multimodal_multitask_model(tensor_maps_in: List[TensorMap] = None,
last_conv = conv_layer(filters=all_filters[-(1 + i)], kernel_size=kernel, padding=padding)(last_conv)

conv_label = conv_layer(tm.shape[channel_axis], _one_by_n_kernel(len(tm.shape)), activation="linear")(last_conv)
output_predictions[tm.output_name()] = Activation(tm.activation, name=tm.output_name())(conv_label)
output_predictions[tm] = Activation(tm.activation, name=tm.output_name())(conv_label)
elif tm.parents is not None:
parented_activation = concatenate([multimodal_activation] + [output_predictions[p.output_name()] for p in tm.parents])
parented_activation = concatenate([multimodal_activation] + [output_predictions[p] for p in tm.parents])
parented_activation = _dense_layer(parented_activation, layers, tm.annotation_units, activation, conv_normalize)
output_predictions[tm.output_name()] = Dense(units=tm.shape[0], activation=tm.activation, name=tm.output_name())(parented_activation)
elif tm.is_categorical_any():
output_predictions[tm.output_name()] = Dense(units=tm.shape[0], activation='softmax', name=tm.output_name())(multimodal_activation)
output_predictions[tm] = Dense(units=tm.shape[0], activation=tm.activation, name=tm.output_name())(parented_activation)
elif tm.is_categorical():
output_predictions[tm] = Dense(units=tm.shape[0], activation='softmax', name=tm.output_name())(multimodal_activation)
else:
output_predictions[tm.output_name()] = Dense(units=tm.shape[0], activation=tm.activation, name=tm.output_name())(multimodal_activation)
output_predictions[tm] = Dense(units=tm.shape[0], activation=tm.activation, name=tm.output_name())(multimodal_activation)

m = Model(inputs=input_tensors, outputs=list(output_predictions.values()))
m.summary()
Expand Down Expand Up @@ -1129,13 +1130,12 @@ def _gradients_from_output(model, output_layer, output_index):
return iterate


def _get_tensor_maps_for_characters(tensor_maps_in: List[TensorMap], base_model: Model):
embed_model = make_hidden_layer_model(base_model, tensor_maps_in, 'embed')
tm_embed = TensorMap('embed', shape=(64,), group='hidden_layer', required_inputs=tensor_maps_in.copy(), model=embed_model)
tm_char = TensorMap('ecg_rest_next_char', shape=(len(ECG_CHAR_2_IDX),), channel_map=ECG_CHAR_2_IDX, activation='softmax', loss='categorical_crossentropy',
loss_weight=1.0, cacheable=False)
tm_burn_in = TensorMap('ecg_rest_text', shape=(100, len(ECG_CHAR_2_IDX)), group='ecg_text', channel_map={'context': 0, 'alphabet': 1},
dependent_map=tm_char, cacheable=False)
def _get_tensor_maps_for_characters(tensor_maps_in: List[TensorMap], base_model: Model, embed_name='embed', embed_size=64, burn_in=100):
embed_model = make_hidden_layer_model(base_model, tensor_maps_in, embed_name)
tm_embed = TensorMap(embed_name, shape=(embed_size,), interpretation=Interpretation.EMBEDDING, parents=tensor_maps_in.copy(), model=embed_model)
tm_char = TensorMap('ecg_rest_next_char', shape=(len(ECG_CHAR_2_IDX),), Interpretation=Interpretation.LANGUAGE, channel_map=ECG_CHAR_2_IDX, cacheable=False)
tm_burn_in = TensorMap('ecg_rest_text', shape=(burn_in, len(ECG_CHAR_2_IDX)), Interpretation=Interpretation.LANGUAGE,
channel_map={'context': 0, 'alphabet': 1}, dependent_map=tm_char, cacheable=False)
return [tm_embed, tm_burn_in], [tm_char]


Expand Down
14 changes: 7 additions & 7 deletions ml4cvd/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,37 +78,37 @@ def evaluate_predictions(tm: TensorMap, y_predictions: np.ndarray, y_truth: np.n
:return: Dictionary of performance metrics with string keys for labels and float values
"""
performance_metrics = {}
if tm.is_categorical_any() and len(tm.shape) == 1:
if tm.is_categorical() and tm.axes() == 1:
logging.info(f"For tm:{tm.name} with channel map:{tm.channel_map} examples:{y_predictions.shape[0]}")
logging.info(f"\nSum Truth:{np.sum(y_truth, axis=0)} \nSum pred :{np.sum(y_predictions, axis=0)}")
plot_precision_recall_per_class(y_predictions, y_truth, tm.channel_map, title, folder)
performance_metrics.update(plot_roc_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
rocs.append((y_predictions, y_truth, tm.channel_map))
elif tm.is_categorical() and len(tm.shape) == 2:
elif tm.is_categorical() and tm.axes() == 2:
melt_shape = (y_predictions.shape[0] * y_predictions.shape[1], y_predictions.shape[2])
idx = np.random.choice(np.arange(melt_shape[0]), max_melt, replace=False)
y_predictions = y_predictions.reshape(melt_shape)[idx]
y_truth = y_truth.reshape(melt_shape)[idx]
performance_metrics.update(plot_roc_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
performance_metrics.update(plot_precision_recall_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
rocs.append((y_predictions, y_truth, tm.channel_map))
elif tm.is_categorical() and len(tm.shape) == 3:
elif tm.is_categorical() and tm.axes() == 3:
melt_shape = (y_predictions.shape[0] * y_predictions.shape[1] * y_predictions.shape[2], y_predictions.shape[3])
idx = np.random.choice(np.arange(melt_shape[0]), max_melt, replace=False)
y_predictions = y_predictions.reshape(melt_shape)[idx]
y_truth = y_truth.reshape(melt_shape)[idx]
performance_metrics.update(plot_roc_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
performance_metrics.update(plot_precision_recall_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
rocs.append((y_predictions, y_truth, tm.channel_map))
elif tm.is_categorical_any() and len(tm.shape) == 4:
elif tm.is_categorical() and tm.axes() == 4:
melt_shape = (y_predictions.shape[0] * y_predictions.shape[1] * y_predictions.shape[2] * y_predictions.shape[3], y_predictions.shape[4])
idx = np.random.choice(np.arange(melt_shape[0]), max_melt, replace=False)
y_predictions = y_predictions.reshape(melt_shape)[idx]
y_truth = y_truth.reshape(melt_shape)[idx]
performance_metrics.update(plot_roc_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
performance_metrics.update(plot_precision_recall_per_class(y_predictions, y_truth, tm.channel_map, title, folder))
rocs.append((y_predictions, y_truth, tm.channel_map))
elif tm.is_proportional_hazard():
elif tm.is_cox_proportional_hazard():
plot_survival(y_predictions, y_truth, title, prefix=folder)
plot_survival_curves(y_predictions, y_truth, title, prefix=folder, paths=test_paths)
elif len(tm.shape) > 1:
Expand Down Expand Up @@ -171,7 +171,7 @@ def plot_metric_history(history, title, prefix='./figures/'):

def plot_scatter(prediction, truth, title, prefix='./figures/', paths=None, top_k=3, alpha=0.5):
margin = float((np.max(truth)-np.min(truth))/100)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(SUBPLOT_SIZE, 2 * SUBPLOT_SIZE), sharex='all')
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(SUBPLOT_SIZE, 2 * SUBPLOT_SIZE))
ax1.plot([np.min(truth), np.max(truth)], [np.min(truth), np.max(truth)], linewidth=2)
ax1.plot([np.min(prediction), np.max(prediction)], [np.min(prediction), np.max(prediction)], linewidth=4)
pearson = np.corrcoef(prediction.flatten(), truth.flatten())[1, 0] # corrcoef returns full covariance matrix
Expand All @@ -190,7 +190,7 @@ def plot_scatter(prediction, truth, title, prefix='./figures/', paths=None, top_
ax1.set_xlabel('Predictions')
ax1.set_ylabel('Actual')
ax1.set_title(title + '\n')
ax1.legend(loc="upper left")
ax1.legend(loc="lower right")

sns.distplot(prediction, label='Predicted', color='r', ax=ax2)
sns.distplot(truth, label='Truth', color='b', ax=ax2)
Expand Down
Loading

0 comments on commit eea8602

Please sign in to comment.