From 7a6ba15980f043f317227dc2ff4da03c02f120c7 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 29 May 2020 18:43:32 +0200 Subject: [PATCH 01/20] Add predicted box as output --- tf2_yolov4/heads/yolov3_head.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tf2_yolov4/heads/yolov3_head.py b/tf2_yolov4/heads/yolov3_head.py index f3fa32f..ff28a6b 100644 --- a/tf2_yolov4/heads/yolov3_head.py +++ b/tf2_yolov4/heads/yolov3_head.py @@ -178,6 +178,7 @@ def yolov3_boxes_regression(feats_per_stage, anchors_per_stage): box_xy = tf.sigmoid(box_xy) objectness = tf.sigmoid(objectness) class_probs = tf.sigmoid(class_probs) + predicted_box = tf.concat((box_xy, box_wh), axis=-1) grid = tf.meshgrid(tf.range(grid_size_y), tf.range(grid_size_x)) grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) # [gy, gx, 1, 2] @@ -191,7 +192,7 @@ def yolov3_boxes_regression(feats_per_stage, anchors_per_stage): box_x2y2 = box_xy + box_wh / 2 bbox = tf.concat([box_x1y1, box_x2y2], axis=-1) - return bbox, objectness, class_probs + return bbox, objectness, class_probs, predicted_box def yolo_nms(yolo_feats, yolo_max_boxes, yolo_iou_threshold, yolo_score_threshold): From 6290708aa11213a4c86803a4bf966380e0981aee Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 29 May 2020 18:43:40 +0200 Subject: [PATCH 02/20] Add ugly training script --- scripts/train.py | 201 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 201 insertions(+) create mode 100644 scripts/train.py diff --git a/scripts/train.py b/scripts/train.py new file mode 100644 index 0000000..65070e2 --- /dev/null +++ b/scripts/train.py @@ -0,0 +1,201 @@ +""" +Training script for Pascal VOC using tf2-yolov4 +""" +import numpy as np +import tensorflow as tf +import tensorflow_datasets as tfds + +from tf2_yolov4.anchors import YOLOV4_ANCHORS +from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression +from tf2_yolov4.model import YOLOv4 + +YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + +INPUT_SHAPE = (608, 608, 3) +BATCH_SIZE = 1 +BOUNDING_BOXES_FIXED_NUMBER = 10 + + +def broadcast_iou(box_1, box_2): + # box_1: (..., (x1, y1, x2, y2)) + # box_2: (N, (x1, y1, x2, y2)) + + # broadcast boxes + box_1 = tf.expand_dims(box_1, -2) + box_2 = tf.expand_dims(box_2, 0) + # new_shape: (..., N, (x1, y1, x2, y2)) + new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) + box_1 = tf.broadcast_to(box_1, new_shape) + box_2 = tf.broadcast_to(box_2, new_shape) + + int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) - + tf.maximum(box_1[..., 0], box_2[..., 0]), 0) + int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) - + tf.maximum(box_1[..., 1], box_2[..., 1]), 0) + int_area = int_w * int_h + box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] - box_1[..., 1]) + box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] - box_2[..., 1]) + return int_area / (box_1_area + box_2_area - int_area) + + +def YoloLoss(anchors, ignore_thresh=0.5): + def yolo_loss(y_true, y_pred): + # 1. transform all pred outputs + # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) + pred_box, pred_obj, pred_class, pred_xywh = yolov3_boxes_regression(y_pred, anchors) + pred_xy = pred_xywh[..., 0:2] + pred_wh = pred_xywh[..., 2:4] + + # 2. transform all true outputs + # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) + true_box, true_obj, true_class_idx = tf.split( + y_true, (4, 1, 1), axis=-1) + true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 + true_wh = true_box[..., 2:4] - true_box[..., 0:2] + + # give higher weights to small boxes + box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] + + # 3. inverting the pred box equations + grid_size = tf.shape(y_true)[1] + grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) + grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) + true_xy = true_xy * tf.cast(grid_size, tf.float32) - tf.cast(grid, tf.float32) + true_wh = tf.math.log(true_wh / anchors) + true_wh = tf.where(tf.math.is_inf(true_wh), + tf.zeros_like(true_wh), true_wh) + + # 4. calculate all masks + obj_mask = tf.squeeze(true_obj, -1) + # ignore false positive when iou is over threshold + best_iou = tf.map_fn( + lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask( + x[1], tf.cast(x[2], tf.bool))), axis=-1), + (pred_box, true_box, obj_mask), + tf.float32) + ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) + + # 5. calculate all losses + xy_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) + wh_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) + obj_loss = tf.keras.losses.binary_crossentropy(true_obj, pred_obj) + obj_loss = obj_mask * obj_loss + (1 - obj_mask) * ignore_mask * obj_loss + # TODO: use binary_crossentropy instead + class_loss = obj_mask * tf.keras.losses.sparse_categorical_crossentropy(true_class_idx, pred_class) + + # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) + xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) + wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) + obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) + class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) + + return xy_loss + wh_loss + obj_loss + class_loss + return yolo_loss + + +@tf.function +def transform_targets_for_output(y_true, grid_size, anchor_idxs): + # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor)) + N = tf.shape(y_true)[0] + + # y_true_out: (N, grid, grid, anchors, [x, y, w, h, obj, class]) + y_true_out = tf.zeros( + (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) + + anchor_idxs = tf.cast(anchor_idxs, tf.int32) + + indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True) + updates = tf.TensorArray(tf.float32, 1, dynamic_size=True) + idx = 0 + for i in tf.range(N): + for j in tf.range(tf.shape(y_true)[1]): + if tf.equal(y_true[i][j][2], 0): + continue + anchor_eq = tf.equal( + anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) + + if tf.reduce_any(anchor_eq): + box = y_true[i][j][0:4] + box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2 + + anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32) + grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32) + + # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class) + indexes = indexes.write( + idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]]) + updates = updates.write( + idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]]) + idx += 1 + + return tf.tensor_scatter_nd_update(y_true_out, indexes.stack(), updates.stack()) + + +def transform_targets(y_train, anchors, anchor_masks, size): + y_outs = [] + grid_size = size // 32 + + # calculate anchor index for true boxes + anchors = tf.cast(anchors, tf.float32) + anchor_area = anchors[..., 0] * anchors[..., 1] + box_wh = y_train[..., 2:4] - y_train[..., 0:2] + box_wh = tf.tile(tf.expand_dims(box_wh, -2), + (1, 1, tf.shape(anchors)[0], 1)) + box_area = box_wh[..., 0] * box_wh[..., 1] + intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * tf.minimum(box_wh[..., 1], anchors[..., 1]) + iou = intersection / (box_area + anchor_area - intersection) + anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32) + anchor_idx = tf.expand_dims(anchor_idx, axis=-1) + + y_train = tf.concat([y_train, anchor_idx], axis=-1) + + for anchor_idxs in anchor_masks: + y_outs.append(transform_targets_for_output( + y_train, grid_size, anchor_idxs)) + grid_size *= 2 + + return tuple(y_outs) + + +def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number): + box_number = tf.shape(bounding_boxes)[0] + paddings = [[0, pad_number - box_number], [0, 0]] + + return tf.pad(bounding_boxes, paddings, constant_values=0.) + + +ds_train = tfds.load('voc', split='train', shuffle_files=True) +ds_train = ds_train.map(lambda el: (el["image"], el["objects"])) +ds_train = ds_train.map( + lambda image, object: ( + image, + tf.concat([object["bbox"], tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1)], axis=-1) + ) +) +ds_train = ds_train.map( + lambda image, bounding_boxes: ( + image, + pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER), + ) +) +ds_train = ds_train.batch(BATCH_SIZE) +ds_train = ds_train.map( + lambda image, bounding_box_with_class: ( + tf.image.resize(image, INPUT_SHAPE[:2]) / 255., + transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ + bounding_box_with_class, + np.concatenate(YOLOV4_ANCHORS, axis=0), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of + YOLOV4_ANCHORS_MASKS, + INPUT_SHAPE[0], # Assumes square input + ) + ) +) + +model = YOLOv4(input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=80, training=True) + +optimizer = tf.keras.optimizers.Adam(lr=1e-4) +loss = [YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS] + +model.compile(optimizer=optimizer, loss=loss) + +history = model.fit(ds_train, epochs=2) From 399d7659f16a9295140ae909a9a6c6c320b20d5a Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sun, 31 May 2020 14:28:21 +0200 Subject: [PATCH 03/20] Black --- scripts/train.py | 74 ++++++++++++++++++++++++++++++------------------ 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 65070e2..8c65ab3 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -28,10 +28,16 @@ def broadcast_iou(box_1, box_2): box_1 = tf.broadcast_to(box_1, new_shape) box_2 = tf.broadcast_to(box_2, new_shape) - int_w = tf.maximum(tf.minimum(box_1[..., 2], box_2[..., 2]) - - tf.maximum(box_1[..., 0], box_2[..., 0]), 0) - int_h = tf.maximum(tf.minimum(box_1[..., 3], box_2[..., 3]) - - tf.maximum(box_1[..., 1], box_2[..., 1]), 0) + int_w = tf.maximum( + tf.minimum(box_1[..., 2], box_2[..., 2]) + - tf.maximum(box_1[..., 0], box_2[..., 0]), + 0, + ) + int_h = tf.maximum( + tf.minimum(box_1[..., 3], box_2[..., 3]) + - tf.maximum(box_1[..., 1], box_2[..., 1]), + 0, + ) int_area = int_w * int_h box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] - box_1[..., 1]) box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] - box_2[..., 1]) @@ -42,14 +48,15 @@ def YoloLoss(anchors, ignore_thresh=0.5): def yolo_loss(y_true, y_pred): # 1. transform all pred outputs # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) - pred_box, pred_obj, pred_class, pred_xywh = yolov3_boxes_regression(y_pred, anchors) + pred_box, pred_obj, pred_class, pred_xywh = yolov3_boxes_regression( + y_pred, anchors + ) pred_xy = pred_xywh[..., 0:2] pred_wh = pred_xywh[..., 2:4] # 2. transform all true outputs # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) - true_box, true_obj, true_class_idx = tf.split( - y_true, (4, 1, 1), axis=-1) + true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1) true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 true_wh = true_box[..., 2:4] - true_box[..., 0:2] @@ -62,26 +69,38 @@ def yolo_loss(y_true, y_pred): grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) true_xy = true_xy * tf.cast(grid_size, tf.float32) - tf.cast(grid, tf.float32) true_wh = tf.math.log(true_wh / anchors) - true_wh = tf.where(tf.math.is_inf(true_wh), - tf.zeros_like(true_wh), true_wh) + true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh) # 4. calculate all masks obj_mask = tf.squeeze(true_obj, -1) # ignore false positive when iou is over threshold best_iou = tf.map_fn( - lambda x: tf.reduce_max(broadcast_iou(x[0], tf.boolean_mask( - x[1], tf.cast(x[2], tf.bool))), axis=-1), + lambda x: tf.reduce_max( + broadcast_iou(x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))), + axis=-1, + ), (pred_box, true_box, obj_mask), - tf.float32) + tf.float32, + ) ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) # 5. calculate all losses - xy_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) - wh_loss = obj_mask * box_loss_scale * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) + xy_loss = ( + obj_mask + * box_loss_scale + * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) + ) + wh_loss = ( + obj_mask + * box_loss_scale + * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) + ) obj_loss = tf.keras.losses.binary_crossentropy(true_obj, pred_obj) obj_loss = obj_mask * obj_loss + (1 - obj_mask) * ignore_mask * obj_loss # TODO: use binary_crossentropy instead - class_loss = obj_mask * tf.keras.losses.sparse_categorical_crossentropy(true_class_idx, pred_class) + class_loss = obj_mask * tf.keras.losses.sparse_categorical_crossentropy( + true_class_idx, pred_class + ) # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) @@ -90,6 +109,7 @@ def yolo_loss(y_true, y_pred): class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) return xy_loss + wh_loss + obj_loss + class_loss + return yolo_loss @@ -99,8 +119,7 @@ def transform_targets_for_output(y_true, grid_size, anchor_idxs): N = tf.shape(y_true)[0] # y_true_out: (N, grid, grid, anchors, [x, y, w, h, obj, class]) - y_true_out = tf.zeros( - (N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) + y_true_out = tf.zeros((N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) anchor_idxs = tf.cast(anchor_idxs, tf.int32) @@ -111,21 +130,22 @@ def transform_targets_for_output(y_true, grid_size, anchor_idxs): for j in tf.range(tf.shape(y_true)[1]): if tf.equal(y_true[i][j][2], 0): continue - anchor_eq = tf.equal( - anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) + anchor_eq = tf.equal(anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) if tf.reduce_any(anchor_eq): box = y_true[i][j][0:4] box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2 anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32) - grid_xy = tf.cast(box_xy // (1/grid_size), tf.int32) + grid_xy = tf.cast(box_xy // (1 / grid_size), tf.int32) # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class) indexes = indexes.write( - idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]]) + idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]] + ) updates = updates.write( - idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]]) + idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]] + ) idx += 1 return tf.tensor_scatter_nd_update(y_true_out, indexes.stack(), updates.stack()) @@ -139,10 +159,11 @@ def transform_targets(y_train, anchors, anchor_masks, size): anchors = tf.cast(anchors, tf.float32) anchor_area = anchors[..., 0] * anchors[..., 1] box_wh = y_train[..., 2:4] - y_train[..., 0:2] - box_wh = tf.tile(tf.expand_dims(box_wh, -2), - (1, 1, tf.shape(anchors)[0], 1)) + box_wh = tf.tile(tf.expand_dims(box_wh, -2), (1, 1, tf.shape(anchors)[0], 1)) box_area = box_wh[..., 0] * box_wh[..., 1] - intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * tf.minimum(box_wh[..., 1], anchors[..., 1]) + intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * tf.minimum( + box_wh[..., 1], anchors[..., 1] + ) iou = intersection / (box_area + anchor_area - intersection) anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32) anchor_idx = tf.expand_dims(anchor_idx, axis=-1) @@ -150,8 +171,7 @@ def transform_targets(y_train, anchors, anchor_masks, size): y_train = tf.concat([y_train, anchor_idx], axis=-1) for anchor_idxs in anchor_masks: - y_outs.append(transform_targets_for_output( - y_train, grid_size, anchor_idxs)) + y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs)) grid_size *= 2 return tuple(y_outs) From 88e235e58e1931b0de5e74946541ad342ac31b7a Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sun, 31 May 2020 14:28:36 +0200 Subject: [PATCH 04/20] Add callbacks and validation data --- scripts/train.py | 90 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 62 insertions(+), 28 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 8c65ab3..1fc9e4d 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -181,41 +181,75 @@ def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_num box_number = tf.shape(bounding_boxes)[0] paddings = [[0, pad_number - box_number], [0, 0]] - return tf.pad(bounding_boxes, paddings, constant_values=0.) - - -ds_train = tfds.load('voc', split='train', shuffle_files=True) -ds_train = ds_train.map(lambda el: (el["image"], el["objects"])) -ds_train = ds_train.map( - lambda image, object: ( - image, - tf.concat([object["bbox"], tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1)], axis=-1) + return tf.pad(bounding_boxes, paddings, constant_values=0.0) + + +def prepare_dataset(dataset): + dataset = dataset.map(lambda el: (el["image"], el["objects"])) + dataset = dataset.map( + lambda image, object: ( + image, + tf.concat( + [ + object["bbox"], + tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1), + ], + axis=-1, + ), + ) ) -) -ds_train = ds_train.map( - lambda image, bounding_boxes: ( - image, - pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER), + dataset = dataset.map( + lambda image, bounding_boxes: ( + image, + pad_bounding_boxes_to_fixed_number_of_bounding_boxes( + bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER + ), + ) ) -) -ds_train = ds_train.batch(BATCH_SIZE) -ds_train = ds_train.map( - lambda image, bounding_box_with_class: ( - tf.image.resize(image, INPUT_SHAPE[:2]) / 255., - transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ - bounding_box_with_class, - np.concatenate(YOLOV4_ANCHORS, axis=0), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of - YOLOV4_ANCHORS_MASKS, - INPUT_SHAPE[0], # Assumes square input + dataset = dataset.batch(BATCH_SIZE) + dataset = dataset.map( + lambda image, bounding_box_with_class: ( + tf.image.resize(image, INPUT_SHAPE[:2]) / 255.0, + transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ + bounding_box_with_class, + np.concatenate( + YOLOV4_ANCHORS, axis=0 + ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors + YOLOV4_ANCHORS_MASKS, + INPUT_SHAPE[0], # Assumes square input + ), ) ) -) -model = YOLOv4(input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=80, training=True) + return dataset + + +voc_dataset = tfds.load("voc", shuffle_files=True) +ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] +ds_train = prepare_dataset(ds_train) +ds_test = prepare_dataset(ds_test) + +model = YOLOv4( + input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=80, training=True +) optimizer = tf.keras.optimizers.Adam(lr=1e-4) -loss = [YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS] +loss = [ + YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) + for mask in YOLOV4_ANCHORS_MASKS +] model.compile(optimizer=optimizer, loss=loss) -history = model.fit(ds_train, epochs=2) +history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=100, + epochs=2, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir="./logs"), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_best.h5", save_best_only=True, save_weights_only=True + ), + ], +) From 3aced1e0ed62a4afe3728079f3c419edeb1f0a71 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sun, 31 May 2020 16:03:02 +0200 Subject: [PATCH 05/20] Update training script --- scripts/train.py | 35 ++++++++++++++++++++++++----------- tests/test_model.py | 2 +- 2 files changed, 25 insertions(+), 12 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 1fc9e4d..e9d834a 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -12,8 +12,9 @@ YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] INPUT_SHAPE = (608, 608, 3) -BATCH_SIZE = 1 -BOUNDING_BOXES_FIXED_NUMBER = 10 +BATCH_SIZE = 16 +BOUNDING_BOXES_FIXED_NUMBER = 50 +PASCAL_VOC_NUM_CLASSES = 20 def broadcast_iou(box_1, box_2): @@ -184,7 +185,7 @@ def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_num return tf.pad(bounding_boxes, paddings, constant_values=0.0) -def prepare_dataset(dataset): +def prepare_dataset(dataset, shuffle=True): dataset = dataset.map(lambda el: (el["image"], el["objects"])) dataset = dataset.map( lambda image, object: ( @@ -196,7 +197,8 @@ def prepare_dataset(dataset): ], axis=-1, ), - ) + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, ) dataset = dataset.map( lambda image, bounding_boxes: ( @@ -204,8 +206,18 @@ def prepare_dataset(dataset): pad_bounding_boxes_to_fixed_number_of_bounding_boxes( bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER ), - ) + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + dataset = dataset.map( + lambda image, bounding_box: ( + tf.image.resize(image, INPUT_SHAPE[:2]) / 255.0, + bounding_box, + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, ) + if shuffle: + dataset = dataset.shuffle(buffer_size=1000) dataset = dataset.batch(BATCH_SIZE) dataset = dataset.map( lambda image, bounding_box_with_class: ( @@ -218,7 +230,8 @@ def prepare_dataset(dataset): YOLOV4_ANCHORS_MASKS, INPUT_SHAPE[0], # Assumes square input ), - ) + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, ) return dataset @@ -226,11 +239,11 @@ def prepare_dataset(dataset): voc_dataset = tfds.load("voc", shuffle_files=True) ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] -ds_train = prepare_dataset(ds_train) -ds_test = prepare_dataset(ds_test) +ds_train = prepare_dataset(ds_train, shuffle=True) +ds_test = prepare_dataset(ds_test, shuffle=False) model = YOLOv4( - input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=80, training=True + input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=PASCAL_VOC_NUM_CLASSES, training=True ) optimizer = tf.keras.optimizers.Adam(lr=1e-4) @@ -244,8 +257,8 @@ def prepare_dataset(dataset): history = model.fit( ds_train, validation_data=ds_test, - validation_steps=100, - epochs=2, + validation_steps=10, + epochs=100, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir="./logs"), tf.keras.callbacks.ModelCheckpoint( diff --git a/tests/test_model.py b/tests/test_model.py index 945a86a..9e6be5c 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -37,7 +37,7 @@ def test_model_should_predict_valid_shapes_at_inference( @pytest.mark.parametrize("input_shape", [(32, 33, 3), (33, 32, 3)]) def test_model_instanciation_should_fail_with_input_shapes_not_multiple_of_32( - input_shape + input_shape, ): with pytest.raises(ValueError): YOLOv4(input_shape, 80, []) From 3c3921245a63fdace9faa9c11012d6440c3d0b27 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Wed, 3 Jun 2020 18:08:29 +0200 Subject: [PATCH 06/20] Progressive training with frozen layers --- scripts/train.py | 60 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 54 insertions(+), 6 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index e9d834a..c62a48f 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -1,6 +1,9 @@ """ Training script for Pascal VOC using tf2-yolov4 """ +from datetime import datetime +from pathlib import Path + import numpy as np import tensorflow as tf import tensorflow_datasets as tfds @@ -12,10 +15,12 @@ YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] INPUT_SHAPE = (608, 608, 3) -BATCH_SIZE = 16 +BATCH_SIZE = 8 BOUNDING_BOXES_FIXED_NUMBER = 50 PASCAL_VOC_NUM_CLASSES = 20 +LOG_DIR = Path("./logs") / datetime.now().strftime("%m-%d-%Y %H:%M:%S") + def broadcast_iou(box_1, box_2): # box_1: (..., (x1, y1, x2, y2)) @@ -245,24 +250,67 @@ def prepare_dataset(dataset, shuffle=True): model = YOLOv4( input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=PASCAL_VOC_NUM_CLASSES, training=True ) +darknet_weights = Path("./yolov4.h5") +if darknet_weights.exists(): + model.load_weights(str(darknet_weights), by_name=True, skip_mismatch=True) + print("Darknet weights loaded.") -optimizer = tf.keras.optimizers.Adam(lr=1e-4) +optimizer = tf.keras.optimizers.Adam(1e-4) loss = [ YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS ] +model.summary() +# Start training: 5 epochs with backbone + neck frozen +ALL_FROZEN_EPOCH_NUMBER = 10 +for layer in model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers: + layer.trainable = False +model.compile(optimizer=optimizer, loss=loss) +history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=10, + epochs=ALL_FROZEN_EPOCH_NUMBER, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_all_frozen.h5", save_best_only=True, save_weights_only=True + ), + ], +) +# Keep training: 10 epochs with backbone frozen -- unfreeze neck +BACKBONE_FROZEN_EPOCH_NUMBER = 10 +for layer in model.get_layer("YOLOv4_neck").layers: + layer.trainable = False +model.compile(optimizer=optimizer, loss=loss) +history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=10, + epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, + initial_epoch=ALL_FROZEN_EPOCH_NUMBER, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_backbone_frozen.h5", save_best_only=True, save_weights_only=True, verbose=True, + ), + ], +) +# Final training: 35 epochs with all weights unfrozen +for layer in model.get_layer("CSPDarknet53").layers: + layer.trainable = True model.compile(optimizer=optimizer, loss=loss) - history = model.fit( ds_train, validation_data=ds_test, validation_steps=10, - epochs=100, + epochs=50, + initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, callbacks=[ - tf.keras.callbacks.TensorBoard(log_dir="./logs"), + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - "yolov4_best.h5", save_best_only=True, save_weights_only=True + "yolov4_full.h5", save_best_only=True, save_weights_only=True ), ], ) From b4afbc38671e8e7fc61d501f6c1497a3ef433eca Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Wed, 3 Jun 2020 18:14:49 +0200 Subject: [PATCH 07/20] Stop resizing and dividing by 255. twice --- scripts/train.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/train.py b/scripts/train.py index c62a48f..a8ac34f 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -226,7 +226,7 @@ def prepare_dataset(dataset, shuffle=True): dataset = dataset.batch(BATCH_SIZE) dataset = dataset.map( lambda image, bounding_box_with_class: ( - tf.image.resize(image, INPUT_SHAPE[:2]) / 255.0, + image, transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ bounding_box_with_class, np.concatenate( From 1ebef2a42a32438e8281e5c5bed32f527f16c64e Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Wed, 3 Jun 2020 18:25:15 +0200 Subject: [PATCH 08/20] Script executed if __main__ --- scripts/train.py | 145 ++++++++++++++++++++++++----------------------- 1 file changed, 73 insertions(+), 72 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index a8ac34f..db9f95c 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -242,75 +242,76 @@ def prepare_dataset(dataset, shuffle=True): return dataset -voc_dataset = tfds.load("voc", shuffle_files=True) -ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] -ds_train = prepare_dataset(ds_train, shuffle=True) -ds_test = prepare_dataset(ds_test, shuffle=False) - -model = YOLOv4( - input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=PASCAL_VOC_NUM_CLASSES, training=True -) -darknet_weights = Path("./yolov4.h5") -if darknet_weights.exists(): - model.load_weights(str(darknet_weights), by_name=True, skip_mismatch=True) - print("Darknet weights loaded.") - -optimizer = tf.keras.optimizers.Adam(1e-4) -loss = [ - YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) - for mask in YOLOV4_ANCHORS_MASKS -] - -model.summary() -# Start training: 5 epochs with backbone + neck frozen -ALL_FROZEN_EPOCH_NUMBER = 10 -for layer in model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers: - layer.trainable = False -model.compile(optimizer=optimizer, loss=loss) -history = model.fit( - ds_train, - validation_data=ds_test, - validation_steps=10, - epochs=ALL_FROZEN_EPOCH_NUMBER, - callbacks=[ - tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), - tf.keras.callbacks.ModelCheckpoint( - "yolov4_all_frozen.h5", save_best_only=True, save_weights_only=True - ), - ], -) -# Keep training: 10 epochs with backbone frozen -- unfreeze neck -BACKBONE_FROZEN_EPOCH_NUMBER = 10 -for layer in model.get_layer("YOLOv4_neck").layers: - layer.trainable = False -model.compile(optimizer=optimizer, loss=loss) -history = model.fit( - ds_train, - validation_data=ds_test, - validation_steps=10, - epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, - initial_epoch=ALL_FROZEN_EPOCH_NUMBER, - callbacks=[ - tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), - tf.keras.callbacks.ModelCheckpoint( - "yolov4_backbone_frozen.h5", save_best_only=True, save_weights_only=True, verbose=True, - ), - ], -) -# Final training: 35 epochs with all weights unfrozen -for layer in model.get_layer("CSPDarknet53").layers: - layer.trainable = True -model.compile(optimizer=optimizer, loss=loss) -history = model.fit( - ds_train, - validation_data=ds_test, - validation_steps=10, - epochs=50, - initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, - callbacks=[ - tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), - tf.keras.callbacks.ModelCheckpoint( - "yolov4_full.h5", save_best_only=True, save_weights_only=True - ), - ], -) +if __name__ == "__main__": + voc_dataset = tfds.load("voc", shuffle_files=True) + ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] + ds_train = prepare_dataset(ds_train, shuffle=True) + ds_test = prepare_dataset(ds_test, shuffle=False) + + model = YOLOv4( + input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=PASCAL_VOC_NUM_CLASSES, training=True + ) + darknet_weights = Path("./yolov4.h5") + if darknet_weights.exists(): + model.load_weights(str(darknet_weights), by_name=True, skip_mismatch=True) + print("Darknet weights loaded.") + + optimizer = tf.keras.optimizers.Adam(1e-4) + loss = [ + YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) + for mask in YOLOV4_ANCHORS_MASKS + ] + + model.summary() + # Start training: 5 epochs with backbone + neck frozen + ALL_FROZEN_EPOCH_NUMBER = 10 + for layer in model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers: + layer.trainable = False + model.compile(optimizer=optimizer, loss=loss) + history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=10, + epochs=ALL_FROZEN_EPOCH_NUMBER, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_all_frozen.h5", save_best_only=True, save_weights_only=True + ), + ], + ) + # Keep training: 10 epochs with backbone frozen -- unfreeze neck + BACKBONE_FROZEN_EPOCH_NUMBER = 10 + for layer in model.get_layer("YOLOv4_neck").layers: + layer.trainable = False + model.compile(optimizer=optimizer, loss=loss) + history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=10, + epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, + initial_epoch=ALL_FROZEN_EPOCH_NUMBER, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_backbone_frozen.h5", save_best_only=True, save_weights_only=True, verbose=True, + ), + ], + ) + # Final training: 35 epochs with all weights unfrozen + for layer in model.get_layer("CSPDarknet53").layers: + layer.trainable = True + model.compile(optimizer=optimizer, loss=loss) + history = model.fit( + ds_train, + validation_data=ds_test, + validation_steps=10, + epochs=50, + initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, + callbacks=[ + tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_full.h5", save_best_only=True, save_weights_only=True + ), + ], + ) From d3184bb5c92803a9cbae5d535aff5ee4d0ef5416 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sat, 6 Jun 2020 10:38:05 +0200 Subject: [PATCH 09/20] Change bbox order to match what YOLO expects --- scripts/train.py | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index db9f95c..1e747fc 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -21,6 +21,10 @@ LOG_DIR = Path("./logs") / datetime.now().strftime("%m-%d-%Y %H:%M:%S") +ALL_FROZEN_EPOCH_NUMBER = 10 +BACKBONE_FROZEN_EPOCH_NUMBER = 10 +TOTAL_NUMBER_OF_EPOCHS = 50 + def broadcast_iou(box_1, box_2): # box_1: (..., (x1, y1, x2, y2)) @@ -197,7 +201,15 @@ def prepare_dataset(dataset, shuffle=True): image, tf.concat( [ - object["bbox"], + tf.stack( + [ + object["bbox"][:, 1], + object["bbox"][:, 0], + object["bbox"][:, 3], + object["bbox"][:, 2], + ], + axis=-1, + ), tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1), ], axis=-1, @@ -249,7 +261,10 @@ def prepare_dataset(dataset, shuffle=True): ds_test = prepare_dataset(ds_test, shuffle=False) model = YOLOv4( - input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, num_classes=PASCAL_VOC_NUM_CLASSES, training=True + input_shape=INPUT_SHAPE, + anchors=YOLOV4_ANCHORS, + num_classes=PASCAL_VOC_NUM_CLASSES, + training=True, ) darknet_weights = Path("./yolov4.h5") if darknet_weights.exists(): @@ -264,8 +279,9 @@ def prepare_dataset(dataset, shuffle=True): model.summary() # Start training: 5 epochs with backbone + neck frozen - ALL_FROZEN_EPOCH_NUMBER = 10 - for layer in model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers: + for layer in ( + model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers + ): layer.trainable = False model.compile(optimizer=optimizer, loss=loss) history = model.fit( @@ -281,9 +297,8 @@ def prepare_dataset(dataset, shuffle=True): ], ) # Keep training: 10 epochs with backbone frozen -- unfreeze neck - BACKBONE_FROZEN_EPOCH_NUMBER = 10 for layer in model.get_layer("YOLOv4_neck").layers: - layer.trainable = False + layer.trainable = True model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, @@ -294,7 +309,10 @@ def prepare_dataset(dataset, shuffle=True): callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - "yolov4_backbone_frozen.h5", save_best_only=True, save_weights_only=True, verbose=True, + "yolov4_backbone_frozen.h5", + save_best_only=True, + save_weights_only=True, + verbose=True, ), ], ) @@ -306,12 +324,18 @@ def prepare_dataset(dataset, shuffle=True): ds_train, validation_data=ds_test, validation_steps=10, - epochs=50, + epochs=TOTAL_NUMBER_OF_EPOCHS, initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( "yolov4_full.h5", save_best_only=True, save_weights_only=True ), + tf.keras.callbacks.ModelCheckpoint( + "yolov4_train_loss.h5", + save_best_only=True, + save_weights_only=True, + monitor="loss", + ), ], ) From 0da167ea273f257d1d3adbe2eeb6f9c9618df8ac Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sat, 6 Jun 2020 10:46:54 +0200 Subject: [PATCH 10/20] Try reversing the anchors --- scripts/train.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 1e747fc..44ae69f 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -242,7 +242,7 @@ def prepare_dataset(dataset, shuffle=True): transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ bounding_box_with_class, np.concatenate( - YOLOV4_ANCHORS, axis=0 + list(reversed(YOLOV4_ANCHORS)), axis=0 ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors YOLOV4_ANCHORS_MASKS, INPUT_SHAPE[0], # Assumes square input @@ -273,7 +273,7 @@ def prepare_dataset(dataset, shuffle=True): optimizer = tf.keras.optimizers.Adam(1e-4) loss = [ - YoloLoss(np.concatenate(YOLOV4_ANCHORS, axis=0)[mask]) + YoloLoss(np.concatenate(list(reversed(YOLOV4_ANCHORS)), axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS ] From b7436abec528a847c37fd0dd289920a2f034fc01 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sat, 6 Jun 2020 15:04:35 +0200 Subject: [PATCH 11/20] Normalized anchors --- scripts/train.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 44ae69f..ed3e0f5 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -8,17 +8,18 @@ import tensorflow as tf import tensorflow_datasets as tfds -from tf2_yolov4.anchors import YOLOV4_ANCHORS +from tf2_yolov4.anchors import YOLOV4_ANCHORS, compute_normalized_anchors from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression from tf2_yolov4.model import YOLOv4 -YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] - INPUT_SHAPE = (608, 608, 3) BATCH_SIZE = 8 BOUNDING_BOXES_FIXED_NUMBER = 50 PASCAL_VOC_NUM_CLASSES = 20 +YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] +YOLOV4_ANCHORS_NORMALIZED = compute_normalized_anchors(YOLOV4_ANCHORS, INPUT_SHAPE) + LOG_DIR = Path("./logs") / datetime.now().strftime("%m-%d-%Y %H:%M:%S") ALL_FROZEN_EPOCH_NUMBER = 10 @@ -242,7 +243,7 @@ def prepare_dataset(dataset, shuffle=True): transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ bounding_box_with_class, np.concatenate( - list(reversed(YOLOV4_ANCHORS)), axis=0 + list(reversed(YOLOV4_ANCHORS_NORMALIZED)), axis=0 ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors YOLOV4_ANCHORS_MASKS, INPUT_SHAPE[0], # Assumes square input @@ -273,7 +274,9 @@ def prepare_dataset(dataset, shuffle=True): optimizer = tf.keras.optimizers.Adam(1e-4) loss = [ - YoloLoss(np.concatenate(list(reversed(YOLOV4_ANCHORS)), axis=0)[mask]) + YoloLoss( + np.concatenate(list(reversed(YOLOV4_ANCHORS_NORMALIZED)), axis=0)[mask] + ) for mask in YOLOV4_ANCHORS_MASKS ] From b6f362092794e3491904a5ed7e4fce700d21524c Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sat, 6 Jun 2020 15:04:48 +0200 Subject: [PATCH 12/20] Save models.h5 inside log dir --- scripts/train.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index ed3e0f5..6a3190e 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -295,7 +295,7 @@ def prepare_dataset(dataset, shuffle=True): callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - "yolov4_all_frozen.h5", save_best_only=True, save_weights_only=True + str(LOG_DIR / "yolov4_all_frozen.h5"), save_best_only=True, save_weights_only=True ), ], ) @@ -312,7 +312,7 @@ def prepare_dataset(dataset, shuffle=True): callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - "yolov4_backbone_frozen.h5", + str(LOG_DIR / "yolov4_backbone_frozen.h5"), save_best_only=True, save_weights_only=True, verbose=True, @@ -332,10 +332,12 @@ def prepare_dataset(dataset, shuffle=True): callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - "yolov4_full.h5", save_best_only=True, save_weights_only=True + str(LOG_DIR / "yolov4_full.h5"), + save_best_only=True, + save_weights_only=True, ), tf.keras.callbacks.ModelCheckpoint( - "yolov4_train_loss.h5", + str(LOG_DIR / "yolov4_train_loss.h5"), save_best_only=True, save_weights_only=True, monitor="loss", From 41b9370433a232688bc24a09fc87816b75c53a85 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sat, 6 Jun 2020 15:23:19 +0200 Subject: [PATCH 13/20] Add test script to plot box, run black --- scripts/test.py | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ scripts/train.py | 4 +- 2 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 scripts/test.py diff --git a/scripts/test.py b/scripts/test.py new file mode 100644 index 0000000..a3b80ec --- /dev/null +++ b/scripts/test.py @@ -0,0 +1,95 @@ +import matplotlib.pyplot as plt +import tensorflow as tf + +from tf2_yolov4.anchors import YOLOV4_ANCHORS +from tf2_yolov4.model import YOLOv4 + +HEIGHT, WIDTH = (608, 608) + +image = tf.io.read_file("../notebooks/images/cars.jpg") +image = tf.image.decode_image(image) +image = tf.image.resize(image, (HEIGHT, WIDTH)) +images = tf.expand_dims(image, axis=0) / 255.0 + +model = YOLOv4( + input_shape=(HEIGHT, WIDTH, 3), + anchors=YOLOV4_ANCHORS, + num_classes=20, + training=False, + yolo_max_boxes=100, + yolo_iou_threshold=0.5, + yolo_score_threshold=0.5, +) + +model.load_weights("../yolov4_full.h5") +model.summary() + +boxes, scores, classes, valid_detections = model.predict(images) + +CLASSES = [ + "aeroplane", + "bicycle", + "bird", + "boat", + "bottle", + "bus", + "car", + "cat", + "chair", + "cow", + "diningtable", + "dog", + "horse", + "motorbike", + "person", + "pottedplant", + "sheep", + "sofa", + "train", + "tvmonitor", +] + +# colors for visualization +COLORS = [ + [0.000, 0.447, 0.741], + [0.850, 0.325, 0.098], + [0.929, 0.694, 0.125], + [0.494, 0.184, 0.556], + [0.466, 0.674, 0.188], + [0.301, 0.745, 0.933], +] + + +def plot_results(pil_img, boxes, scores, classes): + plt.figure(figsize=(16, 10)) + plt.imshow(pil_img) + ax = plt.gca() + + for (xmin, ymin, xmax, ymax), score, cl in zip( + boxes.tolist(), scores.tolist(), classes.tolist() + ): + if score > 0: + ax.add_patch( + plt.Rectangle( + (xmin, ymin), + xmax - xmin, + ymax - ymin, + fill=False, + color=COLORS[cl % 6], + linewidth=3, + ) + ) + text = f"{CLASSES[cl]}: {score:0.2f}" + ax.text( + xmin, ymin, text, fontsize=15, bbox=dict(facecolor="yellow", alpha=0.5) + ) + plt.axis("off") + plt.show() + + +plot_results( + images[0], + boxes[0] * [WIDTH, HEIGHT, WIDTH, HEIGHT], + scores[0], + classes[0].astype(int), +) diff --git a/scripts/train.py b/scripts/train.py index 6a3190e..71ace58 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -295,7 +295,9 @@ def prepare_dataset(dataset, shuffle=True): callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( - str(LOG_DIR / "yolov4_all_frozen.h5"), save_best_only=True, save_weights_only=True + str(LOG_DIR / "yolov4_all_frozen.h5"), + save_best_only=True, + save_weights_only=True, ), ], ) From ab7762893ffe1ca221423ecd887f9e87487c7912 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 12 Jun 2020 11:13:42 +0200 Subject: [PATCH 14/20] Impact output order change in network on training script --- scripts/test.py | 2 +- scripts/train.py | 109 +++++++++++++++++++++++++++++++++++------------ 2 files changed, 83 insertions(+), 28 deletions(-) diff --git a/scripts/test.py b/scripts/test.py index a3b80ec..3bc898a 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -18,7 +18,7 @@ training=False, yolo_max_boxes=100, yolo_iou_threshold=0.5, - yolo_score_threshold=0.5, + yolo_score_threshold=0.15, ) model.load_weights("../yolov4_full.h5") diff --git a/scripts/train.py b/scripts/train.py index 71ace58..c0b301b 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -12,7 +12,7 @@ from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression from tf2_yolov4.model import YOLOv4 -INPUT_SHAPE = (608, 608, 3) +INPUT_SHAPE = (416, 416, 3) BATCH_SIZE = 8 BOUNDING_BOXES_FIXED_NUMBER = 50 PASCAL_VOC_NUM_CLASSES = 20 @@ -22,7 +22,7 @@ LOG_DIR = Path("./logs") / datetime.now().strftime("%m-%d-%Y %H:%M:%S") -ALL_FROZEN_EPOCH_NUMBER = 10 +ALL_FROZEN_EPOCH_NUMBER = 15 BACKBONE_FROZEN_EPOCH_NUMBER = 10 TOTAL_NUMBER_OF_EPOCHS = 50 @@ -185,7 +185,7 @@ def transform_targets(y_train, anchors, anchor_masks, size): y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs)) grid_size *= 2 - return tuple(y_outs) + return tuple(reversed(y_outs)) def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number): @@ -195,7 +195,43 @@ def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_num return tf.pad(bounding_boxes, paddings, constant_values=0.0) -def prepare_dataset(dataset, shuffle=True): +def random_flip_right_with_bounding_boxes(images, bounding_boxes): + apply_flip = tf.random.uniform(shape=[]) > 0.5 + if apply_flip: + images = tf.image.flip_left_right(images) + bounding_boxes = tf.stack( + [ + 1.0 - bounding_boxes[..., 2], + bounding_boxes[..., 1], + 1.0 - bounding_boxes[..., 0], + bounding_boxes[..., 3], + bounding_boxes[..., 4], + ], + axis=-1, + ) + + return images, bounding_boxes + + +def augment_images(images, bounding_boxes): + # Image transformations that do not affect bounding boxes + images = tf.image.random_hue(images, 0.15) + images = tf.image.random_brightness(images, 0.15) + + # Transformations that affect bounding boxes + images, bounding_boxes = random_flip_right_with_bounding_boxes( + images, bounding_boxes + ) + + return images, bounding_boxes + + +def prepare_dataset( + dataset, + shuffle=True, + apply_data_augmentation=False, + transform_to_bbox_by_stage=True, +): dataset = dataset.map(lambda el: (el["image"], el["objects"])) dataset = dataset.map( lambda image, object: ( @@ -218,15 +254,6 @@ def prepare_dataset(dataset, shuffle=True): ), num_parallel_calls=tf.data.experimental.AUTOTUNE, ) - dataset = dataset.map( - lambda image, bounding_boxes: ( - image, - pad_bounding_boxes_to_fixed_number_of_bounding_boxes( - bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER - ), - ), - num_parallel_calls=tf.data.experimental.AUTOTUNE, - ) dataset = dataset.map( lambda image, bounding_box: ( tf.image.resize(image, INPUT_SHAPE[:2]) / 255.0, @@ -234,32 +261,57 @@ def prepare_dataset(dataset, shuffle=True): ), num_parallel_calls=tf.data.experimental.AUTOTUNE, ) + if apply_data_augmentation: + dataset = dataset.map( + augment_images, num_parallel_calls=tf.data.experimental.AUTOTUNE + ) if shuffle: dataset = dataset.shuffle(buffer_size=1000) - dataset = dataset.batch(BATCH_SIZE) dataset = dataset.map( - lambda image, bounding_box_with_class: ( + lambda image, bounding_boxes: ( image, - transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ - bounding_box_with_class, - np.concatenate( - list(reversed(YOLOV4_ANCHORS_NORMALIZED)), axis=0 - ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors - YOLOV4_ANCHORS_MASKS, - INPUT_SHAPE[0], # Assumes square input + pad_bounding_boxes_to_fixed_number_of_bounding_boxes( + bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER ), ), num_parallel_calls=tf.data.experimental.AUTOTUNE, ) + dataset = dataset.batch(BATCH_SIZE) - return dataset + if transform_to_bbox_by_stage: + dataset = dataset.map( + lambda image, bounding_box_with_class: ( + image, + transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ + bounding_box_with_class, + np.concatenate( + list(YOLOV4_ANCHORS_NORMALIZED), axis=0 + ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors + YOLOV4_ANCHORS_MASKS, + INPUT_SHAPE[0], # Assumes square input + ), + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + return dataset.repeat() if __name__ == "__main__": - voc_dataset = tfds.load("voc", shuffle_files=True) + voc_dataset, infos = tfds.load("voc", with_info=True, shuffle_files=True) ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] - ds_train = prepare_dataset(ds_train, shuffle=True) - ds_test = prepare_dataset(ds_test, shuffle=False) + ds_train = prepare_dataset( + ds_train, + shuffle=True, + apply_data_augmentation=True, + transform_to_bbox_by_stage=True, + ) + ds_test = prepare_dataset( + ds_test, + shuffle=False, + apply_data_augmentation=False, + transform_to_bbox_by_stage=True, + ) model = YOLOv4( input_shape=INPUT_SHAPE, @@ -275,7 +327,7 @@ def prepare_dataset(dataset, shuffle=True): optimizer = tf.keras.optimizers.Adam(1e-4) loss = [ YoloLoss( - np.concatenate(list(reversed(YOLOV4_ANCHORS_NORMALIZED)), axis=0)[mask] + np.concatenate(list(YOLOV4_ANCHORS_NORMALIZED), axis=0)[mask] ) for mask in YOLOV4_ANCHORS_MASKS ] @@ -289,6 +341,7 @@ def prepare_dataset(dataset, shuffle=True): model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, + steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, validation_data=ds_test, validation_steps=10, epochs=ALL_FROZEN_EPOCH_NUMBER, @@ -307,6 +360,7 @@ def prepare_dataset(dataset, shuffle=True): model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, + steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, validation_data=ds_test, validation_steps=10, epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, @@ -327,6 +381,7 @@ def prepare_dataset(dataset, shuffle=True): model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, + steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, validation_data=ds_test, validation_steps=10, epochs=TOTAL_NUMBER_OF_EPOCHS, From 3a4f70cc7b2c2214a80c3e0f438c8d515903fe5e Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Sun, 21 Jun 2020 16:45:07 +0200 Subject: [PATCH 15/20] Pascal VOC 2012 --- scripts/test.py | 6 +++++- scripts/train.py | 21 ++++++++++++--------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/scripts/test.py b/scripts/test.py index 3bc898a..91efe95 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -20,8 +20,12 @@ yolo_iou_threshold=0.5, yolo_score_threshold=0.15, ) +for layer in ( + model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers +): + layer.trainable = False -model.load_weights("../yolov4_full.h5") +model.load_weights("../yolov4_all_frozen.h5") model.summary() boxes, scores, classes, valid_detections = model.predict(images) diff --git a/scripts/train.py b/scripts/train.py index c0b301b..cab7c39 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -14,7 +14,7 @@ INPUT_SHAPE = (416, 416, 3) BATCH_SIZE = 8 -BOUNDING_BOXES_FIXED_NUMBER = 50 +BOUNDING_BOXES_FIXED_NUMBER = 60 PASCAL_VOC_NUM_CLASSES = 20 YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] @@ -298,8 +298,8 @@ def prepare_dataset( if __name__ == "__main__": - voc_dataset, infos = tfds.load("voc", with_info=True, shuffle_files=True) - ds_train, ds_test = voc_dataset["train"], voc_dataset["test"] + voc_dataset, infos = tfds.load("voc/2012", with_info=True, shuffle_files=True) + ds_train, ds_test = voc_dataset["train"], voc_dataset["validation"] ds_train = prepare_dataset( ds_train, shuffle=True, @@ -313,6 +313,9 @@ def prepare_dataset( transform_to_bbox_by_stage=True, ) + steps_per_epoch = infos.splits["train"].num_examples // BATCH_SIZE + validation_steps = infos.splits["validation"].num_examples // BATCH_SIZE + model = YOLOv4( input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, @@ -341,9 +344,9 @@ def prepare_dataset( model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, - steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, + steps_per_epoch=steps_per_epoch, validation_data=ds_test, - validation_steps=10, + validation_steps=validation_steps, epochs=ALL_FROZEN_EPOCH_NUMBER, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), @@ -360,9 +363,9 @@ def prepare_dataset( model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, - steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, + steps_per_epoch=steps_per_epoch, validation_data=ds_test, - validation_steps=10, + validation_steps=validation_steps, epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, initial_epoch=ALL_FROZEN_EPOCH_NUMBER, callbacks=[ @@ -381,9 +384,9 @@ def prepare_dataset( model.compile(optimizer=optimizer, loss=loss) history = model.fit( ds_train, - steps_per_epoch=infos.splits["train"].num_examples // BATCH_SIZE, + steps_per_epoch=steps_per_epoch, validation_data=ds_test, - validation_steps=10, + validation_steps=validation_steps, epochs=TOTAL_NUMBER_OF_EPOCHS, initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, callbacks=[ From a7ba2421b3da40e166a5f888b292113d5998c883 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 3 Jul 2020 11:07:19 +0200 Subject: [PATCH 16/20] Fix training loop. Improve test scripts. --- scripts/test.py | 102 +++++++++++++++++--------------- scripts/train.py | 13 ++-- tf2_yolov4/heads/yolov3_head.py | 4 +- 3 files changed, 62 insertions(+), 57 deletions(-) diff --git a/scripts/test.py b/scripts/test.py index 91efe95..01d166d 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -5,6 +5,35 @@ from tf2_yolov4.model import YOLOv4 HEIGHT, WIDTH = (608, 608) +INPUT_SHAPE = (HEIGHT, WIDTH, 3) + +PASCAL_VOC_CLASSES = [ + "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", + "car", "cat", "chair", "cow", "diningtable", "dog", "horse", + "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", +] + +COCO_CLASSES = [ + 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', + 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', + 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', + 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', + 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', + 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', + 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', + 'chair', 'couch', 'potted plant', 'bed', 'dining table', + 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', + 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', + 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', + 'toothbrush' +] + +# Switch this variable between PASCAL_VOC_CLASSES and COCO_CLASSES depending +# on your training, or define your own set of classes. +CLASSES = PASCAL_VOC_CLASSES + image = tf.io.read_file("../notebooks/images/cars.jpg") image = tf.image.decode_image(image) @@ -14,45 +43,17 @@ model = YOLOv4( input_shape=(HEIGHT, WIDTH, 3), anchors=YOLOV4_ANCHORS, - num_classes=20, + num_classes=len(CLASSES), training=False, yolo_max_boxes=100, yolo_iou_threshold=0.5, yolo_score_threshold=0.15, ) -for layer in ( - model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers -): - layer.trainable = False - -model.load_weights("../yolov4_all_frozen.h5") +model.load_weights("../yolov4_full.h5") model.summary() boxes, scores, classes, valid_detections = model.predict(images) -CLASSES = [ - "aeroplane", - "bicycle", - "bird", - "boat", - "bottle", - "bus", - "car", - "cat", - "chair", - "cow", - "diningtable", - "dog", - "horse", - "motorbike", - "person", - "pottedplant", - "sheep", - "sofa", - "train", - "tvmonitor", -] - # colors for visualization COLORS = [ [0.000, 0.447, 0.741], @@ -69,24 +70,31 @@ def plot_results(pil_img, boxes, scores, classes): plt.imshow(pil_img) ax = plt.gca() - for (xmin, ymin, xmax, ymax), score, cl in zip( - boxes.tolist(), scores.tolist(), classes.tolist() - ): - if score > 0: - ax.add_patch( - plt.Rectangle( - (xmin, ymin), - xmax - xmin, - ymax - ymin, - fill=False, - color=COLORS[cl % 6], - linewidth=3, - ) - ) - text = f"{CLASSES[cl]}: {score:0.2f}" - ax.text( - xmin, ymin, text, fontsize=15, bbox=dict(facecolor="yellow", alpha=0.5) + predictions_with_positive_score = [ + (box, score, box_class) + for box, score, box_class in zip( + boxes.tolist(), scores.tolist(), classes.tolist() + ) + if score > 0 + ] + for (xmin, ymin, xmax, ymax), score, cl in predictions_with_positive_score: + color = COLORS[cl % 6] + ax.add_patch( + plt.Rectangle( + (xmin, ymin), + xmax - xmin, + ymax - ymin, + fill=False, + color=color, + linewidth=3, ) + ) + text = f"{CLASSES[cl]}: {score:0.2f}" + ax.text( + xmin, ymin, text, color="white", + fontsize=15, fontweight="bold", + bbox=dict(facecolor=color, alpha=0.7), + ) plt.axis("off") plt.show() diff --git a/scripts/train.py b/scripts/train.py index cab7c39..084ea33 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -12,7 +12,7 @@ from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression from tf2_yolov4.model import YOLOv4 -INPUT_SHAPE = (416, 416, 3) +INPUT_SHAPE = (608, 608, 3) BATCH_SIZE = 8 BOUNDING_BOXES_FIXED_NUMBER = 60 PASCAL_VOC_NUM_CLASSES = 20 @@ -185,7 +185,7 @@ def transform_targets(y_train, anchors, anchor_masks, size): y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs)) grid_size *= 2 - return tuple(reversed(y_outs)) + return tuple(y_outs) def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number): @@ -322,20 +322,17 @@ def prepare_dataset( num_classes=PASCAL_VOC_NUM_CLASSES, training=True, ) - darknet_weights = Path("./yolov4.h5") + darknet_weights = Path(__file__).parent / "yolov4.h5" if darknet_weights.exists(): model.load_weights(str(darknet_weights), by_name=True, skip_mismatch=True) print("Darknet weights loaded.") optimizer = tf.keras.optimizers.Adam(1e-4) loss = [ - YoloLoss( - np.concatenate(list(YOLOV4_ANCHORS_NORMALIZED), axis=0)[mask] - ) + YoloLoss(np.concatenate(list(YOLOV4_ANCHORS_NORMALIZED), axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS ] - model.summary() # Start training: 5 epochs with backbone + neck frozen for layer in ( model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers @@ -378,7 +375,7 @@ def prepare_dataset( ), ], ) - # Final training: 35 epochs with all weights unfrozen + # Final training for layer in model.get_layer("CSPDarknet53").layers: layer.trainable = True model.compile(optimizer=optimizer, loss=loss) diff --git a/tf2_yolov4/heads/yolov3_head.py b/tf2_yolov4/heads/yolov3_head.py index ff28a6b..e2bcaeb 100644 --- a/tf2_yolov4/heads/yolov3_head.py +++ b/tf2_yolov4/heads/yolov3_head.py @@ -95,7 +95,7 @@ def yolov3_head( if training: return tf.keras.Model( [input_1, input_2, input_3], - [output_1, output_2, output_3], + [output_3, output_2, output_1], name="YOLOv3_head", ) @@ -120,7 +120,7 @@ def yolov3_head( yolo_score_threshold=yolo_score_threshold, ), name="yolov4_nms", - )([predictions_1, predictions_2, predictions_3]) + )([predictions_3, predictions_2, predictions_1]) return tf.keras.Model([input_1, input_2, input_3], output, name="YOLOv3_head") From 1239ea11fc85bc2ecfbb8d3c712f265db63f0e59 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 3 Jul 2020 14:52:40 +0200 Subject: [PATCH 17/20] Improve it --- scripts/train.py | 351 ++++++----------------------------------- tf2_yolov4/anchors.py | 2 + tf2_yolov4/datasets.py | 188 ++++++++++++++++++++++ tf2_yolov4/losses.py | 101 ++++++++++++ 4 files changed, 342 insertions(+), 300 deletions(-) create mode 100644 tf2_yolov4/datasets.py create mode 100644 tf2_yolov4/losses.py diff --git a/scripts/train.py b/scripts/train.py index 084ea33..29d5445 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -4,332 +4,64 @@ from datetime import datetime from pathlib import Path +import click import numpy as np import tensorflow as tf import tensorflow_datasets as tfds -from tf2_yolov4.anchors import YOLOV4_ANCHORS, compute_normalized_anchors -from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression +from tf2_yolov4.anchors import ( + YOLOV4_ANCHORS, + YOLOV4_ANCHORS_MASKS, + compute_normalized_anchors, +) +from tf2_yolov4.datasets import prepare_dataset +from tf2_yolov4.losses import YoloV3Loss from tf2_yolov4.model import YOLOv4 INPUT_SHAPE = (608, 608, 3) -BATCH_SIZE = 8 -BOUNDING_BOXES_FIXED_NUMBER = 60 -PASCAL_VOC_NUM_CLASSES = 20 -YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] -YOLOV4_ANCHORS_NORMALIZED = compute_normalized_anchors(YOLOV4_ANCHORS, INPUT_SHAPE) -LOG_DIR = Path("./logs") / datetime.now().strftime("%m-%d-%Y %H:%M:%S") +def launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name="voc"): + LOG_DIR = Path("./logs") / dataset_name / datetime.now().strftime("%m-%d-%Y %H:%M:%S") -ALL_FROZEN_EPOCH_NUMBER = 15 -BACKBONE_FROZEN_EPOCH_NUMBER = 10 -TOTAL_NUMBER_OF_EPOCHS = 50 + voc_dataset, infos = tfds.load(dataset_name, with_info=True, shuffle_files=True) - -def broadcast_iou(box_1, box_2): - # box_1: (..., (x1, y1, x2, y2)) - # box_2: (N, (x1, y1, x2, y2)) - - # broadcast boxes - box_1 = tf.expand_dims(box_1, -2) - box_2 = tf.expand_dims(box_2, 0) - # new_shape: (..., N, (x1, y1, x2, y2)) - new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) - box_1 = tf.broadcast_to(box_1, new_shape) - box_2 = tf.broadcast_to(box_2, new_shape) - - int_w = tf.maximum( - tf.minimum(box_1[..., 2], box_2[..., 2]) - - tf.maximum(box_1[..., 0], box_2[..., 0]), - 0, - ) - int_h = tf.maximum( - tf.minimum(box_1[..., 3], box_2[..., 3]) - - tf.maximum(box_1[..., 1], box_2[..., 1]), - 0, - ) - int_area = int_w * int_h - box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] - box_1[..., 1]) - box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] - box_2[..., 1]) - return int_area / (box_1_area + box_2_area - int_area) - - -def YoloLoss(anchors, ignore_thresh=0.5): - def yolo_loss(y_true, y_pred): - # 1. transform all pred outputs - # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) - pred_box, pred_obj, pred_class, pred_xywh = yolov3_boxes_regression( - y_pred, anchors - ) - pred_xy = pred_xywh[..., 0:2] - pred_wh = pred_xywh[..., 2:4] - - # 2. transform all true outputs - # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) - true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1) - true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 - true_wh = true_box[..., 2:4] - true_box[..., 0:2] - - # give higher weights to small boxes - box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] - - # 3. inverting the pred box equations - grid_size = tf.shape(y_true)[1] - grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) - grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) - true_xy = true_xy * tf.cast(grid_size, tf.float32) - tf.cast(grid, tf.float32) - true_wh = tf.math.log(true_wh / anchors) - true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh) - - # 4. calculate all masks - obj_mask = tf.squeeze(true_obj, -1) - # ignore false positive when iou is over threshold - best_iou = tf.map_fn( - lambda x: tf.reduce_max( - broadcast_iou(x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))), - axis=-1, - ), - (pred_box, true_box, obj_mask), - tf.float32, - ) - ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) - - # 5. calculate all losses - xy_loss = ( - obj_mask - * box_loss_scale - * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) - ) - wh_loss = ( - obj_mask - * box_loss_scale - * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) - ) - obj_loss = tf.keras.losses.binary_crossentropy(true_obj, pred_obj) - obj_loss = obj_mask * obj_loss + (1 - obj_mask) * ignore_mask * obj_loss - # TODO: use binary_crossentropy instead - class_loss = obj_mask * tf.keras.losses.sparse_categorical_crossentropy( - true_class_idx, pred_class - ) - - # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) - xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) - wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) - obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) - class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) - - return xy_loss + wh_loss + obj_loss + class_loss - - return yolo_loss - - -@tf.function -def transform_targets_for_output(y_true, grid_size, anchor_idxs): - # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor)) - N = tf.shape(y_true)[0] - - # y_true_out: (N, grid, grid, anchors, [x, y, w, h, obj, class]) - y_true_out = tf.zeros((N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) - - anchor_idxs = tf.cast(anchor_idxs, tf.int32) - - indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True) - updates = tf.TensorArray(tf.float32, 1, dynamic_size=True) - idx = 0 - for i in tf.range(N): - for j in tf.range(tf.shape(y_true)[1]): - if tf.equal(y_true[i][j][2], 0): - continue - anchor_eq = tf.equal(anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) - - if tf.reduce_any(anchor_eq): - box = y_true[i][j][0:4] - box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2 - - anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32) - grid_xy = tf.cast(box_xy // (1 / grid_size), tf.int32) - - # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class) - indexes = indexes.write( - idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]] - ) - updates = updates.write( - idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]] - ) - idx += 1 - - return tf.tensor_scatter_nd_update(y_true_out, indexes.stack(), updates.stack()) - - -def transform_targets(y_train, anchors, anchor_masks, size): - y_outs = [] - grid_size = size // 32 - - # calculate anchor index for true boxes - anchors = tf.cast(anchors, tf.float32) - anchor_area = anchors[..., 0] * anchors[..., 1] - box_wh = y_train[..., 2:4] - y_train[..., 0:2] - box_wh = tf.tile(tf.expand_dims(box_wh, -2), (1, 1, tf.shape(anchors)[0], 1)) - box_area = box_wh[..., 0] * box_wh[..., 1] - intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * tf.minimum( - box_wh[..., 1], anchors[..., 1] - ) - iou = intersection / (box_area + anchor_area - intersection) - anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32) - anchor_idx = tf.expand_dims(anchor_idx, axis=-1) - - y_train = tf.concat([y_train, anchor_idx], axis=-1) - - for anchor_idxs in anchor_masks: - y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs)) - grid_size *= 2 - - return tuple(y_outs) - - -def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number): - box_number = tf.shape(bounding_boxes)[0] - paddings = [[0, pad_number - box_number], [0, 0]] - - return tf.pad(bounding_boxes, paddings, constant_values=0.0) - - -def random_flip_right_with_bounding_boxes(images, bounding_boxes): - apply_flip = tf.random.uniform(shape=[]) > 0.5 - if apply_flip: - images = tf.image.flip_left_right(images) - bounding_boxes = tf.stack( - [ - 1.0 - bounding_boxes[..., 2], - bounding_boxes[..., 1], - 1.0 - bounding_boxes[..., 0], - bounding_boxes[..., 3], - bounding_boxes[..., 4], - ], - axis=-1, - ) - - return images, bounding_boxes - - -def augment_images(images, bounding_boxes): - # Image transformations that do not affect bounding boxes - images = tf.image.random_hue(images, 0.15) - images = tf.image.random_brightness(images, 0.15) - - # Transformations that affect bounding boxes - images, bounding_boxes = random_flip_right_with_bounding_boxes( - images, bounding_boxes - ) - - return images, bounding_boxes - - -def prepare_dataset( - dataset, - shuffle=True, - apply_data_augmentation=False, - transform_to_bbox_by_stage=True, -): - dataset = dataset.map(lambda el: (el["image"], el["objects"])) - dataset = dataset.map( - lambda image, object: ( - image, - tf.concat( - [ - tf.stack( - [ - object["bbox"][:, 1], - object["bbox"][:, 0], - object["bbox"][:, 3], - object["bbox"][:, 2], - ], - axis=-1, - ), - tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1), - ], - axis=-1, - ), - ), - num_parallel_calls=tf.data.experimental.AUTOTUNE, - ) - dataset = dataset.map( - lambda image, bounding_box: ( - tf.image.resize(image, INPUT_SHAPE[:2]) / 255.0, - bounding_box, - ), - num_parallel_calls=tf.data.experimental.AUTOTUNE, - ) - if apply_data_augmentation: - dataset = dataset.map( - augment_images, num_parallel_calls=tf.data.experimental.AUTOTUNE - ) - if shuffle: - dataset = dataset.shuffle(buffer_size=1000) - dataset = dataset.map( - lambda image, bounding_boxes: ( - image, - pad_bounding_boxes_to_fixed_number_of_bounding_boxes( - bounding_boxes, pad_number=BOUNDING_BOXES_FIXED_NUMBER - ), - ), - num_parallel_calls=tf.data.experimental.AUTOTUNE, - ) - dataset = dataset.batch(BATCH_SIZE) - - if transform_to_bbox_by_stage: - dataset = dataset.map( - lambda image, bounding_box_with_class: ( - image, - transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ - bounding_box_with_class, - np.concatenate( - list(YOLOV4_ANCHORS_NORMALIZED), axis=0 - ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors - YOLOV4_ANCHORS_MASKS, - INPUT_SHAPE[0], # Assumes square input - ), - ), - num_parallel_calls=tf.data.experimental.AUTOTUNE, - ) - dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) - return dataset.repeat() - - -if __name__ == "__main__": - voc_dataset, infos = tfds.load("voc/2012", with_info=True, shuffle_files=True) ds_train, ds_test = voc_dataset["train"], voc_dataset["validation"] ds_train = prepare_dataset( ds_train, + shape=INPUT_SHAPE, + batch_size=batch_size, shuffle=True, apply_data_augmentation=True, transform_to_bbox_by_stage=True, ) ds_test = prepare_dataset( ds_test, + shape=INPUT_SHAPE, + batch_size=batch_size, shuffle=False, apply_data_augmentation=False, transform_to_bbox_by_stage=True, ) - steps_per_epoch = infos.splits["train"].num_examples // BATCH_SIZE - validation_steps = infos.splits["validation"].num_examples // BATCH_SIZE + steps_per_epoch = infos.splits["train"].num_examples // batch_size + validation_steps = infos.splits["validation"].num_examples // batch_size + num_classes = infos.features["objects"]["label"].num_classes model = YOLOv4( input_shape=INPUT_SHAPE, anchors=YOLOV4_ANCHORS, - num_classes=PASCAL_VOC_NUM_CLASSES, + num_classes=num_classes, training=True, ) - darknet_weights = Path(__file__).parent / "yolov4.h5" - if darknet_weights.exists(): - model.load_weights(str(darknet_weights), by_name=True, skip_mismatch=True) + if weights_path is not None: + model.load_weights(str(weights_path), by_name=True, skip_mismatch=True) print("Darknet weights loaded.") optimizer = tf.keras.optimizers.Adam(1e-4) + normalized_anchors = compute_normalized_anchors(YOLOV4_ANCHORS, INPUT_SHAPE) loss = [ - YoloLoss(np.concatenate(list(YOLOV4_ANCHORS_NORMALIZED), axis=0)[mask]) + YoloV3Loss(np.concatenate(list(normalized_anchors), axis=0)[mask]) for mask in YOLOV4_ANCHORS_MASKS ] @@ -339,59 +71,63 @@ def prepare_dataset( ): layer.trainable = False model.compile(optimizer=optimizer, loss=loss) - history = model.fit( + model.fit( ds_train, steps_per_epoch=steps_per_epoch, validation_data=ds_test, validation_steps=validation_steps, - epochs=ALL_FROZEN_EPOCH_NUMBER, + epochs=all_frozen_epoch_number, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( str(LOG_DIR / "yolov4_all_frozen.h5"), save_best_only=True, save_weights_only=True, + monitor="val_loss", ), ], ) + # Keep training: 10 epochs with backbone frozen -- unfreeze neck for layer in model.get_layer("YOLOv4_neck").layers: layer.trainable = True model.compile(optimizer=optimizer, loss=loss) - history = model.fit( + model.fit( ds_train, steps_per_epoch=steps_per_epoch, validation_data=ds_test, validation_steps=validation_steps, - epochs=BACKBONE_FROZEN_EPOCH_NUMBER + ALL_FROZEN_EPOCH_NUMBER, - initial_epoch=ALL_FROZEN_EPOCH_NUMBER, + epochs=backbone_frozen_epoch_number + all_frozen_epoch_number, + initial_epoch=all_frozen_epoch_number, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( str(LOG_DIR / "yolov4_backbone_frozen.h5"), save_best_only=True, save_weights_only=True, - verbose=True, + monitor="val_loss", ), ], ) + # Final training for layer in model.get_layer("CSPDarknet53").layers: layer.trainable = True model.compile(optimizer=optimizer, loss=loss) - history = model.fit( + model.fit( ds_train, steps_per_epoch=steps_per_epoch, validation_data=ds_test, validation_steps=validation_steps, - epochs=TOTAL_NUMBER_OF_EPOCHS, - initial_epoch=ALL_FROZEN_EPOCH_NUMBER + BACKBONE_FROZEN_EPOCH_NUMBER, + epochs=num_epochs, + initial_epoch=all_frozen_epoch_number + backbone_frozen_epoch_number, callbacks=[ tf.keras.callbacks.TensorBoard(log_dir=LOG_DIR), tf.keras.callbacks.ModelCheckpoint( str(LOG_DIR / "yolov4_full.h5"), save_best_only=True, save_weights_only=True, + monitor="val_loss", ), tf.keras.callbacks.ModelCheckpoint( str(LOG_DIR / "yolov4_train_loss.h5"), @@ -401,3 +137,18 @@ def prepare_dataset( ), ], ) + + +@click.command() +@click.option("--batch_size", type=int, default=16, help="Size of mini-batch") +@click.option("--weights_path", type=click.Path(exists=True), default=None, help="Path to pretrained weights") +@click.option("--all_frozen_epoch_number", type=int, default=20, help="Number of epochs to perform with backbone and neck frozen") +@click.option("--backbone_frozen_epoch_number", type=int, default=10, help="Number of epochs to perform with backbone frozen") +@click.option("--num_epochs", type=int, default=50, help="Total number of epochs to perform") +@click.option("--dataset_name", type=str, default="voc", help="Dataset used during training. Refer to TensorFlow Datasets documentation for dataset names.") +def launch_training_command(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name): + launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name) + + +if __name__ == "__main__": + launch_training_command() diff --git a/tf2_yolov4/anchors.py b/tf2_yolov4/anchors.py index 8d2dcde..e3f4076 100644 --- a/tf2_yolov4/anchors.py +++ b/tf2_yolov4/anchors.py @@ -9,6 +9,8 @@ np.array([(142, 110), (192, 243), (459, 401)], np.float32), ] +YOLOV4_ANCHORS_MASKS = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] + YOLOV3_ANCHORS = [ np.array([(10, 13), (16, 30), (33, 23)], np.float32), np.array([(30, 61), (62, 45), (59, 119)], np.float32), diff --git a/tf2_yolov4/datasets.py b/tf2_yolov4/datasets.py new file mode 100644 index 0000000..a973ea1 --- /dev/null +++ b/tf2_yolov4/datasets.py @@ -0,0 +1,188 @@ +import numpy as np +import tensorflow as tf + +from tf2_yolov4.anchors import ( + YOLOV4_ANCHORS, + YOLOV4_ANCHORS_MASKS, + compute_normalized_anchors, +) + +BOUNDING_BOXES_FIXED_NUMBER = 60 + + +@tf.function +def transform_targets_for_output(y_true, grid_size, anchor_idxs): + # y_true: (N, boxes, (x1, y1, x2, y2, class, best_anchor)) + N = tf.shape(y_true)[0] + + # y_true_out: (N, grid, grid, anchors, [x, y, w, h, obj, class]) + y_true_out = tf.zeros((N, grid_size, grid_size, tf.shape(anchor_idxs)[0], 6)) + + anchor_idxs = tf.cast(anchor_idxs, tf.int32) + + indexes = tf.TensorArray(tf.int32, 1, dynamic_size=True) + updates = tf.TensorArray(tf.float32, 1, dynamic_size=True) + idx = 0 + for i in tf.range(N): + for j in tf.range(tf.shape(y_true)[1]): + if tf.equal(y_true[i][j][2], 0): + continue + anchor_eq = tf.equal(anchor_idxs, tf.cast(y_true[i][j][5], tf.int32)) + + if tf.reduce_any(anchor_eq): + box = y_true[i][j][0:4] + box_xy = (y_true[i][j][0:2] + y_true[i][j][2:4]) / 2 + + anchor_idx = tf.cast(tf.where(anchor_eq), tf.int32) + grid_xy = tf.cast(box_xy // (1 / grid_size), tf.int32) + + # grid[y][x][anchor] = (tx, ty, bw, bh, obj, class) + indexes = indexes.write( + idx, [i, grid_xy[1], grid_xy[0], anchor_idx[0][0]] + ) + updates = updates.write( + idx, [box[0], box[1], box[2], box[3], 1, y_true[i][j][4]] + ) + idx += 1 + + return tf.tensor_scatter_nd_update(y_true_out, indexes.stack(), updates.stack()) + + +def transform_targets(y_train, anchors, anchor_masks, size): + y_outs = [] + grid_size = size // 32 + + # calculate anchor index for true boxes + anchors = tf.cast(anchors, tf.float32) + anchor_area = anchors[..., 0] * anchors[..., 1] + box_wh = y_train[..., 2:4] - y_train[..., 0:2] + box_wh = tf.tile(tf.expand_dims(box_wh, -2), (1, 1, tf.shape(anchors)[0], 1)) + box_area = box_wh[..., 0] * box_wh[..., 1] + intersection = tf.minimum(box_wh[..., 0], anchors[..., 0]) * tf.minimum( + box_wh[..., 1], anchors[..., 1] + ) + iou = intersection / (box_area + anchor_area - intersection) + anchor_idx = tf.cast(tf.argmax(iou, axis=-1), tf.float32) + anchor_idx = tf.expand_dims(anchor_idx, axis=-1) + + y_train = tf.concat([y_train, anchor_idx], axis=-1) + + for anchor_idxs in anchor_masks: + y_outs.append(transform_targets_for_output(y_train, grid_size, anchor_idxs)) + grid_size *= 2 + + return tuple(y_outs) + + +def pad_bounding_boxes_to_fixed_number_of_bounding_boxes(bounding_boxes, pad_number): + box_number = tf.shape(bounding_boxes)[0] + paddings = [[0, pad_number - box_number], [0, 0]] + + return tf.pad(bounding_boxes, paddings, constant_values=0.0) + + +def random_flip_right_with_bounding_boxes(images, bounding_boxes): + apply_flip = tf.random.uniform(shape=[]) > 0.5 + if apply_flip: + images = tf.image.flip_left_right(images) + bounding_boxes = tf.stack( + [ + 1.0 - bounding_boxes[..., 2], + bounding_boxes[..., 1], + 1.0 - bounding_boxes[..., 0], + bounding_boxes[..., 3], + bounding_boxes[..., 4], + ], + axis=-1, + ) + + return images, bounding_boxes + + +def augment_images(images, bounding_boxes): + # Image transformations that do not affect bounding boxes + images = tf.image.random_hue(images, 0.15) + images = tf.image.random_brightness(images, 0.15) + + # Transformations that affect bounding boxes + images, bounding_boxes = random_flip_right_with_bounding_boxes( + images, bounding_boxes + ) + + return images, bounding_boxes + + +def prepare_dataset( + dataset, + shape, + batch_size, + shuffle=True, + apply_data_augmentation=False, + transform_to_bbox_by_stage=True, + pad_number_of_boxes=BOUNDING_BOXES_FIXED_NUMBER, + anchors=YOLOV4_ANCHORS, +): + normalized_anchors = compute_normalized_anchors(anchors, shape) + dataset = dataset.map(lambda el: (el["image"], el["objects"])) + dataset = dataset.map( + lambda image, object: ( + image, + tf.concat( + [ + tf.stack( + [ + object["bbox"][:, 1], + object["bbox"][:, 0], + object["bbox"][:, 3], + object["bbox"][:, 2], + ], + axis=-1, + ), + tf.expand_dims(tf.cast(object["label"], tf.float32), axis=-1), + ], + axis=-1, + ), + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + dataset = dataset.map( + lambda image, bounding_box: ( + tf.image.resize(image, shape[:2]) / 255.0, + bounding_box, + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + if apply_data_augmentation: + dataset = dataset.map( + augment_images, num_parallel_calls=tf.data.experimental.AUTOTUNE + ) + if shuffle: + dataset = dataset.shuffle(buffer_size=1000) + dataset = dataset.map( + lambda image, bounding_boxes: ( + image, + pad_bounding_boxes_to_fixed_number_of_bounding_boxes( + bounding_boxes, pad_number=pad_number_of_boxes + ), + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + dataset = dataset.batch(batch_size) + + if transform_to_bbox_by_stage: + dataset = dataset.map( + lambda image, bounding_box_with_class: ( + image, + transform_targets( # Comes straight from https://github.com/zzh8829/yolov3-tf2/ + bounding_box_with_class, + np.concatenate( + list(normalized_anchors), axis=0 + ), # Must concatenate because in zzh8829/yolov3-tf2, it's a list of anchors + YOLOV4_ANCHORS_MASKS, + shape[0], # Assumes square input + ), + ), + num_parallel_calls=tf.data.experimental.AUTOTUNE, + ) + dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE) + return dataset.repeat() diff --git a/tf2_yolov4/losses.py b/tf2_yolov4/losses.py new file mode 100644 index 0000000..7ca43a0 --- /dev/null +++ b/tf2_yolov4/losses.py @@ -0,0 +1,101 @@ +# TODO: Cite zzh/yolov3-tf2 +import tensorflow as tf + +from tf2_yolov4.heads.yolov3_head import yolov3_boxes_regression + + +def broadcast_iou(box_1, box_2): + # box_1: (..., (x1, y1, x2, y2)) + # box_2: (N, (x1, y1, x2, y2)) + + # broadcast boxes + box_1 = tf.expand_dims(box_1, -2) + box_2 = tf.expand_dims(box_2, 0) + # new_shape: (..., N, (x1, y1, x2, y2)) + new_shape = tf.broadcast_dynamic_shape(tf.shape(box_1), tf.shape(box_2)) + box_1 = tf.broadcast_to(box_1, new_shape) + box_2 = tf.broadcast_to(box_2, new_shape) + + int_w = tf.maximum( + tf.minimum(box_1[..., 2], box_2[..., 2]) + - tf.maximum(box_1[..., 0], box_2[..., 0]), + 0, + ) + int_h = tf.maximum( + tf.minimum(box_1[..., 3], box_2[..., 3]) + - tf.maximum(box_1[..., 1], box_2[..., 1]), + 0, + ) + int_area = int_w * int_h + box_1_area = (box_1[..., 2] - box_1[..., 0]) * (box_1[..., 3] - box_1[..., 1]) + box_2_area = (box_2[..., 2] - box_2[..., 0]) * (box_2[..., 3] - box_2[..., 1]) + return int_area / (box_1_area + box_2_area - int_area) + + +def YoloV3Loss(anchors, ignore_thresh=0.5): + def yolo_loss(y_true, y_pred): + # 1. transform all pred outputs + # y_pred: (batch_size, grid, grid, anchors, (x, y, w, h, obj, ...cls)) + pred_box, pred_obj, pred_class, pred_xywh = yolov3_boxes_regression( + y_pred, anchors + ) + pred_xy = pred_xywh[..., 0:2] + pred_wh = pred_xywh[..., 2:4] + + # 2. transform all true outputs + # y_true: (batch_size, grid, grid, anchors, (x1, y1, x2, y2, obj, cls)) + true_box, true_obj, true_class_idx = tf.split(y_true, (4, 1, 1), axis=-1) + true_xy = (true_box[..., 0:2] + true_box[..., 2:4]) / 2 + true_wh = true_box[..., 2:4] - true_box[..., 0:2] + + # give higher weights to small boxes + box_loss_scale = 2 - true_wh[..., 0] * true_wh[..., 1] + + # 3. inverting the pred box equations + grid_size = tf.shape(y_true)[1] + grid = tf.meshgrid(tf.range(grid_size), tf.range(grid_size)) + grid = tf.expand_dims(tf.stack(grid, axis=-1), axis=2) + true_xy = true_xy * tf.cast(grid_size, tf.float32) - tf.cast(grid, tf.float32) + true_wh = tf.math.log(true_wh / anchors) + true_wh = tf.where(tf.math.is_inf(true_wh), tf.zeros_like(true_wh), true_wh) + + # 4. calculate all masks + obj_mask = tf.squeeze(true_obj, -1) + # ignore false positive when iou is over threshold + best_iou = tf.map_fn( + lambda x: tf.reduce_max( + broadcast_iou(x[0], tf.boolean_mask(x[1], tf.cast(x[2], tf.bool))), + axis=-1, + ), + (pred_box, true_box, obj_mask), + tf.float32, + ) + ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32) + + # 5. calculate all losses + xy_loss = ( + obj_mask + * box_loss_scale + * tf.reduce_sum(tf.square(true_xy - pred_xy), axis=-1) + ) + wh_loss = ( + obj_mask + * box_loss_scale + * tf.reduce_sum(tf.square(true_wh - pred_wh), axis=-1) + ) + obj_loss = tf.keras.losses.binary_crossentropy(true_obj, pred_obj) + obj_loss = obj_mask * obj_loss + (1 - obj_mask) * ignore_mask * obj_loss + # TODO: use binary_crossentropy instead + class_loss = obj_mask * tf.keras.losses.sparse_categorical_crossentropy( + true_class_idx, pred_class + ) + + # 6. sum over (batch, gridx, gridy, anchors) => (batch, 1) + xy_loss = tf.reduce_sum(xy_loss, axis=(1, 2, 3)) + wh_loss = tf.reduce_sum(wh_loss, axis=(1, 2, 3)) + obj_loss = tf.reduce_sum(obj_loss, axis=(1, 2, 3)) + class_loss = tf.reduce_sum(class_loss, axis=(1, 2, 3)) + + return xy_loss + wh_loss + obj_loss + class_loss + + return yolo_loss From b639169704b6934c26d5231451af7c67891c4feb Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 24 Jul 2020 15:30:26 +0200 Subject: [PATCH 18/20] Reduce threshold --- scripts/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.py b/scripts/test.py index 01d166d..b68ac2c 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -46,7 +46,7 @@ num_classes=len(CLASSES), training=False, yolo_max_boxes=100, - yolo_iou_threshold=0.5, + yolo_iou_threshold=0.3, yolo_score_threshold=0.15, ) model.load_weights("../yolov4_full.h5") From 085108ff992775f1e477ce71b8d942630464ceb8 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Fri, 24 Jul 2020 15:35:08 +0200 Subject: [PATCH 19/20] Last modifications --- scripts/train.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/train.py b/scripts/train.py index 29d5445..f8b059b 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -65,7 +65,6 @@ def launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_ for mask in YOLOV4_ANCHORS_MASKS ] - # Start training: 5 epochs with backbone + neck frozen for layer in ( model.get_layer("CSPDarknet53").layers + model.get_layer("YOLOv4_neck").layers ): @@ -88,7 +87,6 @@ def launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_ ], ) - # Keep training: 10 epochs with backbone frozen -- unfreeze neck for layer in model.get_layer("YOLOv4_neck").layers: layer.trainable = True model.compile(optimizer=optimizer, loss=loss) From 4cda74b98ebce74488793deff6d4deba9e87aa67 Mon Sep 17 00:00:00 2001 From: Raphael Meudec Date: Thu, 6 Aug 2020 08:53:24 +0200 Subject: [PATCH 20/20] Black + fix tests --- scripts/test.py | 125 ++++++++++++++++++++++++++++++++------ scripts/train.py | 63 ++++++++++++++++--- tests/conftest.py | 1 + tests/test_model.py | 4 +- tests/test_yolov3_head.py | 4 +- 5 files changed, 165 insertions(+), 32 deletions(-) diff --git a/scripts/test.py b/scripts/test.py index b68ac2c..f0ece6f 100644 --- a/scripts/test.py +++ b/scripts/test.py @@ -8,26 +8,109 @@ INPUT_SHAPE = (HEIGHT, WIDTH, 3) PASCAL_VOC_CLASSES = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", - "car", "cat", "chair", "cow", "diningtable", "dog", "horse", - "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor", + "aeroplane", + "bicycle", + "bird", + "boat", + "bottle", + "bus", + "car", + "cat", + "chair", + "cow", + "diningtable", + "dog", + "horse", + "motorbike", + "person", + "pottedplant", + "sheep", + "sofa", + "train", + "tvmonitor", ] COCO_CLASSES = [ - 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', - 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', - 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', - 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', - 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', - 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', - 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', - 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', - 'chair', 'couch', 'potted plant', 'bed', 'dining table', - 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', - 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', - 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', - 'toothbrush' + "person", + "bicycle", + "car", + "motorcycle", + "airplane", + "bus", + "train", + "truck", + "boat", + "traffic light", + "fire hydrant", + "stop sign", + "parking meter", + "bench", + "bird", + "cat", + "dog", + "horse", + "sheep", + "cow", + "elephant", + "bear", + "zebra", + "giraffe", + "backpack", + "umbrella", + "handbag", + "tie", + "suitcase", + "frisbee", + "skis", + "snowboard", + "sports ball", + "kite", + "baseball bat", + "baseball glove", + "skateboard", + "surfboard", + "tennis racket", + "bottle", + "wine glass", + "cup", + "fork", + "knife", + "spoon", + "bowl", + "banana", + "apple", + "sandwich", + "orange", + "broccoli", + "carrot", + "hot dog", + "pizza", + "donut", + "cake", + "chair", + "couch", + "potted plant", + "bed", + "dining table", + "toilet", + "tv", + "laptop", + "mouse", + "remote", + "keyboard", + "cell phone", + "microwave", + "oven", + "toaster", + "sink", + "refrigerator", + "book", + "clock", + "vase", + "scissors", + "teddy bear", + "hair drier", + "toothbrush", ] # Switch this variable between PASCAL_VOC_CLASSES and COCO_CLASSES depending @@ -91,8 +174,12 @@ def plot_results(pil_img, boxes, scores, classes): ) text = f"{CLASSES[cl]}: {score:0.2f}" ax.text( - xmin, ymin, text, color="white", - fontsize=15, fontweight="bold", + xmin, + ymin, + text, + color="white", + fontsize=15, + fontweight="bold", bbox=dict(facecolor=color, alpha=0.7), ) plt.axis("off") diff --git a/scripts/train.py b/scripts/train.py index f8b059b..39f2e63 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -21,8 +21,17 @@ INPUT_SHAPE = (608, 608, 3) -def launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name="voc"): - LOG_DIR = Path("./logs") / dataset_name / datetime.now().strftime("%m-%d-%Y %H:%M:%S") +def launch_training( + batch_size, + weights_path, + all_frozen_epoch_number, + backbone_frozen_epoch_number, + num_epochs, + dataset_name="voc", +): + LOG_DIR = ( + Path("./logs") / dataset_name / datetime.now().strftime("%m-%d-%Y %H:%M:%S") + ) voc_dataset, infos = tfds.load(dataset_name, with_info=True, shuffle_files=True) @@ -139,13 +148,49 @@ def launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_ @click.command() @click.option("--batch_size", type=int, default=16, help="Size of mini-batch") -@click.option("--weights_path", type=click.Path(exists=True), default=None, help="Path to pretrained weights") -@click.option("--all_frozen_epoch_number", type=int, default=20, help="Number of epochs to perform with backbone and neck frozen") -@click.option("--backbone_frozen_epoch_number", type=int, default=10, help="Number of epochs to perform with backbone frozen") -@click.option("--num_epochs", type=int, default=50, help="Total number of epochs to perform") -@click.option("--dataset_name", type=str, default="voc", help="Dataset used during training. Refer to TensorFlow Datasets documentation for dataset names.") -def launch_training_command(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name): - launch_training(batch_size, weights_path, all_frozen_epoch_number, backbone_frozen_epoch_number, num_epochs, dataset_name) +@click.option( + "--weights_path", + type=click.Path(exists=True), + default=None, + help="Path to pretrained weights", +) +@click.option( + "--all_frozen_epoch_number", + type=int, + default=20, + help="Number of epochs to perform with backbone and neck frozen", +) +@click.option( + "--backbone_frozen_epoch_number", + type=int, + default=10, + help="Number of epochs to perform with backbone frozen", +) +@click.option( + "--num_epochs", type=int, default=50, help="Total number of epochs to perform" +) +@click.option( + "--dataset_name", + type=str, + default="voc", + help="Dataset used during training. Refer to TensorFlow Datasets documentation for dataset names.", +) +def launch_training_command( + batch_size, + weights_path, + all_frozen_epoch_number, + backbone_frozen_epoch_number, + num_epochs, + dataset_name, +): + launch_training( + batch_size, + weights_path, + all_frozen_epoch_number, + backbone_frozen_epoch_number, + num_epochs, + dataset_name, + ) if __name__ == "__main__": diff --git a/tests/conftest.py b/tests/conftest.py index c665729..bdc522e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,4 +1,5 @@ import pytest +import tensorflow as tf from tf2_yolov4.anchors import YOLOV4_ANCHORS from tf2_yolov4.backbones.csp_darknet53 import csp_darknet53 diff --git a/tests/test_model.py b/tests/test_model.py index 9e6be5c..2c19957 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -15,9 +15,9 @@ def test_model_should_predict_valid_shapes_at_training(yolov4_training, num_clas tf.random.uniform((n_images, 416, 416, 3)) ) - assert output_1.shape == (n_images, 52, 52, 3, expected_head_shape) + assert output_1.shape == (n_images, 13, 13, 3, expected_head_shape) assert output_2.shape == (n_images, 26, 26, 3, expected_head_shape) - assert output_3.shape == (n_images, 13, 13, 3, expected_head_shape) + assert output_3.shape == (n_images, 52, 52, 3, expected_head_shape) def test_model_should_predict_valid_shapes_at_inference( diff --git a/tests/test_yolov3_head.py b/tests/test_yolov3_head.py index 283867d..b24af98 100644 --- a/tests/test_yolov3_head.py +++ b/tests/test_yolov3_head.py @@ -6,9 +6,9 @@ def test_head_should_have_valid_output_shapes_training( expected_head_shape = (num_classes + objectness_score_shape) + bounding_box_shape output_1, output_2, output_3 = yolov3_head_416_training.outputs - assert output_1.shape.as_list() == [None, 52, 52, 3, expected_head_shape] + assert output_1.shape.as_list() == [None, 13, 13, 3, expected_head_shape] assert output_2.shape.as_list() == [None, 26, 26, 3, expected_head_shape] - assert output_3.shape.as_list() == [None, 13, 13, 3, expected_head_shape] + assert output_3.shape.as_list() == [None, 52, 52, 3, expected_head_shape] def test_head_should_have_valid_output_shapes_inference(