first commit

xuannianz · Nov 17, 2019 · b8f3831 · b8f3831
1 parent 5ca28e8
commit b8f3831
Show file tree

Hide file tree

Showing 35 changed files with 5,842 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,7 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+logs/
+checkpoints/
+datasets/
+.idea/
diff --git a/README.md b/README.md
@@ -1 +1,25 @@
-# keras-GaussianYOLOv3
+# Gaussian YOLOv3
+This is an implementation of [Gaussian YOLOv3](https://arxiv.org/abs/1904.04620) on Keras and Tensorflow. The project is based on [qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3) and [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet). 
+Thanks for their hard work. 
+The official implementation is [jwchoi384/Gaussian_YOLOv3](https://github.com/jwchoi384/Gaussian_YOLOv3)
+
+As in the official implementation, there is a hyper parameter `sigma_const`. I get best result on Pascal VOC when set it to be 0.3.
+The best result (0.8535) is a little better than the one (0.8500) I got by original YOLOv3. I can not determine if the promotion comes from random training or benefits from Gaussian logic.
+
+## Test
+1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation.
+2. The best evaluation result (score_threshold=0.01, mAP<sub>50</sub>, image_size=416, sigma_const=0.3) on VOC2007 test is 0.8535. 
+3. Pretrained official yolo weights on COCO and gaussian yolo weights on Pascal VOC are here. [baidu netdisk](https://pan.baidu.com/s/1ZgSPGt0UEWk3tDW16kbfPQ), extract code: qgnd
+4. `python3 yolo/inference.py` to test your image by specifying image path and model path there. 
+
+## Train
+### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet))
+* Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012.
+* Append VOC2007 train.txt to VOC2012 trainval.txt.
+* Overwrite VOC2012 val.txt by VOC2007 val.txt.
+### train
+* **STEP1**: `python3 train.py --freeze-body yolo --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-3 then stop when val mAP keep dropping.
+* **STEP2**: `python3 train.py --snapshot <xxx> --freeze-body darknet --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-4 then top when val mAP keep dropping.
+* **STEP3**: `python3 train.py --snapshot <xxx> --freeze-body none --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-5 and then set lr=1e-6 when val mAP when dropping.
+## Evaluate
+* `python3 eval/common.py` to evaluate by specifying model path there.
diff --git a/__init__.py b/__init__.py
diff --git a/augmentor/__init__.py b/augmentor/__init__.py
diff --git a/augmentor/color.py b/augmentor/color.py
@@ -0,0 +1,178 @@
+import numpy as np
+from PIL import Image, ImageEnhance, ImageOps
+
+
+def autocontrast(image, prob=0.5):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    image = Image.fromarray(image[..., ::-1])
+    image = ImageOps.autocontrast(image)
+    image = np.array(image)[..., ::-1]
+    return image
+
+
+def equalize(image, prob=0.5):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    image = Image.fromarray(image[..., ::-1])
+    image = ImageOps.equalize(image)
+    image = np.array(image)[..., ::-1]
+    return image
+
+
+def solarize(image, prob=0.5, threshold=128.):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    image = Image.fromarray(image[..., ::-1])
+    image = ImageOps.solarize(image, threshold=threshold)
+    image = np.array(image)[..., ::-1]
+    return image
+
+
+def sharpness(image, prob=0.5, min=0, max=2, factor=None):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    if factor is None:
+        # 0 模糊一点, 1 原图, 2 清晰一点
+        factor = np.random.uniform(min, max)
+    image = Image.fromarray(image[..., ::-1])
+    enhancer = ImageEnhance.Sharpness(image)
+    image = enhancer.enhance(factor=factor)
+    return np.array(image)[..., ::-1]
+
+
+def color(image, prob=0.5, min=0., max=1., factor=None):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    if factor is None:
+        # factor=0 返回黑白色, factor=1 返回原图
+        factor = np.random.uniform(min, max)
+    image = Image.fromarray(image[..., ::-1])
+    enhancer = ImageEnhance.Color(image)
+    image = enhancer.enhance(factor=factor)
+    return np.array(image)[..., ::-1]
+
+
+def contrast(image, prob=0.5, min=0.2, max=1., factor=None):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    if factor is None:
+        # factor=0 返回灰色, factor=1 返回原图
+        factor = np.random.uniform(min, max)
+    image = Image.fromarray(image[..., ::-1])
+    enhancer = ImageEnhance.Contrast(image)
+    image = enhancer.enhance(factor=factor)
+    return np.array(image)[..., ::-1]
+
+
+def brightness(image, prob=0.5, min=0.8, max=1., factor=None):
+    random_prob = np.random.uniform()
+    if random_prob > prob:
+        return image
+    if factor is None:
+        # factor=0 返回全黑色, factor=1 返回原图
+        factor = np.random.uniform(min, max)
+    image = Image.fromarray(image[..., ::-1])
+    enhancer = ImageEnhance.Brightness(image)
+    image = enhancer.enhance(factor=factor)
+    return np.array(image)[..., ::-1]
+
+
+class VisualEffect:
+    """
+    Struct holding parameters and applying image color transformation.
+
+    Args
+        solarize_threshold:
+        color_factor: A factor for adjusting color.
+        contrast_factor: A factor for adjusting contrast.
+        brightness_factor: A factor for adjusting brightness.
+        sharpness_factor: A factor for adjusting sharpness.
+    """
+
+    def __init__(
+            self,
+            color_factor=None,
+            contrast_factor=None,
+            brightness_factor=None,
+            sharpness_factor=None,
+            color_prob=0.5,
+            contrast_prob=0.5,
+            brightness_prob=0.5,
+            sharpness_prob=0.5,
+            autocontrast_prob=0.5,
+            equalize_prob=0.5,
+            solarize_prob=0.1,
+            solarize_threshold=128.,
+
+    ):
+        self.color_factor = color_factor
+        self.contrast_factor = contrast_factor
+        self.brightness_factor = brightness_factor
+        self.sharpness_factor = sharpness_factor
+        self.color_prob = color_prob
+        self.contrast_prob = contrast_prob
+        self.brightness_prob = brightness_prob
+        self.sharpness_prob = sharpness_prob
+        self.autocontrast_prob = autocontrast_prob
+        self.equalize_prob = equalize_prob
+        self.solarize_prob = solarize_prob
+        self.solarize_threshold = solarize_threshold
+
+    def __call__(self, image):
+        """
+        Apply a visual effect on the image.
+
+        Args
+            image: Image to adjust
+        """
+        random_enhance_id = np.random.randint(0, 4)
+        if random_enhance_id == 0:
+            image = color(image, prob=self.color_prob, factor=self.color_factor)
+        elif random_enhance_id == 1:
+            image = contrast(image, prob=self.contrast_prob, factor=self.contrast_factor)
+        elif random_enhance_id == 2:
+            image = brightness(image, prob=self.brightness_prob, factor=self.brightness_factor)
+        else:
+            image = sharpness(image, prob=self.sharpness_prob, factor=self.sharpness_factor)
+
+        random_ops_id = np.random.randint(0, 3)
+        if random_ops_id == 0:
+            image = autocontrast(image, prob=self.autocontrast_prob)
+        elif random_ops_id == 1:
+            image = equalize(image, prob=self.equalize_prob)
+        else:
+            image = solarize(image, prob=self.solarize_prob, threshold=self.solarize_threshold)
+        return image
+
+
+if __name__ == '__main__':
+    from generators.pascal import PascalVocGenerator
+    import cv2
+
+    train_generator = PascalVocGenerator(
+        'datasets/VOC0712',
+        'trainval',
+        skip_difficult=True,
+        anchors_path='voc_anchors_416.txt',
+        batch_size=1
+    )
+    visual_effect = VisualEffect()
+    for i in range(train_generator.size()):
+        image = train_generator.load_image(i)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        annotations = train_generator.load_annotations(i)
+        boxes = annotations['bboxes']
+        for box in boxes.astype(np.int32):
+            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
+        src_image = image.copy()
+        image = visual_effect(image)
+        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
+        cv2.imshow('image', np.concatenate([src_image, image], axis=1))
+        cv2.waitKey(0)
diff --git a/augmentor/misc.py b/augmentor/misc.py
@@ -0,0 +1,149 @@
+import cv2
+import numpy as np
+from augmentor.transform import translation_xy, change_transform_origin
+
+ROTATE_DEGREE = [90, 180, 270]
+
+
+def rotate(image, boxes, prob=0.5):
+    random_prob = np.random.uniform()
+    if random_prob < prob:
+        return image, boxes
+    rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)]
+    h, w = image.shape[:2]
+    # Compute the rotation matrix.
+    M = cv2.getRotationMatrix2D(center=(w / 2, h / 2),
+                                angle=rotate_degree,
+                                scale=1)
+
+    # Get the sine and cosine from the rotation matrix.
+    abs_cos_angle = np.abs(M[0, 0])
+    abs_sin_angle = np.abs(M[0, 1])
+
+    # Compute the new bounding dimensions of the image.
+    # 画图可以得出这样的结论
+    new_w = int(h * abs_sin_angle + w * abs_cos_angle)
+    new_h = int(h * abs_cos_angle + w * abs_sin_angle)
+
+    # Adjust the rotation matrix to take into account the translation.
+    # 中心点的偏移量作为所有像素的偏移量
+    M[0, 2] += new_w // 2 - w // 2
+    M[1, 2] += new_h // 2 - h // 2
+
+    # Rotate the image.
+    image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
+                           borderValue=(128, 128, 128))
+
+    new_boxes = []
+    for box in boxes:
+        x1, y1, x2, y2 = box
+        points = M.dot([
+            [x1, x2, x1, x2],
+            [y1, y2, y2, y1],
+            [1, 1, 1, 1],
+        ])
+
+        # Extract the min and max corners again.
+        min_xy = np.sort(points, axis=1)[:, :2]
+        min_x = np.mean(min_xy[0])
+        min_y = np.mean(min_xy[1])
+        max_xy = np.sort(points, axis=1)[:, 2:]
+        max_x = np.mean(max_xy[0])
+        max_y = np.mean(max_xy[1])
+
+        new_boxes.append([min_x, min_y, max_x, max_y])
+    boxes = np.array(new_boxes)
+    return image, boxes
+
+
+def crop(image, boxes, prob=0.5):
+    random_prob = np.random.uniform()
+    if random_prob < prob:
+        return image, boxes
+    h, w = image.shape[:2]
+    min_x1, min_y1 = np.min(boxes, axis=0)[:2]
+    max_x2, max_y2 = np.max(boxes, axis=0)[2:]
+    random_x1 = np.random.randint(0, max(min_x1 // 2, 1))
+    random_y1 = np.random.randint(0, max(min_y1 // 2, 1))
+    random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1))
+    random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1))
+    image = image[random_y1:random_y2, random_x1:random_x2]
+    boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1
+    boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1
+    return image, boxes
+
+
+def translate(image, boxes, prob=0.5):
+    random_prob = np.random.uniform()
+    if random_prob < prob:
+        return image, boxes
+    h, w = image.shape[:2]
+    min_x1, min_y1 = np.min(boxes, axis=0)[:2]
+    max_x2, max_y2 = np.max(boxes, axis=0)[2:]
+    translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)),
+                                        max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.)
+    translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2))
+    image = cv2.warpAffine(
+        image,
+        # warpAffine 只需要前面 2*3 的矩阵
+        translation_matrix[:2, :],
+        dsize=(w, h),
+        flags=cv2.INTER_CUBIC,
+        borderMode=cv2.BORDER_CONSTANT,
+        borderValue=(128, 128, 128),
+    )
+    new_boxes = []
+    for box in boxes:
+        x1, y1, x2, y2 = box
+        points = translation_matrix.dot([
+            [x1, x2, x1, x2],
+            [y1, y2, y2, y1],
+            [1, 1, 1, 1],
+        ])
+        min_x, min_y = np.min(points, axis=1)[:2]
+        max_x, max_y = np.max(points, axis=1)[:2]
+        new_boxes.append([min_x, min_y, max_x, max_y])
+    boxes = np.array(new_boxes)
+    return image, boxes
+
+
+class MiscEffect:
+    def __init__(self, rotate_prob=0.9, crop_prob=0.5, translate_prob=0.5):
+        self.rotate_prob = rotate_prob
+        self.crop_prob = crop_prob
+        self.translate_prob = translate_prob
+
+    def __call__(self, image, boxes):
+        image, boxes = rotate(image, boxes, prob=self.rotate_prob)
+        image, boxes = crop(image, boxes, prob=self.crop_prob)
+        image, boxes = translate(image, boxes, prob=self.translate_prob)
+        return image, boxes
+
+
+if __name__ == '__main__':
+    from generators.pascal import PascalVocGenerator
+
+    train_generator = PascalVocGenerator(
+        'datasets/VOC0712',
+        'trainval',
+        skip_difficult=True,
+        anchors_path='voc_anchors_416.txt',
+        batch_size=1
+    )
+    misc_effect = MiscEffect()
+    for i in range(train_generator.size()):
+        image = train_generator.load_image(i)
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+        annotations = train_generator.load_annotations(i)
+        boxes = annotations['bboxes']
+        for box in boxes.astype(np.int32):
+            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
+        src_image = image.copy()
+        cv2.namedWindow('src_image', cv2.WINDOW_NORMAL)
+        cv2.imshow('src_image', src_image)
+        image, boxes = misc_effect(image, boxes)
+        for box in boxes.astype(np.int32):
+            cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
+        cv2.namedWindow('image', cv2.WINDOW_NORMAL)
+        cv2.imshow('image', image)
+        cv2.waitKey(0)