Skip to content

Commit

Permalink
first commit
Browse files Browse the repository at this point in the history
  • Loading branch information
xuannianz committed Nov 17, 2019
1 parent 5ca28e8 commit b8f3831
Show file tree
Hide file tree
Showing 35 changed files with 5,842 additions and 1 deletion.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,7 @@ venv.bak/

# mypy
.mypy_cache/
logs/
checkpoints/
datasets/
.idea/
26 changes: 25 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,25 @@
# keras-GaussianYOLOv3
# Gaussian YOLOv3
This is an implementation of [Gaussian YOLOv3](https://arxiv.org/abs/1904.04620) on Keras and Tensorflow. The project is based on [qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3) and [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet).
Thanks for their hard work.
The official implementation is [jwchoi384/Gaussian_YOLOv3](https://github.com/jwchoi384/Gaussian_YOLOv3)

As in the official implementation, there is a hyper parameter `sigma_const`. I get best result on Pascal VOC when set it to be 0.3.
The best result (0.8535) is a little better than the one (0.8500) I got by original YOLOv3. I can not determine if the promotion comes from random training or benefits from Gaussian logic.

## Test
1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation.
2. The best evaluation result (score_threshold=0.01, mAP<sub>50</sub>, image_size=416, sigma_const=0.3) on VOC2007 test is 0.8535.
3. Pretrained official yolo weights on COCO and gaussian yolo weights on Pascal VOC are here. [baidu netdisk](https://pan.baidu.com/s/1ZgSPGt0UEWk3tDW16kbfPQ), extract code: qgnd
4. `python3 yolo/inference.py` to test your image by specifying image path and model path there.

## Train
### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet))
* Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012.
* Append VOC2007 train.txt to VOC2012 trainval.txt.
* Overwrite VOC2012 val.txt by VOC2007 val.txt.
### train
* **STEP1**: `python3 train.py --freeze-body yolo --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-3 then stop when val mAP keep dropping.
* **STEP2**: `python3 train.py --snapshot <xxx> --freeze-body darknet --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-4 then top when val mAP keep dropping.
* **STEP3**: `python3 train.py --snapshot <xxx> --freeze-body none --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-5 and then set lr=1e-6 when val mAP when dropping.
## Evaluate
* `python3 eval/common.py` to evaluate by specifying model path there.
Empty file added __init__.py
Empty file.
Empty file added augmentor/__init__.py
Empty file.
178 changes: 178 additions & 0 deletions augmentor/color.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
import numpy as np
from PIL import Image, ImageEnhance, ImageOps


def autocontrast(image, prob=0.5):
random_prob = np.random.uniform()
if random_prob > prob:
return image
image = Image.fromarray(image[..., ::-1])
image = ImageOps.autocontrast(image)
image = np.array(image)[..., ::-1]
return image


def equalize(image, prob=0.5):
random_prob = np.random.uniform()
if random_prob > prob:
return image
image = Image.fromarray(image[..., ::-1])
image = ImageOps.equalize(image)
image = np.array(image)[..., ::-1]
return image


def solarize(image, prob=0.5, threshold=128.):
random_prob = np.random.uniform()
if random_prob > prob:
return image
image = Image.fromarray(image[..., ::-1])
image = ImageOps.solarize(image, threshold=threshold)
image = np.array(image)[..., ::-1]
return image


def sharpness(image, prob=0.5, min=0, max=2, factor=None):
random_prob = np.random.uniform()
if random_prob > prob:
return image
if factor is None:
# 0 模糊一点, 1 原图, 2 清晰一点
factor = np.random.uniform(min, max)
image = Image.fromarray(image[..., ::-1])
enhancer = ImageEnhance.Sharpness(image)
image = enhancer.enhance(factor=factor)
return np.array(image)[..., ::-1]


def color(image, prob=0.5, min=0., max=1., factor=None):
random_prob = np.random.uniform()
if random_prob > prob:
return image
if factor is None:
# factor=0 返回黑白色, factor=1 返回原图
factor = np.random.uniform(min, max)
image = Image.fromarray(image[..., ::-1])
enhancer = ImageEnhance.Color(image)
image = enhancer.enhance(factor=factor)
return np.array(image)[..., ::-1]


def contrast(image, prob=0.5, min=0.2, max=1., factor=None):
random_prob = np.random.uniform()
if random_prob > prob:
return image
if factor is None:
# factor=0 返回灰色, factor=1 返回原图
factor = np.random.uniform(min, max)
image = Image.fromarray(image[..., ::-1])
enhancer = ImageEnhance.Contrast(image)
image = enhancer.enhance(factor=factor)
return np.array(image)[..., ::-1]


def brightness(image, prob=0.5, min=0.8, max=1., factor=None):
random_prob = np.random.uniform()
if random_prob > prob:
return image
if factor is None:
# factor=0 返回全黑色, factor=1 返回原图
factor = np.random.uniform(min, max)
image = Image.fromarray(image[..., ::-1])
enhancer = ImageEnhance.Brightness(image)
image = enhancer.enhance(factor=factor)
return np.array(image)[..., ::-1]


class VisualEffect:
"""
Struct holding parameters and applying image color transformation.
Args
solarize_threshold:
color_factor: A factor for adjusting color.
contrast_factor: A factor for adjusting contrast.
brightness_factor: A factor for adjusting brightness.
sharpness_factor: A factor for adjusting sharpness.
"""

def __init__(
self,
color_factor=None,
contrast_factor=None,
brightness_factor=None,
sharpness_factor=None,
color_prob=0.5,
contrast_prob=0.5,
brightness_prob=0.5,
sharpness_prob=0.5,
autocontrast_prob=0.5,
equalize_prob=0.5,
solarize_prob=0.1,
solarize_threshold=128.,

):
self.color_factor = color_factor
self.contrast_factor = contrast_factor
self.brightness_factor = brightness_factor
self.sharpness_factor = sharpness_factor
self.color_prob = color_prob
self.contrast_prob = contrast_prob
self.brightness_prob = brightness_prob
self.sharpness_prob = sharpness_prob
self.autocontrast_prob = autocontrast_prob
self.equalize_prob = equalize_prob
self.solarize_prob = solarize_prob
self.solarize_threshold = solarize_threshold

def __call__(self, image):
"""
Apply a visual effect on the image.
Args
image: Image to adjust
"""
random_enhance_id = np.random.randint(0, 4)
if random_enhance_id == 0:
image = color(image, prob=self.color_prob, factor=self.color_factor)
elif random_enhance_id == 1:
image = contrast(image, prob=self.contrast_prob, factor=self.contrast_factor)
elif random_enhance_id == 2:
image = brightness(image, prob=self.brightness_prob, factor=self.brightness_factor)
else:
image = sharpness(image, prob=self.sharpness_prob, factor=self.sharpness_factor)

random_ops_id = np.random.randint(0, 3)
if random_ops_id == 0:
image = autocontrast(image, prob=self.autocontrast_prob)
elif random_ops_id == 1:
image = equalize(image, prob=self.equalize_prob)
else:
image = solarize(image, prob=self.solarize_prob, threshold=self.solarize_threshold)
return image


if __name__ == '__main__':
from generators.pascal import PascalVocGenerator
import cv2

train_generator = PascalVocGenerator(
'datasets/VOC0712',
'trainval',
skip_difficult=True,
anchors_path='voc_anchors_416.txt',
batch_size=1
)
visual_effect = VisualEffect()
for i in range(train_generator.size()):
image = train_generator.load_image(i)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
annotations = train_generator.load_annotations(i)
boxes = annotations['bboxes']
for box in boxes.astype(np.int32):
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
src_image = image.copy()
image = visual_effect(image)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow('image', np.concatenate([src_image, image], axis=1))
cv2.waitKey(0)
149 changes: 149 additions & 0 deletions augmentor/misc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import cv2
import numpy as np
from augmentor.transform import translation_xy, change_transform_origin

ROTATE_DEGREE = [90, 180, 270]


def rotate(image, boxes, prob=0.5):
random_prob = np.random.uniform()
if random_prob < prob:
return image, boxes
rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)]
h, w = image.shape[:2]
# Compute the rotation matrix.
M = cv2.getRotationMatrix2D(center=(w / 2, h / 2),
angle=rotate_degree,
scale=1)

# Get the sine and cosine from the rotation matrix.
abs_cos_angle = np.abs(M[0, 0])
abs_sin_angle = np.abs(M[0, 1])

# Compute the new bounding dimensions of the image.
# 画图可以得出这样的结论
new_w = int(h * abs_sin_angle + w * abs_cos_angle)
new_h = int(h * abs_cos_angle + w * abs_sin_angle)

# Adjust the rotation matrix to take into account the translation.
# 中心点的偏移量作为所有像素的偏移量
M[0, 2] += new_w // 2 - w // 2
M[1, 2] += new_h // 2 - h // 2

# Rotate the image.
image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT,
borderValue=(128, 128, 128))

new_boxes = []
for box in boxes:
x1, y1, x2, y2 = box
points = M.dot([
[x1, x2, x1, x2],
[y1, y2, y2, y1],
[1, 1, 1, 1],
])

# Extract the min and max corners again.
min_xy = np.sort(points, axis=1)[:, :2]
min_x = np.mean(min_xy[0])
min_y = np.mean(min_xy[1])
max_xy = np.sort(points, axis=1)[:, 2:]
max_x = np.mean(max_xy[0])
max_y = np.mean(max_xy[1])

new_boxes.append([min_x, min_y, max_x, max_y])
boxes = np.array(new_boxes)
return image, boxes


def crop(image, boxes, prob=0.5):
random_prob = np.random.uniform()
if random_prob < prob:
return image, boxes
h, w = image.shape[:2]
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
random_x1 = np.random.randint(0, max(min_x1 // 2, 1))
random_y1 = np.random.randint(0, max(min_y1 // 2, 1))
random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1))
random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1))
image = image[random_y1:random_y2, random_x1:random_x2]
boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1
boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1
return image, boxes


def translate(image, boxes, prob=0.5):
random_prob = np.random.uniform()
if random_prob < prob:
return image, boxes
h, w = image.shape[:2]
min_x1, min_y1 = np.min(boxes, axis=0)[:2]
max_x2, max_y2 = np.max(boxes, axis=0)[2:]
translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)),
max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.)
translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2))
image = cv2.warpAffine(
image,
# warpAffine 只需要前面 2*3 的矩阵
translation_matrix[:2, :],
dsize=(w, h),
flags=cv2.INTER_CUBIC,
borderMode=cv2.BORDER_CONSTANT,
borderValue=(128, 128, 128),
)
new_boxes = []
for box in boxes:
x1, y1, x2, y2 = box
points = translation_matrix.dot([
[x1, x2, x1, x2],
[y1, y2, y2, y1],
[1, 1, 1, 1],
])
min_x, min_y = np.min(points, axis=1)[:2]
max_x, max_y = np.max(points, axis=1)[:2]
new_boxes.append([min_x, min_y, max_x, max_y])
boxes = np.array(new_boxes)
return image, boxes


class MiscEffect:
def __init__(self, rotate_prob=0.9, crop_prob=0.5, translate_prob=0.5):
self.rotate_prob = rotate_prob
self.crop_prob = crop_prob
self.translate_prob = translate_prob

def __call__(self, image, boxes):
image, boxes = rotate(image, boxes, prob=self.rotate_prob)
image, boxes = crop(image, boxes, prob=self.crop_prob)
image, boxes = translate(image, boxes, prob=self.translate_prob)
return image, boxes


if __name__ == '__main__':
from generators.pascal import PascalVocGenerator

train_generator = PascalVocGenerator(
'datasets/VOC0712',
'trainval',
skip_difficult=True,
anchors_path='voc_anchors_416.txt',
batch_size=1
)
misc_effect = MiscEffect()
for i in range(train_generator.size()):
image = train_generator.load_image(i)
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
annotations = train_generator.load_annotations(i)
boxes = annotations['bboxes']
for box in boxes.astype(np.int32):
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2)
src_image = image.copy()
cv2.namedWindow('src_image', cv2.WINDOW_NORMAL)
cv2.imshow('src_image', src_image)
image, boxes = misc_effect(image, boxes)
for box in boxes.astype(np.int32):
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1)
cv2.namedWindow('image', cv2.WINDOW_NORMAL)
cv2.imshow('image', image)
cv2.waitKey(0)
Loading

0 comments on commit b8f3831

Please sign in to comment.