-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
35 changed files
with
5,842 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,3 +102,7 @@ venv.bak/ | |
|
||
# mypy | ||
.mypy_cache/ | ||
logs/ | ||
checkpoints/ | ||
datasets/ | ||
.idea/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,25 @@ | ||
# keras-GaussianYOLOv3 | ||
# Gaussian YOLOv3 | ||
This is an implementation of [Gaussian YOLOv3](https://arxiv.org/abs/1904.04620) on Keras and Tensorflow. The project is based on [qqwweee/keras-yolo3](https://github.com/qqwweee/keras-yolo3) and [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet). | ||
Thanks for their hard work. | ||
The official implementation is [jwchoi384/Gaussian_YOLOv3](https://github.com/jwchoi384/Gaussian_YOLOv3) | ||
|
||
As in the official implementation, there is a hyper parameter `sigma_const`. I get best result on Pascal VOC when set it to be 0.3. | ||
The best result (0.8535) is a little better than the one (0.8500) I got by original YOLOv3. I can not determine if the promotion comes from random training or benefits from Gaussian logic. | ||
|
||
## Test | ||
1. I trained on Pascal VOC2012 trainval.txt + Pascal VOC2007 train.txt, and validated on Pascal VOC2007 val.txt. There are 14041 images for training and 2510 images for validation. | ||
2. The best evaluation result (score_threshold=0.01, mAP<sub>50</sub>, image_size=416, sigma_const=0.3) on VOC2007 test is 0.8535. | ||
3. Pretrained official yolo weights on COCO and gaussian yolo weights on Pascal VOC are here. [baidu netdisk](https://pan.baidu.com/s/1ZgSPGt0UEWk3tDW16kbfPQ), extract code: qgnd | ||
4. `python3 yolo/inference.py` to test your image by specifying image path and model path there. | ||
|
||
## Train | ||
### build dataset (Pascal VOC, other types please refer to [fizyr/keras-retinanet](https://github.com/fizyr/keras-retinanet)) | ||
* Download VOC2007 and VOC2012, copy all image files from VOC2007 to VOC2012. | ||
* Append VOC2007 train.txt to VOC2012 trainval.txt. | ||
* Overwrite VOC2012 val.txt by VOC2007 val.txt. | ||
### train | ||
* **STEP1**: `python3 train.py --freeze-body yolo --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-3 then stop when val mAP keep dropping. | ||
* **STEP2**: `python3 train.py --snapshot <xxx> --freeze-body darknet --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-4 then top when val mAP keep dropping. | ||
* **STEP3**: `python3 train.py --snapshot <xxx> --freeze-body none --gpu 0 --batch-size 32 --random-transform pascal datasets/VOC2012` to start training with lr=1e-5 and then set lr=1e-6 when val mAP when dropping. | ||
## Evaluate | ||
* `python3 eval/common.py` to evaluate by specifying model path there. |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,178 @@ | ||
import numpy as np | ||
from PIL import Image, ImageEnhance, ImageOps | ||
|
||
|
||
def autocontrast(image, prob=0.5): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
image = Image.fromarray(image[..., ::-1]) | ||
image = ImageOps.autocontrast(image) | ||
image = np.array(image)[..., ::-1] | ||
return image | ||
|
||
|
||
def equalize(image, prob=0.5): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
image = Image.fromarray(image[..., ::-1]) | ||
image = ImageOps.equalize(image) | ||
image = np.array(image)[..., ::-1] | ||
return image | ||
|
||
|
||
def solarize(image, prob=0.5, threshold=128.): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
image = Image.fromarray(image[..., ::-1]) | ||
image = ImageOps.solarize(image, threshold=threshold) | ||
image = np.array(image)[..., ::-1] | ||
return image | ||
|
||
|
||
def sharpness(image, prob=0.5, min=0, max=2, factor=None): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
if factor is None: | ||
# 0 模糊一点, 1 原图, 2 清晰一点 | ||
factor = np.random.uniform(min, max) | ||
image = Image.fromarray(image[..., ::-1]) | ||
enhancer = ImageEnhance.Sharpness(image) | ||
image = enhancer.enhance(factor=factor) | ||
return np.array(image)[..., ::-1] | ||
|
||
|
||
def color(image, prob=0.5, min=0., max=1., factor=None): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
if factor is None: | ||
# factor=0 返回黑白色, factor=1 返回原图 | ||
factor = np.random.uniform(min, max) | ||
image = Image.fromarray(image[..., ::-1]) | ||
enhancer = ImageEnhance.Color(image) | ||
image = enhancer.enhance(factor=factor) | ||
return np.array(image)[..., ::-1] | ||
|
||
|
||
def contrast(image, prob=0.5, min=0.2, max=1., factor=None): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
if factor is None: | ||
# factor=0 返回灰色, factor=1 返回原图 | ||
factor = np.random.uniform(min, max) | ||
image = Image.fromarray(image[..., ::-1]) | ||
enhancer = ImageEnhance.Contrast(image) | ||
image = enhancer.enhance(factor=factor) | ||
return np.array(image)[..., ::-1] | ||
|
||
|
||
def brightness(image, prob=0.5, min=0.8, max=1., factor=None): | ||
random_prob = np.random.uniform() | ||
if random_prob > prob: | ||
return image | ||
if factor is None: | ||
# factor=0 返回全黑色, factor=1 返回原图 | ||
factor = np.random.uniform(min, max) | ||
image = Image.fromarray(image[..., ::-1]) | ||
enhancer = ImageEnhance.Brightness(image) | ||
image = enhancer.enhance(factor=factor) | ||
return np.array(image)[..., ::-1] | ||
|
||
|
||
class VisualEffect: | ||
""" | ||
Struct holding parameters and applying image color transformation. | ||
Args | ||
solarize_threshold: | ||
color_factor: A factor for adjusting color. | ||
contrast_factor: A factor for adjusting contrast. | ||
brightness_factor: A factor for adjusting brightness. | ||
sharpness_factor: A factor for adjusting sharpness. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
color_factor=None, | ||
contrast_factor=None, | ||
brightness_factor=None, | ||
sharpness_factor=None, | ||
color_prob=0.5, | ||
contrast_prob=0.5, | ||
brightness_prob=0.5, | ||
sharpness_prob=0.5, | ||
autocontrast_prob=0.5, | ||
equalize_prob=0.5, | ||
solarize_prob=0.1, | ||
solarize_threshold=128., | ||
|
||
): | ||
self.color_factor = color_factor | ||
self.contrast_factor = contrast_factor | ||
self.brightness_factor = brightness_factor | ||
self.sharpness_factor = sharpness_factor | ||
self.color_prob = color_prob | ||
self.contrast_prob = contrast_prob | ||
self.brightness_prob = brightness_prob | ||
self.sharpness_prob = sharpness_prob | ||
self.autocontrast_prob = autocontrast_prob | ||
self.equalize_prob = equalize_prob | ||
self.solarize_prob = solarize_prob | ||
self.solarize_threshold = solarize_threshold | ||
|
||
def __call__(self, image): | ||
""" | ||
Apply a visual effect on the image. | ||
Args | ||
image: Image to adjust | ||
""" | ||
random_enhance_id = np.random.randint(0, 4) | ||
if random_enhance_id == 0: | ||
image = color(image, prob=self.color_prob, factor=self.color_factor) | ||
elif random_enhance_id == 1: | ||
image = contrast(image, prob=self.contrast_prob, factor=self.contrast_factor) | ||
elif random_enhance_id == 2: | ||
image = brightness(image, prob=self.brightness_prob, factor=self.brightness_factor) | ||
else: | ||
image = sharpness(image, prob=self.sharpness_prob, factor=self.sharpness_factor) | ||
|
||
random_ops_id = np.random.randint(0, 3) | ||
if random_ops_id == 0: | ||
image = autocontrast(image, prob=self.autocontrast_prob) | ||
elif random_ops_id == 1: | ||
image = equalize(image, prob=self.equalize_prob) | ||
else: | ||
image = solarize(image, prob=self.solarize_prob, threshold=self.solarize_threshold) | ||
return image | ||
|
||
|
||
if __name__ == '__main__': | ||
from generators.pascal import PascalVocGenerator | ||
import cv2 | ||
|
||
train_generator = PascalVocGenerator( | ||
'datasets/VOC0712', | ||
'trainval', | ||
skip_difficult=True, | ||
anchors_path='voc_anchors_416.txt', | ||
batch_size=1 | ||
) | ||
visual_effect = VisualEffect() | ||
for i in range(train_generator.size()): | ||
image = train_generator.load_image(i) | ||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | ||
annotations = train_generator.load_annotations(i) | ||
boxes = annotations['bboxes'] | ||
for box in boxes.astype(np.int32): | ||
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2) | ||
src_image = image.copy() | ||
image = visual_effect(image) | ||
cv2.namedWindow('image', cv2.WINDOW_NORMAL) | ||
cv2.imshow('image', np.concatenate([src_image, image], axis=1)) | ||
cv2.waitKey(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
import cv2 | ||
import numpy as np | ||
from augmentor.transform import translation_xy, change_transform_origin | ||
|
||
ROTATE_DEGREE = [90, 180, 270] | ||
|
||
|
||
def rotate(image, boxes, prob=0.5): | ||
random_prob = np.random.uniform() | ||
if random_prob < prob: | ||
return image, boxes | ||
rotate_degree = ROTATE_DEGREE[np.random.randint(0, 3)] | ||
h, w = image.shape[:2] | ||
# Compute the rotation matrix. | ||
M = cv2.getRotationMatrix2D(center=(w / 2, h / 2), | ||
angle=rotate_degree, | ||
scale=1) | ||
|
||
# Get the sine and cosine from the rotation matrix. | ||
abs_cos_angle = np.abs(M[0, 0]) | ||
abs_sin_angle = np.abs(M[0, 1]) | ||
|
||
# Compute the new bounding dimensions of the image. | ||
# 画图可以得出这样的结论 | ||
new_w = int(h * abs_sin_angle + w * abs_cos_angle) | ||
new_h = int(h * abs_cos_angle + w * abs_sin_angle) | ||
|
||
# Adjust the rotation matrix to take into account the translation. | ||
# 中心点的偏移量作为所有像素的偏移量 | ||
M[0, 2] += new_w // 2 - w // 2 | ||
M[1, 2] += new_h // 2 - h // 2 | ||
|
||
# Rotate the image. | ||
image = cv2.warpAffine(image, M=M, dsize=(new_w, new_h), flags=cv2.INTER_CUBIC, borderMode=cv2.BORDER_CONSTANT, | ||
borderValue=(128, 128, 128)) | ||
|
||
new_boxes = [] | ||
for box in boxes: | ||
x1, y1, x2, y2 = box | ||
points = M.dot([ | ||
[x1, x2, x1, x2], | ||
[y1, y2, y2, y1], | ||
[1, 1, 1, 1], | ||
]) | ||
|
||
# Extract the min and max corners again. | ||
min_xy = np.sort(points, axis=1)[:, :2] | ||
min_x = np.mean(min_xy[0]) | ||
min_y = np.mean(min_xy[1]) | ||
max_xy = np.sort(points, axis=1)[:, 2:] | ||
max_x = np.mean(max_xy[0]) | ||
max_y = np.mean(max_xy[1]) | ||
|
||
new_boxes.append([min_x, min_y, max_x, max_y]) | ||
boxes = np.array(new_boxes) | ||
return image, boxes | ||
|
||
|
||
def crop(image, boxes, prob=0.5): | ||
random_prob = np.random.uniform() | ||
if random_prob < prob: | ||
return image, boxes | ||
h, w = image.shape[:2] | ||
min_x1, min_y1 = np.min(boxes, axis=0)[:2] | ||
max_x2, max_y2 = np.max(boxes, axis=0)[2:] | ||
random_x1 = np.random.randint(0, max(min_x1 // 2, 1)) | ||
random_y1 = np.random.randint(0, max(min_y1 // 2, 1)) | ||
random_x2 = np.random.randint(max_x2, max(min(w, max_x2 + (w - max_x2) // 2), max_x2 + 1)) | ||
random_y2 = np.random.randint(max_y2, max(min(h, max_y2 + (h - max_y2) // 2), max_y2 + 1)) | ||
image = image[random_y1:random_y2, random_x1:random_x2] | ||
boxes[:, [0, 2]] = boxes[:, [0, 2]] - random_x1 | ||
boxes[:, [1, 3]] = boxes[:, [1, 3]] - random_y1 | ||
return image, boxes | ||
|
||
|
||
def translate(image, boxes, prob=0.5): | ||
random_prob = np.random.uniform() | ||
if random_prob < prob: | ||
return image, boxes | ||
h, w = image.shape[:2] | ||
min_x1, min_y1 = np.min(boxes, axis=0)[:2] | ||
max_x2, max_y2 = np.max(boxes, axis=0)[2:] | ||
translation_matrix = translation_xy(min=(min(-min_x1 // 2, 0), min(-min_y1 // 2, 0)), | ||
max=(max((w - max_x2) // 2, 1), max((h - max_y2) // 2, 1)), prob=1.) | ||
translation_matrix = change_transform_origin(translation_matrix, (w / 2, h / 2)) | ||
image = cv2.warpAffine( | ||
image, | ||
# warpAffine 只需要前面 2*3 的矩阵 | ||
translation_matrix[:2, :], | ||
dsize=(w, h), | ||
flags=cv2.INTER_CUBIC, | ||
borderMode=cv2.BORDER_CONSTANT, | ||
borderValue=(128, 128, 128), | ||
) | ||
new_boxes = [] | ||
for box in boxes: | ||
x1, y1, x2, y2 = box | ||
points = translation_matrix.dot([ | ||
[x1, x2, x1, x2], | ||
[y1, y2, y2, y1], | ||
[1, 1, 1, 1], | ||
]) | ||
min_x, min_y = np.min(points, axis=1)[:2] | ||
max_x, max_y = np.max(points, axis=1)[:2] | ||
new_boxes.append([min_x, min_y, max_x, max_y]) | ||
boxes = np.array(new_boxes) | ||
return image, boxes | ||
|
||
|
||
class MiscEffect: | ||
def __init__(self, rotate_prob=0.9, crop_prob=0.5, translate_prob=0.5): | ||
self.rotate_prob = rotate_prob | ||
self.crop_prob = crop_prob | ||
self.translate_prob = translate_prob | ||
|
||
def __call__(self, image, boxes): | ||
image, boxes = rotate(image, boxes, prob=self.rotate_prob) | ||
image, boxes = crop(image, boxes, prob=self.crop_prob) | ||
image, boxes = translate(image, boxes, prob=self.translate_prob) | ||
return image, boxes | ||
|
||
|
||
if __name__ == '__main__': | ||
from generators.pascal import PascalVocGenerator | ||
|
||
train_generator = PascalVocGenerator( | ||
'datasets/VOC0712', | ||
'trainval', | ||
skip_difficult=True, | ||
anchors_path='voc_anchors_416.txt', | ||
batch_size=1 | ||
) | ||
misc_effect = MiscEffect() | ||
for i in range(train_generator.size()): | ||
image = train_generator.load_image(i) | ||
image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) | ||
annotations = train_generator.load_annotations(i) | ||
boxes = annotations['bboxes'] | ||
for box in boxes.astype(np.int32): | ||
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 0, 255), 2) | ||
src_image = image.copy() | ||
cv2.namedWindow('src_image', cv2.WINDOW_NORMAL) | ||
cv2.imshow('src_image', src_image) | ||
image, boxes = misc_effect(image, boxes) | ||
for box in boxes.astype(np.int32): | ||
cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (0, 255, 0), 1) | ||
cv2.namedWindow('image', cv2.WINDOW_NORMAL) | ||
cv2.imshow('image', image) | ||
cv2.waitKey(0) |
Oops, something went wrong.