diff --git a/.github/workflows/greetings.yml b/.github/workflows/greetings.yml
index d62cf5c1600d..ee472297107e 100644
--- a/.github/workflows/greetings.yml
+++ b/.github/workflows/greetings.yml
@@ -11,7 +11,7 @@ jobs:
repo-token: ${{ secrets.GITHUB_TOKEN }}
pr-message: |
👋 Hello @${{ github.actor }}, thank you for submitting a 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to:
- - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch:
+ - ✅ Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master an automatic [GitHub actions](https://github.com/ultralytics/yolov5/blob/master/.github/workflows/rebase.yml) rebase may be attempted by including the /rebase command in a comment body, or by running the following code, replacing 'feature' with the name of your local branch:
```bash
git remote add upstream https://github.com/ultralytics/yolov5.git
git fetch upstream
diff --git a/Dockerfile b/Dockerfile
index 98dfee204770..fe64d6da29f9 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -2,7 +2,7 @@
FROM nvcr.io/nvidia/pytorch:20.12-py3
# Install linux packages
-RUN apt update && apt install -y screen libgl1-mesa-glx
+RUN apt update && apt install -y zip screen libgl1-mesa-glx
# Install python dependencies
RUN python -m pip install --upgrade pip
diff --git a/README.md b/README.md
index 3c14071698c5..b7129e80adfe 100755
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
-![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)
+
This repository represents Ultralytics open-source research into future object detection methods, and incorporates lessons learned and best practices evolved over thousands of hours of training and evolution on anonymized client datasets. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk.
@@ -89,17 +89,15 @@ To run inference on example images in `data/images`:
```bash
$ python detect.py --source data/images --weights yolov5s.pt --conf 0.25
-Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', img_size=640, iou_thres=0.45, save_conf=False, save_dir='runs/detect', save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt'])
-Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB)
-
-Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt to yolov5s.pt... 100%|██████████████| 14.5M/14.5M [00:00<00:00, 21.3MB/s]
+Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', project='runs/detect', save_conf=False, save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt'])
+YOLOv5 v4.0-96-g83dc1b4 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)
Fusing layers...
-Model Summary: 232 layers, 7459581 parameters, 0 gradients
-image 1/2 data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s)
-image 2/2 data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s)
-Results saved to runs/detect/exp
-Done. (0.113s)
+Model Summary: 224 layers, 7266973 parameters, 0 gradients, 17.0 GFLOPS
+image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 bus, Done. (0.010s)
+image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 1 tie, Done. (0.011s)
+Results saved to runs/detect/exp2
+Done. (0.103s)
```
@@ -108,18 +106,17 @@ Done. (0.113s)
To run **batched inference** with YOLOv5 and [PyTorch Hub](https://github.com/ultralytics/yolov5/issues/36):
```python
import torch
-from PIL import Image
# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)
# Images
-img1 = Image.open('zidane.jpg')
-img2 = Image.open('bus.jpg')
-imgs = [img1, img2] # batched list of images
+dir = 'https://github.com/ultralytics/yolov5/raw/master/data/images/'
+imgs = [dir + f for f in ('zidane.jpg', 'bus.jpg')] # batched list of images
# Inference
-result = model(imgs)
+results = model(imgs)
+results.print() # or .show(), .save()
```
diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh
index b0df905c8525..bbb1e9291d5b 100755
--- a/data/scripts/get_coco.sh
+++ b/data/scripts/get_coco.sh
@@ -10,8 +10,9 @@
# Download/unzip labels
d='../' # unzip directory
url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
-f='coco2017labels.zip' # 68 MB
-echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+f='coco2017labels.zip' # or 'coco2017labels-segments.zip', 68 MB
+echo 'Downloading' $url$f ' ...'
+curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
# Download/unzip images
d='../coco/images' # unzip directory
@@ -20,7 +21,7 @@ f1='train2017.zip' # 19G, 118k images
f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
- echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background)
- unzip -q $f -d $d && rm $f &
+ echo 'Downloading' $url$f '...'
+ curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
done
wait # finish background tasks
diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh
index 06414b085095..13b83c28d706 100644
--- a/data/scripts/get_voc.sh
+++ b/data/scripts/get_voc.sh
@@ -18,8 +18,8 @@ f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
for f in $f3 $f2 $f1; do
- echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background)
- unzip -q $f -d $d && rm $f &
+ echo 'Downloading' $url$f '...'
+ curl -L $url$f -o $f && unzip -q $f -d $d && rm $f & # download, unzip, remove in background
done
wait # finish background tasks
diff --git a/detect.py b/detect.py
index f9085e670916..22bf21b4c825 100644
--- a/detect.py
+++ b/detect.py
@@ -9,8 +9,8 @@
from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
-from utils.general import check_img_size, check_requirements, non_max_suppression, apply_classifier, scale_coords, \
- xyxy2xywh, strip_optimizer, set_logging, increment_path
+from utils.general import check_img_size, check_requirements, check_imshow, non_max_suppression, apply_classifier, \
+ scale_coords, xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized
@@ -45,7 +45,7 @@ def detect(save_img=False):
# Set Dataloader
vid_path, vid_writer = None, None
if webcam:
- view_img = True
+ view_img = check_imshow()
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
@@ -118,6 +118,7 @@ def detect(save_img=False):
# Stream results
if view_img:
cv2.imshow(str(p), im0)
+ cv2.waitKey(1) # 1 millisecond
# Save results (image with detections)
if save_img:
diff --git a/hubconf.py b/hubconf.py
index 2a34813310e8..47eee4477725 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -133,9 +133,14 @@ def custom(path_or_model='path/to/model.pt', autoshape=True):
# model = custom(path_or_model='path/to/model.pt') # custom example
# Verify inference
+ import numpy as np
from PIL import Image
- imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')]
- results = model(imgs)
+ imgs = [Image.open('data/images/bus.jpg'), # PIL
+ 'data/images/zidane.jpg', # filename
+ 'https://github.com/ultralytics/yolov5/raw/master/data/images/bus.jpg', # URI
+ np.zeros((640, 480, 3))] # numpy
+
+ results = model(imgs) # batched inference
results.print()
results.save()
diff --git a/models/common.py b/models/common.py
index e8adb66293d5..ad35f908d865 100644
--- a/models/common.py
+++ b/models/common.py
@@ -1,16 +1,17 @@
# This file contains modules common to various models
import math
+from pathlib import Path
import numpy as np
import requests
import torch
import torch.nn as nn
-from PIL import Image, ImageDraw
+from PIL import Image
from utils.datasets import letterbox
from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
-from utils.plots import color_list
+from utils.plots import color_list, plot_one_box
def autopad(k, p=None): # kernel, padding
@@ -195,10 +196,12 @@ def forward(self, imgs, size=640, augment=False, profile=False):
# Pre-process
n, imgs = (len(imgs), imgs) if isinstance(imgs, list) else (1, [imgs]) # number of images, list of images
- shape0, shape1 = [], [] # image and inference shapes
+ shape0, shape1, files = [], [], [] # image and inference shapes, filenames
for i, im in enumerate(imgs):
if isinstance(im, str): # filename or uri
- im = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im) # open
+ im, f = Image.open(requests.get(im, stream=True).raw if im.startswith('http') else im), im # open
+ im.filename = f # for uri
+ files.append(Path(im.filename).with_suffix('.jpg').name if isinstance(im, Image.Image) else f'image{i}.jpg')
im = np.array(im) # to numpy
if im.shape[0] < 5: # image in CHW
im = im.transpose((1, 2, 0)) # reverse dataloader .transpose(2, 0, 1)
@@ -223,25 +226,26 @@ def forward(self, imgs, size=640, augment=False, profile=False):
for i in range(n):
scale_coords(shape1, y[i][:, :4], shape0[i])
- return Detections(imgs, y, self.names)
+ return Detections(imgs, y, files, self.names)
class Detections:
# detections class for YOLOv5 inference results
- def __init__(self, imgs, pred, names=None):
+ def __init__(self, imgs, pred, files, names=None):
super(Detections, self).__init__()
d = pred[0].device # device
gn = [torch.tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.], device=d) for im in imgs] # normalizations
self.imgs = imgs # list of images as numpy arrays
self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls)
self.names = names # class names
+ self.files = files # image filenames
self.xyxy = pred # xyxy pixels
self.xywh = [xyxy2xywh(x) for x in pred] # xywh pixels
self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)] # xyxy normalized
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred)
- def display(self, pprint=False, show=False, save=False, render=False):
+ def display(self, pprint=False, show=False, save=False, render=False, save_dir=''):
colors = color_list()
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
@@ -250,16 +254,16 @@ def display(self, pprint=False, show=False, save=False, render=False):
n = (pred[:, -1] == c).sum() # detections per class
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render:
- img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
for *box, conf, cls in pred: # xyxy, confidence, class
- # str += '%s %.2f, ' % (names[int(cls)], conf) # label
- ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot
+ label = f'{self.names[int(cls)]} {conf:.2f}'
+ plot_one_box(box, img, label=label, color=colors[int(cls) % 10])
+ img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
if pprint:
print(str.rstrip(', '))
if show:
- img.show(f'image {i}') # show
+ img.show(self.files[i]) # show
if save:
- f = f'results{i}.jpg'
+ f = Path(save_dir) / self.files[i]
img.save(f) # save
print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n')
if render:
@@ -271,8 +275,9 @@ def print(self):
def show(self):
self.display(show=True) # show results
- def save(self):
- self.display(save=True) # save results
+ def save(self, save_dir='results/'):
+ Path(save_dir).mkdir(exist_ok=True)
+ self.display(save=True, save_dir=save_dir) # save results
def render(self):
self.display(render=True) # render results
diff --git a/models/export.py b/models/export.py
index 057658af53dc..cc817871f218 100644
--- a/models/export.py
+++ b/models/export.py
@@ -22,6 +22,7 @@
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path') # from yolov5/models/
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size') # height, width
+ parser.add_argument('--dynamic', action='store_true', help='dynamic ONNX axes')
parser.add_argument('--batch-size', type=int, default=1, help='batch size')
opt = parser.parse_args()
opt.img_size *= 2 if len(opt.img_size) == 1 else 1 # expand
@@ -70,7 +71,9 @@
print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
f = opt.weights.replace('.pt', '.onnx') # filename
torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
- output_names=['classes', 'boxes'] if y is None else ['output'])
+ output_names=['classes', 'boxes'] if y is None else ['output'],
+ dynamic_axes={'images': {0: 'batch', 2: 'height', 3: 'width'}, # size(1,3,640,640)
+ 'output': {0: 'batch', 2: 'y', 3: 'x'}} if opt.dynamic else None)
# Checks
onnx_model = onnx.load(f) # load onnx model
diff --git a/models/yolo.py b/models/yolo.py
index 11e6a65921a4..85043f2b0205 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -2,7 +2,6 @@
import logging
import sys
from copy import deepcopy
-from pathlib import Path
sys.path.append('./') # to run '$ python *.py' files in subdirectories
logger = logging.getLogger(__name__)
@@ -50,7 +49,7 @@ def forward(self, x):
self.grid[i] = self._make_grid(nx, ny).to(x[i].device)
y = x[i].sigmoid()
- y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i].to(x[i].device)) * self.stride[i] # xy
+ y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy
y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh
z.append(y.view(bs, -1, self.no))
@@ -110,9 +109,9 @@ def forward(self, x, augment=False, profile=False):
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi[..., :4] /= si # de-scale
if fi == 2:
- yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
+ yi[..., 1] = img_size[0] - 1 - yi[..., 1] # de-flip ud
elif fi == 3:
- yi[..., 0] = img_size[1] - yi[..., 0] # de-flip lr
+ yi[..., 0] = img_size[1] - 1 - yi[..., 0] # de-flip lr
y.append(yi)
return torch.cat(y, 1), None # augmented inference, train
else:
@@ -213,43 +212,27 @@ def parse_model(d, ch): # model_dict, input_channels(3)
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
C3]:
c1, c2 = ch[f], args[0]
-
- # Normal
- # if i > 0 and args[0] != no: # channel expansion factor
- # ex = 1.75 # exponential (default 2.0)
- # e = math.log(c2 / ch[1]) / math.log(2)
- # c2 = int(ch[1] * ex ** e)
- # if m != Focus:
-
- c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
-
- # Experimental
- # if i > 0 and args[0] != no: # channel expansion factor
- # ex = 1 + gw # exponential (default 2.0)
- # ch1 = 32 # ch[1]
- # e = math.log(c2 / ch1) / math.log(2) # level 1-n
- # c2 = int(ch1 * ex ** e)
- # if m != Focus:
- # c2 = make_divisible(c2, 8) if c2 != no else c2
+ if c2 != no: # if not output
+ c2 = make_divisible(c2 * gw, 8)
args = [c1, c2, *args[1:]]
if m in [BottleneckCSP, C3]:
- args.insert(2, n)
+ args.insert(2, n) # number of repeats
n = 1
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
- c2 = sum([ch[x if x < 0 else x + 1] for x in f])
+ c2 = sum([ch[x] for x in f])
elif m is Detect:
- args.append([ch[x + 1] for x in f])
+ args.append([ch[x] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
- c2 = ch[f if f < 0 else f + 1] * args[0] ** 2
+ c2 = ch[f] * args[0] ** 2
elif m is Expand:
- c2 = ch[f if f < 0 else f + 1] // args[0] ** 2
+ c2 = ch[f] // args[0] ** 2
else:
- c2 = ch[f if f < 0 else f + 1]
+ c2 = ch[f]
m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type
@@ -258,6 +241,8 @@ def parse_model(d, ch): # model_dict, input_channels(3)
logger.info('%3s%18s%3s%10.0f %-40s%-30s' % (i, f, n, np, t, args)) # print
save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist
layers.append(m_)
+ if i == 0:
+ ch = []
ch.append(c2)
return nn.Sequential(*layers), sorted(save)
diff --git a/requirements.txt b/requirements.txt
index d22b42f5d786..cb50cf8f32e1 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,8 +21,8 @@ seaborn>=0.11.0
pandas
# export --------------------------------------
-# coremltools==4.0
-# onnx>=1.8.0
+# coremltools>=4.1
+# onnx>=1.8.1
# scikit-learn==0.19.2 # for coreml quantization
# extras --------------------------------------
diff --git a/test.py b/test.py
index 738764f15601..91176eca01db 100644
--- a/test.py
+++ b/test.py
@@ -52,7 +52,8 @@ def test(data,
# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
- imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
+ gs = max(int(model.stride.max()), 32) # grid size (max stride)
+ imgsz = check_img_size(imgsz, s=gs) # check img_size
# Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
# if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -85,7 +86,7 @@ def test(data,
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
- dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True,
+ dataloader = create_dataloader(path, imgsz, batch_size, gs, opt, pad=0.5, rect=True,
prefix=colorstr('test: ' if opt.task == 'test' else 'val: '))[0]
seen = 0
@@ -106,7 +107,7 @@ def test(data,
with torch.no_grad():
# Run model
t = time_synchronized()
- inf_out, train_out = model(img, augment=augment) # inference and training outputs
+ out, train_out = model(img, augment=augment) # inference and training outputs
t0 += time_synchronized() - t
# Compute loss
@@ -117,11 +118,11 @@ def test(data,
targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling
t = time_synchronized()
- output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb)
+ out = non_max_suppression(out, conf_thres=conf_thres, iou_thres=iou_thres, labels=lb, multi_label=True)
t1 += time_synchronized() - t
# Statistics per image
- for si, pred in enumerate(output):
+ for si, pred in enumerate(out):
labels = targets[targets[:, 0] == si, 1:]
nl = len(labels)
tcls = labels[:, 0].tolist() if nl else [] # target class
@@ -209,7 +210,7 @@ def test(data,
f = save_dir / f'test_batch{batch_i}_labels.jpg' # labels
Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
f = save_dir / f'test_batch{batch_i}_pred.jpg' # predictions
- Thread(target=plot_images, args=(img, output_to_target(output), paths, f, names), daemon=True).start()
+ Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
# Compute statistics
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
@@ -268,10 +269,10 @@ def test(data,
print(f'pycocotools unable to run: {e}')
# Return results
+ model.float() # for training
if not training:
s = f"\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}" if save_txt else ''
print(f"Results saved to {save_dir}{s}")
- model.float() # for training
maps = np.zeros(nc) + map
for i, c in enumerate(ap_class):
maps[c] = ap[i]
diff --git a/train.py b/train.py
index 4ec97ae71e16..bbf879f3af5f 100644
--- a/train.py
+++ b/train.py
@@ -4,6 +4,7 @@
import os
import random
import time
+from copy import deepcopy
from pathlib import Path
from threading import Thread
@@ -31,7 +32,7 @@
from utils.google_utils import attempt_download
from utils.loss import ComputeLoss
from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
-from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first
+from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first, is_parallel
logger = logging.getLogger(__name__)
@@ -120,7 +121,10 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
# Scheduler https://arxiv.org/pdf/1812.01187.pdf
# https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
- lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
+ if opt.linear_lr:
+ lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
+ else:
+ lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
# plot_lr_scheduler(optimizer, scheduler, epochs)
@@ -130,9 +134,13 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
wandb_run = wandb.init(config=opt, resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
name=save_dir.stem,
+ entity=opt.entity,
id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
loggers = {'wandb': wandb} # loggers dict
+ # EMA
+ ema = ModelEMA(model) if rank in [-1, 0] else None
+
# Resume
start_epoch, best_fitness = 0, 0.0
if pretrained:
@@ -141,10 +149,14 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']
+ # EMA
+ if ema and ckpt.get('ema'):
+ ema.ema.load_state_dict(ckpt['ema'][0].float().state_dict())
+ ema.updates = ckpt['ema'][1]
+
# Results
if ckpt.get('training_results') is not None:
- with open(results_file, 'w') as file:
- file.write(ckpt['training_results']) # write results.txt
+ results_file.write_text(ckpt['training_results']) # write results.txt
# Epochs
start_epoch = ckpt['epoch'] + 1
@@ -158,7 +170,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
del ckpt, state_dict
# Image sizes
- gs = int(model.stride.max()) # grid size (max stride)
+ gs = max(int(model.stride.max()), 32) # grid size (max stride)
nl = model.model[-1].nl # number of detection layers (used for scaling hyp['obj'])
imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples
@@ -171,9 +183,6 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
logger.info('Using SyncBatchNorm()')
- # EMA
- ema = ModelEMA(model) if rank in [-1, 0] else None
-
# DDP mode
if cuda and rank != -1:
model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)
@@ -189,8 +198,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
# Process 0
if rank in [-1, 0]:
- ema.updates = start_epoch * nb // accumulate # set EMA updates
- testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # testloader
+ testloader = create_dataloader(test_path, imgsz_test, batch_size * 2, gs, opt, # testloader
hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
world_size=opt.world_size, workers=opt.workers,
pad=0.5, prefix=colorstr('val: '))[0]
@@ -333,12 +341,11 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
# DDP process 0 or single-GPU
if rank in [-1, 0]:
# mAP
- if ema:
- ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+ ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
final_epoch = epoch + 1 == epochs
if not opt.notest or final_epoch: # Calculate mAP
results, maps, times = test.test(opt.data,
- batch_size=total_batch_size,
+ batch_size=batch_size * 2,
imgsz=imgsz_test,
model=ema.ema,
single_cls=opt.single_cls,
@@ -351,7 +358,7 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
# Write
with open(results_file, 'a') as f:
- f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
+ f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
if len(opt.name) and opt.bucket:
os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
@@ -372,30 +379,30 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
best_fitness = fi
# Save model
- save = (not opt.nosave) or (final_epoch and not opt.evolve)
- if save:
- with open(results_file, 'r') as f: # create checkpoint
- ckpt = {'epoch': epoch,
- 'best_fitness': best_fitness,
- 'training_results': f.read(),
- 'model': ema.ema,
- 'optimizer': None if final_epoch else optimizer.state_dict(),
- 'wandb_id': wandb_run.id if wandb else None}
+ if (not opt.nosave) or (final_epoch and not opt.evolve): # if save
+ ckpt = {'epoch': epoch,
+ 'best_fitness': best_fitness,
+ 'training_results': results_file.read_text(),
+ 'model': deepcopy(model.module if is_parallel(model) else model).half(),
+ 'ema': (deepcopy(ema.ema).half(), ema.updates),
+ 'optimizer': optimizer.state_dict(),
+ 'wandb_id': wandb_run.id if wandb else None}
# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
del ckpt
+
# end epoch ----------------------------------------------------------------------------------------------------
# end training
if rank in [-1, 0]:
# Strip optimizers
final = best if best.exists() else last # final model
- for f in [last, best]:
+ for f in last, best:
if f.exists():
- strip_optimizer(f) # strip optimizers
+ strip_optimizer(f)
if opt.bucket:
os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload
@@ -412,17 +419,17 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
# Test best.pt
logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
if opt.data.endswith('coco.yaml') and nc == 80: # if COCO
- for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests
+ for m in (last, best) if best.exists() else (last): # speed, mAP tests
results, _, _ = test.test(opt.data,
- batch_size=total_batch_size,
+ batch_size=batch_size * 2,
imgsz=imgsz_test,
- conf_thres=conf,
- iou_thres=iou,
- model=attempt_load(final, device).half(),
+ conf_thres=0.001,
+ iou_thres=0.7,
+ model=attempt_load(m, device).half(),
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=save_dir,
- save_json=save_json,
+ save_json=True,
plots=False)
else:
@@ -461,9 +468,11 @@ def train(hyp, opt, device, tb_writer=None, wandb=None):
parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', default='runs/train', help='save to project/name')
+ parser.add_argument('--entity', default=None, help='W&B entity')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
parser.add_argument('--quad', action='store_true', help='quad dataloader')
+ parser.add_argument('--linear-lr', action='store_true', help='linear LR')
opt = parser.parse_args()
# Set DDP variables
diff --git a/tutorial.ipynb b/tutorial.ipynb
index 3f7133f4f7d7..7fce40c3824e 100644
--- a/tutorial.ipynb
+++ b/tutorial.ipynb
@@ -16,7 +16,7 @@
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
- "811fd52fef65422c8267bafcde8a2c3d": {
+ "1f8e9b8ebded4175b2eaa9f75c3ceb00": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
@@ -28,15 +28,15 @@
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
- "layout": "IPY_MODEL_8f41b90117224eef9133a9c3a103dbba",
+ "layout": "IPY_MODEL_0a1246a73077468ab80e979cc0576cd2",
"_model_module": "@jupyter-widgets/controls",
"children": [
- "IPY_MODEL_ca2fb37af6ed43d4a74cdc9f2ac5c4a5",
- "IPY_MODEL_29419ae5ebb9403ea73f7e5a68037bdd"
+ "IPY_MODEL_d327cde5a85a4a51bb8b1b3e9cf06c97",
+ "IPY_MODEL_d5ef1cb2cbed4b87b3c5d292ff2b0da6"
]
}
},
- "8f41b90117224eef9133a9c3a103dbba": {
+ "0a1246a73077468ab80e979cc0576cd2": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -87,12 +87,12 @@
"left": null
}
},
- "ca2fb37af6ed43d4a74cdc9f2ac5c4a5": {
+ "d327cde5a85a4a51bb8b1b3e9cf06c97": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
- "style": "IPY_MODEL_6511b4dfb10b48d1bc98bcfb3987bfa0",
+ "style": "IPY_MODEL_8d5dff8bca14435a88fa1814533acd85",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
@@ -107,30 +107,30 @@
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
- "layout": "IPY_MODEL_64f0badf1a8f489885aa984dd62d37dc"
+ "layout": "IPY_MODEL_3d5136c19e7645ca9bc8f51ceffb2be1"
}
},
- "29419ae5ebb9403ea73f7e5a68037bdd": {
+ "d5ef1cb2cbed4b87b3c5d292ff2b0da6": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
- "style": "IPY_MODEL_f569911c5cfc4d81bb1bdfa83447afc8",
+ "style": "IPY_MODEL_2919396dbd4b4c8e821d12bd28665d8a",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "value": " 781M/781M [00:23<00:00, 34.2MB/s]",
+ "value": " 781M/781M [00:12<00:00, 65.5MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
- "layout": "IPY_MODEL_84943ade566440aaa2dcf3b3b27e7074"
+ "layout": "IPY_MODEL_6feb16f2b2fa4021b1a271e1dd442d04"
}
},
- "6511b4dfb10b48d1bc98bcfb3987bfa0": {
+ "8d5dff8bca14435a88fa1814533acd85": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
@@ -145,7 +145,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
- "64f0badf1a8f489885aa984dd62d37dc": {
+ "3d5136c19e7645ca9bc8f51ceffb2be1": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -196,7 +196,7 @@
"left": null
}
},
- "f569911c5cfc4d81bb1bdfa83447afc8": {
+ "2919396dbd4b4c8e821d12bd28665d8a": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
@@ -210,7 +210,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
- "84943ade566440aaa2dcf3b3b27e7074": {
+ "6feb16f2b2fa4021b1a271e1dd442d04": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -261,7 +261,7 @@
"left": null
}
},
- "8501ed1563e4452eac9df6b7a66e8f8c": {
+ "e6459e0bcee449b090fc9807672725bc": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
@@ -273,15 +273,15 @@
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
- "layout": "IPY_MODEL_d2bb96801e1f46f4a58e02534f7026ff",
+ "layout": "IPY_MODEL_c341e1d3bf3b40d1821ce392eb966c68",
"_model_module": "@jupyter-widgets/controls",
"children": [
- "IPY_MODEL_468a796ef06b4a24bcba6fbd4a0a8db5",
- "IPY_MODEL_42ad5c1ea7be4835bffebf90642178f1"
+ "IPY_MODEL_660afee173694231a6dce3cd94df6cae",
+ "IPY_MODEL_261218485cef48df961519dde5edfcbe"
]
}
},
- "d2bb96801e1f46f4a58e02534f7026ff": {
+ "c341e1d3bf3b40d1821ce392eb966c68": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -332,12 +332,12 @@
"left": null
}
},
- "468a796ef06b4a24bcba6fbd4a0a8db5": {
+ "660afee173694231a6dce3cd94df6cae": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
- "style": "IPY_MODEL_c58b5536d98f4814831934e9c30c4d78",
+ "style": "IPY_MODEL_32736d503c06497abfae8c0421918255",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
@@ -352,30 +352,30 @@
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
- "layout": "IPY_MODEL_505597101151486ea29e9ab754544d27"
+ "layout": "IPY_MODEL_e257738711f54d5280c8393d9d3dce1c"
}
},
- "42ad5c1ea7be4835bffebf90642178f1": {
+ "261218485cef48df961519dde5edfcbe": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
- "style": "IPY_MODEL_de6e7b4b4a1c408c9f89d89b07a13bcd",
+ "style": "IPY_MODEL_beb7a6fe34b840899bb79c062681696f",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
- "value": " 21.1M/21.1M [00:01<00:00, 18.2MB/s]",
+ "value": " 21.1M/21.1M [00:00<00:00, 33.5MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
- "layout": "IPY_MODEL_f5cc9c7d4c274b2d81327ba3163c43fd"
+ "layout": "IPY_MODEL_e639132395d64d70b99d8b72c32f8fbb"
}
},
- "c58b5536d98f4814831934e9c30c4d78": {
+ "32736d503c06497abfae8c0421918255": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
@@ -390,7 +390,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
- "505597101151486ea29e9ab754544d27": {
+ "e257738711f54d5280c8393d9d3dce1c": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -441,7 +441,7 @@
"left": null
}
},
- "de6e7b4b4a1c408c9f89d89b07a13bcd": {
+ "beb7a6fe34b840899bb79c062681696f": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
@@ -455,7 +455,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
- "f5cc9c7d4c274b2d81327ba3163c43fd": {
+ "e639132395d64d70b99d8b72c32f8fbb": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -550,7 +550,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "c6ad57c2-40b7-4764-b07d-19ee2ceaabaf"
+ "outputId": "ae8805a9-ce15-4e1c-f6b4-baa1c1033f56"
},
"source": [
"!git clone https://github.com/ultralytics/yolov5 # clone repo\n",
@@ -568,7 +568,7 @@
{
"output_type": "stream",
"text": [
- "Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16130MB, multi_processor_count=80)\n"
+ "Setup complete. Using torch 1.7.0+cu101 _CudaDeviceProperties(name='Tesla V100-SXM2-16GB', major=7, minor=0, total_memory=16160MB, multi_processor_count=80)\n"
],
"name": "stdout"
}
@@ -672,17 +672,17 @@
"base_uri": "https://localhost:8080/",
"height": 65,
"referenced_widgets": [
- "811fd52fef65422c8267bafcde8a2c3d",
- "8f41b90117224eef9133a9c3a103dbba",
- "ca2fb37af6ed43d4a74cdc9f2ac5c4a5",
- "29419ae5ebb9403ea73f7e5a68037bdd",
- "6511b4dfb10b48d1bc98bcfb3987bfa0",
- "64f0badf1a8f489885aa984dd62d37dc",
- "f569911c5cfc4d81bb1bdfa83447afc8",
- "84943ade566440aaa2dcf3b3b27e7074"
+ "1f8e9b8ebded4175b2eaa9f75c3ceb00",
+ "0a1246a73077468ab80e979cc0576cd2",
+ "d327cde5a85a4a51bb8b1b3e9cf06c97",
+ "d5ef1cb2cbed4b87b3c5d292ff2b0da6",
+ "8d5dff8bca14435a88fa1814533acd85",
+ "3d5136c19e7645ca9bc8f51ceffb2be1",
+ "2919396dbd4b4c8e821d12bd28665d8a",
+ "6feb16f2b2fa4021b1a271e1dd442d04"
]
},
- "outputId": "59a7a546-8492-492e-861d-70a2c85a6794"
+ "outputId": "d6ace7c6-1be5-41ff-d607-1c716b88d298"
},
"source": [
"# Download COCO val2017\n",
@@ -695,7 +695,7 @@
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "811fd52fef65422c8267bafcde8a2c3d",
+ "model_id": "1f8e9b8ebded4175b2eaa9f75c3ceb00",
"version_minor": 0,
"version_major": 2
},
@@ -723,7 +723,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "427c211e-e283-4e87-f7b3-7b8dfb11a4a5"
+ "outputId": "cc25f70c-0a11-44f6-cc44-e92c5083488c"
},
"source": [
"# Run YOLOv5x on COCO val2017\n",
@@ -735,34 +735,33 @@
"output_type": "stream",
"text": [
"Namespace(augment=False, batch_size=32, conf_thres=0.001, data='./data/coco.yaml', device='', exist_ok=False, img_size=640, iou_thres=0.65, name='exp', project='runs/test', save_conf=False, save_hybrid=False, save_json=True, save_txt=False, single_cls=False, task='val', verbose=False, weights=['yolov5x.pt'])\n",
- "YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n",
+ "YOLOv5 v4.0-75-gbdd88e1 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n",
"\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5x.pt to yolov5x.pt...\n",
- "100% 168M/168M [00:05<00:00, 31.9MB/s]\n",
+ "100% 168M/168M [00:04<00:00, 39.7MB/s]\n",
"\n",
"Fusing layers... \n",
"Model Summary: 476 layers, 87730285 parameters, 0 gradients, 218.8 GFLOPS\n",
- "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2791.81it/s]\n",
- "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../coco/labels/val2017.cache\n",
- "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017.cache' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:00<00:00, 13332180.55it/s]\n",
- " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:30<00:00, 1.73it/s]\n",
- " all 5e+03 3.63e+04 0.419 0.765 0.68 0.486\n",
- "Speed: 5.2/2.0/7.2 ms inference/NMS/total per 640x640 image at batch-size 32\n",
+ "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/val2017' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2824.78it/s]\n",
+ "\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../coco/val2017.cache\n",
+ " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:33<00:00, 1.68it/s]\n",
+ " all 5e+03 3.63e+04 0.749 0.619 0.68 0.486\n",
+ "Speed: 5.2/2.0/7.3 ms inference/NMS/total per 640x640 image at batch-size 32\n",
"\n",
"Evaluating pycocotools mAP... saving runs/test/exp/yolov5x_predictions.json...\n",
"loading annotations into memory...\n",
- "Done (t=0.41s)\n",
+ "Done (t=0.44s)\n",
"creating index...\n",
"index created!\n",
"Loading and preparing results...\n",
- "DONE (t=5.26s)\n",
+ "DONE (t=4.47s)\n",
"creating index...\n",
"index created!\n",
"Running per image evaluation...\n",
"Evaluate annotation type *bbox*\n",
- "DONE (t=93.97s).\n",
+ "DONE (t=94.87s).\n",
"Accumulating evaluation results...\n",
- "DONE (t=15.06s).\n",
+ "DONE (t=15.96s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.501\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.687\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.544\n",
@@ -837,17 +836,17 @@
"base_uri": "https://localhost:8080/",
"height": 65,
"referenced_widgets": [
- "8501ed1563e4452eac9df6b7a66e8f8c",
- "d2bb96801e1f46f4a58e02534f7026ff",
- "468a796ef06b4a24bcba6fbd4a0a8db5",
- "42ad5c1ea7be4835bffebf90642178f1",
- "c58b5536d98f4814831934e9c30c4d78",
- "505597101151486ea29e9ab754544d27",
- "de6e7b4b4a1c408c9f89d89b07a13bcd",
- "f5cc9c7d4c274b2d81327ba3163c43fd"
+ "e6459e0bcee449b090fc9807672725bc",
+ "c341e1d3bf3b40d1821ce392eb966c68",
+ "660afee173694231a6dce3cd94df6cae",
+ "261218485cef48df961519dde5edfcbe",
+ "32736d503c06497abfae8c0421918255",
+ "e257738711f54d5280c8393d9d3dce1c",
+ "beb7a6fe34b840899bb79c062681696f",
+ "e639132395d64d70b99d8b72c32f8fbb"
]
},
- "outputId": "c68a3db4-1314-46b4-9e52-83532eb65749"
+ "outputId": "e8b7d5b3-a71e-4446-eec2-ad13419cf700"
},
"source": [
"# Download COCO128\n",
@@ -860,7 +859,7 @@
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
- "model_id": "8501ed1563e4452eac9df6b7a66e8f8c",
+ "model_id": "e6459e0bcee449b090fc9807672725bc",
"version_minor": 0,
"version_major": 2
},
@@ -925,7 +924,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
- "outputId": "6af7116a-01ab-4b94-e5d7-b37c17dc95de"
+ "outputId": "38e51b29-2df4-4f00-cde8-5f6e4a34da9e"
},
"source": [
"# Train YOLOv5s on COCO128 for 3 epochs\n",
@@ -937,15 +936,15 @@
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
- "YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n",
+ "YOLOv5 v4.0-75-gbdd88e1 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)\n",
"\n",
- "Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n",
+ "Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], linear_lr=False, local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n",
"\u001b[34m\u001b[1mwandb: \u001b[0mInstall Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)\n",
"Start Tensorboard with \"tensorboard --logdir runs/train\", view at http://localhost:6006/\n",
- "2021-01-17 19:56:03.945851: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n",
+ "2021-02-12 06:38:28.027271: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n",
"\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt to yolov5s.pt...\n",
- "100% 14.1M/14.1M [00:00<00:00, 15.8MB/s]\n",
+ "100% 14.1M/14.1M [00:01<00:00, 13.2MB/s]\n",
"\n",
"\n",
" from n params module arguments \n",
@@ -979,12 +978,11 @@
"Transferred 362/362 items from yolov5s.pt\n",
"Scaled weight_decay = 0.0005\n",
"Optimizer groups: 62 .bias, 62 conv.weight, 59 other\n",
- "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2647.74it/s]\n",
+ "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2566.00it/s]\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: ../coco128/labels/train2017.cache\n",
- "\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 1503840.09it/s]\n",
- "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 176.03it/s]\n",
- "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 24200.82it/s]\n",
- "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:01<00:00, 123.25it/s]\n",
+ "\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 175.07it/s]\n",
+ "\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 764773.38it/s]\n",
+ "\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 128.17it/s]\n",
"Plotting labels... \n",
"\n",
"\u001b[34m\u001b[1mautoanchor: \u001b[0mAnalyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n",
@@ -994,19 +992,19 @@
"Starting training for 3 epochs...\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
- " 0/2 3.27G 0.04357 0.06779 0.01869 0.1301 207 640: 100% 8/8 [00:04<00:00, 1.95it/s]\n",
- " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:05<00:00, 1.36it/s]\n",
- " all 128 929 0.392 0.732 0.657 0.428\n",
+ " 0/2 3.27G 0.04357 0.06781 0.01869 0.1301 207 640: 100% 8/8 [00:03<00:00, 2.03it/s]\n",
+ " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:04<00:00, 1.14s/it]\n",
+ " all 128 929 0.646 0.627 0.659 0.431\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
- " 1/2 7.47G 0.04308 0.06636 0.02083 0.1303 227 640: 100% 8/8 [00:02<00:00, 3.88it/s]\n",
- " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:01<00:00, 5.07it/s]\n",
- " all 128 929 0.387 0.737 0.657 0.432\n",
+ " 1/2 7.75G 0.04308 0.06654 0.02083 0.1304 227 640: 100% 8/8 [00:01<00:00, 4.11it/s]\n",
+ " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:01<00:00, 2.94it/s]\n",
+ " all 128 929 0.681 0.607 0.663 0.434\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
- " 2/2 7.48G 0.04461 0.06864 0.01866 0.1319 191 640: 100% 8/8 [00:02<00:00, 3.57it/s]\n",
- " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:02<00:00, 2.82it/s]\n",
- " all 128 929 0.385 0.742 0.658 0.431\n",
+ " 2/2 7.75G 0.04461 0.06896 0.01866 0.1322 191 640: 100% 8/8 [00:02<00:00, 3.94it/s]\n",
+ " Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 4/4 [00:03<00:00, 1.22it/s]\n",
+ " all 128 929 0.642 0.632 0.662 0.432\n",
"Optimizer stripped from runs/train/exp/weights/last.pt, 14.8MB\n",
"3 epochs completed in 0.007 hours.\n",
"\n"
@@ -1224,6 +1222,19 @@
"execution_count": null,
"outputs": []
},
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "RVRSOhEvUdb5"
+ },
+ "source": [
+ "# Evolve\n",
+ "!python train.py --img 640 --batch 64 --epochs 100 --data coco128.yaml --weights yolov5s.pt --cache --noautoanchor --evolve\n",
+ "!d=runs/train/evolve && cp evolve.* $d && zip -r evolve.zip $d && gsutil mv evolve.zip gs://bucket # upload results (optional)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
{
"cell_type": "code",
"metadata": {
@@ -1238,4 +1249,4 @@
"outputs": []
}
]
-}
+}
\ No newline at end of file
diff --git a/utils/aws/__init__.py b/utils/aws/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/utils/aws/mime.sh b/utils/aws/mime.sh
new file mode 100644
index 000000000000..c319a83cfbdf
--- /dev/null
+++ b/utils/aws/mime.sh
@@ -0,0 +1,26 @@
+# AWS EC2 instance startup 'MIME' script https://aws.amazon.com/premiumsupport/knowledge-center/execute-user-data-ec2/
+# This script will run on every instance restart, not only on first start
+# --- DO NOT COPY ABOVE COMMENTS WHEN PASTING INTO USERDATA ---
+
+Content-Type: multipart/mixed; boundary="//"
+MIME-Version: 1.0
+
+--//
+Content-Type: text/cloud-config; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="cloud-config.txt"
+
+#cloud-config
+cloud_final_modules:
+- [scripts-user, always]
+
+--//
+Content-Type: text/x-shellscript; charset="us-ascii"
+MIME-Version: 1.0
+Content-Transfer-Encoding: 7bit
+Content-Disposition: attachment; filename="userdata.txt"
+
+#!/bin/bash
+# --- paste contents of userdata.sh here ---
+--//
diff --git a/utils/aws/resume.py b/utils/aws/resume.py
new file mode 100644
index 000000000000..563f22be20dc
--- /dev/null
+++ b/utils/aws/resume.py
@@ -0,0 +1,37 @@
+# Resume all interrupted trainings in yolov5/ dir including DPP trainings
+# Usage: $ python utils/aws/resume.py
+
+import os
+import sys
+from pathlib import Path
+
+import torch
+import yaml
+
+sys.path.append('./') # to run '$ python *.py' files in subdirectories
+
+port = 0 # --master_port
+path = Path('').resolve()
+for last in path.rglob('*/**/last.pt'):
+ ckpt = torch.load(last)
+ if ckpt['optimizer'] is None:
+ continue
+
+ # Load opt.yaml
+ with open(last.parent.parent / 'opt.yaml') as f:
+ opt = yaml.load(f, Loader=yaml.SafeLoader)
+
+ # Get device count
+ d = opt['device'].split(',') # devices
+ nd = len(d) # number of devices
+ ddp = nd > 1 or (nd == 0 and torch.cuda.device_count() > 1) # distributed data parallel
+
+ if ddp: # multi-GPU
+ port += 1
+ cmd = f'python -m torch.distributed.launch --nproc_per_node {nd} --master_port {port} train.py --resume {last}'
+ else: # single-GPU
+ cmd = f'python train.py --resume {last}'
+
+ cmd += ' > /dev/null 2>&1 &' # redirect output to dev/null and run in daemon thread
+ print(cmd)
+ os.system(cmd)
diff --git a/utils/aws/userdata.sh b/utils/aws/userdata.sh
new file mode 100644
index 000000000000..36405d1a1565
--- /dev/null
+++ b/utils/aws/userdata.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# AWS EC2 instance startup script https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/user-data.html
+# This script will run only once on first instance start (for a re-start script see mime.sh)
+# /home/ubuntu (ubuntu) or /home/ec2-user (amazon-linux) is working dir
+# Use >300 GB SSD
+
+cd home/ubuntu
+if [ ! -d yolov5 ]; then
+ echo "Running first-time script." # install dependencies, download COCO, pull Docker
+ git clone https://github.com/ultralytics/yolov5 && sudo chmod -R 777 yolov5
+ cd yolov5
+ bash data/scripts/get_coco.sh && echo "Data done." &
+ sudo docker pull ultralytics/yolov5:latest && echo "Docker done." &
+ # python -m pip install --upgrade pip && pip install -r requirements.txt && python detect.py && echo "Requirements done." &
+else
+ echo "Running re-start script." # resume interrupted runs
+ i=0
+ list=$(docker ps -qa) # container list i.e. $'one\ntwo\nthree\nfour'
+ while IFS= read -r id; do
+ ((i++))
+ echo "restarting container $i: $id"
+ docker start $id
+ # docker exec -it $id python train.py --resume # single-GPU
+ docker exec -d $id python utils/aws/resume.py
+ done <<<"$list"
+fi
diff --git a/utils/datasets.py b/utils/datasets.py
index 1e23934b63cc..d6ab16518034 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -20,12 +20,13 @@
from torch.utils.data import Dataset
from tqdm import tqdm
-from utils.general import xyxy2xywh, xywh2xyxy, xywhn2xyxy, clean_str
+from utils.general import xyxy2xywh, xywh2xyxy, xywhn2xyxy, xyn2xy, segment2box, segments2boxes, resample_segments, \
+ clean_str
from utils.torch_utils import torch_distributed_zero_first
# Parameters
help_url = 'https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data'
-img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng'] # acceptable image suffixes
+img_formats = ['bmp', 'jpg', 'jpeg', 'png', 'tif', 'tiff', 'dng', 'webp'] # acceptable image suffixes
vid_formats = ['mov', 'avi', 'mp4', 'mpg', 'mpeg', 'm4v', 'wmv', 'mkv'] # acceptable video suffixes
logger = logging.getLogger(__name__)
@@ -120,8 +121,7 @@ def __iter__(self):
class LoadImages: # for inference
def __init__(self, path, img_size=640, stride=32):
- p = str(Path(path)) # os-agnostic
- p = os.path.abspath(p) # absolute path
+ p = str(Path(path).absolute()) # os-agnostic absolute path
if '*' in p:
files = sorted(glob.glob(p, recursive=True)) # glob
elif os.path.isdir(p):
@@ -300,7 +300,8 @@ def update(self, index, cap):
# _, self.imgs[index] = cap.read()
cap.grab()
if n == 4: # read every 4th frame
- _, self.imgs[index] = cap.retrieve()
+ success, im = cap.retrieve()
+ self.imgs[index] = im if success else self.imgs[index] * 0
n = 0
time.sleep(0.01) # wait time
@@ -334,7 +335,7 @@ def __len__(self):
def img2label_paths(img_paths):
# Define label paths as a function of image paths
sa, sb = os.sep + 'images' + os.sep, os.sep + 'labels' + os.sep # /images/, /labels/ substrings
- return [x.replace(sa, sb, 1).replace('.' + x.split('.')[-1], '.txt') for x in img_paths]
+ return ['txt'.join(x.replace(sa, sb, 1).rsplit(x.split('.')[-1], 1)) for x in img_paths]
class LoadImagesAndLabels(Dataset): # for training/testing
@@ -349,44 +350,49 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
self.mosaic_border = [-img_size // 2, -img_size // 2]
self.stride = stride
self.path = path
-
+
try:
f = [] # image files
for p in path if isinstance(path, list) else [path]:
p = Path(p) # os-agnostic
if p.is_dir(): # dir
f += glob.glob(str(p / '**' / '*.*'), recursive=True)
+ # f = list(p.rglob('**/*.*')) # pathlib
elif p.is_file(): # file
with open(p, 'r') as t:
t = t.read().strip().splitlines()
parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
+ # f += [p.parent / x.lstrip(os.sep) for x in t] # local to global path (pathlib)
else:
raise Exception(f'{prefix}{p} does not exist')
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
+ # self.img_files = sorted([x for x in f if x.suffix[1:].lower() in img_formats]) # pathlib
assert self.img_files, f'{prefix}No images found'
except Exception as e:
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')
# Check cache
self.label_files = img2label_paths(self.img_files) # labels
- cache_path = Path(self.label_files[0]).parent.with_suffix('.cache') # cached labels
+ cache_path = (p if p.is_file() else Path(self.label_files[0]).parent).with_suffix('.cache') # cached labels
if cache_path.is_file():
- cache = torch.load(cache_path) # load
- if cache['hash'] != get_hash(self.label_files + self.img_files) or 'results' not in cache: # changed
- cache = self.cache_labels(cache_path, prefix) # re-cache
+ cache, exists = torch.load(cache_path), True # load
+ if cache['hash'] != get_hash(self.label_files + self.img_files) or 'version' not in cache: # changed
+ cache, exists = self.cache_labels(cache_path, prefix), False # re-cache
else:
- cache = self.cache_labels(cache_path, prefix) # cache
+ cache, exists = self.cache_labels(cache_path, prefix), False # cache
# Display cache
- [nf, nm, ne, nc, n] = cache.pop('results') # found, missing, empty, corrupted, total
- desc = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
- tqdm(None, desc=prefix + desc, total=n, initial=n)
+ nf, nm, ne, nc, n = cache.pop('results') # found, missing, empty, corrupted, total
+ if exists:
+ d = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
+ tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'
# Read cache
cache.pop('hash') # remove hash
- labels, shapes = zip(*cache.values())
+ cache.pop('version') # remove version
+ labels, shapes, self.segments = zip(*cache.values())
self.labels = list(labels)
self.shapes = np.array(shapes, dtype=np.float64)
self.img_files = list(cache.keys()) # update
@@ -449,6 +455,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
+ segments = [] # instance segments
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in img_formats, f'invalid image format {im.format}'
@@ -456,7 +463,12 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
if os.path.isfile(lb_file):
nf += 1 # label found
with open(lb_file, 'r') as f:
- l = np.array([x.split() for x in f.read().strip().splitlines()], dtype=np.float32) # labels
+ l = [x.split() for x in f.read().strip().splitlines()]
+ if any([len(x) > 8 for x in l]): # is segment
+ classes = np.array([x[0] for x in l], dtype=np.float32)
+ segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in l] # (cls, xy1...)
+ l = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1) # (cls, xywh)
+ l = np.array(l, dtype=np.float32)
if len(l):
assert l.shape[1] == 5, 'labels require 5 columns each'
assert (l >= 0).all(), 'negative labels'
@@ -468,7 +480,7 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
else:
nm += 1 # label missing
l = np.zeros((0, 5), dtype=np.float32)
- x[im_file] = [l, shape]
+ x[im_file] = [l, shape, segments]
except Exception as e:
nc += 1
print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')
@@ -480,7 +492,8 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')
x['hash'] = get_hash(self.label_files + self.img_files)
- x['results'] = [nf, nm, ne, nc, i + 1]
+ x['results'] = nf, nm, ne, nc, i + 1
+ x['version'] = 0.1 # cache version
torch.save(x, path) # save for next time
logging.info(f'{prefix}New cache created: {path}')
return x
@@ -650,7 +663,7 @@ def hist_equalize(img, clahe=True, bgr=False):
def load_mosaic(self, index):
# loads images in a 4-mosaic
- labels4 = []
+ labels4, segments4 = [], []
s = self.img_size
yc, xc = [int(random.uniform(-x, 2 * s + x)) for x in self.mosaic_border] # mosaic center x, y
indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(3)] # 3 additional image indices
@@ -678,19 +691,21 @@ def load_mosaic(self, index):
padh = y1a - y1b
# Labels
- labels = self.labels[index].copy()
+ labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
+ segments = [xyn2xy(x, w, h, padw, padh) for x in segments]
labels4.append(labels)
+ segments4.extend(segments)
# Concat/clip labels
- if len(labels4):
- labels4 = np.concatenate(labels4, 0)
- np.clip(labels4[:, 1:], 0, 2 * s, out=labels4[:, 1:]) # use with random_perspective
- # img4, labels4 = replicate(img4, labels4) # replicate
+ labels4 = np.concatenate(labels4, 0)
+ for x in (labels4[:, 1:], *segments4):
+ np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
+ # img4, labels4 = replicate(img4, labels4) # replicate
# Augment
- img4, labels4 = random_perspective(img4, labels4,
+ img4, labels4 = random_perspective(img4, labels4, segments4,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
@@ -704,7 +719,7 @@ def load_mosaic(self, index):
def load_mosaic9(self, index):
# loads images in a 9-mosaic
- labels9 = []
+ labels9, segments9 = [], []
s = self.img_size
indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(8)] # 8 additional image indices
for i, index in enumerate(indices):
@@ -737,30 +752,34 @@ def load_mosaic9(self, index):
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords
# Labels
- labels = self.labels[index].copy()
+ labels, segments = self.labels[index].copy(), self.segments[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
+ segments = [xyn2xy(x, w, h, padx, pady) for x in segments]
labels9.append(labels)
+ segments9.extend(segments)
# Image
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
hp, wp = h, w # height, width previous
# Offset
- yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y
+ yc, xc = [int(random.uniform(0, s)) for _ in self.mosaic_border] # mosaic center x, y
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]
# Concat/clip labels
- if len(labels9):
- labels9 = np.concatenate(labels9, 0)
- labels9[:, [1, 3]] -= xc
- labels9[:, [2, 4]] -= yc
+ labels9 = np.concatenate(labels9, 0)
+ labels9[:, [1, 3]] -= xc
+ labels9[:, [2, 4]] -= yc
+ c = np.array([xc, yc]) # centers
+ segments9 = [x - c for x in segments9]
- np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective
- # img9, labels9 = replicate(img9, labels9) # replicate
+ for x in (labels9[:, 1:], *segments9):
+ np.clip(x, 0, 2 * s, out=x) # clip when using random_perspective()
+ # img9, labels9 = replicate(img9, labels9) # replicate
# Augment
- img9, labels9 = random_perspective(img9, labels9,
+ img9, labels9 = random_perspective(img9, labels9, segments9,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
@@ -821,7 +840,8 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale
return img, ratio, (dw, dh)
-def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)):
+def random_perspective(img, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
+ border=(0, 0)):
# torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
# targets = [cls, xyxy]
@@ -873,37 +893,38 @@ def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shea
# Transform label coordinates
n = len(targets)
if n:
- # warp points
- xy = np.ones((n * 4, 3))
- xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
- xy = xy @ M.T # transform
- if perspective:
- xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale
- else: # affine
- xy = xy[:, :2].reshape(n, 8)
-
- # create new boxes
- x = xy[:, [0, 2, 4, 6]]
- y = xy[:, [1, 3, 5, 7]]
- xy = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
-
- # # apply angle-based reduction of bounding boxes
- # radians = a * math.pi / 180
- # reduction = max(abs(math.sin(radians)), abs(math.cos(radians))) ** 0.5
- # x = (xy[:, 2] + xy[:, 0]) / 2
- # y = (xy[:, 3] + xy[:, 1]) / 2
- # w = (xy[:, 2] - xy[:, 0]) * reduction
- # h = (xy[:, 3] - xy[:, 1]) * reduction
- # xy = np.concatenate((x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
-
- # clip boxes
- xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width)
- xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height)
+ use_segments = any(x.any() for x in segments)
+ new = np.zeros((n, 4))
+ if use_segments: # warp segments
+ segments = resample_segments(segments) # upsample
+ for i, segment in enumerate(segments):
+ xy = np.ones((len(segment), 3))
+ xy[:, :2] = segment
+ xy = xy @ M.T # transform
+ xy = xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2] # perspective rescale or affine
+
+ # clip
+ new[i] = segment2box(xy, width, height)
+
+ else: # warp boxes
+ xy = np.ones((n * 4, 3))
+ xy[:, :2] = targets[:, [1, 2, 3, 4, 1, 4, 3, 2]].reshape(n * 4, 2) # x1y1, x2y2, x1y2, x2y1
+ xy = xy @ M.T # transform
+ xy = (xy[:, :2] / xy[:, 2:3] if perspective else xy[:, :2]).reshape(n, 8) # perspective rescale or affine
+
+ # create new boxes
+ x = xy[:, [0, 2, 4, 6]]
+ y = xy[:, [1, 3, 5, 7]]
+ new = np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
+
+ # clip
+ new[:, [0, 2]] = new[:, [0, 2]].clip(0, width)
+ new[:, [1, 3]] = new[:, [1, 3]].clip(0, height)
# filter candidates
- i = box_candidates(box1=targets[:, 1:5].T * s, box2=xy.T)
+ i = box_candidates(box1=targets[:, 1:5].T * s, box2=new.T, area_thr=0.01 if use_segments else 0.10)
targets = targets[i]
- targets[:, 1:5] = xy[i]
+ targets[:, 1:5] = new[i]
return img, targets
diff --git a/utils/general.py b/utils/general.py
index bbc0f32b8425..e5bbc50c6177 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -47,11 +47,16 @@ def get_latest_run(search_dir='.'):
return max(last_list, key=os.path.getctime) if last_list else ''
+def isdocker():
+ # Is environment a Docker container
+ return Path('/workspace').exists() # or Path('/.dockerenv').exists()
+
+
def check_online():
# Check internet connectivity
import socket
try:
- socket.create_connection(("1.1.1.1", 53)) # check host accesability
+ socket.create_connection(("1.1.1.1", 443), 5) # check host accesability
return True
except OSError:
return False
@@ -62,7 +67,7 @@ def check_git_status():
print(colorstr('github: '), end='')
try:
assert Path('.git').exists(), 'skipping check (not a git repository)'
- assert not Path('/workspace').exists(), 'skipping check (Docker image)' # not Path('/.dockerenv').exists()
+ assert not isdocker(), 'skipping check (Docker image)'
assert check_online(), 'skipping check (offline)'
cmd = 'git fetch && git config --get remote.origin.url'
@@ -95,6 +100,20 @@ def check_img_size(img_size, s=32):
return new_size
+def check_imshow():
+ # Check if environment supports image displays
+ try:
+ assert not isdocker(), 'cv2.imshow() is disabled in Docker environments'
+ cv2.imshow('test', np.zeros((1, 1, 3)))
+ cv2.waitKey(1)
+ cv2.destroyAllWindows()
+ cv2.waitKey(1)
+ return True
+ except Exception as e:
+ print(f'WARNING: Environment does not support cv2.imshow() or PIL Image.show() image displays\n{e}')
+ return False
+
+
def check_file(file):
# Search for file if not found
if os.path.isfile(file) or file == '':
@@ -225,7 +244,7 @@ def xywh2xyxy(x):
return y
-def xywhn2xyxy(x, w=640, h=640, padw=32, padh=32):
+def xywhn2xyxy(x, w=640, h=640, padw=0, padh=0):
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
@@ -235,6 +254,40 @@ def xywhn2xyxy(x, w=640, h=640, padw=32, padh=32):
return y
+def xyn2xy(x, w=640, h=640, padw=0, padh=0):
+ # Convert normalized segments into pixel segments, shape (n,2)
+ y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
+ y[:, 0] = w * x[:, 0] + padw # top left x
+ y[:, 1] = h * x[:, 1] + padh # top left y
+ return y
+
+
+def segment2box(segment, width=640, height=640):
+ # Convert 1 segment label to 1 box label, applying inside-image constraint, i.e. (xy1, xy2, ...) to (xyxy)
+ x, y = segment.T # segment xy
+ inside = (x >= 0) & (y >= 0) & (x <= width) & (y <= height)
+ x, y, = x[inside], y[inside]
+ return np.array([x.min(), y.min(), x.max(), y.max()]) if any(x) else np.zeros((1, 4)) # cls, xyxy
+
+
+def segments2boxes(segments):
+ # Convert segment labels to box labels, i.e. (cls, xy1, xy2, ...) to (cls, xywh)
+ boxes = []
+ for s in segments:
+ x, y = s.T # segment xy
+ boxes.append([x.min(), y.min(), x.max(), y.max()]) # cls, xyxy
+ return xyxy2xywh(np.array(boxes)) # cls, xywh
+
+
+def resample_segments(segments, n=1000):
+ # Up-sample an (n,2) segment
+ for i, s in enumerate(segments):
+ x = np.linspace(0, len(s) - 1, n)
+ xp = np.arange(len(s))
+ segments[i] = np.concatenate([np.interp(x, xp, s[:, i]) for i in range(2)]).reshape(2, -1).T # segment xy
+ return segments
+
+
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
@@ -337,11 +390,12 @@ def wh_iou(wh1, wh2):
return inter / (wh1.prod(2) + wh2.prod(2) - inter) # iou = inter / (area1 + area2 - inter)
-def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, labels=()):
- """Performs Non-Maximum Suppression (NMS) on inference results
+def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,
+ labels=()):
+ """Runs Non-Maximum Suppression (NMS) on inference results
Returns:
- detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+ list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
nc = prediction.shape[2] - 5 # number of classes
@@ -353,7 +407,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
- multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
+ multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
@@ -430,8 +484,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer()
# Strip optimizer from 'f' to finalize training, optionally save as 's'
x = torch.load(f, map_location=torch.device('cpu'))
- for key in 'optimizer', 'training_results', 'wandb_id':
- x[key] = None
+ for k in 'optimizer', 'training_results', 'wandb_id', 'ema': # keys
+ x[k] = None
x['epoch'] = -1
x['model'].half() # to FP16
for p in x['model'].parameters():
diff --git a/utils/loss.py b/utils/loss.py
index 889ddf7295da..2302d18de87d 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -105,9 +105,8 @@ def __init__(self, model, autobalance=False):
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
- self.balance = {3: [3.67, 1.0, 0.43], 4: [3.78, 1.0, 0.39, 0.22], 5: [3.88, 1.0, 0.37, 0.17, 0.10]}[det.nl]
- # self.balance = [1.0] * det.nl
- self.ssi = (det.stride == 16).nonzero(as_tuple=False).item() # stride 16 index
+ self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7
+ self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))
diff --git a/utils/plots.py b/utils/plots.py
index 3ec793528fe5..aa9a1cab81f0 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -15,7 +15,7 @@
import seaborn as sns
import torch
import yaml
-from PIL import Image, ImageDraw
+from PIL import Image, ImageDraw, ImageFont
from scipy.signal import butter, filtfilt
from utils.general import xywh2xyxy, xyxy2xywh
@@ -54,7 +54,7 @@ def butter_lowpass(cutoff, fs, order):
return filtfilt(b, a, data) # forward-backward filter
-def plot_one_box(x, img, color=None, label=None, line_thickness=None):
+def plot_one_box(x, img, color=None, label=None, line_thickness=3):
# Plots one bounding box on image img
tl = line_thickness or round(0.002 * (img.shape[0] + img.shape[1]) / 2) + 1 # line/font thickness
color = color or [random.randint(0, 255) for _ in range(3)]
@@ -68,6 +68,20 @@ def plot_one_box(x, img, color=None, label=None, line_thickness=None):
cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
+def plot_one_box_PIL(box, img, color=None, label=None, line_thickness=None):
+ img = Image.fromarray(img)
+ draw = ImageDraw.Draw(img)
+ line_thickness = line_thickness or max(int(min(img.size) / 200), 2)
+ draw.rectangle(box, width=line_thickness, outline=tuple(color)) # plot
+ if label:
+ fontsize = max(round(max(img.size) / 40), 12)
+ font = ImageFont.truetype("Arial.ttf", fontsize)
+ txt_width, txt_height = font.getsize(label)
+ draw.rectangle([box[0], box[1] - txt_height + 4, box[0] + txt_width, box[1]], fill=tuple(color))
+ draw.text((box[0], box[1] - txt_height + 1), label, fill=(255, 255, 255), font=font)
+ return np.asarray(img)
+
+
def plot_wh_methods(): # from utils.plots import *; plot_wh_methods()
# Compares the two methods for width-height anchor multiplication
# https://github.com/ultralytics/yolov3/issues/168
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 2cb09e71ce71..1b1cc2038c55 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -205,7 +205,7 @@ def model_info(model, verbose=False, img_size=640):
try: # FLOPS
from thop import profile
- stride = int(model.stride.max()) if hasattr(model, 'stride') else 32
+ stride = max(int(model.stride.max()), 32) if hasattr(model, 'stride') else 32
img = torch.zeros((1, model.yaml.get('ch', 3), stride, stride), device=next(model.parameters()).device) # input
flops = profile(deepcopy(model), inputs=(img,), verbose=False)[0] / 1E9 * 2 # stride GFLOPS
img_size = img_size if isinstance(img_size, list) else [img_size, img_size] # expand if int/float