-
Notifications
You must be signed in to change notification settings - Fork 250
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #147 from tryolabs/yolov7
Yolov7
- Loading branch information
Showing
5 changed files
with
446 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
FROM python:3.8-slim-buster | ||
|
||
WORKDIR / | ||
|
||
RUN apt update && \ | ||
apt install -y libgl1 sudo gcc libglib2.0-0 git-all wget | ||
|
||
RUN pip3 install --upgrade pip && \ | ||
pip3 install -r https://raw.githubusercontent.com/WongKinYiu/yolov7/main/requirements.txt | ||
|
||
RUN git clone https://github.com/tryolabs/norfair.git ./norfair/ && \ | ||
pip3 install ./norfair/ | ||
|
||
WORKDIR /demo/src/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# YOLOv7 Example | ||
|
||
Simplest possible example of tracking. Based on [Yolov7](https://github.com/WongKinYiu/yolov7). | ||
|
||
## Instructions | ||
|
||
1. Build and run the Docker container with: | ||
```bash | ||
./run_docker.sh | ||
``` | ||
|
||
1. In the container, display the demo instructions: | ||
```bash | ||
python demo.py --help | ||
``` | ||
Bonus: Use additional arguments `--detector-path`, `--img-size`, `--iou-threshold`,`--conf-threshold`, `--classes`, `--track-points` as you wish. | ||
|
||
|
||
## Explanation | ||
|
||
This example tracks objects using a single point per detection: the centroid of the bounding boxes around cars returned by Yolov7. | ||
|
||
![Norfair Yolov7 demo](../../docs/yolov7_cars.gif) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/usr/bin/env bash | ||
docker build . -f Dockerfile.local -t norfair-yolov7 | ||
docker run --gpus all -it --shm-size=1gb --rm -v `realpath .`:/demo norfair-yolov7 bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,191 @@ | ||
import argparse | ||
from typing import List, Optional, Union | ||
import os | ||
|
||
import numpy as np | ||
import torch | ||
import torchvision.ops.boxes as bops | ||
|
||
import norfair | ||
from norfair import Detection, Tracker, Video, Paths | ||
|
||
DISTANCE_THRESHOLD_BBOX: float = 3.33 | ||
DISTANCE_THRESHOLD_CENTROID: int = 30 | ||
MAX_DISTANCE: int = 10000 | ||
|
||
|
||
class YOLO: | ||
def __init__(self, model_path: str, device: Optional[str] = None): | ||
if device is not None and "cuda" in device and not torch.cuda.is_available(): | ||
raise Exception( | ||
"Selected device='cuda', but cuda is not available to Pytorch." | ||
) | ||
# automatically set device if its None | ||
elif device is None: | ||
device = "cuda:0" if torch.cuda.is_available() else "cpu" | ||
|
||
if not os.path.exists(model_path): | ||
os.system(f'wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/{os.path.basename(model_path)} -O {model_path}') | ||
|
||
# load model | ||
try: | ||
self.model = torch.hub.load('WongKinYiu/yolov7', 'custom', model_path) | ||
except: | ||
raise Exception("Failed to load model from {}".format(model_path)) | ||
def __call__( | ||
self, | ||
img: Union[str, np.ndarray], | ||
conf_threshold: float = 0.25, | ||
iou_threshold: float = 0.45, | ||
image_size: int = 720, | ||
classes: Optional[List[int]] = None | ||
) -> torch.tensor: | ||
|
||
self.model.conf = conf_threshold | ||
self.model.iou = iou_threshold | ||
if classes is not None: | ||
self.model.classes = classes | ||
detections = self.model(img, size=image_size) | ||
return detections | ||
|
||
|
||
def euclidean_distance(detection, tracked_object): | ||
return np.linalg.norm(detection.points - tracked_object.estimate) | ||
|
||
|
||
def center(points): | ||
return [np.mean(np.array(points), axis=0)] | ||
|
||
|
||
def iou_pytorch(detection, tracked_object): | ||
# Slower but simplier version of iou | ||
|
||
detection_points = np.concatenate([detection.points[0], detection.points[1]]) | ||
tracked_object_points = np.concatenate( | ||
[tracked_object.estimate[0], tracked_object.estimate[1]] | ||
) | ||
|
||
box_a = torch.tensor([detection_points], dtype=torch.float) | ||
box_b = torch.tensor([tracked_object_points], dtype=torch.float) | ||
iou = bops.box_iou(box_a, box_b) | ||
|
||
# Since 0 <= IoU <= 1, we define 1/IoU as a distance. | ||
# Distance values will be in [1, inf) | ||
return np.float(1 / iou if iou else MAX_DISTANCE) | ||
|
||
|
||
def iou(detection, tracked_object): | ||
# Detection points will be box A | ||
# Tracked objects point will be box B. | ||
|
||
box_a = np.concatenate([detection.points[0], detection.points[1]]) | ||
box_b = np.concatenate([tracked_object.estimate[0], tracked_object.estimate[1]]) | ||
|
||
x_a = max(box_a[0], box_b[0]) | ||
y_a = max(box_a[1], box_b[1]) | ||
x_b = min(box_a[2], box_b[2]) | ||
y_b = min(box_a[3], box_b[3]) | ||
|
||
# Compute the area of intersection rectangle | ||
inter_area = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1) | ||
|
||
# Compute the area of both the prediction and tracker | ||
# rectangles | ||
box_a_area = (box_a[2] - box_a[0] + 1) * (box_a[3] - box_a[1] + 1) | ||
box_b_area = (box_b[2] - box_b[0] + 1) * (box_b[3] - box_b[1] + 1) | ||
|
||
# Compute the intersection over union by taking the intersection | ||
# area and dividing it by the sum of prediction + tracker | ||
# areas - the interesection area | ||
iou = inter_area / float(box_a_area + box_b_area - inter_area) | ||
|
||
# Since 0 <= IoU <= 1, we define 1/IoU as a distance. | ||
# Distance values will be in [1, inf) | ||
return 1 / iou if iou else (MAX_DISTANCE) | ||
|
||
|
||
def yolo_detections_to_norfair_detections( | ||
yolo_detections: torch.tensor, | ||
track_points: str = "centroid" # bbox or centroid | ||
) -> List[Detection]: | ||
"""convert detections_as_xywh to norfair detections | ||
""" | ||
norfair_detections: List[Detection] = [] | ||
|
||
if track_points == "centroid": | ||
detections_as_xywh = yolo_detections.xywh[0] | ||
for detection_as_xywh in detections_as_xywh: | ||
centroid = np.array( | ||
[ | ||
detection_as_xywh[0].item(), | ||
detection_as_xywh[1].item() | ||
] | ||
) | ||
scores = np.array([detection_as_xywh[4].item()]) | ||
norfair_detections.append( | ||
Detection(points=centroid, scores=scores) | ||
) | ||
elif track_points == "bbox": | ||
detections_as_xyxy = yolo_detections.xyxy[0] | ||
for detection_as_xyxy in detections_as_xyxy: | ||
bbox = np.array( | ||
[ | ||
[detection_as_xyxy[0].item(), detection_as_xyxy[1].item()], | ||
[detection_as_xyxy[2].item(), detection_as_xyxy[3].item()] | ||
] | ||
) | ||
scores = np.array([detection_as_xyxy[4].item(), detection_as_xyxy[4].item()]) | ||
norfair_detections.append( | ||
Detection(points=bbox, scores=scores) | ||
) | ||
|
||
return norfair_detections | ||
|
||
|
||
parser = argparse.ArgumentParser(description="Track objects in a video.") | ||
parser.add_argument("files", type=str, nargs="+", help="Video files to process") | ||
parser.add_argument("--detector-path", type=str, default="/yolov7.pt", help="YOLOv7 model path") | ||
parser.add_argument("--img-size", type=int, default="720", help="YOLOv7 inference size (pixels)") | ||
parser.add_argument("--conf-threshold", type=float, default="0.25", help="YOLOv7 object confidence threshold") | ||
parser.add_argument("--iou-threshold", type=float, default="0.45", help="YOLOv7 IOU threshold for NMS") | ||
parser.add_argument("--classes", nargs="+", type=int, help="Filter by class: --classes 0, or --classes 0 2 3") | ||
parser.add_argument("--device", type=str, default=None, help="Inference device: 'cpu' or 'cuda'") | ||
parser.add_argument("--track-points", type=str, default="centroid", help="Track points: 'centroid' or 'bbox'") | ||
args = parser.parse_args() | ||
|
||
model = YOLO(args.detector_path, device=args.device) | ||
|
||
for input_path in args.files: | ||
video = Video(input_path=input_path) | ||
|
||
distance_function = iou if args.track_points == "bbox" else euclidean_distance | ||
distance_threshold = ( | ||
DISTANCE_THRESHOLD_BBOX | ||
if args.track_points == "bbox" | ||
else DISTANCE_THRESHOLD_CENTROID | ||
) | ||
|
||
tracker = Tracker( | ||
distance_function=distance_function, | ||
distance_threshold=distance_threshold, | ||
) | ||
paths_drawer = Paths(center, attenuation=0.01) | ||
|
||
for frame in video: | ||
yolo_detections = model( | ||
frame, | ||
conf_threshold=args.conf_threshold, | ||
iou_threshold=args.iou_threshold, | ||
image_size=args.img_size, | ||
classes=args.classes | ||
) | ||
detections = yolo_detections_to_norfair_detections(yolo_detections, track_points=args.track_points) | ||
tracked_objects = tracker.update(detections=detections) | ||
if args.track_points == "centroid": | ||
norfair.draw_points(frame, detections) | ||
norfair.draw_tracked_objects(frame, tracked_objects) | ||
elif args.track_points == "bbox": | ||
norfair.draw_boxes(frame, detections) | ||
norfair.draw_tracked_boxes(frame, tracked_objects) | ||
frame = paths_drawer.draw(frame, tracked_objects) | ||
video.write(frame) |
Oops, something went wrong.