-
Notifications
You must be signed in to change notification settings - Fork 0
/
object_detection_utils.py
160 lines (126 loc) · 6.3 KB
/
object_detection_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
###################################################################################################
#
# Copyright (C) 2022-2023 Maxim Integrated Products, Inc. All Rights Reserved.
#
# Maxim Integrated Products, Inc. Default Copyright Notice:
# https://www.maximintegrated.com/en/aboutus/legal/copyrights.html
###################################################################################################
#
# GitHub repo for the following helper methods:
# https://github.com/sgrvinod/a-PyTorch-Tutorial-to-Object-Detection:
# MIT License
# Copyright (c) 2019 Sagar Vinodababu
# Code slightly modified
""" Utility functions for Object Detection Tasks """
import torch
def collate_fn(batch):
"""
Since each image may have a different number of objects, we need a collate function
(to be passed to the DataLoader).
This describes how to combine these tensors of different sizes. We use lists.
:param batch: an iterable of N sets from __getitem__()
:return: a tensor of images, lists of varying-size tensors of bounding boxes and labels
"""
images = []
boxes_and_labels = []
for b in batch:
images.append(b[0])
boxes_and_labels.append(b[1])
images = torch.stack(images, dim=0)
return images, boxes_and_labels
def check_target_exists(target_list):
"""
Checks whether any object exists in given target list
Object detection data loaders return target as
target[0]: boxes list
target[1]: labels list
For images without any objects, these lists are both empty
target_list is list of targets e.g. targets in given batch
"""
for target in target_list:
if target[0].numel() > 0:
return True
return False
def xy_to_cxcy(xy):
"""
Convert bounding boxes from boundary coordinates (x_min, y_min, x_max, y_max) to center-size
coordinates (c_x, c_y, w, h).
:param xy: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
:return: bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
"""
return torch.cat([(xy[:, 2:] + xy[:, :2]) / 2, # c_x, c_y
xy[:, 2:] - xy[:, :2]], 1) # w, h
def cxcy_to_xy(cxcy):
"""
Convert bounding boxes from center-size coordinates (c_x, c_y, w, h) to boundary coordinates
(x_min, y_min, x_max, y_max).
:param cxcy: bounding boxes in center-size coordinates, a tensor of size (n_boxes, 4)
:return: bounding boxes in boundary coordinates, a tensor of size (n_boxes, 4)
"""
return torch.cat([cxcy[:, :2] - (cxcy[:, 2:] / 2), # x_min, y_min
cxcy[:, :2] + (cxcy[:, 2:] / 2)], 1) # x_max, y_max
def cxcy_to_gcxgcy(cxcy, priors_cxcy):
"""
Encode bounding boxes (that are in center-size form) w.r.t. the corresponding prior boxes
(that are in center-size form).
For the center coordinates, find the offset with respect to the prior box, and scale by the
size of the prior box.
For the size coordinates, scale by the size of the prior box, and convert to the log-space.
In the model, we are predicting bounding box coordinates in this encoded form.
:param cxcy: bounding boxes in center-size coordinates, a tensor of size (n_priors, 4)
:param priors_cxcy: prior boxes with respect to which the encoding must be performed, a tensor
of size (n_priors, 4)
:return: encoded bounding boxes, a tensor of size (n_priors, 4)
"""
eps = 1e-7
# The 10 and 5 below are referred to as 'variances' in the original Caffe repo,
# completely empirical
# They are for some sort of numerical conditioning, for 'scaling the localization gradient'
# See https://github.com/weiliu89/caffe/issues/155
return torch.cat([(cxcy[:, :2] - priors_cxcy[:, :2]) / (priors_cxcy[:, 2:] / 10),
torch.log((cxcy[:, 2:] / priors_cxcy[:, 2:]) + eps) * 5], 1)
def gcxgcy_to_cxcy(gcxgcy, priors_cxcy):
"""
Decode bounding box coordinates predicted by the model, since they are encoded in the form
mentioned above.
They are decoded into center-size coordinates.
This is the inverse of the function above.
:param gcxgcy: encoded bounding boxes, i.e. output of the model, a tensor of size (n_priors, 4)
:param priors_cxcy: prior boxes with respect to which the encoding is defined, a tensor of size
(n_priors, 4)
:return: decoded bounding boxes in center-size form, a tensor of size (n_priors, 4)
"""
return torch.cat([gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2], # c_x, c_y
torch.exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]], 1) # w, h
def find_intersection(set_1, set_2):
"""
Find the intersection of every box combination between two sets of boxes that are in boundary
coordinates.
:param set_1: set 1, a tensor of dimensions (n1, 4)
:param set_2: set 2, a tensor of dimensions (n2, 4)
:return: intersection of each of the boxes in set 1 with respect to each of the boxes in set 2,
a tensor of dimensions (n1, n2)
"""
# PyTorch auto-broadcasts singleton dimensions
lower_bounds = torch.max(set_1[:, :2].unsqueeze(1), set_2[:, :2].unsqueeze(0)) # (n1, n2, 2)
upper_bounds = torch.min(set_1[:, 2:].unsqueeze(1), set_2[:, 2:].unsqueeze(0)) # (n1, n2, 2)
intersection_dims = torch.clamp(upper_bounds - lower_bounds, min=0) # (n1, n2, 2)
return intersection_dims[:, :, 0] * intersection_dims[:, :, 1] # (n1, n2)
def find_jaccard_overlap(set_1, set_2):
"""
Find the Jaccard Overlap (IoU) of every box combination between two sets of boxes that are in
boundary coordinates.
:param set_1: set 1, a tensor of dimensions (n1, 4)
:param set_2: set 2, a tensor of dimensions (n2, 4)
:return: Jaccard Overlap of each of the boxes in set 1 with respect to each of the boxes in
set 2, a tensor of dimensions (n1, n2)
"""
# Find intersections
intersection = find_intersection(set_1, set_2) # (n1, n2)
# Find areas of each box in both sets
areas_set_1 = (set_1[:, 2] - set_1[:, 0]) * (set_1[:, 3] - set_1[:, 1]) # (n1)
areas_set_2 = (set_2[:, 2] - set_2[:, 0]) * (set_2[:, 3] - set_2[:, 1]) # (n2)
# Find the union
# PyTorch auto-broadcasts singleton dimensions
union = areas_set_1.unsqueeze(1) + areas_set_2.unsqueeze(0) - intersection # (n1, n2)
return intersection / union # (n1, n2)