From 3f437a661cdcee85cd3f37bb222b12ba199108b8 Mon Sep 17 00:00:00 2001 From: Antoine Simoulin Date: Thu, 2 Jan 2025 07:48:52 -0800 Subject: [PATCH] Add rotated bounding box formats Test Plan: Run unit tests: `pytest test/test_ops.py -vvv -k TestBoxConvert` --- test/test_ops.py | 182 ++++++++++++++- torchvision/ops/_box_convert.py | 262 ++++++++++++++++++++++ torchvision/ops/boxes.py | 121 +++++++--- torchvision/tv_tensors/_bounding_boxes.py | 12 +- 4 files changed, 549 insertions(+), 28 deletions(-) diff --git a/test/test_ops.py b/test/test_ops.py index 1ba7a2c9efa..e8224bc62d0 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -1288,6 +1288,38 @@ def test_bbox_same(self): assert_equal(ops.box_convert(box_tensor, in_fmt="xywh", out_fmt="xywh"), exp_xyxy) assert_equal(ops.box_convert(box_tensor, in_fmt="cxcywh", out_fmt="cxcywh"), exp_xyxy) + def test_rotated_bbox_same(self): + box_tensor = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 30, 35, 0], + [23, 35, 93, 95, 0], + ], + dtype=torch.float, + ) + + exp_xyxyr = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 30, 35, 0], + [23, 35, 93, 95, 0], + ], + dtype=torch.float, + ) + + assert exp_xyxyr.size() == torch.Size([4, 5]) + assert_equal( + ops.box_convert(box_tensor, in_fmt="xyxyr", out_fmt="xyxyr"), exp_xyxyr + ) + assert_equal( + ops.box_convert(box_tensor, in_fmt="xywhr", out_fmt="xywhr"), exp_xyxyr + ) + assert_equal( + ops.box_convert(box_tensor, in_fmt="cxcywhr", out_fmt="cxcywhr"), exp_xyxyr + ) + def test_bbox_xyxy_xywh(self): # Simple test convert boxes to xywh and back. Make sure they are same. # box_tensor is in x1 y1 x2 y2 format. @@ -1339,8 +1371,154 @@ def test_bbox_xywh_cxcywh(self): box_xywh = ops.box_convert(box_cxcywh, in_fmt="cxcywh", out_fmt="xywh") assert_equal(box_xywh, box_tensor) - @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh"]) - @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy"]) + def test_bbox_xyxy_to_cxcywhr(self): + box_tensor = torch.tensor( + [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 30, 35], [23, 35, 93, 95]], + dtype=torch.float, + ) + exp_cxcywhr = torch.tensor( + [ + [50, 50, 100, 100, 0], + [0, 0, 0, 0, 0], + [20, 25, 20, 20, 0], + [58, 65, 70, 60, 0], + ], + dtype=torch.float, + ) + + assert exp_cxcywhr.size() == torch.Size([4, 5]) + box_cxcywhr = ops.box_convert(box_tensor, in_fmt="xyxy", out_fmt="cxcywhr") + assert_equal(box_cxcywhr, exp_cxcywhr) + + def test_bbox_xyxyr_xywhr(self): + # Simple test convert boxes to xywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 30, 35, 0], + [23, 35, 93, 95, 0], + [3, 2, 7, 4, 0], + [3, 2, 5, -2, 90], + ], + dtype=torch.float, + ) + exp_xywhr = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 20, 20, 0], + [23, 35, 70, 60, 0], + [3, 2, 4, 2, 0], + [3, 2, 4, 2, 90], + ], + dtype=torch.float, + ) + + assert exp_xywhr.size() == torch.Size([6, 5]) + box_xywhr = ops.box_convert(box_tensor, in_fmt="xyxyr", out_fmt="xywhr") + assert torch.allclose(box_xywhr, exp_xywhr) + + # Reverse conversion + box_xyxyr = ops.box_convert(box_xywhr, in_fmt="xywhr", out_fmt="xyxyr") + assert torch.allclose(box_xyxyr, box_tensor) + + def test_bbox_xyxyr_cxcywhr(self): + # Simple test convert boxes to cxcywh and back. Make sure they are same. + # box_tensor is in x1 y1 x2 y2 format. + box_tensor = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 30, 35, 0], + [23, 35, 93, 95, 0], + [3, 2, 7, 4, 0], + ], + dtype=torch.float, + ) + exp_cxcywhr = torch.tensor( + [ + [50, 50, 100, 100, 0], + [0, 0, 0, 0, 0], + [20, 25, 20, 20, 0], + [58, 65, 70, 60, 0], + [5, 3, 4, 2, 0], + ], + dtype=torch.float, + ) + + assert exp_cxcywhr.size() == torch.Size([5, 5]) + box_cxcywhr = ops.box_convert(box_tensor, in_fmt="xyxyr", out_fmt="cxcywhr") + assert torch.allclose(box_cxcywhr, exp_cxcywhr) + + # Reverse conversion + box_xyxyr = ops.box_convert(box_cxcywhr, in_fmt="cxcywhr", out_fmt="xyxyr") + assert torch.allclose(box_xyxyr, box_tensor) + + def test_bbox_xywhr_cxcywhr(self): + box_tensor = torch.tensor( + [ + [0, 0, 100, 100, 0], + [0, 0, 0, 0, 0], + [10, 15, 20, 20, 0], + [23, 35, 70, 60, 0], + [4.0, 2.0, 4.0, 2.0, 0.0], + [5.0, 5.0, 4.0, 2.0, 90.0], + [8.0, 4.0, 4.0, 2.0, 180.0], + [7.0, 1.0, 4.0, 2.0, -90.0], + ], + dtype=torch.float, + ) + + exp_cxcywhr = torch.tensor( + [ + [50, 50, 100, 100, 0], + [0, 0, 0, 0, 0], + [20, 25, 20, 20, 0], + [58, 65, 70, 60, 0], + [6, 3, 4, 2, 0], + [6, 3, 4, 2, 90], + [6, 3, 4, 2, 180], + [6, 3, 4, 2, -90], + ], + dtype=torch.float, + ) + + assert exp_cxcywhr.size() == torch.Size([8, 5]) + box_cxcywhr = ops.box_convert(box_tensor, in_fmt="xywhr", out_fmt="cxcywhr") + assert torch.allclose(box_cxcywhr, exp_cxcywhr) + + # Reverse conversion + box_xywhr = ops.box_convert(box_cxcywhr, in_fmt="cxcywhr", out_fmt="xywhr") + assert torch.allclose(box_xywhr, box_tensor) + + def test_bbox_xyxyr_to_xyxyxyxy(self): + box_tensor = torch.tensor([[4, 5, 6, 1, 90]], dtype=torch.float) + exp_xyxyxyxy = torch.tensor([[4, 5, 4, 1, 6, 1, 6, 5]], dtype=torch.float) + + assert exp_xyxyxyxy.size() == torch.Size([1, 8]) + box_xyxyxyxy = ops.box_convert(box_tensor, in_fmt="xyxyr", out_fmt="xyxyxyxy") + assert_equal(box_xyxyxyxy, exp_xyxyxyxy) + + def test_bbox_cxcywhr_to_xyxyxyxy(self): + box_tensor = torch.tensor([[5, 3, 4, 2, 90]], dtype=torch.float) + exp_xyxyxyxy = torch.tensor([[4, 5, 4, 1, 6, 1, 6, 5]], dtype=torch.float) + + assert exp_xyxyxyxy.size() == torch.Size([1, 8]) + box_xyxyxyxy = ops.box_convert(box_tensor, in_fmt="cxcywhr", out_fmt="xyxyxyxy") + assert_equal(box_xyxyxyxy, exp_xyxyxyxy) + + def test_bbox_xywhr_to_xyxyxyxy(self): + box_tensor = torch.tensor([[4, 5, 4, 2, 90]], dtype=torch.float) + exp_xyxyxyxy = torch.tensor([[4, 5, 4, 1, 6, 1, 6, 5]], dtype=torch.float) + + assert exp_xyxyxyxy.size() == torch.Size([1, 8]) + box_xyxyxyxy = ops.box_convert(box_tensor, in_fmt="xywhr", out_fmt="xyxyxyxy") + assert_equal(box_xyxyxyxy, exp_xyxyxyxy) + + @pytest.mark.parametrize("inv_infmt", ["xwyh", "cxwyh", "xwyhr", "cxwyhr", "xxxxyyyy"]) + @pytest.mark.parametrize("inv_outfmt", ["xwcx", "xhwcy", "xwcxr", "xhwcyr", "xyxyxxyy"]) def test_bbox_invalid(self, inv_infmt, inv_outfmt): box_tensor = torch.tensor( [[0, 0, 100, 100], [0, 0, 0, 0], [10, 15, 20, 20], [23, 35, 70, 60]], dtype=torch.float diff --git a/torchvision/ops/_box_convert.py b/torchvision/ops/_box_convert.py index 124bdd0bcc6..afdd6efbb98 100644 --- a/torchvision/ops/_box_convert.py +++ b/torchvision/ops/_box_convert.py @@ -79,3 +79,265 @@ def _box_xyxy_to_xywh(boxes: Tensor) -> Tensor: h = y2 - y1 # y2 - y1 boxes = torch.stack((x1, y1, w, h), dim=-1) return boxes + + +def _box_xyxy_to_cxcywhr(boxes: Tensor) -> Tensor: + """ + Converts bounding boxes from (x1, y1, x2, y2) format to (cx, cy, w, h, r) format. + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + (cx, cy) refers to the center of the box. + (w, h) are the width and height of the box. + r is rotation angle in degrees (in this case 0) + Args: + boxes (Tensor[N, 4]): boxes in (x1, y1, x2, y2) format. + + Returns: + boxes (Tensor[N, 5]): boxes in (cx, cy, w, h, r) format. + """ + n = boxes.size(dim=0) + x1, y1, x2, y2 = boxes.unbind(-1) + w = x2 - x1 + h = y2 - y1 + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + r = torch.zeros((n,), device=x1.device) + + boxes = torch.stack((cx, cy, w, h, r), dim=-1) + + return boxes + + +def _box_cxcywhr_to_xyxyr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (cx, cy, w, h, r) format to (x1, y1, x2, y2, r) format. + (cx, cy) refers to center of bounding box + (w, h) are width and height of bounding box + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): boxes in (cx, cy, w, h, r) format which will be converted. + + Returns: + boxes (Tensor(N, 5)): rotated boxes in (x1, y1, x2, y2, r) format. + """ + cx, cy, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + x1 = cx - w / 2 * cos - h / 2 * sin + y1 = cy - h / 2 * cos + w / 2 * sin + x2 = cx + w / 2 * cos + h / 2 * sin + y2 = cy + h / 2 * cos - w / 2 * sin + + boxes = torch.stack((x1, y1, x2, y2, r), dim=-1) + + return boxes + + +def _box_cxcywhr_to_xywhr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (cx, cy, w, h, r) format to (x1, y1, w, h, r) format. + (cx, cy) refers to center of bounding box + (w, h) refers to width and height of rotated bounding box + (x1, y1) refers to top left of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): boxes in (cx, cy, w, h, r) format which will be converted. + + Returns: + boxes (Tensor(N, 5)): rotated boxes in (x1, y1, w, h, r) format. + """ + cx, cy, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + x1 = cx - w / 2 * cos - h / 2 * sin + y1 = cy - h / 2 * cos + w / 2 * sin + boxes = torch.stack((x1, y1, w, h, r), dim=-1) + + return boxes + + +def _box_cxcywhr_to_xyxyxyxy(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (cx, cy, w, h, r) format to (x1, y1, x3, y3, x2, y2, x4, y4) format. + (cx, cy) refers to center of the rotated bounding box + (w, h) are width and height of the rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + + (x1, y1) refer to top left of rotated bounding box + (x3, y3) refer to top right of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + (x4, y4) refer to bottom left ofrotated bounding box + Args: + boxes (Tensor[N, 5]): rotated boxes in (cx, cy, w, h, r) format which will be converted. + + Returns: + boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x3, y3, x2, y2, x4, y4) format. + """ + cx, cy, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + x1 = cx - w / 2 * cos - h / 2 * sin + y1 = cy - h / 2 * cos + w / 2 * sin + x3 = cx + w / 2 * cos - h / 2 * sin + y3 = cy - h / 2 * cos - w / 2 * sin + x2 = cx + w / 2 * cos + h / 2 * sin + y2 = cy + h / 2 * cos - w / 2 * sin + x4 = cx - w / 2 * cos + h / 2 * sin + y4 = cy + h / 2 * cos + w / 2 * sin + + return torch.stack((x1, y1, x3, y3, x2, y2, x4, y4), dim=-1) + + +def _box_xyxyxyxy_to_xyxyr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (x1, y1, x3, y3, x2, y2, x4, y4) format to (x1, y1, x2, y2, r) format. + (x1, y1) refer to top left of the rotated bounding box + (x3, y3) refer to bottom left of the rotated bounding box + (x2, y2) refer to bottom right of the rotated bounding box + (x4, y4) refer to top right of the rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + + Args: + boxes (Tensor(N, 8)): rotated boxes in (x1, y1, x3, y3, x2, y2, x4, y4) format. + + Returns: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, x2, y2, r) format which will be converted. + """ + x1, y1, x3, y3, x2, y2, x4, y4 = boxes.unbind(-1) + r_rad = torch.atan2(y1 - y3, x3 - x1) + r = r_rad * 180 / torch.pi + + boxes = torch.stack((x1, y1, x2, y2, r), dim=-1) + + return boxes + + +def _box_xyxyr_to_cxcywhr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (x1, y1, x2, y2, r) format to (cx, cy, w, h, r) format. + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + (cx, cy) refers to center of bounding box + (w, h) refers to width and height of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, x2, y2, r) format which will be converted. + + Returns: + boxes (Tensor(N, 5)): rotated boxes in (cx, cy, w, h, r) format. + """ + x1, y1, x2, y2, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + cx = (x1 + x2) / 2 + cy = (y1 + y2) / 2 + w = (x2 - x1) * cos + (y1 - y2) * sin + h = (x2 - x1) * sin + (y2 - y1) * cos + + boxes = torch.stack((cx, cy, w, h, r), dim=-1) + + return boxes + + +def _box_xywhr_to_xyxyr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (x1, y1, w, h, r) format to (x1, y1, x2, y2, r) format. + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + (w, h) refers to width and height of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, w, h, r) format which will be converted. + + Returns: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, x2, y2, r) format. + """ + x1, y1, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + x2 = x1 + w * cos + h * sin + y2 = y1 - w * sin + h * cos + boxes = torch.stack([x1, y1, x2, y2, r], dim=-1) + return boxes + + +def _box_xywhr_to_cxcywhr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (x1, y1, w, h, r) format to (cx, cy, w, h, r) format. + (x1, y1) refers to top left of rotated bounding box + (w, h) refers to width and height of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, w, h, r) format which will be converted. + + Returns: + boxes (Tensor[N, 5]): rotated boxes in (cx, cy, w, h, r) format. + """ + x1, y1, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + cx = x1 + w / 2 * cos + h / 2 * sin + cy = y1 - w / 2 * sin + h / 2 * cos + + boxes = torch.stack([cx, cy, w, h, r], dim=-1) + return boxes + + +def _box_xyxyr_to_xywhr(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (x1, y1, x2, y2, r) format to (x, y, w, h, r) format. + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): rotated boxes in (x1, y1, x2, y2) format which will be converted. + + Returns: + boxes (Tensor[N, 5]): rotated boxes in (x, y, w, h, r) format. + """ + x1, y1, x2, y2, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + w = (x2 - x1) * cos + (y1 - y2) * sin + h = (x2 - x1) * sin + (y2 - y1) * cos + boxes = torch.stack((x1, y1, w, h, r), dim=-1) + return boxes + + +def _box_cxcywhr_to_xyxy(boxes: Tensor) -> Tensor: + """ + Converts rotated bounding boxes from (cx, cy, w, h, r) format to rectangular bounding boxes (x1, y1, x2, y2) format. + (cx, cy) refers to center of bounding box + (w, h) are width and height of bounding box + (x1, y1) refer to top left of rotated bounding box + (x2, y2) refer to bottom right of rotated bounding box + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + Args: + boxes (Tensor[N, 5]): rotated boxes in (cx, cy, w, h, r) format which will be converted. + + + Returns: + boxes (Tensor(N, 4)): rectangular boxes in (x1, y1, x2, y2) format. + """ + + cx, cy, w, h, r = boxes.unbind(-1) + r_rad = r * torch.pi / 180.0 + cos, sin = torch.cos(r_rad), torch.sin(r_rad) + + x1 = cx - w / 2 * cos - h / 2 * sin + y1 = cy - h / 2 * cos + w / 2 * sin + x2 = cx + w / 2 * cos + h / 2 * sin + y2 = cy + h / 2 * cos - w / 2 * sin + + boxes = torch.stack((x1, y1, x2, y2), dim=-1) + + return boxes diff --git a/torchvision/ops/boxes.py b/torchvision/ops/boxes.py index 309990ea03a..b992a66cb5d 100644 --- a/torchvision/ops/boxes.py +++ b/torchvision/ops/boxes.py @@ -6,7 +6,22 @@ from torchvision.extension import _assert_has_ops from ..utils import _log_api_usage_once -from ._box_convert import _box_cxcywh_to_xyxy, _box_xywh_to_xyxy, _box_xyxy_to_cxcywh, _box_xyxy_to_xywh +from ._box_convert import ( + _box_cxcywh_to_xyxy, + _box_cxcywhr_to_xywhr, + _box_cxcywhr_to_xyxy, + _box_cxcywhr_to_xyxyr, + _box_cxcywhr_to_xyxyxyxy, + _box_xywh_to_xyxy, + _box_xywhr_to_cxcywhr, + _box_xywhr_to_xyxyr, + _box_xyxy_to_cxcywh, + _box_xyxy_to_cxcywhr, + _box_xyxy_to_xywh, + _box_xyxyr_to_cxcywhr, + _box_xyxyr_to_xywhr, + _box_xyxyxyxy_to_xyxyr, +) from ._utils import _upcast @@ -194,41 +209,97 @@ def box_convert(boxes: Tensor, in_fmt: str, out_fmt: str) -> Tensor: ``'cxcywh'``: boxes are represented via centre, width and height, cx, cy being center of box, w, h being width and height. + ``'xyxyr'``: boxes are represented via corners, x1, y1 being top left and x2, y2 being bottom right. + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + + ``'xywhr'``: boxes are represented via corner, width and height, x1, y2 being top left, w, h being width and height. + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + + ``'cxcywhr'``: boxes are represented via centre, width and height, cx, cy being center of box, w, h + being width and height. + r is rotation angle w.r.t to the box center by :math:`|r|` degrees counter clock wise in the image plan + + ``'xyxyxyxy'``: boxes are represented via corners, x1, y1 being top left, x2, y2 bottom right, + x3, y3 bottom left, and x4, y4 top right. + Args: - boxes (Tensor[N, 4]): boxes which will be converted. - in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh']. - out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh'] + boxes (Tensor[N, K]): boxes which will be converted. K is the number of coordinates (4 for unrotated bounding boxes or 5 for rotated bounding boxes) + in_fmt (str): Input format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh', 'xyxyr', 'xywhr', 'cxcywhr', 'xyxyxyxy']. + out_fmt (str): Output format of given boxes. Supported formats are ['xyxy', 'xywh', 'cxcywh', 'xyxyr', 'xywhr', 'cxcywhr', 'xyxyxyxy'] Returns: - Tensor[N, 4]: Boxes into converted format. + Tensor[N, K]: Boxes into converted format. """ if not torch.jit.is_scripting() and not torch.jit.is_tracing(): _log_api_usage_once(box_convert) - allowed_fmts = ("xyxy", "xywh", "cxcywh") + allowed_fmts = ( + "xyxy", + "xywh", + "cxcywh", + "xyxyr", + "xywhr", + "cxcywhr", + "xyxyxyxy", + ) if in_fmt not in allowed_fmts or out_fmt not in allowed_fmts: - raise ValueError("Unsupported Bounding Box Conversions for given in_fmt and out_fmt") + raise ValueError( + f"Unsupported Bounding Box Conversions for given in_fmt {in_fmt} and out_fmt {out_fmt}" + ) if in_fmt == out_fmt: return boxes.clone() + e = (in_fmt, out_fmt) + if e == ("xywh", "xyxy"): + boxes = _box_xywh_to_xyxy(boxes) + elif e == ("cxcywh", "xyxy"): + boxes = _box_cxcywh_to_xyxy(boxes) + elif e == ("xyxy", "xywh"): + boxes = _box_xyxy_to_xywh(boxes) + elif e == ("xyxy", "cxcywh"): + boxes = _box_xyxy_to_cxcywh(boxes) + elif e == ("xywh", "cxcywh"): + boxes = _box_xywh_to_xyxy(boxes) + boxes = _box_xyxy_to_cxcywh(boxes) + elif e == ("cxcywh", "xywh"): + boxes = _box_cxcywh_to_xyxy(boxes) + boxes = _box_xyxy_to_xywh(boxes) + elif e == ("xyxy", "cxcywhr"): + boxes = _box_xyxy_to_cxcywhr(boxes) + elif e == ("cxcywhr", "xyxyr"): + boxes = _box_cxcywhr_to_xyxyr(boxes) + elif e == ("xywhr", "xyxyr"): + boxes = _box_xywhr_to_xyxyr(boxes) + elif e == ("xyxyr", "cxcywhr"): + boxes = _box_xyxyr_to_cxcywhr(boxes) + elif e == ("xyxyr", "xywhr"): + boxes = _box_xyxyr_to_xywhr(boxes) + elif e == ("cxcywhr", "xyxy"): + boxes = _box_cxcywhr_to_xyxy(boxes) + elif e == ("cxcywhr", "xywhr"): + boxes = _box_cxcywhr_to_xywhr(boxes) + elif e == ("xywhr", "cxcywhr"): + boxes = _box_xywhr_to_cxcywhr(boxes) + elif e == ("cxcywhr", "xyxyxyxy"): + boxes = _box_cxcywhr_to_xyxyxyxy(boxes) + elif e == ("xyxyxyxy", "xyxyr"): + boxes = _box_xyxyxyxy_to_xyxyr(boxes) + elif e == ("xywhr", "xyxyxyxy"): + boxes = _box_xywhr_to_cxcywhr(boxes) + boxes = _box_cxcywhr_to_xyxyxyxy(boxes) + elif e == ("xyxyr", "xyxyxyxy"): + boxes = _box_xyxyr_to_cxcywhr(boxes) + boxes = _box_cxcywhr_to_xyxyxyxy(boxes) + elif e == ("xyxyxyxy", "cxcywhr"): + boxes = _box_xyxyxyxy_to_xyxyr(boxes) + boxes = _box_xyxyr_to_cxcywhr(boxes) + elif e == ("xyxyxyxy", "xywhr"): + boxes = _box_xyxyxyxy_to_xyxyr(boxes) + boxes = _box_xyxyr_to_xywhr(boxes) + else: + raise NotImplementedError( + f"Unsupported Bounding Box Conversions for given in_fmt {e[0]} and out_fmt {e[1]}" + ) - if in_fmt != "xyxy" and out_fmt != "xyxy": - # convert to xyxy and change in_fmt xyxy - if in_fmt == "xywh": - boxes = _box_xywh_to_xyxy(boxes) - elif in_fmt == "cxcywh": - boxes = _box_cxcywh_to_xyxy(boxes) - in_fmt = "xyxy" - - if in_fmt == "xyxy": - if out_fmt == "xywh": - boxes = _box_xyxy_to_xywh(boxes) - elif out_fmt == "cxcywh": - boxes = _box_xyxy_to_cxcywh(boxes) - elif out_fmt == "xyxy": - if in_fmt == "xywh": - boxes = _box_xywh_to_xyxy(boxes) - elif in_fmt == "cxcywh": - boxes = _box_cxcywh_to_xyxy(boxes) return boxes diff --git a/torchvision/tv_tensors/_bounding_boxes.py b/torchvision/tv_tensors/_bounding_boxes.py index ea02fa3dc7b..b4400923ef9 100644 --- a/torchvision/tv_tensors/_bounding_boxes.py +++ b/torchvision/tv_tensors/_bounding_boxes.py @@ -17,15 +17,25 @@ class BoundingBoxFormat(Enum): * ``XYXY`` * ``XYWH`` * ``CXCYWH`` + * ``XYXYR``: rotated boxes represented via corners, x1, y1 being top left and x2, y2 being bottom right. r is rotation angle in degrees. + * ``XYWHR``: rotated boxes represented via corner, width and height, x1, y1 being top left, w, h being width and height. r is rotation angle in degrees. + * ``CXCYWHR``rotated boxes represented via centre, width and height, cx, cy being center of box, w, h being width and height. r is rotation angle in degrees. + * ``XYXYXYXY``rotated boxes represented via corners, x1, y1 being top left, , x2, y2 being bottom right, x3, y3 being bottom left, x4, y4 being top right. """ XYXY = "XYXY" XYWH = "XYWH" CXCYWH = "CXCYWH" + XYXYR = "XYXYR" + XYWHR = "XYWHR" + CXCYWHR = "CXCYWHR" + XYXYXYXY = "XYXYXYXY" class BoundingBoxes(TVTensor): - """:class:`torch.Tensor` subclass for bounding boxes with shape ``[N, 4]``. + """:class:`torch.Tensor` subclass for bounding boxes with shape ``[N, K]``. + Where ``N`` is the number of bounding boxes + and ``K`` is either 4 for unrotated boxes or 5 for rotated boxes. .. note:: There should be only one :class:`~torchvision.tv_tensors.BoundingBoxes`