From 23e46fe8ad7c04a86fa44b7a7f954b73b94029b3 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Mon, 26 Dec 2022 09:09:09 +0000
Subject: [PATCH 01/12] Replace numpy transpose with torch permute to speed-up

---
 mmdet/datasets/pipelines/formatting.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index 45ca69cfc6f..d4a6d88b0fe 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -1,9 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from collections.abc import Sequence
 
-import mmcv
 import numpy as np
 import torch
+
+import mmcv
 from mmcv.parallel import DataContainer as DC
 
 from ..builder import PIPELINES
@@ -80,20 +81,20 @@ def __init__(self, keys):
 
     def __call__(self, results):
         """Call function to convert image in results to :obj:`torch.Tensor` and
-        transpose the channel order.
+        permute the channel order.
 
         Args:
             results (dict): Result dict contains the image data to convert.
 
         Returns:
             dict: The result dict contains the image converted
-                to :obj:`torch.Tensor` and transposed to (C, H, W) order.
+                to :obj:`torch.Tensor` and permuted to (C, H, W) order.
         """
         for key in self.keys:
             img = results[key]
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
-            results[key] = (to_tensor(img.transpose(2, 0, 1))).contiguous()
+            results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
         return results
 
     def __repr__(self):
@@ -179,7 +180,7 @@ class DefaultFormatBundle:
     "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
     These fields are formatted as follows.
 
-    - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True)
+    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
     - proposals: (1)to tensor, (2)to DataContainer
     - gt_bboxes: (1)to tensor, (2)to DataContainer
     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
@@ -226,9 +227,10 @@ def __call__(self, results):
             results = self._add_default_meta_keys(results)
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
-            img = np.ascontiguousarray(img.transpose(2, 0, 1))
             results['img'] = DC(
-                to_tensor(img), padding_value=self.pad_val['img'], stack=True)
+                to_tensor(img).permute(2, 0, 1).contiguous(),
+                padding_value=self.pad_val['img'],
+                stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From 51d45de8b2ffffd0b5dae760ecf16dd7bc5ef703 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Mon, 26 Dec 2022 09:56:04 +0000
Subject: [PATCH 02/12] Lint

---
 mmdet/datasets/pipelines/formatting.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index d4a6d88b0fe..b1f94edbcb5 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -1,10 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from collections.abc import Sequence
 
+import mmcv
 import numpy as np
 import torch
-
-import mmcv
 from mmcv.parallel import DataContainer as DC
 
 from ..builder import PIPELINES
@@ -79,21 +78,21 @@ class ImageToTensor:
     def __init__(self, keys):
         self.keys = keys
 
-    def __call__(self, results):
-        """Call function to convert image in results to :obj:`torch.Tensor` and
+        permute the channel order.
+        permute the channel order.
         permute the channel order.
 
         Args:
             results (dict): Result dict contains the image data to convert.
 
-        Returns:
-            dict: The result dict contains the image converted
+                to :obj:`torch.Tensor` and permuted to (C, H, W) order.
+                to :obj:`torch.Tensor` and permuted to (C, H, W) order.
                 to :obj:`torch.Tensor` and permuted to (C, H, W) order.
         """
         for key in self.keys:
             img = results[key]
-            if len(img.shape) < 3:
-                img = np.expand_dims(img, -1)
+            results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
+            results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
             results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
         return results
 
@@ -178,8 +177,8 @@ class DefaultFormatBundle:
 
     It simplifies the pipeline of formatting common fields, including "img",
     "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
-    These fields are formatted as follows.
-
+    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
+    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
     - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
     - proposals: (1)to tensor, (2)to DataContainer
     - gt_bboxes: (1)to tensor, (2)to DataContainer
@@ -225,12 +224,14 @@ def __call__(self, results):
                 img = img.astype(np.float32)
             # add default meta keys
             results = self._add_default_meta_keys(results)
-            if len(img.shape) < 3:
-                img = np.expand_dims(img, -1)
             results['img'] = DC(
                 to_tensor(img).permute(2, 0, 1).contiguous(),
                 padding_value=self.pad_val['img'],
                 stack=True)
+                padding_value=self.pad_val['img'],
+                stack=True)
+                padding_value=self.pad_val['img'],
+                stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From 51932a4c74cea0bc289908156fc4c65aa2d6c156 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Mon, 26 Dec 2022 09:59:29 +0000
Subject: [PATCH 03/12] Lint

---
 mmdet/datasets/pipelines/formatting.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index b1f94edbcb5..3bac19c3ca8 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -78,21 +78,21 @@ class ImageToTensor:
     def __init__(self, keys):
         self.keys = keys
 
-        permute the channel order.
-        permute the channel order.
+    def __call__(self, results):
+        """Call function to convert image in results to :obj:`torch.Tensor` and
         permute the channel order.
 
         Args:
             results (dict): Result dict contains the image data to convert.
 
-                to :obj:`torch.Tensor` and permuted to (C, H, W) order.
-                to :obj:`torch.Tensor` and permuted to (C, H, W) order.
+        Returns:
+            dict: The result dict contains the image converted
                 to :obj:`torch.Tensor` and permuted to (C, H, W) order.
         """
         for key in self.keys:
             img = results[key]
-            results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
-            results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
             results[key] = to_tensor(img).permute(2, 0, 1).contiguous()
         return results
 
@@ -177,8 +177,8 @@ class DefaultFormatBundle:
 
     It simplifies the pipeline of formatting common fields, including "img",
     "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
-    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
-    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
+    These fields are formatted as follows.
+
     - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
     - proposals: (1)to tensor, (2)to DataContainer
     - gt_bboxes: (1)to tensor, (2)to DataContainer
@@ -224,14 +224,12 @@ def __call__(self, results):
                 img = img.astype(np.float32)
             # add default meta keys
             results = self._add_default_meta_keys(results)
+            if len(img.shape) < 3:
+                img = np.expand_dims(img, -1)
             results['img'] = DC(
                 to_tensor(img).permute(2, 0, 1).contiguous(),
                 padding_value=self.pad_val['img'],
                 stack=True)
-                padding_value=self.pad_val['img'],
-                stack=True)
-                padding_value=self.pad_val['img'],
-                stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From 5c21cfe6037baf06403d74e0d4d586cbbdeabf2b Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Mon, 26 Dec 2022 10:08:41 +0000
Subject: [PATCH 04/12] Fix scikit-learn install name

---
 requirements/optional.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/optional.txt b/requirements/optional.txt
index 150b3146f1f..4f0065a9b4d 100644
--- a/requirements/optional.txt
+++ b/requirements/optional.txt
@@ -1,3 +1,3 @@
 cityscapesscripts
 imagecorruptions
-sklearn
+scikit-learn

From d6a5370de5a078aeb13758abc2e60601e0130b84 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Tue, 27 Dec 2022 04:48:00 +0000
Subject: [PATCH 05/12] Fix non-contiguous numpy array

---
 mmdet/datasets/pipelines/formatting.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index 3bac19c3ca8..19ff7985b3b 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -226,6 +226,7 @@ def __call__(self, results):
             results = self._add_default_meta_keys(results)
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
+            img = np.ascontiguousarray(img)
             results['img'] = DC(
                 to_tensor(img).permute(2, 0, 1).contiguous(),
                 padding_value=self.pad_val['img'],

From 219f7970bc2639b03bbbf9e6634419731e93d272 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Tue, 27 Dec 2022 05:14:29 +0000
Subject: [PATCH 06/12] Switch the order of transpose and to_tensor according
 to array continuousness

---
 mmdet/datasets/pipelines/formatting.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index 19ff7985b3b..947db8ab990 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -179,7 +179,7 @@ class DefaultFormatBundle:
     "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
     These fields are formatted as follows.
 
-    - img: (1)to tensor, (2)permute, (3)to DataContainer (stack=True)
+    - img: (1)transpose (or permute) & to tensor, (2)to DataContainer (stack=True)
     - proposals: (1)to tensor, (2)to DataContainer
     - gt_bboxes: (1)to tensor, (2)to DataContainer
     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
@@ -226,11 +226,18 @@ def __call__(self, results):
             results = self._add_default_meta_keys(results)
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
-            img = np.ascontiguousarray(img)
-            results['img'] = DC(
-                to_tensor(img).permute(2, 0, 1).contiguous(),
-                padding_value=self.pad_val['img'],
-                stack=True)
+            # To lower the computational time, if image is not contiguous,
+            # use `numpy.transpose()`` before `numpy.ascontiguousarray()`,
+            # otherwise, use `torch.permute()` before `torch.contiguous()`.
+            if not img.flags.c_contiguous:
+                img = np.ascontiguousarray(img.transpose(2, 0, 1))
+                results['img'] = DC(
+                    to_tensor(img), padding_value=self.pad_val['img'], stack=True)
+            else:
+                results['img'] = DC(
+                    to_tensor(img).permute(2, 0, 1).contiguous(),
+                    padding_value=self.pad_val['img'],
+                    stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From 6b8a1b0cabdbcf42b4a8d3f337ef2ad7888af997 Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Tue, 27 Dec 2022 05:21:31 +0000
Subject: [PATCH 07/12] Lint

---
 mmdet/datasets/pipelines/formatting.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index 947db8ab990..4eb3d37e18a 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -179,7 +179,7 @@ class DefaultFormatBundle:
     "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg".
     These fields are formatted as follows.
 
-    - img: (1)transpose (or permute) & to tensor, (2)to DataContainer (stack=True)
+    - img: (1)transpose & to tensor, (2)to DataContainer (stack=True)
     - proposals: (1)to tensor, (2)to DataContainer
     - gt_bboxes: (1)to tensor, (2)to DataContainer
     - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer
@@ -232,7 +232,9 @@ def __call__(self, results):
             if not img.flags.c_contiguous:
                 img = np.ascontiguousarray(img.transpose(2, 0, 1))
                 results['img'] = DC(
-                    to_tensor(img), padding_value=self.pad_val['img'], stack=True)
+                    to_tensor(img),
+                    padding_value=self.pad_val['img'],
+                    stack=True)
             else:
                 results['img'] = DC(
                     to_tensor(img).permute(2, 0, 1).contiguous(),

From e4009ec89ac9424c26e081a70587f12f546353a8 Mon Sep 17 00:00:00 2001
From: Min Sheng Wu <30727252+Min-Sheng@users.noreply.github.com>
Date: Wed, 28 Dec 2022 10:23:39 +0800
Subject: [PATCH 08/12] Update mmdet/datasets/pipelines/formatting.py

Co-authored-by: Wenwei Zhang <40779233+ZwwWayne@users.noreply.github.com>
---
 mmdet/datasets/pipelines/formatting.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index 4eb3d37e18a..d5a885fe733 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -231,13 +231,13 @@ def __call__(self, results):
             # otherwise, use `torch.permute()` before `torch.contiguous()`.
             if not img.flags.c_contiguous:
                 img = np.ascontiguousarray(img.transpose(2, 0, 1))
-                results['img'] = DC(
-                    to_tensor(img),
-                    padding_value=self.pad_val['img'],
-                    stack=True)
+                img = to_tensor(img),
+            
             else:
-                results['img'] = DC(
-                    to_tensor(img).permute(2, 0, 1).contiguous(),
+               img =  to_tensor(img).permute(2, 0, 1).contiguous()
+                   
+            results['img'] = DC(
+                    img,
                     padding_value=self.pad_val['img'],
                     stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:

From 455f666227aa155c5589f85175e000b936ace19d Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Wed, 28 Dec 2022 02:27:14 +0000
Subject: [PATCH 09/12] Fix Indentation

---
 mmdet/datasets/pipelines/formatting.py | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index d5a885fe733..ce77c89fe50 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -231,15 +231,13 @@ def __call__(self, results):
             # otherwise, use `torch.permute()` before `torch.contiguous()`.
             if not img.flags.c_contiguous:
                 img = np.ascontiguousarray(img.transpose(2, 0, 1))
-                img = to_tensor(img),
-            
+                img = to_tensor(img)
             else:
-               img =  to_tensor(img).permute(2, 0, 1).contiguous()
-                   
+                img = to_tensor(img).permute(2, 0, 1).contiguous()
             results['img'] = DC(
-                    img,
-                    padding_value=self.pad_val['img'],
-                    stack=True)
+                img,
+                padding_value=self.pad_val['img'],
+                stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From f1a16c441db559664bc0ad41d75aadbde146e1bb Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Wed, 28 Dec 2022 02:40:10 +0000
Subject: [PATCH 10/12] Add acceleration ratio to the comment

---
 mmdet/datasets/pipelines/formatting.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index ce77c89fe50..c4bd0ed2c06 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -226,9 +226,11 @@ def __call__(self, results):
             results = self._add_default_meta_keys(results)
             if len(img.shape) < 3:
                 img = np.expand_dims(img, -1)
-            # To lower the computational time, if image is not contiguous,
-            # use `numpy.transpose()`` before `numpy.ascontiguousarray()`,
-            # otherwise, use `torch.permute()` before `torch.contiguous()`.
+            # To improve the computational speed by by 3-5 times, apply:
+            # If image is not contiguous, use
+            # `numpy.transpose()` followed by `numpy.ascontiguousarray()`
+            # If image is already contiguous, use
+            # `torch.permute()` followed by `torch.contiguous()`
             if not img.flags.c_contiguous:
                 img = np.ascontiguousarray(img.transpose(2, 0, 1))
                 img = to_tensor(img)

From 679284ea880925591e0c124f3be7910977dbd92b Mon Sep 17 00:00:00 2001
From: Min-Sheng <mason840929@gmail.com>
Date: Wed, 28 Dec 2022 02:51:19 +0000
Subject: [PATCH 11/12] Yapf

---
 mmdet/datasets/pipelines/formatting.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index c4bd0ed2c06..d4596e4cf69 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -237,9 +237,7 @@ def __call__(self, results):
             else:
                 img = to_tensor(img).permute(2, 0, 1).contiguous()
             results['img'] = DC(
-                img,
-                padding_value=self.pad_val['img'],
-                stack=True)
+                img, padding_value=self.pad_val['img'], stack=True)
         for key in ['proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels']:
             if key not in results:
                 continue

From 25c6efac36f31489ac88d484a32869b75b433a07 Mon Sep 17 00:00:00 2001
From: vincentwu1 <vincentwu@aetherai.com>
Date: Wed, 4 Jan 2023 02:14:45 +0000
Subject: [PATCH 12/12] Add PR reference in comment

---
 mmdet/datasets/pipelines/formatting.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mmdet/datasets/pipelines/formatting.py b/mmdet/datasets/pipelines/formatting.py
index d4596e4cf69..2e07f3894f0 100644
--- a/mmdet/datasets/pipelines/formatting.py
+++ b/mmdet/datasets/pipelines/formatting.py
@@ -231,6 +231,8 @@ def __call__(self, results):
             # `numpy.transpose()` followed by `numpy.ascontiguousarray()`
             # If image is already contiguous, use
             # `torch.permute()` followed by `torch.contiguous()`
+            # Refer to https://github.com/open-mmlab/mmdetection/pull/9533
+            # for more details
             if not img.flags.c_contiguous:
                 img = np.ascontiguousarray(img.transpose(2, 0, 1))
                 img = to_tensor(img)