open-mmlab · ZwwWayne · Dec 8, 2021 · Nov 16, 2021 · Nov 18, 2021 · Dec 2, 2021
diff --git a/mmdet/datasets/dataset_wrappers.py b/mmdet/datasets/dataset_wrappers.py
@@ -68,6 +68,30 @@ def get_cat_ids(self, idx):
             sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
         return self.datasets[dataset_idx].get_cat_ids(sample_idx)
 
+    def get_ann_info(self, idx):
+        """Get annotation of concatenated dataset by index.
+
+        This is needed by MixUp.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+
+        if idx < 0:
+            if -idx > len(self):
+                raise ValueError(
+                    'absolute value of index should not exceed dataset length')
+            idx = len(self) + idx
+        dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
+        if dataset_idx == 0:
+            sample_idx = idx
+        else:
+            sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
+        return self.datasets[dataset_idx].get_ann_info(sample_idx)
+
     def evaluate(self, results, logger=None, **kwargs):
         """Evaluate the results.
 
@@ -165,6 +189,18 @@ def get_cat_ids(self, idx):
 
         return self.dataset.get_cat_ids(idx % self._ori_len)
 
+    def get_ann_info(self, idx):
+        """Get annotation of repeat dataset by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+
+        return self.dataset.get_ann_info(idx % self._ori_len)
+
     def __len__(self):
         """Length after repetition."""
         return self.times * self._ori_len
@@ -280,6 +316,18 @@ def __getitem__(self, idx):
         ori_index = self.repeat_indices[idx]
         return self.dataset[ori_index]
 
+    def get_ann_info(self, idx):
+        """Get annotation of dataset by index.
+
+        Args:
+            idx (int): Index of data.
+
+        Returns:
+            dict: Annotation info of specified index.
+        """
+        ori_index = self.repeat_indices[idx]
+        return self.dataset.get_ann_info(ori_index)
+
     def __len__(self):
         """Length after repetition."""
         return len(self.repeat_indices)

diff --git a/tests/test_data/test_datasets/test_dataset_wrapper.py b/tests/test_data/test_datasets/test_dataset_wrapper.py
@@ -20,27 +20,59 @@ def test_dataset_wrapper():
         np.random.randint(0, 80, num).tolist()
         for num in np.random.randint(1, 20, len_a)
     ]
+    ann_info_list_a = []
+    for _ in range(len_a):
+        height = np.random.randint(10, 30)
+        weight = np.random.randint(10, 30)
+        img = np.ones((height, weight, 3))
+        gt_bbox = np.concatenate([
+            np.random.randint(1, 5, (2, 2)),
+            np.random.randint(1, 5, (2, 2)) + 5
+        ],
+                                 axis=1)
+        gt_labels = np.random.randint(0, 80, 2)
+        ann_info_list_a.append(
+            dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))
     dataset_a.data_infos = MagicMock()
     dataset_a.data_infos.__len__.return_value = len_a
     dataset_a.get_cat_ids = MagicMock(
         side_effect=lambda idx: cat_ids_list_a[idx])
+    dataset_a.get_ann_info = MagicMock(
+        side_effect=lambda idx: ann_info_list_a[idx])
     dataset_b = CustomDataset(
         ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')
     len_b = 20
     cat_ids_list_b = [
         np.random.randint(0, 80, num).tolist()
         for num in np.random.randint(1, 20, len_b)
     ]
+    ann_info_list_b = []
+    for _ in range(len_b):
+        height = np.random.randint(10, 30)
+        weight = np.random.randint(10, 30)
+        img = np.ones((height, weight, 3))
+        gt_bbox = np.concatenate([
+            np.random.randint(1, 5, (2, 2)),
+            np.random.randint(1, 5, (2, 2)) + 5
+        ],
+                                 axis=1)
+        gt_labels = np.random.randint(0, 80, 2)
+        ann_info_list_b.append(
+            dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))
     dataset_b.data_infos = MagicMock()
     dataset_b.data_infos.__len__.return_value = len_b
     dataset_b.get_cat_ids = MagicMock(
         side_effect=lambda idx: cat_ids_list_b[idx])
+    dataset_b.get_ann_info = MagicMock(
+        side_effect=lambda idx: ann_info_list_b[idx])
 
     concat_dataset = ConcatDataset([dataset_a, dataset_b])
     assert concat_dataset[5] == 5
     assert concat_dataset[25] == 15
     assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
     assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15]
+    assert concat_dataset.get_ann_info(5) == ann_info_list_a[5]
+    assert concat_dataset.get_ann_info(25) == ann_info_list_b[15]
     assert len(concat_dataset) == len(dataset_a) + len(dataset_b)
 
     repeat_dataset = RepeatDataset(dataset_a, 10)
@@ -50,6 +82,9 @@ def test_dataset_wrapper():
     assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
     assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5]
     assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7]
+    assert repeat_dataset.get_ann_info(5) == ann_info_list_a[5]
+    assert repeat_dataset.get_ann_info(15) == ann_info_list_a[5]
+    assert repeat_dataset.get_ann_info(27) == ann_info_list_a[7]
     assert len(repeat_dataset) == 10 * len(dataset_a)
 
     category_freq = defaultdict(int)
@@ -79,6 +114,8 @@ def test_dataset_wrapper():
     for idx in np.random.randint(0, len(repeat_factor_dataset), 3):
         assert repeat_factor_dataset[idx] == bisect.bisect_right(
             repeat_factors_cumsum, idx)
+        assert repeat_factor_dataset.get_ann_info(idx) == ann_info_list_a[
+            bisect.bisect_right(repeat_factors_cumsum, idx)]
 
     img_scale = (60, 60)
     dynamic_scale = (80, 80)