Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add 'get_ann_info' to dataset_wrappers #6526

Merged
merged 3 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions mmdet/datasets/dataset_wrappers.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,30 @@ def get_cat_ids(self, idx):
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
return self.datasets[dataset_idx].get_cat_ids(sample_idx)

def get_ann_info(self, idx):
"""Get annotation of concatenated dataset by index.

This is needed by MixUp.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line is not necessary.


Args:
idx (int): Index of data.

Returns:
dict: Annotation info of specified index.
"""

if idx < 0:
if -idx > len(self):
raise ValueError(
'absolute value of index should not exceed dataset length')
idx = len(self) + idx
dataset_idx = bisect.bisect_right(self.cumulative_sizes, idx)
if dataset_idx == 0:
sample_idx = idx
else:
sample_idx = idx - self.cumulative_sizes[dataset_idx - 1]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 83-92 can be encapsulated into a function like get_sample_idx and we can use it in many places.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is only used in two places. Where should this function be placed?

return self.datasets[dataset_idx].get_ann_info(sample_idx)

def evaluate(self, results, logger=None, **kwargs):
"""Evaluate the results.

Expand Down Expand Up @@ -165,6 +189,18 @@ def get_cat_ids(self, idx):

return self.dataset.get_cat_ids(idx % self._ori_len)

def get_ann_info(self, idx):
"""Get annotation of repeat dataset by index.

Args:
idx (int): Index of data.

Returns:
dict: Annotation info of specified index.
"""

return self.dataset.get_ann_info(idx % self._ori_len)

def __len__(self):
"""Length after repetition."""
return self.times * self._ori_len
Expand Down Expand Up @@ -280,6 +316,18 @@ def __getitem__(self, idx):
ori_index = self.repeat_indices[idx]
return self.dataset[ori_index]

def get_ann_info(self, idx):
"""Get annotation of dataset by index.

Args:
idx (int): Index of data.

Returns:
dict: Annotation info of specified index.
"""
ori_index = self.repeat_indices[idx]
return self.dataset.get_ann_info(ori_index)

def __len__(self):
"""Length after repetition."""
return len(self.repeat_indices)
Expand Down
37 changes: 37 additions & 0 deletions tests/test_data/test_datasets/test_dataset_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,27 +20,59 @@ def test_dataset_wrapper():
np.random.randint(0, 80, num).tolist()
for num in np.random.randint(1, 20, len_a)
]
ann_info_list_a = []
for _ in range(len_a):
height = np.random.randint(10, 30)
weight = np.random.randint(10, 30)
img = np.ones((height, weight, 3))
gt_bbox = np.concatenate([
np.random.randint(1, 5, (2, 2)),
np.random.randint(1, 5, (2, 2)) + 5
],
axis=1)
gt_labels = np.random.randint(0, 80, 2)
ann_info_list_a.append(
dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))
dataset_a.data_infos = MagicMock()
dataset_a.data_infos.__len__.return_value = len_a
dataset_a.get_cat_ids = MagicMock(
side_effect=lambda idx: cat_ids_list_a[idx])
dataset_a.get_ann_info = MagicMock(
side_effect=lambda idx: ann_info_list_a[idx])
dataset_b = CustomDataset(
ann_file=MagicMock(), pipeline=[], test_mode=True, img_prefix='')
len_b = 20
cat_ids_list_b = [
np.random.randint(0, 80, num).tolist()
for num in np.random.randint(1, 20, len_b)
]
ann_info_list_b = []
for _ in range(len_b):
height = np.random.randint(10, 30)
weight = np.random.randint(10, 30)
img = np.ones((height, weight, 3))
gt_bbox = np.concatenate([
np.random.randint(1, 5, (2, 2)),
np.random.randint(1, 5, (2, 2)) + 5
],
axis=1)
gt_labels = np.random.randint(0, 80, 2)
ann_info_list_b.append(
dict(gt_bboxes=gt_bbox, gt_labels=gt_labels, img=img))
dataset_b.data_infos = MagicMock()
dataset_b.data_infos.__len__.return_value = len_b
dataset_b.get_cat_ids = MagicMock(
side_effect=lambda idx: cat_ids_list_b[idx])
dataset_b.get_ann_info = MagicMock(
side_effect=lambda idx: ann_info_list_b[idx])

concat_dataset = ConcatDataset([dataset_a, dataset_b])
assert concat_dataset[5] == 5
assert concat_dataset[25] == 15
assert concat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
assert concat_dataset.get_cat_ids(25) == cat_ids_list_b[15]
assert concat_dataset.get_ann_info(5) == ann_info_list_a[5]
assert concat_dataset.get_ann_info(25) == ann_info_list_b[15]
assert len(concat_dataset) == len(dataset_a) + len(dataset_b)

repeat_dataset = RepeatDataset(dataset_a, 10)
Expand All @@ -50,6 +82,9 @@ def test_dataset_wrapper():
assert repeat_dataset.get_cat_ids(5) == cat_ids_list_a[5]
assert repeat_dataset.get_cat_ids(15) == cat_ids_list_a[5]
assert repeat_dataset.get_cat_ids(27) == cat_ids_list_a[7]
assert repeat_dataset.get_ann_info(5) == ann_info_list_a[5]
assert repeat_dataset.get_ann_info(15) == ann_info_list_a[5]
assert repeat_dataset.get_ann_info(27) == ann_info_list_a[7]
assert len(repeat_dataset) == 10 * len(dataset_a)

category_freq = defaultdict(int)
Expand Down Expand Up @@ -79,6 +114,8 @@ def test_dataset_wrapper():
for idx in np.random.randint(0, len(repeat_factor_dataset), 3):
assert repeat_factor_dataset[idx] == bisect.bisect_right(
repeat_factors_cumsum, idx)
assert repeat_factor_dataset.get_ann_info(idx) == ann_info_list_a[
bisect.bisect_right(repeat_factors_cumsum, idx)]

img_scale = (60, 60)
dynamic_scale = (80, 80)
Expand Down