Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Enhancement] Refactor VOT, UAV123, OTB100 and LaSOT dataset class based on BaseSOTDataset #401

Merged
merged 18 commits into from
Jan 29, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
data_root = 'data/'
train_pipeline = [
dict(type='LoadMultiImagesFromFile', to_float32=True),
dict(type='SeqLoadAnnotations', with_bbox=True),
dict(type='SeqLoadAnnotations', with_bbox=True, with_label=False),
dict(
type='SeqCropLikeSiamFC',
context_amount=0.5,
Expand All @@ -90,7 +90,7 @@
]
test_pipeline = [
dict(type='LoadImageFromFile', to_float32=True),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='LoadAnnotations', with_bbox=True, with_label=False),
dict(
type='MultiScaleFlipAug',
scale_factor=1,
Expand Down Expand Up @@ -146,19 +146,15 @@
],
val=dict(
type='LaSOTDataset',
test_load_ann=True,
ann_file=data_root + 'lasot/annotations/lasot_test.json',
img_prefix=data_root + 'lasot/LaSOTBenchmark',
pipeline=test_pipeline,
ref_img_sampler=None,
split='test',
test_mode=True),
test=dict(
type='LaSOTDataset',
test_load_ann=True,
ann_file=data_root + 'lasot/annotations/lasot_test.json',
img_prefix=data_root + 'lasot/LaSOTBenchmark',
pipeline=test_pipeline,
ref_img_sampler=None,
split='test',
test_mode=True))
# optimizer
optimizer = dict(
Expand Down
6 changes: 3 additions & 3 deletions configs/sot/siamese_rpn/siamese_rpn_r50_1x_otb100.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
data_root = 'data/'
train_pipeline = [
dict(type='LoadMultiImagesFromFile', to_float32=True),
dict(type='SeqLoadAnnotations', with_bbox=True),
dict(type='SeqLoadAnnotations', with_bbox=True, with_label=False),
dict(
type='SeqCropLikeSiamFC',
context_amount=0.5,
Expand Down Expand Up @@ -74,9 +74,9 @@
],
val=dict(
type='OTB100Dataset',
ann_file=data_root + 'otb100/annotations/otb100.json',
ann_file='tools/convert_datasets/otb100/otb100_infos.txt',
img_prefix=data_root + 'otb100/data'),
test=dict(
type='OTB100Dataset',
ann_file=data_root + 'otb100/annotations/otb100.json',
ann_file='tools/convert_datasets/otb100/otb100_infos.txt',
img_prefix=data_root + 'otb100/data'))
8 changes: 4 additions & 4 deletions configs/sot/siamese_rpn/siamese_rpn_r50_1x_uav123.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
data = dict(
val=dict(
type='UAV123Dataset',
ann_file=data_root + 'UAV123/annotations/uav123.json',
img_prefix=data_root + 'UAV123/data_seq/UAV123'),
ann_file='tools/convert_datasets/uav123/uav123_infos.txt',
img_prefix=data_root + 'UAV123'),
test=dict(
type='UAV123Dataset',
ann_file=data_root + 'UAV123/annotations/uav123.json',
img_prefix=data_root + 'UAV123/data_seq/UAV123'))
ann_file='tools/convert_datasets/uav123/uav123_infos.txt',
img_prefix=data_root + 'UAV123'))
4 changes: 2 additions & 2 deletions configs/sot/siamese_rpn/siamese_rpn_r50_1x_vot2018.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
data = dict(
val=dict(
type='VOTDataset',
ann_file=data_root + 'vot2018/annotations/vot2018.json',
challenge_year=2018,
img_prefix=data_root + 'vot2018/data'),
test=dict(
type='VOTDataset',
ann_file=data_root + 'vot2018/annotations/vot2018.json',
challenge_year=2018,
img_prefix=data_root + 'vot2018/data'))
evaluation = dict(
metric=['track'], interval=1, start=10, rule='greater', save_best='eao')
19 changes: 10 additions & 9 deletions mmtrack/core/evaluation/eval_sot_ope.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ def eval_sot_ope(results, annotations):
results of each video. The second list contains the tracking
results of each frame in one video. The ndarray denotes the
tracking box in [tl_x, tl_y, br_x, br_y] format.
annotations (list[list[dict]]): The first list contains the annotations
of each video. The second list contains the annotations of each
frame in one video. The dict contains the annotation information
of one frame.
annotations (list[dict]): The list contains the annotations
of each video. The dict contains the annotation information
of one video. The format is {'bboxes': ndarray in (N, 4) shape,
'visible':ndarray, ...}. The bbox is in (x1, y1, x2, y2) format.
JingweiZhang12 marked this conversation as resolved.
Show resolved Hide resolved

Returns:
dict[str, float]: OPE style evaluation metric (i.e. success,
Expand All @@ -76,14 +76,15 @@ def eval_sot_ope(results, annotations):
precision_results = []
norm_precision_results = []
for single_video_results, single_video_anns in zip(results, annotations):
gt_bboxes = np.stack([ann['bboxes'] for ann in single_video_anns])
gt_bboxes = single_video_anns['bboxes']
pred_bboxes = np.stack(single_video_results)
assert len(pred_bboxes) == len(gt_bboxes)
video_length = len(single_video_results)

if 'ignore' in single_video_anns[0]:
gt_ignore = np.stack([ann['ignore'] for ann in single_video_anns])
gt_bboxes = gt_bboxes[gt_ignore == 0]
pred_bboxes = pred_bboxes[gt_ignore == 0]
if 'visible' in single_video_anns:
gt_valid = single_video_anns['visible']
gt_bboxes = gt_bboxes[gt_valid]
pred_bboxes = pred_bboxes[gt_valid]

# eval success based on iou
iou_th = np.arange(0, 1.05, 0.05)
Expand Down
12 changes: 6 additions & 6 deletions mmtrack/core/evaluation/eval_sot_vot.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def eval_sot_accuracy_robustness(results,
- special tracking state: [0] denotes the unknown state,
namely the skipping frame after failure, [1] denotes the
initialized state, and [2] denotes the failed state.
annotations (list[list[dict]]): The first list contains the
annotations (list[list[ndarray]]): The first list contains the
JingweiZhang12 marked this conversation as resolved.
Show resolved Hide resolved
gt_bboxes of each video. The second list contains the
gt_bbox of each frame in one video. The dict contains the
annotation information of one frame.
Expand All @@ -176,7 +176,6 @@ def eval_sot_accuracy_robustness(results,
num_fails = 0
weight = 0
for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)):
gt_traj = np.stack([ann['bboxes'] for ann in gt_traj])
assert len(gt_traj) == len(pred_traj)
assert len(pred_traj[0]) == 1 and pred_traj[0][0] == 1
num_fails += count_failures(pred_traj)
Expand Down Expand Up @@ -249,7 +248,7 @@ def eval_sot_eao(results, annotations, interval=[100, 356], videos_wh=None):
- special tracking state: [0] denotes the unknown state,
namely the skipping frame after failure, [1] denotes the
initialized state, and [2] denotes the failed state.
annotations (list[list[dict]]): The first list contains the
annotations (list[list[ndarray]]): The first list contains the
gt_bboxes of each video. The second list contains the
gt_bbox of each frame in one video. The dict contains the
annotation information of one frame.
Expand All @@ -275,10 +274,11 @@ def eval_sot_eao(results, annotations, interval=[100, 356], videos_wh=None):
all_successes = []

for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)):
gt_traj = np.stack([ann['bboxes'] for ann in gt_traj])
assert len(gt_traj) == len(pred_traj)
assert len(gt_traj) == len(
pred_traj), f'{len(gt_traj)} == {len(pred_traj)}'
# initialized bbox annotation is [1]
assert len(pred_traj[0]) == 1 and pred_traj[0][0] == 1
assert len(pred_traj[0]) == 1 and pred_traj[0][
0] == 1, f'{len(pred_traj[0])} == 1 and {pred_traj[0][0]} == 1'
fail_inds, init_inds = locate_failures_inits(pred_traj)

pred_traj = trajectory2region(pred_traj)
Expand Down
18 changes: 10 additions & 8 deletions mmtrack/datasets/base_sot_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,9 @@ def get_bboxes_from_video(self, video_ind):
start_frame_id = self.data_infos[video_ind]['start_frame_id']

if not self.test_mode:
assert len(bboxes) == (end_frame_id - start_frame_id +
1), f'{len(bboxes)} is not equal to'
'{end_frame_id}-{start_frame_id}+1'
assert len(bboxes) == (
end_frame_id - start_frame_id + 1
), f'{len(bboxes)} is not equal to {end_frame_id}-{start_frame_id}+1' # noqa
return bboxes

def get_len_per_video(self, video_ind):
Expand Down Expand Up @@ -263,21 +263,23 @@ def evaluate(self, results, metric=['track'], logger=None):
# get all test annotations
annotations = []
for video_ind in range(len(self.data_infos)):
bboxes = self.get_ann_infos_from_video(video_ind)['bboxes']
annotations.append(bboxes)
video_anns = self.get_ann_infos_from_video(video_ind)
annotations.append(video_anns)

# tracking_bboxes converting code
eval_results = dict()
if 'track' in metrics:
assert len(self) == len(results['track_bboxes'])
assert len(self) == len(
results['track_bboxes']
), f"{len(self)} == {len(results['track_bboxes'])}"
print_log('Evaluate OPE Benchmark...', logger=logger)
track_bboxes = []
start_ind = end_ind = 0
for num in self.num_frames_per_video:
end_ind += num
track_bboxes.append(
list(
map(lambda x: x[:4],
map(lambda x: x[:-1],
results['track_bboxes'][start_ind:end_ind])))
start_ind += num

Expand All @@ -289,4 +291,4 @@ def evaluate(self, results, metric=['track'], logger=None):
for k, v in eval_results.items():
if isinstance(v, float):
eval_results[k] = float(f'{(v):.3f}')
print_log(eval_results, logger=logger)
return eval_results
JingweiZhang12 marked this conversation as resolved.
Show resolved Hide resolved
100 changes: 81 additions & 19 deletions mmtrack/datasets/lasot_dataset.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,96 @@
# Copyright (c) OpenMMLab. All rights reserved.
import glob
import os.path as osp
import time

import numpy as np
from mmdet.datasets import DATASETS

from .sot_test_dataset import SOTTestDataset
from .base_sot_dataset import BaseSOTDataset


@DATASETS.register_module()
class LaSOTDataset(SOTTestDataset):
"""LaSOT dataset for the testing of single object tracking.
class LaSOTDataset(BaseSOTDataset):
"""LaSOT dataset of single object tracking.

The dataset doesn't support training mode.
The dataset can both support training and testing mode.
"""

def _parse_ann_info(self, img_info, ann_info):
"""Parse bbox annotations.
def __init__(self, ann_file, *args, **kwargs):
"""Initialization of SOT dataset class.

Args:
ann_file (str): The file contains testing video names. It will be
loaded in the `self.load_data_infos` function.
"""
self.ann_file = ann_file
super(LaSOTDataset, self).__init__(*args, **kwargs)

def load_data_infos(self, split='test'):
"""Load dataset information.

Args:
img_info (dict): image information.
ann_info (list[dict]): Annotation information of an image. Each
image only has one bbox annotation.
split (str, optional): Dataset split. Defaults to 'test'.

Returns:
dict: A dict containing the following keys: bboxes, labels,
ignore. labels are not useful in SOT.
list[dict]: The length of the list is the number of videos. The
inner dict is in the following format:
{
'video_path': the video path
'ann_path': the annotation path
'start_frame_id': the starting frame number contained
in the image name
'end_frame_id': the ending frame number contained in
the image name
'framename_template': the template of image name
}
"""
gt_bboxes = np.array(ann_info[0]['bbox'], dtype=np.float32)
# convert [x1, y1, w, h] to [x1, y1, x2, y2]
gt_bboxes[2] += gt_bboxes[0]
gt_bboxes[3] += gt_bboxes[1]
gt_labels = np.array(self.cat2label[ann_info[0]['category_id']])
ignore = ann_info[0]['full_occlusion'] or ann_info[0]['out_of_view']
ann = dict(bboxes=gt_bboxes, labels=gt_labels, ignore=ignore)
return ann
print('Loading LaSOT dataset...')
start_time = time.time()
assert split in ['train', 'test']
data_infos = []

test_videos_list = np.loadtxt(self.ann_file, dtype=np.str_)
if self.test_mode:
videos_list = test_videos_list.tolist()
else:
all_videos_list = glob.glob(self.img_prefix + '/*/*-[1-20]')
test_videos = set(test_videos_list)
videos_list = []
for x in all_videos_list:
x = osp.basename(x)
if x not in test_videos:
videos_list.append(x)

videos_list = sorted(videos_list)
for video_name in videos_list:
video_name = osp.join(video_name.split('-')[0], video_name)
video_path = osp.join(video_name, 'img')
ann_path = osp.join(video_name, 'groundtruth.txt')
img_names = glob.glob(
osp.join(self.img_prefix, video_name, 'img', '*.jpg'))
end_frame_name = max(
img_names, key=lambda x: int(osp.basename(x).split('.')[0]))
end_frame_id = int(osp.basename(end_frame_name).split('.')[0])
data_infos.append(
dict(
video_path=video_path,
ann_path=ann_path,
start_frame_id=1,
end_frame_id=end_frame_id,
framename_template='%08d.jpg'))
print(f'LaSOT dataset loaded! ({time.time()-start_time:.2f} s)')
return data_infos

def get_visibility_from_video(self, video_ind):
"""Get the visible information of instance in a video."""
video_path = osp.dirname(self.data_infos[video_ind]['video_path'])
full_occlusion_file = osp.join(self.img_prefix, video_path,
'full_occlusion.txt')
out_of_view_file = osp.join(self.img_prefix, video_path,
'out_of_view.txt')
full_occlusion = np.loadtxt(
full_occlusion_file, dtype=bool, delimiter=',')
out_of_view = np.loadtxt(out_of_view_file, dtype=bool, delimiter=',')
visible = ~(full_occlusion | out_of_view)
return dict(visible=visible)
Loading