diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py index 655ee52cc..edc2b723e 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_lasot.py @@ -71,7 +71,7 @@ data_root = 'data/' train_pipeline = [ dict(type='LoadMultiImagesFromFile', to_float32=True), - dict(type='SeqLoadAnnotations', with_bbox=True), + dict(type='SeqLoadAnnotations', with_bbox=True, with_label=False), dict( type='SeqCropLikeSiamFC', context_amount=0.5, @@ -146,20 +146,20 @@ ], val=dict( type='LaSOTDataset', - test_load_ann=True, - ann_file=data_root + 'lasot/annotations/lasot_test.json', + ann_file='tools/convert_datasets/lasot/testing_set.txt', img_prefix=data_root + 'lasot/LaSOTBenchmark', pipeline=test_pipeline, - ref_img_sampler=None, - test_mode=True), + split='test', + test_mode=True, + only_eval_visible=True), test=dict( type='LaSOTDataset', - test_load_ann=True, - ann_file=data_root + 'lasot/annotations/lasot_test.json', + ann_file='tools/convert_datasets/lasot/testing_set.txt', img_prefix=data_root + 'lasot/LaSOTBenchmark', pipeline=test_pipeline, - ref_img_sampler=None, - test_mode=True)) + split='test', + test_mode=True, + only_eval_visible=True)) # optimizer optimizer = dict( type='SGD', diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_otb100.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_otb100.py index c41f4b81f..2deb64268 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_otb100.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_otb100.py @@ -11,7 +11,7 @@ data_root = 'data/' train_pipeline = [ dict(type='LoadMultiImagesFromFile', to_float32=True), - dict(type='SeqLoadAnnotations', with_bbox=True), + dict(type='SeqLoadAnnotations', with_bbox=True, with_label=False), dict( type='SeqCropLikeSiamFC', context_amount=0.5, @@ -74,9 +74,11 @@ ], val=dict( type='OTB100Dataset', - ann_file=data_root + 'otb100/annotations/otb100.json', - img_prefix=data_root + 'otb100/data'), + ann_file='tools/convert_datasets/otb100/otb100_infos.txt', + img_prefix=data_root + 'otb100/data', + only_eval_visible=False), test=dict( type='OTB100Dataset', - ann_file=data_root + 'otb100/annotations/otb100.json', - img_prefix=data_root + 'otb100/data')) + ann_file='tools/convert_datasets/otb100/otb100_infos.txt', + img_prefix=data_root + 'otb100/data', + only_eval_visible=False)) diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_trackingnet.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_trackingnet.py index e9b50157a..66d77b121 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_trackingnet.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_trackingnet.py @@ -3,7 +3,4 @@ data_root = 'data/' # dataset settings data = dict( - test=dict( - type='TrackingNetDataset', - img_prefix=data_root + 'trackingnet', - split='test')) + test=dict(type='TrackingNetDataset', img_prefix=data_root + 'trackingnet')) diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_uav123.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_uav123.py index a2b3857a1..c7bfed7f9 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_uav123.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_uav123.py @@ -9,9 +9,11 @@ data = dict( val=dict( type='UAV123Dataset', - ann_file=data_root + 'UAV123/annotations/uav123.json', - img_prefix=data_root + 'UAV123/data_seq/UAV123'), + ann_file='tools/convert_datasets/uav123/uav123_infos.txt', + img_prefix=data_root + 'UAV123', + only_eval_visible=False), test=dict( type='UAV123Dataset', - ann_file=data_root + 'UAV123/annotations/uav123.json', - img_prefix=data_root + 'UAV123/data_seq/UAV123')) + ann_file='tools/convert_datasets/uav123/uav123_infos.txt', + img_prefix=data_root + 'UAV123', + only_eval_visible=False)) diff --git a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_vot2018.py b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_vot2018.py index 4d8768f1b..6e5989dbd 100644 --- a/configs/sot/siamese_rpn/siamese_rpn_r50_1x_vot2018.py +++ b/configs/sot/siamese_rpn/siamese_rpn_r50_1x_vot2018.py @@ -11,11 +11,11 @@ data = dict( val=dict( type='VOTDataset', - ann_file=data_root + 'vot2018/annotations/vot2018.json', + dataset_type='vot2018', img_prefix=data_root + 'vot2018/data'), test=dict( type='VOTDataset', - ann_file=data_root + 'vot2018/annotations/vot2018.json', + dataset_type='vot2018', img_prefix=data_root + 'vot2018/data')) evaluation = dict( metric=['track'], interval=1, start=10, rule='greater', save_best='eao') diff --git a/mmtrack/apis/train.py b/mmtrack/apis/train.py index b112546cd..abb15ca42 100644 --- a/mmtrack/apis/train.py +++ b/mmtrack/apis/train.py @@ -108,8 +108,7 @@ def train_model(model, broadcast_buffers=False, find_unused_parameters=find_unused_parameters) else: - model = MMDataParallel( - model, device_ids=cfg.gpu_ids) + model = MMDataParallel(model, device_ids=cfg.gpu_ids) # build runner optimizer = build_optimizer(model, cfg.optimizer) diff --git a/mmtrack/core/evaluation/eval_sot_ope.py b/mmtrack/core/evaluation/eval_sot_ope.py index 2071fa070..b379daa36 100644 --- a/mmtrack/core/evaluation/eval_sot_ope.py +++ b/mmtrack/core/evaluation/eval_sot_ope.py @@ -55,7 +55,7 @@ def success_error(gt_bboxes_center, pred_bboxes_center, pixel_offset_th, return success -def eval_sot_ope(results, annotations): +def eval_sot_ope(results, annotations, visible_infos=None): """Evaluation in OPE protocol. Args: @@ -63,10 +63,13 @@ def eval_sot_ope(results, annotations): results of each video. The second list contains the tracking results of each frame in one video. The ndarray denotes the tracking box in [tl_x, tl_y, br_x, br_y] format. - annotations (list[list[dict]]): The first list contains the annotations - of each video. The second list contains the annotations of each - frame in one video. The dict contains the annotation information - of one frame. + annotations (list[ndarray]): The list contains the bbox + annotations of each video. The ndarray is gt_bboxes of one video. + It's in (N, 4) shape. Each bbox is in (x1, y1, x2, y2) format. + visible_infos (list[ndarray] | None): If not None, the list + contains the visible information of each video. The ndarray is + visibility (with bool type) of object in one video. It's in (N,) + shape. Default to None. Returns: dict[str, float]: OPE style evaluation metric (i.e. success, @@ -75,15 +78,16 @@ def eval_sot_ope(results, annotations): success_results = [] precision_results = [] norm_precision_results = [] - for single_video_results, single_video_anns in zip(results, annotations): - gt_bboxes = np.stack([ann['bboxes'] for ann in single_video_anns]) + if visible_infos is None: + visible_infos = [np.array([True] * len(_)) for _ in annotations] + for single_video_results, single_video_gt_bboxes, single_video_visible in zip( # noqa + results, annotations, visible_infos): pred_bboxes = np.stack(single_video_results) + assert len(pred_bboxes) == len(single_video_gt_bboxes) video_length = len(single_video_results) - if 'ignore' in single_video_anns[0]: - gt_ignore = np.stack([ann['ignore'] for ann in single_video_anns]) - gt_bboxes = gt_bboxes[gt_ignore == 0] - pred_bboxes = pred_bboxes[gt_ignore == 0] + gt_bboxes = single_video_gt_bboxes[single_video_visible] + pred_bboxes = pred_bboxes[single_video_visible] # eval success based on iou iou_th = np.arange(0, 1.05, 0.05) diff --git a/mmtrack/core/evaluation/eval_sot_vot.py b/mmtrack/core/evaluation/eval_sot_vot.py index c89a76b13..850e8e9a1 100644 --- a/mmtrack/core/evaluation/eval_sot_vot.py +++ b/mmtrack/core/evaluation/eval_sot_vot.py @@ -152,10 +152,9 @@ def eval_sot_accuracy_robustness(results, - special tracking state: [0] denotes the unknown state, namely the skipping frame after failure, [1] denotes the initialized state, and [2] denotes the failed state. - annotations (list[list[dict]]): The first list contains the - gt_bboxes of each video. The second list contains the - gt_bbox of each frame in one video. The dict contains the - annotation information of one frame. + annotations (list[ndarray]): The list contains the gt_bboxes of each + video. The ndarray is gt_bboxes of one video. It's in (N, 4) shape. + Each bbox is in (x1, y1, w, h) format. burnin: number of frames that have to be ignored after the re-initialization when calculating accuracy. Default is 10. ignore_unknown (bool): whether ignore the skipping frames after @@ -176,7 +175,6 @@ def eval_sot_accuracy_robustness(results, num_fails = 0 weight = 0 for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)): - gt_traj = np.stack([ann['bboxes'] for ann in gt_traj]) assert len(gt_traj) == len(pred_traj) assert len(pred_traj[0]) == 1 and pred_traj[0][0] == 1 num_fails += count_failures(pred_traj) @@ -249,10 +247,9 @@ def eval_sot_eao(results, annotations, interval=[100, 356], videos_wh=None): - special tracking state: [0] denotes the unknown state, namely the skipping frame after failure, [1] denotes the initialized state, and [2] denotes the failed state. - annotations (list[list[dict]]): The first list contains the - gt_bboxes of each video. The second list contains the - gt_bbox of each frame in one video. The dict contains the - annotation information of one frame. + annotations (list[ndarray]): The list contains the gt_bboxes of each + video. The ndarray is gt_bboxes of one video. It's in (N, 4) shape. + Each bbox is in (x1, y1, w, h) format. interval: an specified interval in EAO curve used to calculate the EAO score. There are different settings in different VOT challenge. Default is VOT2018 setting: [100, 356]. @@ -275,10 +272,11 @@ def eval_sot_eao(results, annotations, interval=[100, 356], videos_wh=None): all_successes = [] for i, (gt_traj, pred_traj) in enumerate(zip(annotations, results)): - gt_traj = np.stack([ann['bboxes'] for ann in gt_traj]) - assert len(gt_traj) == len(pred_traj) + assert len(gt_traj) == len( + pred_traj), f'{len(gt_traj)} == {len(pred_traj)}' # initialized bbox annotation is [1] - assert len(pred_traj[0]) == 1 and pred_traj[0][0] == 1 + assert len(pred_traj[0]) == 1 and pred_traj[0][ + 0] == 1, f'{len(pred_traj[0])} == 1 and {pred_traj[0][0]} == 1' fail_inds, init_inds = locate_failures_inits(pred_traj) pred_traj = trajectory2region(pred_traj) diff --git a/mmtrack/datasets/base_sot_dataset.py b/mmtrack/datasets/base_sot_dataset.py index 1bec84fcf..1970a3ff8 100644 --- a/mmtrack/datasets/base_sot_dataset.py +++ b/mmtrack/datasets/base_sot_dataset.py @@ -23,6 +23,8 @@ class BaseSOTDataset(Dataset, metaclass=ABCMeta): test_mode (bool, optional): Default to False. bbox_min_size (int, optional): Only bounding boxes whose sizes are larger than `bbox_min_size` can be regarded as valid. Default to 0. + only_eval_visible (bool, optional): Whether to only evaluate frames + where object are visible. Default to False. """ # Compatible with MOT and VID Dataset class. The 'CLASSES' attribute will @@ -35,12 +37,14 @@ def __init__(self, split, test_mode=False, bbox_min_size=0, + only_eval_visible=False, **kwargs): self.img_prefix = img_prefix self.split = split self.pipeline = Compose(pipeline) self.test_mode = test_mode self.bbox_min_size = bbox_min_size + self.only_eval_visible = only_eval_visible # 'self.load_as_video' must be set to True in order to using # distributed video sampler to load dataset when testing. self.load_as_video = True @@ -98,9 +102,9 @@ def get_bboxes_from_video(self, video_ind): start_frame_id = self.data_infos[video_ind]['start_frame_id'] if not self.test_mode: - assert len(bboxes) == (end_frame_id - start_frame_id + - 1), f'{len(bboxes)} is not equal to' - '{end_frame_id}-{start_frame_id}+1' + assert len(bboxes) == ( + end_frame_id - start_frame_id + 1 + ), f'{len(bboxes)} is not equal to {end_frame_id}-{start_frame_id}+1' # noqa return bboxes def get_len_per_video(self, video_ind): @@ -249,15 +253,19 @@ def evaluate(self, results, metric=['track'], logger=None): raise KeyError(f'metric {metric} is not supported.') # get all test annotations - annotations = [] + gt_bboxes = [] + visible_infos = [] for video_ind in range(len(self.data_infos)): - bboxes = self.get_ann_infos_from_video(video_ind)['bboxes'] - annotations.append(bboxes) + video_anns = self.get_ann_infos_from_video(video_ind) + gt_bboxes.append(video_anns['bboxes']) + visible_infos.append(video_anns['visible']) # tracking_bboxes converting code eval_results = dict() if 'track' in metrics: - assert len(self) == len(results['track_bboxes']) + assert len(self) == len( + results['track_bboxes'] + ), f"{len(self)} == {len(results['track_bboxes'])}" print_log('Evaluate OPE Benchmark...', logger=logger) track_bboxes = [] start_ind = end_ind = 0 @@ -265,16 +273,21 @@ def evaluate(self, results, metric=['track'], logger=None): end_ind += num track_bboxes.append( list( - map(lambda x: x[:4], + map(lambda x: x[:-1], results['track_bboxes'][start_ind:end_ind]))) start_ind += num + if not self.only_eval_visible: + visible_infos = None # evaluation track_eval_results = eval_sot_ope( - results=track_bboxes, annotations=annotations) + results=track_bboxes, + annotations=gt_bboxes, + visible_infos=visible_infos) eval_results.update(track_eval_results) for k, v in eval_results.items(): if isinstance(v, float): eval_results[k] = float(f'{(v):.3f}') print_log(eval_results, logger=logger) + return eval_results diff --git a/mmtrack/datasets/lasot_dataset.py b/mmtrack/datasets/lasot_dataset.py index 6547d87ce..de57b3d4f 100644 --- a/mmtrack/datasets/lasot_dataset.py +++ b/mmtrack/datasets/lasot_dataset.py @@ -1,34 +1,96 @@ # Copyright (c) OpenMMLab. All rights reserved. +import glob +import os.path as osp +import time + import numpy as np from mmdet.datasets import DATASETS -from .sot_test_dataset import SOTTestDataset +from .base_sot_dataset import BaseSOTDataset @DATASETS.register_module() -class LaSOTDataset(SOTTestDataset): - """LaSOT dataset for the testing of single object tracking. +class LaSOTDataset(BaseSOTDataset): + """LaSOT dataset of single object tracking. - The dataset doesn't support training mode. + The dataset can both support training and testing mode. """ - def _parse_ann_info(self, img_info, ann_info): - """Parse bbox annotations. + def __init__(self, ann_file, *args, **kwargs): + """Initialization of SOT dataset class. + + Args: + ann_file (str): The file contains testing video names. It will be + loaded in the `self.load_data_infos` function. + """ + self.ann_file = ann_file + super(LaSOTDataset, self).__init__(*args, **kwargs) + + def load_data_infos(self, split='test'): + """Load dataset information. Args: - img_info (dict): image information. - ann_info (list[dict]): Annotation information of an image. Each - image only has one bbox annotation. + split (str, optional): Dataset split. Defaults to 'test'. Returns: - dict: A dict containing the following keys: bboxes, labels, - ignore. labels are not useful in SOT. + list[dict]: The length of the list is the number of videos. The + inner dict is in the following format: + { + 'video_path': the video path + 'ann_path': the annotation path + 'start_frame_id': the starting frame number contained + in the image name + 'end_frame_id': the ending frame number contained in + the image name + 'framename_template': the template of image name + } """ - gt_bboxes = np.array(ann_info[0]['bbox'], dtype=np.float32) - # convert [x1, y1, w, h] to [x1, y1, x2, y2] - gt_bboxes[2] += gt_bboxes[0] - gt_bboxes[3] += gt_bboxes[1] - gt_labels = np.array(self.cat2label[ann_info[0]['category_id']]) - ignore = ann_info[0]['full_occlusion'] or ann_info[0]['out_of_view'] - ann = dict(bboxes=gt_bboxes, labels=gt_labels, ignore=ignore) - return ann + print('Loading LaSOT dataset...') + start_time = time.time() + assert split in ['train', 'test'] + data_infos = [] + + test_videos_list = np.loadtxt(self.ann_file, dtype=np.str_) + if self.test_mode: + videos_list = test_videos_list.tolist() + else: + all_videos_list = glob.glob(self.img_prefix + '/*/*-[1-20]') + test_videos = set(test_videos_list) + videos_list = [] + for x in all_videos_list: + x = osp.basename(x) + if x not in test_videos: + videos_list.append(x) + + videos_list = sorted(videos_list) + for video_name in videos_list: + video_name = osp.join(video_name.split('-')[0], video_name) + video_path = osp.join(video_name, 'img') + ann_path = osp.join(video_name, 'groundtruth.txt') + img_names = glob.glob( + osp.join(self.img_prefix, video_name, 'img', '*.jpg')) + end_frame_name = max( + img_names, key=lambda x: int(osp.basename(x).split('.')[0])) + end_frame_id = int(osp.basename(end_frame_name).split('.')[0]) + data_infos.append( + dict( + video_path=video_path, + ann_path=ann_path, + start_frame_id=1, + end_frame_id=end_frame_id, + framename_template='%08d.jpg')) + print(f'LaSOT dataset loaded! ({time.time()-start_time:.2f} s)') + return data_infos + + def get_visibility_from_video(self, video_ind): + """Get the visible information of instance in a video.""" + video_path = osp.dirname(self.data_infos[video_ind]['video_path']) + full_occlusion_file = osp.join(self.img_prefix, video_path, + 'full_occlusion.txt') + out_of_view_file = osp.join(self.img_prefix, video_path, + 'out_of_view.txt') + full_occlusion = np.loadtxt( + full_occlusion_file, dtype=bool, delimiter=',') + out_of_view = np.loadtxt(out_of_view_file, dtype=bool, delimiter=',') + visible = ~(full_occlusion | out_of_view) + return dict(visible=visible) diff --git a/mmtrack/datasets/otb_dataset.py b/mmtrack/datasets/otb_dataset.py index babfe2194..0a7f5f932 100644 --- a/mmtrack/datasets/otb_dataset.py +++ b/mmtrack/datasets/otb_dataset.py @@ -1,11 +1,106 @@ -from mmtrack.datasets import DATASETS -from .sot_test_dataset import SOTTestDataset +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import re +import time + +import numpy as np +from mmdet.datasets import DATASETS + +from .base_sot_dataset import BaseSOTDataset @DATASETS.register_module() -class OTB100Dataset(SOTTestDataset): - """OTB100 dataset for the testing of single object tracking. +class OTB100Dataset(BaseSOTDataset): + """OTB100 dataset of single object tracking. - The dataset doesn't support training mode. + The dataset is only used to test. """ - pass + + def __init__(self, ann_file, *args, **kwargs): + """Initialization of SOT dataset class. + + Args: + ann_file (str): The file contains data information. It will be + loaded and parsed in the `self.load_data_infos` function. + """ + self.ann_file = ann_file + super().__init__(*args, **kwargs) + + def load_data_infos(self, split='test'): + """Load dataset information. + + Args: + split (str, optional): Dataset split. Defaults to 'test'. + + Returns: + list[dict]: The length of the list is the number of videos. The + inner dict is in the following format: + { + 'video_path': the video path + 'ann_path': the annotation path + 'start_frame_id': the starting frame number contained + in the image name + 'end_frame_id': the ending frame number contained in + the image name + 'framename_template': the template of image name + 'init_skip_num': (optional) the number of skipped + frames when initializing tracker + } + """ + print('Loading OTB100 dataset...') + start_time = time.time() + data_infos = [] + with open(self.ann_file, 'r') as f: + # the first line of annotation file is dataset comment. + for line in f.readlines()[1:]: + line = line.strip().split(',') + if line[0].split('/')[0] == 'Board': + framename_template = '%05d.jpg' + else: + framename_template = '%04d.jpg' + data_info = dict( + video_path=line[0], + ann_path=line[1], + start_frame_id=int(line[2]), + end_frame_id=int(line[3]), + framename_template=framename_template) + # Tracker initializatioin in `Tiger1` video will skip the first + # 5 frames. Details can be seen in the official file + # `tracker_benchmark_v1.0/initOmit/tiger1.txt`. + # Annotation loading will refer to this information. + if line[0].split('/')[0] == 'Tiger1': + data_info['init_skip_num'] = 5 + data_infos.append(data_info) + print(f'OTB100 dataset loaded! ({time.time()-start_time:.2f} s)') + return data_infos + + def get_bboxes_from_video(self, video_ind): + """Get bboxes annotation about the instance in a video. + + Args: + video_ind (int): video index + + Returns: + ndarray: in [N, 4] shape. The N is the bbox number and the bbox + is in (x, y, w, h) format. + """ + bboxes_file = osp.join(self.img_prefix, + self.data_infos[video_ind]['ann_path']) + bboxes = [] + with open(bboxes_file, 'r') as f: + for bbox in f.readlines(): + bbox = list(map(int, re.findall(r'-?\d+', bbox))) + bboxes.append(bbox) + bboxes = np.array(bboxes, dtype=float) + + if 'init_skip_num' in self.data_infos[video_ind]: + init_skip_num = self.data_infos[video_ind]['init_skip_num'] + bboxes = bboxes[init_skip_num:] + + end_frame_id = self.data_infos[video_ind]['end_frame_id'] + start_frame_id = self.data_infos[video_ind]['start_frame_id'] + assert len(bboxes) == ( + end_frame_id - start_frame_id + 1 + ), f'{len(bboxes)} is not equal to {end_frame_id}-{start_frame_id}+1' + assert bboxes.shape[1] == 4 + return bboxes diff --git a/mmtrack/datasets/uav123_dataset.py b/mmtrack/datasets/uav123_dataset.py index 8e41cac4a..e622173f1 100644 --- a/mmtrack/datasets/uav123_dataset.py +++ b/mmtrack/datasets/uav123_dataset.py @@ -1,13 +1,60 @@ # Copyright (c) OpenMMLab. All rights reserved. +import time + from mmdet.datasets import DATASETS -from .sot_test_dataset import SOTTestDataset +from .base_sot_dataset import BaseSOTDataset @DATASETS.register_module() -class UAV123Dataset(SOTTestDataset): - """UAV123 dataset for the testing of single object tracking. +class UAV123Dataset(BaseSOTDataset): + """UAV123 dataset of single object tracking. - The dataset doesn't support training mode. + The dataset is only used to test. """ - pass + + def __init__(self, ann_file, *args, **kwargs): + """Initialization of SOT dataset class. + + Args: + ann_file (str): The file contains data information. It will be + loaded and parsed in the `self.load_data_infos` function. + """ + self.ann_file = ann_file + super().__init__(*args, **kwargs) + + def load_data_infos(self, split='test'): + """Load dataset information. + + Args: + split (str, optional): Dataset split. Defaults to 'test'. + + Returns: + list[dict]: The length of the list is the number of videos. The + inner dict is in the following format: + { + 'video_path': the video path + 'ann_path': the annotation path + 'start_frame_id': the starting frame number contained + in the image name + 'end_frame_id': the ending frame number contained in + the image name + 'framename_template': the template of image name + } + """ + print('Loading UAV123 dataset...') + start_time = time.time() + data_infos = [] + with open(self.ann_file, 'r') as f: + # the first line of annotation file is dataset comment. + for line in f.readlines()[1:]: + line = line.strip().split(',') + data_info = dict( + video_path=line[0], + ann_path=line[1], + start_frame_id=int(line[2]), + end_frame_id=int(line[3]), + framename_template='%06d.jpg') + data_infos.append(data_info) + print(f'UAV123 dataset loaded! ({time.time()-start_time:.2f} s)') + return data_infos diff --git a/mmtrack/datasets/vot_dataset.py b/mmtrack/datasets/vot_dataset.py index a3aaa8330..548f79f6c 100644 --- a/mmtrack/datasets/vot_dataset.py +++ b/mmtrack/datasets/vot_dataset.py @@ -1,27 +1,37 @@ +import glob import os.path as osp +import time +import mmcv import numpy as np from mmcv.utils import print_log from mmdet.datasets import DATASETS from mmtrack.core.evaluation import eval_sot_accuracy_robustness, eval_sot_eao -from .sot_test_dataset import SOTTestDataset +from .base_sot_dataset import BaseSOTDataset @DATASETS.register_module() -class VOTDataset(SOTTestDataset): - """VOT dataset for the testing of single object tracking. +class VOTDataset(BaseSOTDataset): + """VOT dataset of single object tracking. - The dataset doesn't support training mode. - - Note: The vot datasets using the mask annotation, such as VOT2020, is not - supported now. + The dataset is only used to test. """ - CLASSES = (0, ) - def __init__(self, *args, **kwargs): + def __init__(self, dataset_type='vot2018', *args, **kwargs): + """Initialization of SOT dataset class. + + Args: + dataset_type (str, optional): The type of VOT challenge. The + optional values are in ['vot2018', 'vot2018_lt', + 'vot2019', 'vot2019_lt', 'vot2020', 'vot2021'] + """ + assert dataset_type in [ + 'vot2018', 'vot2018_lt', 'vot2019', 'vot2019_lt', 'vot2020', + 'vot2021' + ] + self.dataset_type = dataset_type super().__init__(*args, **kwargs) - self.dataset_name = osp.basename(self.ann_file).rstrip('.json') # parameter, used for EAO evaluation, may vary by different vot # challenges. self.INTERVAL = dict( @@ -30,30 +40,79 @@ def __init__(self, *args, **kwargs): vot2020=[115, 755], vot2021=[115, 755]) - def _parse_ann_info(self, img_info, ann_info): - """Parse bbox annotations. + def load_data_infos(self, split='test'): + """Load dataset information. + + Args: + split (str, optional): Dataset split. Defaults to 'test'. + + Returns: + list[dict]: The length of the list is the number of videos. The + inner dict is in the following format: + { + 'video_path': the video path + 'ann_path': the annotation path + 'start_frame_id': the starting frame number contained + in the image name + 'end_frame_id': the ending frame number contained in + the image name + 'framename_template': the template of image name + } + """ + print('Loading VOT dataset...') + start_time = time.time() + data_infos = [] + ann_file = osp.join(self.img_prefix, 'list.txt') + videos_list = np.loadtxt(ann_file, dtype=np.str_) + for video_name in videos_list: + video_path = osp.join(video_name, 'color') + ann_path = osp.join(video_name, 'groundtruth.txt') + img_names = glob.glob( + osp.join(self.img_prefix, video_path + '/*.jpg')) + end_frame_id = max( + img_names, key=lambda x: int(osp.basename(x).split('.')[0])) + data_info = dict( + video_path=video_path, + ann_path=ann_path, + start_frame_id=1, + end_frame_id=int(osp.basename(end_frame_id).split('.')[0]), + framename_template='%08d.jpg') + data_infos.append(data_info) + print(f'VOT dataset loaded! ({time.time()-start_time:.2f} s)') + return data_infos + + def get_ann_infos_from_video(self, video_ind): + """Get bboxes annotation about the instance in a video. Args: - img_info (dict): image information. - ann_info (list[dict]): Annotation information of an image. Each - image only has one bbox annotation. + video_ind (int): video index + Returns: - dict: A dict containing the following keys: bboxes, labels. - labels are not useful in SOT. + ndarray: in [N, 8] shape. The N is the bbox number and the bbox + is in (x1, y1, x2, y2, x3, y3, x4, y4) format. """ - # The shape of gt_bboxes is (8, ), in [x1, y1, x2, y2, x3, y3, x4, y4] - # format - gt_bboxes = np.array(ann_info[0]['bbox'], dtype=np.float32) - gt_labels = np.array(self.cat2label[ann_info[0]['category_id']]) - ann = dict(bboxes=gt_bboxes, labels=gt_labels) - return ann + bboxes = self.get_bboxes_from_video(video_ind) + if bboxes.shape[1] == 4: + x1, y1 = bboxes[:, 0], bboxes[:, 1], + x2, y2 = bboxes[:, 0] + bboxes[:, 2], bboxes[:, 1], + x3, y3 = bboxes[:, 0] + bboxes[:, 2], bboxes[:, 1] + bboxes[:, 3] + x4, y4 = bboxes[:, 0], bboxes[:, 1] + bboxes[:, 3], + bboxes = np.stack((x1, y1, x2, y2, x3, y3, x4, y4), axis=-1) + + visible_info = self.get_visibility_from_video(video_ind) + # bboxes in VOT datasets are all valid + bboxes_isvalid = np.array([True] * len(bboxes), dtype=np.bool_) + ann_infos = dict( + bboxes=bboxes, bboxes_isvalid=bboxes_isvalid, **visible_info) + return ann_infos # TODO support multirun test def evaluate(self, results, metric=['track'], logger=None, interval=None): """Evaluation in VOT protocol. Args: - results (dict): Testing results of the dataset. + results (dict): Testing results of the dataset. The tracking bboxes + are in (tl_x, tl_y, br_x, br_y) format. metric (str | list[str]): Metrics to be evaluated. Options are 'track'. logger (logging.Logger | str | None): Logger used for printing @@ -75,40 +134,53 @@ def evaluate(self, results, metric=['track'], logger=None, interval=None): if metric not in allowed_metrics: raise KeyError(f'metric {metric} is not supported.') + # get all test annotations + # annotations are in list[ndarray] format + annotations = [] + for video_ind in range(len(self.data_infos)): + bboxes = self.get_ann_infos_from_video(video_ind)['bboxes'] + annotations.append(bboxes) + + # tracking_bboxes converting code eval_results = dict() if 'track' in metrics: - assert len(self.data_infos) == len(results['track_bboxes']) + assert len(self) == len( + results['track_bboxes'] + ), f"{len(self)} == {len(results['track_bboxes'])}" print_log('Evaluate VOT Benchmark...', logger=logger) - inds = [] + track_bboxes = [] + start_ind = end_ind = 0 videos_wh = [] - ann_infos = [] - for i, info in enumerate(self.data_infos): - if info['frame_id'] == 0: - inds.append(i) - videos_wh.append((info['width'], info['height'])) + for data_info in self.data_infos: + num = data_info['end_frame_id'] - data_info[ + 'start_frame_id'] + 1 + end_ind += num - ann_infos.append(self.get_ann_info(info)) - - num_vids = len(inds) - inds.append(len(self.data_infos)) - track_bboxes = [] - annotations = [] - for i in range(num_vids): bboxes_per_video = [] - for bbox in results['track_bboxes'][inds[i]:inds[i + 1]]: + # results are in dict(track_bboxes=list[ndarray]) format + # track_bboxes are in list[list[ndarray]] format + for bbox in results['track_bboxes'][start_ind:end_ind]: # the last element of `bbox` is score. if len(bbox) != 2: # convert bbox format from (tl_x, tl_y, br_x, br_y) to # (x1, y1, w, h) bbox[2] -= bbox[0] bbox[3] -= bbox[1] + bboxes_per_video.append(bbox[:-1]) + track_bboxes.append(bboxes_per_video) - annotations.append(ann_infos[inds[i]:inds[i + 1]]) + start_ind += num - interval = self.INTERVAL[self.dataset_name] if interval is None \ + # read one image in the video to get video width and height + filename = osp.join(self.img_prefix, data_info['video_path'], + data_info['framename_template'] % 1) + img = mmcv.imread(filename) + videos_wh.append((img.shape[1], img.shape[0])) + + interval = self.INTERVAL[self.dataset_type] if interval is None \ else interval - # anno_info is list[list[dict]] + eao_score = eval_sot_eao( results=track_bboxes, annotations=annotations, diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/full_occlusion.txt b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/full_occlusion.txt new file mode 100755 index 000000000..15794e007 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/full_occlusion.txt @@ -0,0 +1 @@ +0,0 diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/groundtruth.txt b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/groundtruth.txt new file mode 100644 index 000000000..bf06a6169 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/groundtruth.txt @@ -0,0 +1,2 @@ +1,100,1,100 +1,100,1,100 diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/gt_for_eval.txt b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/gt_for_eval.txt new file mode 100644 index 000000000..496a7042e --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/gt_for_eval.txt @@ -0,0 +1,25 @@ +367,101,41,16 +366,103,45,16 +364,107,45,15 +362,109,46,16 +362,111,46,18 +362,113,46,18 +364,116,46,17 +366,118,45,17 +362,119,48,17 +359,119,45,17 +358,119,46,17 +360,121,46,17 +360,124,46,17 +359,124,47,17 +360,126,46,17 +356,127,46,18 +354,127,46,17 +352,127,46,17 +352,126,44,17 +349,126,46,17 +347,126,46,17 +346,125,46,17 +345,124,47,17 +345,124,46,17 +344,124,47,17 diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000001.jpg b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000001.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000001.jpg differ diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000002.jpg b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000002.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/img/00000002.jpg differ diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/out_of_view.txt b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/out_of_view.txt new file mode 100755 index 000000000..15794e007 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/out_of_view.txt @@ -0,0 +1 @@ +0,0 diff --git a/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/track_results.txt b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/track_results.txt new file mode 100644 index 000000000..7648a02f5 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/airplane/airplane-1/track_results.txt @@ -0,0 +1,25 @@ +367,101,408,117 +367,102,410,118 +363,105,406,121 +362,109,407,124 +361,112,407,128 +362,114,408,130 +364,116,410,132 +364,118,411,134 +360,120,408,136 +356,119,404,135 +356,119,404,135 +359,121,407,137 +359,124,407,141 +359,125,407,141 +358,126,406,143 +354,127,402,144 +351,127,400,144 +350,127,398,143 +349,127,397,143 +346,126,394,142 +344,126,392,143 +343,125,392,142 +343,123,392,140 +343,124,392,141 +341,124,392,141 diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/full_occlusion.txt b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/full_occlusion.txt new file mode 100755 index 000000000..15794e007 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/full_occlusion.txt @@ -0,0 +1 @@ +0,0 diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/groundtruth.txt b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/groundtruth.txt new file mode 100644 index 000000000..bf06a6169 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/groundtruth.txt @@ -0,0 +1,2 @@ +1,100,1,100 +1,100,1,100 diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/gt_for_eval.txt b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/gt_for_eval.txt new file mode 100644 index 000000000..2580b2402 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/gt_for_eval.txt @@ -0,0 +1,25 @@ +76,74,367,151 +75,76,369,150 +78,76,368,150 +81,77,366,149 +82,76,367,150 +81,74,370,151 +81,74,370,152 +84,77,370,151 +89,79,371,149 +88,78,372,149 +88,78,372,150 +90,79,374,149 +90,80,374,149 +89,81,374,150 +92,81,375,150 +94,80,378,150 +95,80,379,150 +96,79,376,151 +96,79,375,152 +100,81,377,150 +102,81,377,150 +99,79,376,152 +99,82,379,150 +104,82,375,150 +100,81,379,152 diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000001.jpg b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000001.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000001.jpg differ diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000002.jpg b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000002.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/img/00000002.jpg differ diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/out_of_view.txt b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/out_of_view.txt new file mode 100755 index 000000000..15794e007 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/out_of_view.txt @@ -0,0 +1 @@ +0,0 diff --git a/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/track_results.txt b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/track_results.txt new file mode 100644 index 000000000..2716acd33 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/basketball/basketball-2/track_results.txt @@ -0,0 +1,25 @@ +15,123,544,267 +18,130,545,274 +23,106,553,252 +20,117,547,264 +17,122,545,267 +13,129,540,273 +24,104,551,249 +29,110,559,255 +34,113,566,258 +31,122,557,266 +32,127,552,271 +30,135,548,276 +37,110,554,254 +31,112,558,258 +31,119,560,264 +21,124,547,268 +48,132,578,277 +22,102,553,249 +11,105,544,253 +19,110,551,257 +22,113,557,257 +32,112,567,255 +30,115,566,258 +34,116,570,261 +28,120,556,265 diff --git a/tests/data/demo_sot_data/lasot_full/testing_set.txt b/tests/data/demo_sot_data/lasot_full/testing_set.txt new file mode 100644 index 000000000..b9db8f6f9 --- /dev/null +++ b/tests/data/demo_sot_data/lasot_full/testing_set.txt @@ -0,0 +1,2 @@ +airplane-1 +airplane-1 diff --git a/tests/data/demo_sot_data/vot2018/ants1/color/00000001.jpg b/tests/data/demo_sot_data/vot2018/ants1/color/00000001.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/vot2018/ants1/color/00000001.jpg differ diff --git a/tests/data/demo_sot_data/vot2018/ants1/color/00000002.jpg b/tests/data/demo_sot_data/vot2018/ants1/color/00000002.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/vot2018/ants1/color/00000002.jpg differ diff --git a/tests/data/demo_sot_data/vot2018/ants1/groundtruth.txt b/tests/data/demo_sot_data/vot2018/ants1/groundtruth.txt new file mode 100644 index 000000000..7e32c84af --- /dev/null +++ b/tests/data/demo_sot_data/vot2018/ants1/groundtruth.txt @@ -0,0 +1,2 @@ +1,1,100,1,100,100,1,100 +1,1,100,1,100,100,1,100 diff --git a/tests/data/demo_sot_data/vot2018/ants1/gt_for_eval.txt b/tests/data/demo_sot_data/vot2018/ants1/gt_for_eval.txt new file mode 100644 index 000000000..68f7c1d1b --- /dev/null +++ b/tests/data/demo_sot_data/vot2018/ants1/gt_for_eval.txt @@ -0,0 +1,25 @@ +367,101,408,101,408,117,367,117 +366,103,411,103,411,119,366,119 +364,107,409,107,409,122,364,122 +362,109,408,109,408,125,362,125 +362,111,408,111,408,129,362,129 +362,113,408,113,408,131,362,131 +364,116,410,116,410,133,364,133 +366,118,411,118,411,135,366,135 +362,119,410,119,410,136,362,136 +359,119,404,119,404,136,359,136 +358,119,404,119,404,136,358,136 +360,121,406,121,406,138,360,138 +360,124,406,124,406,141,360,141 +359,124,406,124,406,141,359,141 +360,126,406,126,406,143,360,143 +356,127,402,127,402,145,356,145 +354,127,400,127,400,144,354,144 +352,127,398,127,398,144,352,144 +352,126,396,126,396,143,352,143 +349,126,395,126,395,143,349,143 +347,126,393,126,393,143,347,143 +346,125,392,125,392,142,346,142 +345,124,392,124,392,141,345,141 +345,124,391,124,391,141,345,141 +344,124,391,124,391,141,344,141 diff --git a/tests/data/demo_sot_data/lasot/airplane-1/vot_track_results.txt b/tests/data/demo_sot_data/vot2018/ants1/track_results.txt similarity index 100% rename from tests/data/demo_sot_data/lasot/airplane-1/vot_track_results.txt rename to tests/data/demo_sot_data/vot2018/ants1/track_results.txt diff --git a/tests/data/demo_sot_data/vot2018/ants3/color/00000001.jpg b/tests/data/demo_sot_data/vot2018/ants3/color/00000001.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/vot2018/ants3/color/00000001.jpg differ diff --git a/tests/data/demo_sot_data/vot2018/ants3/color/00000002.jpg b/tests/data/demo_sot_data/vot2018/ants3/color/00000002.jpg new file mode 100644 index 000000000..81e94785b Binary files /dev/null and b/tests/data/demo_sot_data/vot2018/ants3/color/00000002.jpg differ diff --git a/tests/data/demo_sot_data/vot2018/ants3/groundtruth.txt b/tests/data/demo_sot_data/vot2018/ants3/groundtruth.txt new file mode 100644 index 000000000..7e32c84af --- /dev/null +++ b/tests/data/demo_sot_data/vot2018/ants3/groundtruth.txt @@ -0,0 +1,2 @@ +1,1,100,1,100,100,1,100 +1,1,100,1,100,100,1,100 diff --git a/tests/data/demo_sot_data/vot2018/ants3/gt_for_eval.txt b/tests/data/demo_sot_data/vot2018/ants3/gt_for_eval.txt new file mode 100644 index 000000000..8814fef9e --- /dev/null +++ b/tests/data/demo_sot_data/vot2018/ants3/gt_for_eval.txt @@ -0,0 +1,25 @@ +76,74,443,74,443,225,76,225 +75,76,444,76,444,226,75,226 +78,76,446,76,446,226,78,226 +81,77,447,77,447,226,81,226 +82,76,449,76,449,226,82,226 +81,74,451,74,451,225,81,225 +81,74,451,74,451,226,81,226 +84,77,454,77,454,228,84,228 +89,79,460,79,460,228,89,228 +88,78,460,78,460,227,88,227 +88,78,460,78,460,228,88,228 +90,79,464,79,464,228,90,228 +90,80,464,80,464,229,90,229 +89,81,463,81,463,231,89,231 +92,81,467,81,467,231,92,231 +94,80,472,80,472,230,94,230 +95,80,474,80,474,230,95,230 +96,79,472,79,472,230,96,230 +96,79,471,79,471,231,96,231 +100,81,477,81,477,231,100,231 +102,81,479,81,479,231,102,231 +99,79,475,79,475,231,99,231 +99,82,478,82,478,232,99,232 +104,82,479,82,479,232,104,232 +100,81,479,81,479,233,100,233 diff --git a/tests/data/demo_sot_data/lasot/airplane-2/vot_track_results.txt b/tests/data/demo_sot_data/vot2018/ants3/track_results.txt similarity index 100% rename from tests/data/demo_sot_data/lasot/airplane-2/vot_track_results.txt rename to tests/data/demo_sot_data/vot2018/ants3/track_results.txt diff --git a/tests/data/demo_sot_data/vot2018/list.txt b/tests/data/demo_sot_data/vot2018/list.txt new file mode 100644 index 000000000..9c6f04f85 --- /dev/null +++ b/tests/data/demo_sot_data/vot2018/list.txt @@ -0,0 +1,2 @@ +ants1 +ants3 diff --git a/tests/test_data/test_datasets/test_sot_dataset.py b/tests/test_data/test_datasets/test_sot_dataset.py index 9adeb4ee6..b87c485b3 100644 --- a/tests/test_data/test_datasets/test_sot_dataset.py +++ b/tests/test_data/test_datasets/test_sot_dataset.py @@ -12,6 +12,18 @@ SOT_DATA_PREFIX = f'{PREFIX}/demo_sot_data' DATASET_INFOS = dict( GOT10kDataset=dict(img_prefix=osp.join(SOT_DATA_PREFIX, 'got10k')), + VOTDataset=dict( + dataset_type='vot2018', + img_prefix=osp.join(SOT_DATA_PREFIX, 'vot2018')), + OTB100Dataset=dict( + ann_file='tools/convert_datasets/otb100/otb100_infos.txt', + img_prefix=osp.join(SOT_DATA_PREFIX, 'otb100')), + UAV123Dataset=dict( + ann_file='tools/convert_datasets/uav123/uav123_infos.txt', + img_prefix=osp.join(SOT_DATA_PREFIX, 'uav123')), + LaSOTDataset=dict( + ann_file=osp.join(SOT_DATA_PREFIX, 'lasot_full', 'testing_set.txt'), + img_prefix=osp.join(SOT_DATA_PREFIX, 'lasot_full')), TrackingNetDataset=dict( chunks_list=[0], img_prefix=osp.join(SOT_DATA_PREFIX, 'trackingnet')), SOTCocoDataset=dict( @@ -23,7 +35,8 @@ @pytest.mark.parametrize('dataset', [ - 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', + 'GOT10kDataset', 'VOTDataset', 'OTB100Dataset', 'UAV123Dataset', + 'LaSOTDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', 'SOTCocoDataset' ]) def test_load_data_infos(dataset): @@ -34,8 +47,8 @@ def test_load_data_infos(dataset): @pytest.mark.parametrize('dataset', [ - 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', - 'SOTCocoDataset' + 'GOT10kDataset', 'VOTDataset', 'LaSOTDataset', 'TrackingNetDataset', + 'SOTImageNetVIDDataset', 'SOTCocoDataset' ]) def test_get_bboxes_from_video(dataset): dataset_class = DATASETS.get(dataset) @@ -52,8 +65,8 @@ def test_get_bboxes_from_video(dataset): @pytest.mark.parametrize('dataset', [ - 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', - 'SOTCocoDataset' + 'GOT10kDataset', 'VOTDataset', 'LaSOTDataset', 'TrackingNetDataset', + 'SOTImageNetVIDDataset', 'SOTCocoDataset' ]) def test_get_visibility_from_video(dataset): dataset_class = DATASETS.get(dataset) @@ -66,7 +79,7 @@ def test_get_visibility_from_video(dataset): @pytest.mark.parametrize('dataset', [ 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', - 'SOTCocoDataset' + 'SOTCocoDataset', 'VOTDataset', 'LaSOTDataset' ]) def test_get_ann_infos_from_video(dataset): dataset_class = DATASETS.get(dataset) @@ -78,7 +91,7 @@ def test_get_ann_infos_from_video(dataset): @pytest.mark.parametrize('dataset', [ 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', - 'SOTCocoDataset' + 'SOTCocoDataset', 'VOTDataset', 'LaSOTDataset' ]) def test_get_img_infos_from_video(dataset): dataset_class = DATASETS.get(dataset) @@ -88,7 +101,9 @@ def test_get_img_infos_from_video(dataset): dataset_object.get_img_infos_from_video(0) -@pytest.mark.parametrize('dataset', ['GOT10kDataset', 'TrackingNetDataset']) +@pytest.mark.parametrize( + 'dataset', + ['GOT10kDataset', 'VOTDataset', 'LaSOTDataset', 'TrackingNetDataset']) def test_prepare_test_data(dataset): dataset_class = DATASETS.get(dataset) @@ -99,7 +114,7 @@ def test_prepare_test_data(dataset): @pytest.mark.parametrize('dataset', [ 'GOT10kDataset', 'TrackingNetDataset', 'SOTImageNetVIDDataset', - 'SOTCocoDataset' + 'SOTCocoDataset', 'LaSOTDataset' ]) def test_prepare_train_data(dataset): dataset_class = DATASETS.get(dataset) @@ -138,3 +153,85 @@ def test_format_results(dataset): dataset_object.format_results(track_results, resfile_path=tmp_dir.name) if osp.isdir(tmp_dir.name): tmp_dir.cleanup() + + +def test_sot_ope_evaluation(): + dataset_class = DATASETS.get('UAV123Dataset') + dataset_object = dataset_class( + **DATASET_INFOS['UAV123Dataset'], + pipeline=[], + split='test', + test_mode=True) + + dataset_object.num_frames_per_video = [25, 25] + results = [] + data_infos = [] + lasot_root = osp.join(SOT_DATA_PREFIX, 'lasot_full') + for video_name in ['airplane/airplane-1', 'basketball/basketball-2']: + bboxes = np.loadtxt( + osp.join(lasot_root, video_name, 'track_results.txt'), + delimiter=',') + scores = np.zeros((len(bboxes), 1)) + bboxes = np.concatenate((bboxes, scores), axis=-1) + results.extend(bboxes) + data_infos.append( + dict( + video_path=osp.join(lasot_root, video_name, 'img'), + ann_path=osp.join(lasot_root, video_name, 'gt_for_eval.txt'), + start_frame_id=1, + end_frame_id=25, + framename_template='%06d.jpg')) + + dataset_object.data_infos = data_infos + track_results = dict(track_bboxes=results) + eval_results = dataset_object.evaluate(track_results, metric=['track']) + assert eval_results['success'] == 67.524 + assert eval_results['norm_precision'] == 70.0 + assert eval_results['precision'] == 50.0 + + +def test_sot_vot_evaluation(): + dataset_class = DATASETS.get('VOTDataset') + dataset_object = dataset_class( + **DATASET_INFOS['VOTDataset'], + pipeline=[], + split='test', + test_mode=True) + + dataset_object.num_frames_per_video = [25, 25] + data_infos = [] + results = [] + vot_root = osp.join(SOT_DATA_PREFIX, 'vot2018') + for video_name in ['ants1', 'ants3']: + results.extend( + mmcv.list_from_file( + osp.join(vot_root, video_name, 'track_results.txt'))) + data_infos.append( + dict( + video_path=osp.join(vot_root, video_name, 'color'), + ann_path=osp.join(vot_root, video_name, 'gt_for_eval.txt'), + start_frame_id=1, + end_frame_id=25, + framename_template='%08d.jpg')) + dataset_object.data_infos = data_infos + + track_bboxes = [] + for result in results: + result = result.split(',') + if len(result) == 1: + track_bboxes.append(np.array([float(result[0]), 0.])) + else: + track_bboxes.append( + np.array([ + float(result[0]), + float(result[1]), + float(result[2]), + float(result[3]), 0. + ])) + + track_bboxes = dict(track_bboxes=track_bboxes) + eval_results = dataset_object.evaluate( + track_bboxes, interval=[1, 3], metric=['track']) + assert abs(eval_results['eao'] - 0.6661) < 0.0001 + assert round(eval_results['accuracy'], 4) == 0.5826 + assert round(eval_results['robustness'], 4) == 6.0 diff --git a/tests/test_data/test_datasets/test_sot_test_dataset.py b/tests/test_data/test_datasets/test_sot_test_dataset.py deleted file mode 100644 index 72a302c3c..000000000 --- a/tests/test_data/test_datasets/test_sot_test_dataset.py +++ /dev/null @@ -1,99 +0,0 @@ -# Copyright (c) OpenMMLab. All rights reserved. -import os.path as osp - -import mmcv -import numpy as np -import pytest - -from mmtrack.datasets import DATASETS as DATASETS - -PREFIX = osp.join(osp.dirname(__file__), '../../data') -LASOT_ANN_PATH = f'{PREFIX}/demo_sot_data/lasot' - - -@pytest.mark.parametrize('dataset', - ['SOTTestDataset', 'LaSOTDataset', 'VOTDataset']) -def test_parse_ann_info(dataset): - dataset_class = DATASETS.get(dataset) - - ann_file = osp.join(LASOT_ANN_PATH, 'lasot_test_dummy.json') - dataset_object = dataset_class(ann_file=ann_file, pipeline=[]) - - if dataset == 'VOTDataset': - for _, img_ann in dataset_object.coco.anns.items(): - x, y, w, h = img_ann['bbox'] - img_ann['bbox'] = [x, y, x + w, y, x + w, y + h, x, y + h] - - # image 5 has 1 objects - img_id = 5 - img_info = dataset_object.coco.load_imgs([img_id])[0] - ann_ids = dataset_object.coco.get_ann_ids([img_id]) - ann_info = dataset_object.coco.loadAnns(ann_ids) - ann = dataset_object._parse_ann_info(img_info, ann_info) - assert ann['bboxes'].shape == ( - 4, ) if dataset != 'VOTDataset' else ann['bboxes'].shape == (8, ) - assert ann['labels'] == 0 - - -def test_sot_ope_evaluation(): - dataset_class = DATASETS.get('SOTTestDataset') - dataset = dataset_class( - ann_file=osp.join(LASOT_ANN_PATH, 'lasot_test_dummy.json'), - pipeline=[]) - - results = [] - for video_name in ['airplane-1', 'airplane-2']: - results.extend( - mmcv.list_from_file( - osp.join(LASOT_ANN_PATH, video_name, 'track_results.txt'))) - track_bboxes = [] - for result in results: - x1, y1, x2, y2 = result.split(',') - track_bboxes.append( - np.array([float(x1), - float(y1), - float(x2), - float(y2), 0.])) - - track_results = dict(track_bboxes=track_bboxes) - eval_results = dataset.evaluate(track_results, metric=['track']) - assert eval_results['success'] == 67.524 - assert eval_results['norm_precision'] == 70.0 - assert eval_results['precision'] == 50.0 - - -def test_sot_vot_evaluation(): - dataset_class = DATASETS.get('VOTDataset') - dataset = dataset_class( - ann_file=osp.join(LASOT_ANN_PATH, 'lasot_test_dummy.json'), - pipeline=[]) - - for _, img_ann in dataset.coco.anns.items(): - x, y, w, h = img_ann['bbox'] - img_ann['bbox'] = [x, y, x + w, y, x + w, y + h, x, y + h] - - results = [] - for video_name in ['airplane-1', 'airplane-2']: - results.extend( - mmcv.list_from_file( - osp.join(LASOT_ANN_PATH, video_name, 'vot_track_results.txt'))) - track_bboxes = [] - for result in results: - result = result.split(',') - if len(result) == 1: - track_bboxes.append(np.array([float(result[0]), 0.])) - else: - track_bboxes.append( - np.array([ - float(result[0]), - float(result[1]), - float(result[2]), - float(result[3]), 0. - ])) - - track_bboxes = dict(track_bboxes=track_bboxes) - eval_results = dataset.evaluate( - track_bboxes, interval=[1, 3], metric=['track']) - assert abs(eval_results['eao'] - 0.6394) < 0.0001 - assert round(eval_results['accuracy'], 4) == 0.5431 - assert round(eval_results['robustness'], 4) == 6.0 diff --git a/tools/convert_datasets/otb100/otb100_infos.txt b/tools/convert_datasets/otb100/otb100_infos.txt new file mode 100644 index 000000000..bfd81686e --- /dev/null +++ b/tools/convert_datasets/otb100/otb100_infos.txt @@ -0,0 +1,101 @@ +The format of each line in this txt is (video_path,annotation_path,start_frame_id,end_frame_id) +Basketball/img,Basketball/groundtruth_rect.txt,1,725 +Biker/img,Biker/groundtruth_rect.txt,1,142 +Bird1/img,Bird1/groundtruth_rect.txt,1,408 +Bird2/img,Bird2/groundtruth_rect.txt,1,99 +BlurBody/img,BlurBody/groundtruth_rect.txt,1,334 +BlurCar1/img,BlurCar1/groundtruth_rect.txt,247,988 +BlurCar2/img,BlurCar2/groundtruth_rect.txt,1,585 +BlurCar3/img,BlurCar3/groundtruth_rect.txt,3,359 +BlurCar4/img,BlurCar4/groundtruth_rect.txt,18,397 +BlurFace/img,BlurFace/groundtruth_rect.txt,1,493 +BlurOwl/img,BlurOwl/groundtruth_rect.txt,1,631 +Board/img,Board/groundtruth_rect.txt,1,698 +Bolt/img,Bolt/groundtruth_rect.txt,1,350 +Bolt2/img,Bolt2/groundtruth_rect.txt,1,293 +Box/img,Box/groundtruth_rect.txt,1,1161 +Boy/img,Boy/groundtruth_rect.txt,1,602 +Car1/img,Car1/groundtruth_rect.txt,1,1020 +Car2/img,Car2/groundtruth_rect.txt,1,913 +Car24/img,Car24/groundtruth_rect.txt,1,3059 +Car4/img,Car4/groundtruth_rect.txt,1,659 +CarDark/img,CarDark/groundtruth_rect.txt,1,393 +CarScale/img,CarScale/groundtruth_rect.txt,1,252 +ClifBar/img,ClifBar/groundtruth_rect.txt,1,472 +Coke/img,Coke/groundtruth_rect.txt,1,291 +Couple/img,Couple/groundtruth_rect.txt,1,140 +Coupon/img,Coupon/groundtruth_rect.txt,1,327 +Crossing/img,Crossing/groundtruth_rect.txt,1,120 +Crowds/img,Crowds/groundtruth_rect.txt,1,347 +Dancer/img,Dancer/groundtruth_rect.txt,1,225 +Dancer2/img,Dancer2/groundtruth_rect.txt,1,150 +David/img,David/groundtruth_rect.txt,300,770 +David2/img,David2/groundtruth_rect.txt,1,537 +David3/img,David3/groundtruth_rect.txt,1,252 +Deer/img,Deer/groundtruth_rect.txt,1,71 +Diving/img,Diving/groundtruth_rect.txt,1,215 +Dog/img,Dog/groundtruth_rect.txt,1,127 +Dog1/img,Dog1/groundtruth_rect.txt,1,1350 +Doll/img,Doll/groundtruth_rect.txt,1,3872 +DragonBaby/img,DragonBaby/groundtruth_rect.txt,1,113 +Dudek/img,Dudek/groundtruth_rect.txt,1,1145 +FaceOcc1/img,FaceOcc1/groundtruth_rect.txt,1,892 +FaceOcc2/img,FaceOcc2/groundtruth_rect.txt,1,812 +Fish/img,Fish/groundtruth_rect.txt,1,476 +FleetFace/img,FleetFace/groundtruth_rect.txt,1,707 +Football/img,Football/groundtruth_rect.txt,1,362 +Football1/img,Football1/groundtruth_rect.txt,1,74 +Freeman1/img,Freeman1/groundtruth_rect.txt,1,326 +Freeman3/img,Freeman3/groundtruth_rect.txt,1,460 +Freeman4/img,Freeman4/groundtruth_rect.txt,1,283 +Girl/img,Girl/groundtruth_rect.txt,1,500 +Girl2/img,Girl2/groundtruth_rect.txt,1,1500 +Gym/img,Gym/groundtruth_rect.txt,1,767 +Human2/img,Human2/groundtruth_rect.txt,1,1128 +Human3/img,Human3/groundtruth_rect.txt,1,1698 +Human4/img,Human4/groundtruth_rect.2.txt,1,667 +Human5/img,Human5/groundtruth_rect.txt,1,713 +Human6/img,Human6/groundtruth_rect.txt,1,792 +Human7/img,Human7/groundtruth_rect.txt,1,250 +Human8/img,Human8/groundtruth_rect.txt,1,128 +Human9/img,Human9/groundtruth_rect.txt,1,305 +Ironman/img,Ironman/groundtruth_rect.txt,1,166 +Jogging/img,Jogging/groundtruth_rect.1.txt,1,307 +Jogging/img,Jogging/groundtruth_rect.2.txt,1,307 +Jump/img,Jump/groundtruth_rect.txt,1,122 +Jumping/img,Jumping/groundtruth_rect.txt,1,313 +KiteSurf/img,KiteSurf/groundtruth_rect.txt,1,84 +Lemming/img,Lemming/groundtruth_rect.txt,1,1336 +Liquor/img,Liquor/groundtruth_rect.txt,1,1741 +Man/img,Man/groundtruth_rect.txt,1,134 +Matrix/img,Matrix/groundtruth_rect.txt,1,100 +Mhyang/img,Mhyang/groundtruth_rect.txt,1,1490 +MotorRolling/img,MotorRolling/groundtruth_rect.txt,1,164 +MountainBike/img,MountainBike/groundtruth_rect.txt,1,228 +Panda/img,Panda/groundtruth_rect.txt,1,1000 +RedTeam/img,RedTeam/groundtruth_rect.txt,1,1918 +Rubik/img,Rubik/groundtruth_rect.txt,1,1997 +Shaking/img,Shaking/groundtruth_rect.txt,1,365 +Singer1/img,Singer1/groundtruth_rect.txt,1,351 +Singer2/img,Singer2/groundtruth_rect.txt,1,366 +Skater/img,Skater/groundtruth_rect.txt,1,160 +Skater2/img,Skater2/groundtruth_rect.txt,1,435 +Skating1/img,Skating1/groundtruth_rect.txt,1,400 +Skating2/img,Skating2/groundtruth_rect.1.txt,1,473 +Skating2/img,Skating2/groundtruth_rect.2.txt,1,473 +Skiing/img,Skiing/groundtruth_rect.txt,1,81 +Soccer/img,Soccer/groundtruth_rect.txt,1,392 +Subway/img,Subway/groundtruth_rect.txt,1,175 +Surfer/img,Surfer/groundtruth_rect.txt,1,376 +Suv/img,Suv/groundtruth_rect.txt,1,945 +Sylvester/img,Sylvester/groundtruth_rect.txt,1,1345 +Tiger1/img,Tiger1/groundtruth_rect.txt,6,354 +Tiger2/img,Tiger2/groundtruth_rect.txt,1,365 +Toy/img,Toy/groundtruth_rect.txt,1,271 +Trans/img,Trans/groundtruth_rect.txt,1,124 +Trellis/img,Trellis/groundtruth_rect.txt,1,569 +Twinnings/img,Twinnings/groundtruth_rect.txt,1,472 +Vase/img,Vase/groundtruth_rect.txt,1,271 +Walking/img,Walking/groundtruth_rect.txt,1,412 +Walking2/img,Walking2/groundtruth_rect.txt,1,500 +Woman/img,Woman/groundtruth_rect.txt,1,597 diff --git a/tools/convert_datasets/uav123/uav123_info.txt b/tools/convert_datasets/uav123/uav123_info_deprecated.txt similarity index 100% rename from tools/convert_datasets/uav123/uav123_info.txt rename to tools/convert_datasets/uav123/uav123_info_deprecated.txt diff --git a/tools/convert_datasets/uav123/uav123_infos.txt b/tools/convert_datasets/uav123/uav123_infos.txt new file mode 100644 index 000000000..af958d42c --- /dev/null +++ b/tools/convert_datasets/uav123/uav123_infos.txt @@ -0,0 +1,124 @@ +The format of each line in this txt is (video_path,annotation_path,start_frame_id,end_frame_id) +data_seq/UAV123/bike1,anno/UAV123/bike1.txt,1,3085 +data_seq/UAV123/bike2,anno/UAV123/bike2.txt,1,553 +data_seq/UAV123/bike3,anno/UAV123/bike3.txt,1,433 +data_seq/UAV123/bird1,anno/UAV123/bird1_1.txt,1,253 +data_seq/UAV123/bird1,anno/UAV123/bird1_2.txt,775,1477 +data_seq/UAV123/bird1,anno/UAV123/bird1_3.txt,1573,2437 +data_seq/UAV123/boat1,anno/UAV123/boat1.txt,1,901 +data_seq/UAV123/boat2,anno/UAV123/boat2.txt,1,799 +data_seq/UAV123/boat3,anno/UAV123/boat3.txt,1,901 +data_seq/UAV123/boat4,anno/UAV123/boat4.txt,1,553 +data_seq/UAV123/boat5,anno/UAV123/boat5.txt,1,505 +data_seq/UAV123/boat6,anno/UAV123/boat6.txt,1,805 +data_seq/UAV123/boat7,anno/UAV123/boat7.txt,1,535 +data_seq/UAV123/boat8,anno/UAV123/boat8.txt,1,685 +data_seq/UAV123/boat9,anno/UAV123/boat9.txt,1,1399 +data_seq/UAV123/building1,anno/UAV123/building1.txt,1,469 +data_seq/UAV123/building2,anno/UAV123/building2.txt,1,577 +data_seq/UAV123/building3,anno/UAV123/building3.txt,1,829 +data_seq/UAV123/building4,anno/UAV123/building4.txt,1,787 +data_seq/UAV123/building5,anno/UAV123/building5.txt,1,481 +data_seq/UAV123/car10,anno/UAV123/car10.txt,1,1405 +data_seq/UAV123/car11,anno/UAV123/car11.txt,1,337 +data_seq/UAV123/car12,anno/UAV123/car12.txt,1,499 +data_seq/UAV123/car13,anno/UAV123/car13.txt,1,415 +data_seq/UAV123/car14,anno/UAV123/car14.txt,1,1327 +data_seq/UAV123/car15,anno/UAV123/car15.txt,1,469 +data_seq/UAV123/car16,anno/UAV123/car16_1.txt,1,415 +data_seq/UAV123/car16,anno/UAV123/car16_2.txt,415,1993 +data_seq/UAV123/car17,anno/UAV123/car17.txt,1,1057 +data_seq/UAV123/car18,anno/UAV123/car18.txt,1,1207 +data_seq/UAV123/car1,anno/UAV123/car1_1.txt,1,751 +data_seq/UAV123/car1,anno/UAV123/car1_2.txt,751,1627 +data_seq/UAV123/car1,anno/UAV123/car1_3.txt,1627,2629 +data_seq/UAV123/car1_s,anno/UAV123/car1_s.txt,1,1475 +data_seq/UAV123/car2,anno/UAV123/car2.txt,1,1321 +data_seq/UAV123/car2_s,anno/UAV123/car2_s.txt,1,320 +data_seq/UAV123/car3,anno/UAV123/car3.txt,1,1717 +data_seq/UAV123/car3_s,anno/UAV123/car3_s.txt,1,1300 +data_seq/UAV123/car4,anno/UAV123/car4.txt,1,1345 +data_seq/UAV123/car4_s,anno/UAV123/car4_s.txt,1,830 +data_seq/UAV123/car5,anno/UAV123/car5.txt,1,745 +data_seq/UAV123/car6,anno/UAV123/car6_1.txt,1,487 +data_seq/UAV123/car6,anno/UAV123/car6_2.txt,487,1807 +data_seq/UAV123/car6,anno/UAV123/car6_3.txt,1807,2953 +data_seq/UAV123/car6,anno/UAV123/car6_4.txt,2953,3925 +data_seq/UAV123/car6,anno/UAV123/car6_5.txt,3925,4861 +data_seq/UAV123/car7,anno/UAV123/car7.txt,1,1033 +data_seq/UAV123/car8,anno/UAV123/car8_1.txt,1,1357 +data_seq/UAV123/car8,anno/UAV123/car8_2.txt,1357,2575 +data_seq/UAV123/car9,anno/UAV123/car9.txt,1,1879 +data_seq/UAV123/group1,anno/UAV123/group1_1.txt,1,1333 +data_seq/UAV123/group1,anno/UAV123/group1_2.txt,1333,2515 +data_seq/UAV123/group1,anno/UAV123/group1_3.txt,2515,3925 +data_seq/UAV123/group1,anno/UAV123/group1_4.txt,3925,4873 +data_seq/UAV123/group2,anno/UAV123/group2_1.txt,1,907 +data_seq/UAV123/group2,anno/UAV123/group2_2.txt,907,1771 +data_seq/UAV123/group2,anno/UAV123/group2_3.txt,1771,2683 +data_seq/UAV123/group3,anno/UAV123/group3_1.txt,1,1567 +data_seq/UAV123/group3,anno/UAV123/group3_2.txt,1567,2827 +data_seq/UAV123/group3,anno/UAV123/group3_3.txt,2827,4369 +data_seq/UAV123/group3,anno/UAV123/group3_4.txt,4369,5527 +data_seq/UAV123/person1,anno/UAV123/person1.txt,1,799 +data_seq/UAV123/person10,anno/UAV123/person10.txt,1,1021 +data_seq/UAV123/person11,anno/UAV123/person11.txt,1,721 +data_seq/UAV123/person12,anno/UAV123/person12_1.txt,1,601 +data_seq/UAV123/person12,anno/UAV123/person12_2.txt,601,1621 +data_seq/UAV123/person13,anno/UAV123/person13.txt,1,883 +data_seq/UAV123/person14,anno/UAV123/person14_1.txt,1,847 +data_seq/UAV123/person14,anno/UAV123/person14_2.txt,847,1813 +data_seq/UAV123/person14,anno/UAV123/person14_3.txt,1813,2923 +data_seq/UAV123/person15,anno/UAV123/person15.txt,1,1339 +data_seq/UAV123/person16,anno/UAV123/person16.txt,1,1147 +data_seq/UAV123/person17,anno/UAV123/person17_1.txt,1,1501 +data_seq/UAV123/person17,anno/UAV123/person17_2.txt,1501,2347 +data_seq/UAV123/person18,anno/UAV123/person18.txt,1,1393 +data_seq/UAV123/person19,anno/UAV123/person19_1.txt,1,1243 +data_seq/UAV123/person19,anno/UAV123/person19_2.txt,1243,2791 +data_seq/UAV123/person19,anno/UAV123/person19_3.txt,2791,4357 +data_seq/UAV123/person1_s,anno/UAV123/person1_s.txt,1,1600 +data_seq/UAV123/person20,anno/UAV123/person20.txt,1,1783 +data_seq/UAV123/person21,anno/UAV123/person21.txt,1,487 +data_seq/UAV123/person22,anno/UAV123/person22.txt,1,199 +data_seq/UAV123/person23,anno/UAV123/person23.txt,1,397 +data_seq/UAV123/person2,anno/UAV123/person2_1.txt,1,1189 +data_seq/UAV123/person2,anno/UAV123/person2_2.txt,1189,2623 +data_seq/UAV123/person2_s,anno/UAV123/person2_s.txt,1,250 +data_seq/UAV123/person3,anno/UAV123/person3.txt,1,643 +data_seq/UAV123/person3_s,anno/UAV123/person3_s.txt,1,505 +data_seq/UAV123/person4,anno/UAV123/person4_1.txt,1,1501 +data_seq/UAV123/person4,anno/UAV123/person4_2.txt,1501,2743 +data_seq/UAV123/person5,anno/UAV123/person5_1.txt,1,877 +data_seq/UAV123/person5,anno/UAV123/person5_2.txt,877,2101 +data_seq/UAV123/person6,anno/UAV123/person6.txt,1,901 +data_seq/UAV123/person7,anno/UAV123/person7_1.txt,1,1249 +data_seq/UAV123/person7,anno/UAV123/person7_2.txt,1249,2065 +data_seq/UAV123/person8,anno/UAV123/person8_1.txt,1,1075 +data_seq/UAV123/person8,anno/UAV123/person8_2.txt,1075,1525 +data_seq/UAV123/person9,anno/UAV123/person9.txt,1,661 +data_seq/UAV123/truck1,anno/UAV123/truck1.txt,1,463 +data_seq/UAV123/truck2,anno/UAV123/truck2.txt,1,385 +data_seq/UAV123/truck3,anno/UAV123/truck3.txt,1,535 +data_seq/UAV123/truck4,anno/UAV123/truck4_1.txt,1,577 +data_seq/UAV123/truck4,anno/UAV123/truck4_2.txt,577,1261 +data_seq/UAV123/uav1,anno/UAV123/uav1_1.txt,1,1555 +data_seq/UAV123/uav1,anno/UAV123/uav1_2.txt,1555,2377 +data_seq/UAV123/uav1,anno/UAV123/uav1_3.txt,2473,3469 +data_seq/UAV123/uav2,anno/UAV123/uav2.txt,1,133 +data_seq/UAV123/uav3,anno/UAV123/uav3.txt,1,265 +data_seq/UAV123/uav4,anno/UAV123/uav4.txt,1,157 +data_seq/UAV123/uav5,anno/UAV123/uav5.txt,1,139 +data_seq/UAV123/uav6,anno/UAV123/uav6.txt,1,109 +data_seq/UAV123/uav7,anno/UAV123/uav7.txt,1,373 +data_seq/UAV123/uav8,anno/UAV123/uav8.txt,1,301 +data_seq/UAV123/wakeboard1,anno/UAV123/wakeboard1.txt,1,421 +data_seq/UAV123/wakeboard10,anno/UAV123/wakeboard10.txt,1,469 +data_seq/UAV123/wakeboard2,anno/UAV123/wakeboard2.txt,1,733 +data_seq/UAV123/wakeboard3,anno/UAV123/wakeboard3.txt,1,823 +data_seq/UAV123/wakeboard4,anno/UAV123/wakeboard4.txt,1,697 +data_seq/UAV123/wakeboard5,anno/UAV123/wakeboard5.txt,1,1675 +data_seq/UAV123/wakeboard6,anno/UAV123/wakeboard6.txt,1,1165 +data_seq/UAV123/wakeboard7,anno/UAV123/wakeboard7.txt,1,199 +data_seq/UAV123/wakeboard8,anno/UAV123/wakeboard8.txt,1,1543 +data_seq/UAV123/wakeboard9,anno/UAV123/wakeboard9.txt,1,355 diff --git a/tools/convert_datasets/uav123/uav2coco.py b/tools/convert_datasets/uav123/uav2coco.py index 6e489a53b..286040e14 100644 --- a/tools/convert_datasets/uav123/uav2coco.py +++ b/tools/convert_datasets/uav123/uav2coco.py @@ -34,7 +34,8 @@ def convert_uav123(uav123, ann_dir, save_dir): """ # The format of each line in "uav_info123.txt" is # "anno_name,anno_path,video_path,start_frame,end_frame" - info_path = osp.join(os.path.dirname(__file__), 'uav123_info.txt') + info_path = osp.join( + os.path.dirname(__file__), 'uav123_info_deprecated.txt') uav_info = mmcv.list_from_file(info_path)[1:] records = dict(vid_id=1, img_id=1, ann_id=1, global_instance_id=1)