diff --git a/.github/workflows/test_packages.yml b/.github/workflows/test_packages.yml index 7fabc5b512..29c2511dc6 100644 --- a/.github/workflows/test_packages.yml +++ b/.github/workflows/test_packages.yml @@ -45,6 +45,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition # - perception/object_detection_3d # - control/mobile_manipulation @@ -93,6 +94,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition - perception/object_detection_3d - control/mobile_manipulation diff --git a/.github/workflows/tests_suite.yml b/.github/workflows/tests_suite.yml index f084aac5a2..1a4e252b04 100644 --- a/.github/workflows/tests_suite.yml +++ b/.github/workflows/tests_suite.yml @@ -78,6 +78,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - simulation/human_model_generation - perception/facial_expression_recognition - control/single_demo_grasp @@ -185,6 +186,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition # - perception/object_detection_3d # - control/mobile_manipulation @@ -255,6 +257,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition # - perception/object_detection_3d # - control/mobile_manipulation @@ -331,6 +334,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition - perception/object_detection_3d - control/mobile_manipulation diff --git a/.github/workflows/tests_suite_develop.yml b/.github/workflows/tests_suite_develop.yml index 6da62e4f47..38f8113974 100644 --- a/.github/workflows/tests_suite_develop.yml +++ b/.github/workflows/tests_suite_develop.yml @@ -78,6 +78,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - simulation/human_model_generation - perception/facial_expression_recognition - control/single_demo_grasp @@ -190,6 +191,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition # - perception/object_detection_3d # - control/mobile_manipulation @@ -260,6 +262,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition # - perception/object_detection_3d # - control/mobile_manipulation @@ -336,6 +339,7 @@ jobs: - perception/object_detection_2d/ssd - perception/object_detection_2d/yolov3 - perception/object_detection_2d/retinaface + - perception/object_detection_2d/nms - perception/facial_expression_recognition - perception/object_detection_3d - control/mobile_manipulation diff --git a/CHANGELOG.md b/CHANGELOG.md index c4cf9710ba..85847af40a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ Released on XX, XXth, 2022. - New Features: - Added end-to-end planning tool ([#223](https://github.com/opendr-eu/opendr/pull/223)). + - Added seq2seq-nms module, along with other custom NMS implementations for 2D object detection.([#232](https://github.com/opendr-eu/opendr/pull/232)). - Enhancements: - Added support for modular pip packages allowing tools to be installed separately ([#201](https://github.com/opendr-eu/opendr/pull/201)). - Simplified the installation process for pip by including the appropriate post-installation scripts ([#201](https://github.com/opendr-eu/opendr/pull/201)). diff --git a/docs/reference/index.md b/docs/reference/index.md index 728f90a959..8d9a7d7202 100644 --- a/docs/reference/index.md +++ b/docs/reference/index.md @@ -43,6 +43,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any - [centernet Module](object-detection-2d-centernet.md) - [ssd Module](object-detection-2d-ssd.md) - [yolov3 Module](object-detection-2d-yolov3.md) + - [seq2seq-nms Module](object-detection-2d-nms-seq2seq_nms.md) - object detection 3d: - [voxel Module](voxel-object-detection-3d.md) - object tracking 2d: @@ -113,6 +114,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any - [centernet Demo](/projects/perception/object_detection_2d/centernet) - [ssd Demo](/projects/perception/object_detection_2d/ssd) - [yolov3 Demo](/projects/perception/object_detection_2d/yolov3) + - [seq2seq-nms Demo](/projects/perception/object_detection_2d/nms/seq2seq-nms) - object detection 3d: - [voxel Demo](/projects/perception/object_detection_3d/demos/voxel_object_detection_3d) - object tracking 2d: diff --git a/docs/reference/object-detection-2d-nms-seq2seq_nms.md b/docs/reference/object-detection-2d-nms-seq2seq_nms.md new file mode 100644 index 0000000000..513233c833 --- /dev/null +++ b/docs/reference/object-detection-2d-nms-seq2seq_nms.md @@ -0,0 +1,305 @@ +## Seq2Seq-NMS module + +The *seq2seq-nms* module contains the *Seq2SeqNMSLearner* class, which inherits from the abstract class *Learner*. + +### Class Seq2SeqNMSLearner +Bases: `engine.learners.Learner` + +It can be used to perform single-class non-maximum suppression (NMS) on images (inference) as well as training new seq2seq-nms models. The implementation is based on [[1]](#seq2seq_nms-1). The method is set-up for performing NMS on the person-detection task, using the implemention of the [SSD](/docs/reference/object-detection-2d-ssd.md) detector. The Seq2Seq-NMS method can also be employed for performing single-class NMS, in any class other than human/pedestrian class. In that case the method needs to be trained from scratch. Finally, a pretrained-model can be employed for evaluation or inference on the same class that it was trained with, using RoIs from a different detector than the one used in the training. In that case, we advise to fine-tune the Seq2Seq-nms pretrained model using RoIs from the detector, deployed in the inference/evaluation of the method, in order to achieve the highest possible performance. + +The [Seq2SeqNMSLearner](/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py) class has the following +public methods: + +#### `Seq2SeqNMSLearner` constructor +```python +Seq2SeqNMSLearner(self, lr, epochs, device, temp_path, checkpoint_after_iter, checkpoint_load_iter, log_after, variant, + iou_filtering, dropout, app_feats, fmod_map_type, fmod_map_bin, app_input_dim) +``` + +Constructor parameters: + +- **lr**: *float, default=0.0001*\ + Specifies the initial learning rate to be used during training. +- **epochs**: *int, default=8*\ + Specifies the number of epochs to be used during training. +- **device**: *{'cuda', 'cpu'}, default='cuda'*\ + Specifies the device to be used. +- **temp_path**: *str, default='./temp'*\ + Specifies a path to be used for storage of checkpoints during training. +- **checkpoint_after_iter**: *int, default=0*\ + Specifies the epoch interval between checkpoints during training. + If set to 0 no checkpoint will be saved. +- **checkpoint_load_iter**: *int, default=0*\ + Specifies the epoch to load a saved checkpoint from. + If set to 0 no checkpoint will be loaded. +- **log_after**: *int, default=500*\ + Specifies interval (in iterations/batches) between information logging on *stdout*. +- **variant**: *{'light', 'medium', 'full'}, default='medium'*\ + Specifies the variant of seq2seq-nms model. +- **iou_filtering**: *float, default=0.8*\ + Specifies the IoU threshold used for filtering RoIs before provided by the seq2seq-nms model. + If set to values <0 or >1, no filtering is applied. +- **dropout**: *float, default=0.025*\ + Specifies the dropout rate. +- **app_feats**: *{'fmod', 'zeros', 'custom'}, default='fmod'*\ + Specifies the type of the appearance-based features of RoIs used in the model. +- **fmod_map_type**: *{'EDGEMAP', 'FAST', 'AKAZE', 'BRISK', 'ORB'}, default='EDGEMAP'*\ + Specifies the type of maps used by FMoD, in the case where *app_feats*='fmod'. +- **fmod_map_bin**: *bool, default=True*\ + Specifies whether FMoD maps are binary or not, in the case where *app_feats*='fmod'. +- **app_input_dim**: *int, default=None*\ + Specifies the dimension of appearance-based RoI features. + In the case where *app_feats*='fmod', the corresponding dimension is automatically computed. + + +#### `Seq2SeqNMSLearner.fit` +```python +Seq2SeqNMSLearner.fit(self, dataset, logging_path, logging_flush_secs, silent, verbose, nms_gt_iou, max_dt_boxes, datasets_folder, use_ssd) +``` + +This method is used to train the algorithm on a `Dataset_NMS` dataset. +Returns a dictionary containing stats regarding the training process. + +Parameters: + +- **dataset**: *{'PETS', 'COCO'}*\ + Specifies the name of the dataset among those available from training. +- **logging_path**: *str, default=None*\ + Path to save log files. + If set to None, only the console will be used for logging. +- **logging_flush_secs**: *int, default=30*\ + How often, in seconds, to flush the TensorBoard data to disk. +- **silent**: *bool, default=False*\ + If set to True, disables all printing of training progress reports and other information to STDOUT. +- **verbose**: *bool, default=True*\ + If True, enables maximum verbosity. +- **nms_gt_iou**: *float, default=0.5*\ + Specifies the threshold used to determine whether a detection RoI must be suppressed or not based on its IoU with the image's ground-truth RoIs. +- **max_dt_boxes**: *int, default=500*\ + Specifies the maximum number of RoIs provided to seq2Seq-nms model as input. +- **datasets_folder**: *str, default='./datasets'*\ + Specifies the path to the folder where the datasets are stored. +- **use_ssd**: *bool, default=False*\ + If set to True, RoIs from SSD are fed to the seq2Seq-nms model. + Otherwise, RoIs from the default detector of the specified dataset are used as input. + +#### `Seq2SeqNMSLearner.eval` +```python +Seq2SeqNMSLearner.eval(self, dataset, split, verbose, max_dt_boxes, datasets_folder, use_ssd) +``` + +Performs evaluation on a set of dataset. + +Parameters: + +- **dataset**: *{'PETS', 'COCO'}*\ + Specifies the name of the dataset among those available from training. +- **split**: *{'train', 'val', 'test'} default='test'*\ + Specifies the set of the corresponding dataset where the evaluation will be performed. +- **verbose**: *bool, default=True*\ + If True, enables maximum verbosity. +- **max_dt_boxes**: *int, default=500*\ + Specifies the maximum number of RoIs provided to seq2Seq-nms model as input. +- **threshold**: *float, default=0.0*\ + Specifies the confidence threshold, used for RoI selection after seq2seq-nms rescoring. +- **datasets_folder**: *str, default='./datasets'*\ + Specifies the path to the folder where the datasets are stored. +- **use_ssd**: *bool, default=False*\ + If set to True, RoIs from SSD are fed to the seq2Seq-nms model. + Otherwise, RoIs from the default detector of the specified dataset are used as input. + +#### `Seq2SeqNMSLearner.infer` +```python +Seq2SeqNMSLearner.infer(self, boxes, scores, boxes_sorted, max_dt_boxes, img_res, threshold) +``` + +Performs non-maximum suppression, using seq2seq-nms. +In the case where FMoD is selected for appearance-based RoI feature computation, FMoD maps are not computed. + +Parameters: + +- **boxes**: *torch.tensor, default=None*\ + Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max). + For N candidate detection RoIs, the size of the *torch.tensor* is Nx4. +- **scores**: *torch.tensor, default=None*\ + Specifies the scores of the candidate detection RoIs, assigned previously by a detector. + For N candidate detection RoIs, the size of the *torch.tensor* is Nx1. +- **boxes_sorted**: *bool, default=False*\ + Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order. +- **max_dt_boxes**: *int, default=400*\ + Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model. +- **img_res**: *[int, int], default=None*\ + Specifies the image resolution expressed as [width, height]. +- **threshold**: *float, default=0.1*\ + Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring. + +#### `Seq2SeqNMSLearner.run_nms` +```python +Seq2SeqNMSLearner.run_nms(self, boxes, scores, img, threshold, boxes_sorted, top_k) +``` + +Performs non-maximum suppression, using seq2seq-nms. +It incorporates the full pipeline needed for inference, including the FMoD's edge/interest-point map computation step. + +Parameters: + +- **boxes**: *numpy.ndarray, default=None*\ + Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max). + For N candidate detection RoIs, the size of the array is Nx4. +- **scores**: *numpy.ndarray, default=None*\ + Specifies the scores of the candidate detection RoIs, assigned previously by a detector. + For N candidate detection RoIs, the size of the array is Nx1. +- **boxes_sorted**: *bool, default=False*\ + Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order. +- **top_k**: *int, default=400*\ + Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model. +- **img**: *object*\ + Object of type engine.data.Image. +- **threshold**: *float, default=0.1*\ + Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring. + +#### `Seq2SeqNMSLearner.save` +```python +Seq2SeqNMSLearner.save(self, path, verbose, optimizer, scheduler, current_epoch, max_dt_boxes) +``` + +Saves a model in OpenDR format at the specified path. + +Parameters: + +- **path**: *str*\ + Specifies the folder where the model will be saved. +- **verbose**: *bool default=False*\ + If True, enables maximum verbosity. +- **optimizer**: *torch.optim.Optimizer default=None*\ + Specifies the optimizer used for training. +- **scheduler**: *torch.optim.lr_scheduler default=None*\ + Specifies the learning rate scheduler used for training. +- **current_epoch**: *int, default=None*\ + Specifies the number of epochs the model has been trained. +- **max_dt_boxes**: *int, default=400*\ + Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model. + + + +#### `Seq2SeqNMSLearner.load` +```python +Seq2SeqNMSLearner.load(self, path, verbose) +``` + +Loads a model which was previously saved in OpenDR format at the specified path. + +Parameters: + +- **path**: *str*\ + Specifies the folder where the model will be loaded from. +- **verbose**: *bool default=False*\ + If True, enables maximum verbosity. + + +#### `Seq2SeqNMSLearner.download` +```python +Seq2SeqNMSLearner.download(self, path, model_name, verbose, url) +``` + +Downloads pretrained models of seq2seq-nms. + +Parameters: + +Downloads data needed for the various functions of the learner, e.g., pretrained models as well as test data. + +Parameters: + +- **path**: *str, default=None*\ + Specifies the folder where data will be downloaded. + If *None*, the *self.temp_path* directory is used instead. +- **model_name**: *{'seq2seq_medium_pets_jpd_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3', 'seq2seq_medium_coco_frcn_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3'}, default=''seq2seq_medium_pets_jpd_fmod_3'*\ + If *'pretrained'*, downloads a pretrained detector model. + If *'images'*, downloads an image to perform inference on. If + *'test_data'* downloads a dummy dataset for testing purposes. +- **verbose**: *bool default=True*\ + If True, enables maximum verbosity. +- **url**: *str, default=OpenDR FTP URL*\ + URL of the FTP server. + +#### Examples + +* **Training example.** + To train seq2seq-nms properly, the PETS and COCO datasets are supported as Dataset_NMS types. + + ```python + from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner + import os + OPENDR_HOME = os.environ['OPENDR_HOME'] + + temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/tmp' + datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets' + + seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8, + app_feats='fmod', checkpoint_after_iter=1, + temp_path=temp_path, epochs=8) + seq2SeqNMSLearner.fit(dataset='PETS', use_ssd=False, datasets_folder=datasets_folder, + logging_path=os.path.join(temp_path, 'logs'), silent=False, + verbose=True, nms_gt_iou=0.50, max_dt_boxes=500) + ``` + +* **Inference and result drawing example on a test .jpg image using OpenCV.** + + ```python + from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner + from opendr.engine.data import Image + from opendr.perception.object_detection_2d import SingleShotDetectorLearner + from opendr.perception.object_detection_2d import draw_bounding_boxes + import os + OPENDR_HOME = os.environ['OPENDR_HOME'] + temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp' + + seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering = 0.8, + app_feats='fmod', device='cpu', + temp_path=temp_path) + seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path) + seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True) + ssd = SingleShotDetectorLearner(device='cuda') + ssd.download(".", mode="pretrained") + ssd.load("./ssd_default_person", verbose=True) + img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg') + if not isinstance(img, Image): + img = Image(img) + boxes = ssd.infer(img, threshold=0.25, custom_nms=seq2SeqNMSLearner) + draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True) + ``` + +* **Evaluation of pretrained model on PETS dataset.** + + ```python + from opendr.perception.object_detection_2d import Seq2SeqNMSLearner + import os + OPENDR_HOME = os.environ['OPENDR_HOME'] + + datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets' + temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp' + + seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=0.8, app_feats='fmod', + temp_path=temp_path, device='cuda') + seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path) + seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True) + seq2SeqNMSLearner.eval(dataset='PETS', split='test', max_dt_boxes=800, + datasets_folder=datasets_folder, use_ssd=False, threshold=0.0) + ``` + +#### Performance Evaluation + +TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800. +| **Pretrained Model** | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** | +|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:| +| seq2seq_pets_jpd_fmod | PETS | JPD | FMoD | 0.8 | 80.2% | 84.3% | +| seq2seq_pets_ssd_fmod | PETS | SSD | FMoD | 0.8 | 77.4% | 79.1% | +| seq2seq_coco_frcn_fmod | COCO | FRCN | FMoD | - | 68.1% \* | 67.5% \*\* | +| seq2seq_coco_ssd_fmod | COCO | SSD | FMoD | - | 41.8% \* | 42.4% ** | + +\* The minival set was used as validation set.
+\*\* The minitest set was used as test set. + + +#### References +[1] Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275). diff --git a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py index 6f643e61cf..f0dd7ca1d3 100755 --- a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py +++ b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py @@ -22,11 +22,12 @@ from opendr.engine.data import Image from opendr.perception.object_detection_2d import SingleShotDetectorLearner from opendr.perception.object_detection_2d import draw_bounding_boxes +from opendr.perception.object_detection_2d import Seq2SeqNMSLearner, SoftNMS, FastNMS, ClusterNMS class ObjectDetectionSSDNode: def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/opendr/image_boxes_annotated", - detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous"): + detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous", nms_type='default'): """ Creates a ROS Node for face detection :param input_image_topic: Topic from which we are reading the input image @@ -41,6 +42,8 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/ :type device: str :param backbone: backbone network :type backbone: str + :param ms_type: type of NMS method + :type nms_type: str """ # Initialize the face detector @@ -48,6 +51,20 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/ self.object_detector.download(path=".", verbose=True) self.object_detector.load("ssd_default_person") self.class_names = self.object_detector.classes + self.custom_nms = None + + # Initialize Seq2Seq-NMS if selected + if nms_type == 'seq2seq-nms': + self.custom_nms = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8, + app_feats='fmod', device=self.device) + self.custom_nms.download(model_name='seq2seq_pets_jpd', path='.') + self.custom_nms.load('./seq2seq_pets_jpd/', verbose=True) + elif nms_type == 'soft-nms': + self.custom_nms = SoftNMS(nms_thres=0.45, device=self.device) + elif nms_type == 'fast-nms': + self.custom_nms = FastNMS(nms_thres=0.45, device=self.device) + elif nms_type == 'cluster-nms': + self.custom_nms = ClusterNMS(nms_thres=0.45, device=self.device) # Initialize OpenDR ROSBridge object self.bridge = ROSBridge() @@ -76,7 +93,7 @@ def callback(self, data): image = self.bridge.from_ros_image(data, encoding='bgr8') # Run pose estimation - boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False) + boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False, custom_nms=self.custom_nms) # Get an OpenCV image back image = np.float32(image.opencv()) diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/README.md b/projects/perception/object_detection_2d/nms/cluster_nms/README.md new file mode 100644 index 0000000000..0ff5c5fd9c --- /dev/null +++ b/projects/perception/object_detection_2d/nms/cluster_nms/README.md @@ -0,0 +1,7 @@ +# Cluster-NMS Demos + +This folder contains minimal code usage examples that showcase the basic functionality of the Cluster-NMS implementation +provided by OpenDR. Specifically the following examples are provided: + +1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU. + diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py new file mode 100644 index 0000000000..e653f5820c --- /dev/null +++ b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py @@ -0,0 +1,31 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from opendr.perception.object_detection_2d import ClusterNMS +from opendr.engine.data import Image +from opendr.perception.object_detection_2d import SingleShotDetectorLearner +from opendr.perception.object_detection_2d import draw_bounding_boxes +import os +OPENDR_HOME = os.environ['OPENDR_HOME'] + +ssd = SingleShotDetectorLearner(device='cuda') +ssd.download(".", mode="pretrained") +ssd.load("./ssd_default_person", verbose=True) +img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg') +if not isinstance(img, Image): + img = Image(img) +cluster_nms = ClusterNMS(device='cuda', nms_type='default', cross_class=True) +boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms) +draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True) diff --git a/projects/perception/object_detection_2d/nms/fast_nms/README.md b/projects/perception/object_detection_2d/nms/fast_nms/README.md new file mode 100644 index 0000000000..5a1ccb3fd6 --- /dev/null +++ b/projects/perception/object_detection_2d/nms/fast_nms/README.md @@ -0,0 +1,5 @@ +# Fast-NMS Demos + +This folder contains minimal code usage examples that showcase the basic functionality of the Fast-NMS implementation +provided by OpenDR. Specifically the following examples are provided: +1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU. diff --git a/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py new file mode 100644 index 0000000000..5e0a5b48fa --- /dev/null +++ b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py @@ -0,0 +1,31 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from opendr.perception.object_detection_2d import FastNMS +from opendr.engine.data import Image +from opendr.perception.object_detection_2d import SingleShotDetectorLearner +from opendr.perception.object_detection_2d import draw_bounding_boxes +import os +OPENDR_HOME = os.environ['OPENDR_HOME'] + +ssd = SingleShotDetectorLearner(device='cuda') +ssd.download(".", mode="pretrained") +ssd.load("./ssd_default_person", verbose=True) +img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg') +if not isinstance(img, Image): + img = Image(img) +cluster_nms = FastNMS(device='cpu', cross_class=True) +boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms) +draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True) diff --git a/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg new file mode 100644 index 0000000000..5efb4d9298 Binary files /dev/null and b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg differ diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md new file mode 100644 index 0000000000..c831924349 --- /dev/null +++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md @@ -0,0 +1,17 @@ +# Seq2Seq-NMS Demos + +This folder contains minimal code usage examples that showcase the basic functionality of the Seq2Seq-NMS implementation +provided by OpenDR. Specifically the following examples are provided: + +1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU. + +2. eval_demo.py: Perform evaluation on the `WiderPersonDataset`, implemented in OpenDR format. The user must first download + the dataset and provide the path to the dataset root via `--data-root /path/to/wider_person`. + Setting `--device cpu` performs evaluation on CPU. + +3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class and any + `DetectionDataset` can be used as well. Provided is an example of training on `WiderPersonDataset`. The user must set the + dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. + Setting `--device cpu` performs training on CPU. Additional command line arguments can be set to change various training + hyperparameters, and running `python3 train_demo.py -h` prints information about them on stdout. + diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py new file mode 100644 index 0000000000..01437e578b --- /dev/null +++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py @@ -0,0 +1,49 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from opendr.perception.object_detection_2d import Seq2SeqNMSLearner +import os +import argparse +OPENDR_HOME = os.environ['OPENDR_HOME'] + +parser = argparse.ArgumentParser() +parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod", + choices=["fmod", "zeros"]) +parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP", + choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"]) +parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0) +parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) +parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod', + choices=['seq2seq_pets_jpd']) +parser.add_argument("--split", help="The split of the corresponding dataset", type=str, default='test', + choices=["test", "val", "train"]) +parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=600) +parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO", + "TEST_MODULE"]) +parser.add_argument("--data_root", help="Dataset root folder", type=str, + default=os.path.join(OPENDR_HOME, + 'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets')) +parser.add_argument("--use_ssd", help="Train using SSD as detector", type=bool, default=False) +parser.add_argument("--post_thres", help="Confidence threshold, used for RoI selection after seq2seq-nms rescoring", + type=float, default=0.0) + +args = parser.parse_args() +tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp') +seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type, + iou_filtering=args.iou_filtering, + temp_path=tmp_path) +seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path) +seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True) +seq2SeqNMSLearner.eval(dataset=args.dataset, use_ssd=args.use_ssd, split=args.split, max_dt_boxes=args.max_dt_boxes, + datasets_folder=args.data_root, threshold=args.post_thres) diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py new file mode 100755 index 0000000000..c260546d13 --- /dev/null +++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py @@ -0,0 +1,48 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from opendr.perception.object_detection_2d import Seq2SeqNMSLearner +from opendr.perception.object_detection_2d import SingleShotDetectorLearner +from opendr.perception.object_detection_2d import draw_bounding_boxes +from opendr.engine.data import Image +import os +import argparse +OPENDR_HOME = os.environ['OPENDR_HOME'] + +parser = argparse.ArgumentParser() +parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod", + choices=["fmod", "zeros"]) +parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP", + choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"]) +parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0) +parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) +parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod', + choices=['seq2seq_pets_jpd']) + +args = parser.parse_args() +tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp') +seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type, + iou_filtering=args.iou_filtering, + temp_path=tmp_path) +seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path) +seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True) + +ssd = SingleShotDetectorLearner(device=args.device) +ssd.download(".", mode="pretrained") +ssd.load("./ssd_default_person", verbose=True) +img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg') +if not isinstance(img, Image): + img = Image(img) +boxes = ssd.infer(img, threshold=0.3, custom_nms=seq2SeqNMSLearner) +draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True) diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py new file mode 100644 index 0000000000..4facf2696b --- /dev/null +++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py @@ -0,0 +1,50 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from opendr.perception.object_detection_2d import Seq2SeqNMSLearner +import os +import argparse +OPENDR_HOME = os.environ['OPENDR_HOME'] + +parser = argparse.ArgumentParser() +parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod", + choices=["fmod", "zeros"]) +parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP", + choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"]) +parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0) +parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) +parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=1e-4) +parser.add_argument("--n_epochs", help="Number of total epochs", type=int, default=10) +parser.add_argument("--tmp_path", help="Temporary path where weights will be saved", type=str, + default=os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp')) +parser.add_argument("--checkpoint_freq", help="Frequency in-between checkpoint saving", type=int, default=1) +parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", type=int, default=0) +parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO", + "TEST_MODULE"]) +parser.add_argument("--use_ssd", help="Train using SSD as default detector", type=bool, default=False) +parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=500) +parser.add_argument("--data-root", help="Dataset root folder", type=str, + default=os.path.join(OPENDR_HOME, + 'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets')) +args = parser.parse_args() +seq2SeqNMSLearner = Seq2SeqNMSLearner(epochs=args.n_epochs, lr=args.lr, device=args.device, app_feats=args.app_feats, + fmod_map_type=args.fmod_type, iou_filtering=args.iou_filtering, + temp_path=args.tmp_path, checkpoint_after_iter=args.checkpoint_freq, + checkpoint_load_iter=args.resume_from) +seq2SeqNMSLearner.fit(dataset=args.dataset, use_ssd=args.use_ssd, + datasets_folder=args.data_root, silent=False, verbose=True, + max_dt_boxes=args.max_dt_boxes) +seq2SeqNMSLearner.save(path=os.path.join(args.tmp_path, 'saved_model'), current_epoch=args.n_epochs-1, + max_dt_boxes=args.max_dt_boxes) diff --git a/projects/perception/object_detection_2d/nms/soft_nms/README.md b/projects/perception/object_detection_2d/nms/soft_nms/README.md new file mode 100644 index 0000000000..a4c778f35c --- /dev/null +++ b/projects/perception/object_detection_2d/nms/soft_nms/README.md @@ -0,0 +1,5 @@ +# Soft-NMS Demos + +This folder contains minimal code usage examples that showcase the basic functionality of the Soft-NMS implementation +provided by OpenDR. Specifically the following examples are provided: +1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU. diff --git a/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py new file mode 100644 index 0000000000..c05ff4c7c2 --- /dev/null +++ b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py @@ -0,0 +1,31 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from opendr.perception.object_detection_2d import SoftNMS +from opendr.engine.data import Image +from opendr.perception.object_detection_2d import SingleShotDetectorLearner +from opendr.perception.object_detection_2d import draw_bounding_boxes +import os +OPENDR_HOME = os.environ['OPENDR_HOME'] + +ssd = SingleShotDetectorLearner(device='cuda') +ssd.download(".", mode="pretrained") +ssd.load("./ssd_default_person", verbose=True) +img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg') +if not isinstance(img, Image): + img = Image(img) +cluster_nms = SoftNMS(device='cpu', nms_type='gaussian') +boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms) +draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True) diff --git a/src/opendr/perception/object_detection_2d/__init__.py b/src/opendr/perception/object_detection_2d/__init__.py index 61428cb1bd..9fac6ba424 100644 --- a/src/opendr/perception/object_detection_2d/__init__.py +++ b/src/opendr/perception/object_detection_2d/__init__.py @@ -11,6 +11,11 @@ from opendr.perception.object_detection_2d.utils.vis_utils import draw_bounding_boxes +from opendr.perception.object_detection_2d.nms.cluster_nms.cluster_nms import ClusterNMS +from opendr.perception.object_detection_2d.nms.fast_nms.fast_nms import FastNMS +from opendr.perception.object_detection_2d.nms.soft_nms.soft_nms import SoftNMS +from opendr.perception.object_detection_2d.nms.seq2seq_nms.seq2seq_nms_learner import Seq2SeqNMSLearner + __all__ = ['CenterNetDetectorLearner', 'DetrLearner', 'GemLearner', 'RetinaFaceLearner', 'SingleShotDetectorLearner', 'YOLOv3DetectorLearner', 'WiderPersonDataset', 'WiderFaceDataset', - 'transforms', 'draw_bounding_boxes'] + 'transforms', 'draw_bounding_boxes', 'ClusterNMS', 'FastNMS', 'SoftNMS', 'Seq2SeqNMSLearner'] diff --git a/src/opendr/perception/object_detection_2d/datasets/transforms.py b/src/opendr/perception/object_detection_2d/datasets/transforms.py index 5aa6f1e327..08c0f34ecf 100644 --- a/src/opendr/perception/object_detection_2d/datasets/transforms.py +++ b/src/opendr/perception/object_detection_2d/datasets/transforms.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. + import cv2 import numpy as np import mxnet as mx @@ -141,3 +142,20 @@ def transform_test(imgs, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): if len(tensors) == 1: return tensors[0], origs[0] return tensors, origs + + +def pad_test(img, min_size=512): + h_pad_size = 0 + min_dim = 2 + np.argmin([img.shape[2:4]]) + img_padded = img + if img.shape[min_dim] < min_size: + h_pad_size = int((min_size - img.shape[min_dim]) / 2.0) + if min_dim == 2: + img_padded = mx.nd.pad(img, mode="constant", constant_value=0, + pad_width=(0, 0, 0, 0, h_pad_size, + h_pad_size, 0, 0)) + else: + img_padded = mx.nd.pad(img, mode="constant", constant_value=0, + pad_width=(0, 0, 0, 0, 0, 0, + h_pad_size, h_pad_size)) + return img_padded diff --git a/src/opendr/perception/object_detection_2d/dependencies.ini b/src/opendr/perception/object_detection_2d/dependencies.ini index c6beccc16e..c181807f92 100644 --- a/src/opendr/perception/object_detection_2d/dependencies.ini +++ b/src/opendr/perception/object_detection_2d/dependencies.ini @@ -7,6 +7,7 @@ python=mxnet==1.8.0 tqdm pycocotools>=2.0.4 easydict + gdown numba==0.53.0 linux=libopenblas-dev diff --git a/src/opendr/perception/object_detection_2d/nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md new file mode 100644 index 0000000000..410c887028 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md @@ -0,0 +1,28 @@ +Cluster-NMS +====== + +This folder contains an implementation of Cluster-NMS [[1]](#cluster_nms-1). + +Sources +------ +Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0: + +``` +This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU). +Copyright (c) 2020 Zheng, Zhaohui. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +``` + +[1] Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation, +[ArXiv](https://arxiv.org/abs/2005.03572). diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py new file mode 100644 index 0000000000..ee34323346 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py @@ -0,0 +1,510 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU). +# Copyright (c) 2020 Zheng, Zhaohui. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from opendr.perception.object_detection_2d.nms.utils import NMSCustom +from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard, diou, distance +from opendr.engine.target import BoundingBox, BoundingBoxList +import numpy as np +import torch + + +class ClusterNMS(NMSCustom): + def __init__(self, nms_type='default', cross_class=True, device='cuda', iou_thres=0.45, top_k=400, post_k=100): + self.device = device + self.nms_types = ['default', 'diou', 'spm', 'spm_dist', 'spm_dist_weighted'] + if nms_type not in self.nms_types: + raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.') + else: + self.nms_type = nms_type + self.iou_thres = iou_thres + self.top_k = top_k + self.post_k = post_k + self.cross_class = cross_class + + def set_iou_thres(self, iou_thres=0.45): + self.iou_thres = iou_thres + + def top_k(self, top_k=400): + self.top_k = top_k + + def post_k(self, post_k=100): + self.post_k = post_k + + def set_type(self, nms_type=None): + if nms_type not in self.nms_types: + raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.') + else: + self.nms_type = nms_type + + def set_cross_class(self, cross_class=True): + self.cross_class = cross_class + + def run_nms(self, boxes=None, scores=None, img=None, threshold=0.2): + + if isinstance(boxes, np.ndarray): + boxes = torch.tensor(boxes, device=self.device) + elif torch.is_tensor(boxes): + if self.device == 'cpu': + boxes = boxes.cpu() + elif self.device == 'cuda': + boxes = boxes.cuda() + + if isinstance(scores, np.ndarray): + scores = torch.tensor(scores, device=self.device) + elif torch.is_tensor(scores): + if self.device == 'cpu': + scores = scores.cpu() + elif self.device == 'cuda': + scores = scores.cuda() + + scores = torch.transpose(scores, dim0=1, dim1=0) + + if self.nms_type == 'default': + if self.cross_class: + [boxes, classes, scores] = cc_cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + else: + [boxes, classes, scores] = cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + elif self.nms_type == 'diou': + if self.cross_class: + [boxes, classes, scores] = cc_cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + else: + [boxes, classes, scores] = cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + elif self.nms_type == 'spm': + if self.cross_class: + [boxes, classes, scores] = cc_cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + else: + [boxes, classes, scores] = cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + elif self.nms_type == 'spm_dist': + if self.cross_class: + [boxes, classes, scores] = cc_cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + else: + [boxes, classes, scores] = cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + + elif self.nms_type == 'spm_dist_weighted': + if self.cross_class: + [boxes, classes, scores] = cc_cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores, + iou_thres=self.iou_thres, + top_k=self.top_k, + post_k=self.post_k) + else: + [boxes, classes, scores] = cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores, + iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + + keep_ids = torch.where(scores > threshold) + scores = scores[keep_ids].cpu().numpy() + classes = classes[keep_ids].cpu().numpy() + boxes = boxes[keep_ids].cpu().numpy() + bounding_boxes = BoundingBoxList([]) + for idx, box in enumerate(boxes): + bbox = BoundingBox(left=box[0], top=box[1], + width=box[2] - box[0], + height=box[3] - box[1], + name=classes[idx], + score=scores[idx]) + bounding_boxes.data.append(bbox) + + return bounding_boxes, [boxes, classes, scores] + + +def cc_cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + # Collapse all the classes into 1 + + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = torch.max(A, dim=0) + E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + + idx_out = torch.where(maxA > iou_thres) + scores[idx_out] = 0 + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, idx = scores.sort(1, descending=True) + idx = idx[:top_k] + scores = scores[:top_k] + boxes = boxes[idx, :] + + num_classes, num_dets = scores.shape + boxes = boxes.view(num_classes, num_dets, 4) + _, classes = scores.max(dim=0) + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + maxA = None + for i in range(200): + A = B + maxA, _ = A.max(dim=1) + E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + keep = (scores > 0.00) + discard = (maxA > iou_thres) + scores[discard] = 0 + # Assign each kept detection to its corresponding class + boxes = boxes[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cc_cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + iou = diou(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = torch.max(A, dim=0) + E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + + idx_out = torch.where(maxA > iou_thres) + scores[idx_out] = 0 + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, idx = scores.sort(1, descending=True) + idx = idx[:top_k] + scores = scores[:top_k] + boxes = boxes[idx, :] + + num_classes, num_dets = scores.shape + boxes = boxes.view(num_classes, num_dets, 4) + _, classes = scores.max(dim=0) + + iou = diou(boxes, boxes).triu_(diagonal=1) + B = iou + maxA = None + for i in range(200): + A = B + maxA, _ = A.max(dim=1) + E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + keep = (scores > 0.00) + discard = (maxA > iou_thres) + scores[discard] = 0 + # Assign each kept detection to its corresponding class + boxes = boxes[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + + return boxes, classes, scores + + +def cc_cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = torch.max(A, dim=0) + E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + scores = torch.prod(torch.exp(-B ** 2 / 0.2), 0) * scores + + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, idx = scores.sort(1, descending=True) + idx = idx[:top_k] + scores = scores[:top_k] + boxes = boxes[idx, :] + + num_classes, num_dets = scores.shape + boxes = boxes.view(num_classes, num_dets, 4) + _, classes = scores.max(dim=0) + + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = A.max(dim=1) + E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + keep = (scores > 0.00) + scores = torch.prod(torch.exp(-B ** 2 / 0.2), 1) * scores + # Assign each kept detection to its corresponding class + boxes = boxes[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cc_cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = torch.max(A, dim=0) + E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + D = distance(boxes, boxes) + X = (B >= 0).float() + scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores + + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, idx = scores.sort(1, descending=True) + idx = idx[:top_k] + scores = scores[:top_k] + boxes = boxes[idx, :] + + num_classes, num_dets = scores.shape + boxes = boxes.view(num_classes, num_dets, 4) + _, classes = scores.max(dim=0) + + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = A.max(dim=1) + E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + D = distance(boxes, boxes) + X = (B >= 0).float() + keep = (scores > 0.00) + scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores + + # Assign each kept detection to its corresponding class + boxes = boxes[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + + return boxes, classes, scores + + +def cc_cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + n = len(scores) + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + for i in range(200): + A = B + maxA, _ = torch.max(A, dim=0) + E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + D = distance(boxes, boxes) + X = (B >= 0).float() + scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores + eye = torch.eye(n) + if boxes.device.type == 'cuda': + eye = eye.cuda() + weights = (B * (B > 0.8).float() + eye) * (scores.reshape((1, n))) + xx1 = boxes[:, 0].expand(n, n) + yy1 = boxes[:, 1].expand(n, n) + xx2 = boxes[:, 2].expand(n, n) + yy2 = boxes[:, 3].expand(n, n) + + weightsum = weights.sum(dim=1) + xx1 = (xx1 * weights).sum(dim=1) / (weightsum) + yy1 = (yy1 * weights).sum(dim=1) / (weightsum) + xx2 = (xx2 * weights).sum(dim=1) / (weightsum) + yy2 = (yy2 * weights).sum(dim=1) / (weightsum) + boxes = torch.stack([xx1, yy1, xx2, yy2], 1) + + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + + scores, idx = scores.sort(1, descending=True) + idx = idx[:top_k] + scores = scores[:top_k] + boxes = boxes[idx, :] + + num_classes, num_dets = scores.shape + boxes = boxes.view(num_classes, num_dets, 4) + _, classes = scores.max(dim=0) + + iou = jaccard(boxes, boxes).triu_(diagonal=1) + B = iou + A = None + for i in range(200): + A = B + maxA, _ = A.max(dim=1) + E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + if A.equal(B): + break + D = distance(boxes, boxes) + X = (B >= 0).float() + keep = (scores > 0.0) + + scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores + + E = keep.float().unsqueeze(2).expand_as(A) + B = iou.mul(E) + _, n = scores.size() + eye = torch.eye(n).expand(num_classes, n, n) + if boxes.device.type == 'cuda': + eye = eye.cuda() + weights = (B * (B > 0.8).float() + eye) * ( + scores.unsqueeze(2).expand(num_classes, n, n)) + xx1 = boxes[:, :, 0].unsqueeze(1).expand(num_classes, n, n) + yy1 = boxes[:, :, 1].unsqueeze(1).expand(num_classes, n, n) + xx2 = boxes[:, :, 2].unsqueeze(1).expand(num_classes, n, n) + yy2 = boxes[:, :, 3].unsqueeze(1).expand(num_classes, n, n) + + weightsum = weights.sum(dim=2) + xx1 = (xx1 * weights).sum(dim=2) / (weightsum) + yy1 = (yy1 * weights).sum(dim=2) / (weightsum) + xx2 = (xx2 * weights).sum(dim=2) / (weightsum) + yy2 = (yy2 * weights).sum(dim=2) / (weightsum) + boxes = torch.stack([xx1, yy1, xx2, yy2], 2) + + # Assign each kept detection to its corresponding class + classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep) + classes = classes[keep] + boxes = boxes[keep] + scores = scores[keep] + + # Only keep the top cfg.max_num_detections highest scores across all classes + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + + return boxes, classes, scores diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md new file mode 100644 index 0000000000..1b6165122d --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md @@ -0,0 +1,28 @@ +Fast-NMS +====== + +This folder contains an implementation of Fast-NMS [[1]](#fast_nms-1). + +Sources +------ +Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0: + +``` +This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU). +Copyright (c) 2020 Zheng, Zhaohui. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, version 3. + +This program is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . +``` + +[1] YOLACT: Real-time Instance Segmentation, +[ArXiv](https://arxiv.org/abs/1904.02689). diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py new file mode 100644 index 0000000000..ace8b37089 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py @@ -0,0 +1,147 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU). +# Copyright (c) 2020 Zheng, Zhaohui. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +from opendr.perception.object_detection_2d.nms.utils import NMSCustom +from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard +from opendr.engine.target import BoundingBox, BoundingBoxList +import torch +import numpy as np + + +class FastNMS(NMSCustom): + def __init__(self, cross_class=False, device='cuda', iou_thres=0.45, top_k=400, post_k=100): + self.device = device + self.iou_thres = iou_thres + self.top_k = top_k + self.post_k = post_k + self.cross_class = cross_class + + def set_iou_thres(self, iou_thres=0.45): + self.iou_thres = iou_thres + + def top_k(self, top_k=400): + self.top_k = top_k + + def post_k(self, post_k=100): + self.post_k = post_k + + def set_cross_class(self, cross_class=False): + self.cross_class = cross_class + + def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None): + + if isinstance(boxes, np.ndarray): + boxes = torch.tensor(boxes, device=self.device) + elif torch.is_tensor(boxes): + if self.device == 'cpu': + boxes = boxes.cpu() + elif self.device == 'cuda': + boxes = boxes.cuda() + + if isinstance(scores, np.ndarray): + scores = torch.tensor(scores, device=self.device) + elif torch.is_tensor(scores): + if self.device == 'cpu': + scores = scores.cpu() + elif self.device == 'cuda': + scores = scores.cuda() + + scores = torch.transpose(scores, dim0=1, dim1=0) + if self.cross_class: + [boxes, classes, scores] = cc_fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + else: + [boxes, classes, scores] = fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres, + top_k=self.top_k, post_k=self.post_k) + + keep_ids = torch.where(scores > threshold) + scores = scores[keep_ids].cpu().numpy() + classes = classes[keep_ids].cpu().numpy() + boxes = boxes[keep_ids].cpu().numpy() + bounding_boxes = BoundingBoxList([]) + for idx, box in enumerate(boxes): + bbox = BoundingBox(left=box[0], top=box[1], + width=box[2] - box[0], + height=box[3] - box[1], + name=classes[idx], + score=scores[idx]) + bounding_boxes.data.append(bbox) + + return bounding_boxes, [boxes, classes, scores] + + +def fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + scores, idx = scores.sort(1, descending=True) + boxes = boxes[idx, :] + + scores = scores[:, :top_k] + boxes = boxes[:, :top_k] + + num_classes, num_dets = scores.shape + + boxes = boxes.view(num_classes, num_dets, 4) + + iou = jaccard(boxes, boxes).triu_(diagonal=1) + iou_max, _ = iou.max(dim=1) + + keep = (iou_max <= iou_thres) + keep *= (scores > 0.01) + classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep) + classes = classes[keep] + + boxes = boxes[keep] + scores = scores[keep] + + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores + + +def cc_fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200): + scores, classes = scores.max(dim=0) + _, idx = scores.sort(0, descending=True) + idx = idx[:top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + iou = jaccard(boxes, boxes).triu_(diagonal=1) + maxA, _ = torch.max(iou, dim=0) + + idx_out = torch.where(maxA > iou_thres) + scores[idx_out] = 0 + scores, idx = scores.sort(0, descending=True) + idx = idx[:post_k] + scores = scores[:post_k] + classes = classes[idx] + boxes = boxes[idx] + return boxes, classes, scores diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md new file mode 100644 index 0000000000..4e03fce80c --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md @@ -0,0 +1,17 @@ +Seq2Seq-NMS +====== + +This folder contains an implementation of Seq2Seq-NMS [[1]](#seq2seq_nms-1). + +TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800. +| **Pretrained Model** | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** | +|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:| +| seq2seq_pets_jpd_fmod | PETS | JPD | FMoD | 0.8 | 80.2% | 84.3% | +| seq2seq_pets_ssd_fmod | PETS | SSD | FMoD | 0.8 | 77.4% | 79.1% | +| seq2seq_coco_frcn_fmod | COCO | FRCN | FMoD | - | 68.1% \* | 67.5% \*\* | +| seq2seq_coco_ssd_fmod | COCO | SSD | FMoD | - | 41.8% \* | 42.4% ** | + +\* The minival set was used as validation set.
+\*\* The minitest set was used as test set. + +[1] Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275). diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py new file mode 100755 index 0000000000..4b5d5ec2f5 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py @@ -0,0 +1,200 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch +import torchvision +import numpy as np +import cv2 +import random +from opendr.engine.data import Image + + +class FMoD: + def __init__(self, roi_pooling_dim=None, pyramid_depth=3, map_type="SIFT", map_bin=False, + resize_dim=None, device='cpu'): + if roi_pooling_dim is None: + roi_pooling_dim = 160 + self.roi_pooling_dim = [roi_pooling_dim, roi_pooling_dim] + self.pyramid_depth = pyramid_depth + self.boxes_p = [] + self.rp_size = [] + for p in range(self.pyramid_depth): + s = 1 / pow(2, p) + for i in np.arange(0, 1.0, s): + for j in np.arange(0, 1.0, s): + self.boxes_p.append([0, int(i * self.roi_pooling_dim[0]), int(j * self.roi_pooling_dim[1]), + int((i + s) * self.roi_pooling_dim[0]), + int((j + s) * self.roi_pooling_dim[1])]) + self.rp_size.append([int(self.roi_pooling_dim[0] * s), int(self.roi_pooling_dim[1] * s)]) + self.device = device + self.boxes_p = torch.tensor(self.boxes_p).float() + if "cuda" in self.device: + self.boxes_p = self.boxes_p.to(self.device) + self.resc = 1.0 + self.map = None + self.resize_dim = resize_dim + self.map_type = map_type + self.map_bin = map_bin + self.mean = None + self.std = None + + def set_mean_std(self, mean_values=None, std_values=None): + self.mean = torch.tensor(mean_values).float() + self.std = torch.tensor(std_values).float() + if "cuda" in self.device: + self.mean = self.mean.to(self.device) + self.std = self.std.to(self.device) + + def extract_maps(self, img=None, augm=False): + if img is None: + raise Exception('Image is not provided to FMoD...') + + if not isinstance(img, Image): + img = Image(img) + img = img.convert(format='channels_last', channel_order='bgr') + + if self.resize_dim is not None: + max_dim = max(img.shape[0], img.shape[1]) + if max_dim > self.resize_dim: + self.resc = float(self.resize_dim) / max_dim + img = cv2.resize(img, (int(img.shape[1] * self.resc), int(img.shape[0] * self.resc))) + if augm: + img = augm_brightness(img, 0.75, 1.25) + img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) + if self.map_type == "EDGEMAP": + dst_img = np.copy(img) + dst_img = cv2.GaussianBlur(dst_img, (3, 3), 0, 0, cv2.BORDER_DEFAULT) + gradX = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=1, dy=0, scale=1, delta=0, + borderType=cv2.BORDER_DEFAULT) + gradY = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=0, dy=1, scale=1, delta=0, + borderType=cv2.BORDER_DEFAULT) + absGradX = cv2.convertScaleAbs(gradX) + absGradY = cv2.convertScaleAbs(gradY) + absGradXCV32 = absGradX.astype("float32") + absGradYCV32 = absGradY.astype("float32") + self.map = cv2.magnitude(absGradXCV32 / 255.0, absGradYCV32 / 255.0) + self.map = self.map * 255 + if self.map_bin: + self.map = cv2.threshold(self.map, 240, 255, cv2.THRESH_BINARY)[1] + else: + kps = None + if self.map_type == "FAST": + fast = cv2.FastFeatureDetector_create() + kps = fast.detect(img, None) + elif self.map_type == "AKAZE": + akaze = cv2.AKAZE_create() + kps, desc = akaze.detectAndCompute(img, None) + elif self.map_type == "BRISK": + brisk = cv2.BRISK_create() + kps = brisk.detect(img, None) + elif self.map_type == "ORB": + orb = cv2.ORB_create() + kps = orb.detect(img, None) + else: + raise Exception("Map type not supported...") + self.map = np.zeros(img.shape, dtype=np.uint8) + coords_x = [] + coords_y = [] + resps = [] + for kp in kps: + coords_x.append(int(kp.pt[0])) + coords_y.append(int(kp.pt[1])) + resps.append(255 * kp.response) + if not self.map_bin: + self.map[coords_y, coords_x] = resps + else: + self.map[coords_y, coords_x] = 255 + self.map = torch.from_numpy(self.map).float() + if "cuda" in self.device: + self.map = self.map.to(self.device) + + def extract_FMoD_feats(self, boxes): + num_rois = boxes.shape[0] + map_gpu = self.map / 255.0 + map_gpu = map_gpu.unsqueeze(0).unsqueeze(0) + descs = [] + pooled_regions = torchvision.ops.roi_align(map_gpu, [self.resc * boxes], + output_size=self.rp_size[0], spatial_scale=1.0, + aligned=True) + pooled_regions = pooled_regions.unsqueeze(1) + descs.append(self.get_descriptor(pooled_regions)) + for i in range(0, self.pyramid_depth - 1): + pooled_regions_pyr = pooled_regions.contiguous().view(num_rois, pooled_regions.shape[-2], + pooled_regions.shape[-1]) + pooled_regions_pyr = pooled_regions_pyr.unsqueeze(0) + pooled_regions_pyr = torchvision.ops.roi_align(pooled_regions_pyr, self.boxes_p[(pow(4 + 1, i)):( + (pow(4 + 1, i)) + pow(4, (i + 1))), :], output_size=self.rp_size[i + 1], aligned=True) + pooled_regions_pyr = pooled_regions_pyr.permute(1, 0, 2, 3) + pooled_regions_pyr = pooled_regions_pyr.contiguous().view(num_rois, 1, pooled_regions_pyr.shape[-3], + pooled_regions_pyr.shape[-2], + pooled_regions_pyr.shape[-1]) + descs.append(self.get_descriptor(pooled_regions_pyr)) + + descs = torch.cat(descs, dim=1) + if self.mean is not None and self.std is not None: + descs = (descs - self.mean) / self.std + descs = torch.clamp(descs, -50, 50) + return descs + + def release_maps(self): + self.map = None + + def get_descriptor(self, patches): + dt = [] + # row data + dt.append(patches.mean(dim=3)) + # collumn data + dt.append(patches.mean(dim=4)) + # block data + dt.append(torch.flatten(patches, start_dim=3)) + + means = [] + stds = [] + diffs = [] + zscores = [] + skews = [] + kurtoses = [] + powers = [] + for i in range(len(dt)): + if i == 2: + means.append(dt[i].mean(dim=3)) + else: + means.append(dt[i][:, :, :, 0:-1:5].mean(dim=3)) + stds.append(dt[i].std(dim=3)) + diffs.append((dt[i] - means[i].unsqueeze(-1).expand(dt[i].size()))) + zscores.append(diffs[i] / stds[i].unsqueeze(-1).expand(dt[i].size())) + zscores[i] = torch.where(stds[i].unsqueeze(-1).expand(zscores[i].shape) > 0, zscores[i], + torch.zeros_like(zscores[i])) + skews.append(torch.mean(torch.pow(zscores[i], 3.0), -1)) + kurtoses.append(torch.mean(torch.pow(zscores[i], 4.0), -1) - 3.0) + powers.append((dt[i] * dt[i]).mean(-1)) + descs = [] + for i in range(len(dt)): + descs.append(torch.cat((means[i], stds[i], skews[i], kurtoses[i], powers[i]), 2)) + desc = torch.cat((descs[0], descs[1], descs[2]), 2) + desc = desc.contiguous().view(desc.shape[0], desc.shape[1] * desc.shape[2]) + return desc + + +def augm_brightness(img, low, high): + value = random.uniform(low, high) + hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + hsv = np.array(hsv, dtype=np.float64) + hsv[:, :, 1] = hsv[:, :, 1] * value + hsv[:, :, 1][hsv[:, :, 1] > 255] = 255 + hsv[:, :, 2] = hsv[:, :, 2] * value + hsv[:, :, 2][hsv[:, :, 2] > 255] = 255 + hsv = np.array(hsv, dtype=np.uint8) + img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) + return img diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py new file mode 100755 index 0000000000..953892d04e --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py @@ -0,0 +1,196 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import torch.nn as nn +import torch +import math +import torch.nn.functional as F + + +class Seq2SeqNet(nn.Module): + def __init__(self, dropout=0.01, use_app_feats=True, app_input_dim=315, geom_input_dim=14, lq_dim=256, sq_dim=128, + num_JPUs=4, device='cuda'): + super().__init__() + self.use_app_feats = use_app_feats + self.dropout_q = nn.Dropout(dropout * 0.25) + self.num_JPUs = num_JPUs + self.joint_processing_units = [] + self.device = device + for i in range(self.num_JPUs): + self.joint_processing_units.append(Joint_processing_unit(lq_dim=lq_dim, sq_dim=sq_dim, dropout=dropout)) + if "cuda" in self.device: + self.joint_processing_units[i] = self.joint_processing_units[i].to(self.device) + self.joint_processing_units = nn.ModuleList(self.joint_processing_units) + if self.use_app_feats: + q_app_dims = [180, 180] + self.q_app_layers = nn.Sequential( + nn.Linear(app_input_dim, q_app_dims[0]), + nn.GELU(), + nn.Dropout(dropout * 0.25), + nn.LayerNorm(q_app_dims[0], eps=1e-6), + nn.Linear(q_app_dims[0], q_app_dims[1]), + nn.GELU(), + nn.Dropout(dropout * 0.25), + # nn.LayerNorm(q_fmod_dims[1], eps=1e-6) + ) + + q_geom_dims = [180, 180] + self.q_geom_layers = nn.Sequential( + nn.Linear(geom_input_dim, q_geom_dims[0]), + nn.GELU(), + nn.LayerNorm(q_geom_dims[0], eps=1e-6), + nn.Linear(q_geom_dims[0], q_geom_dims[1]), + nn.GELU(), + nn.Dropout(dropout * 0.25), + # nn.LayerNorm(q_geom_dims[1], eps=1e-6) + ) + + k_geom_dims = [180, 180] + self.k_geom_layers = nn.Sequential( + nn.Linear(geom_input_dim, k_geom_dims[0]), + nn.GELU(), + nn.LayerNorm(k_geom_dims[0], eps=1e-6), + nn.Linear(k_geom_dims[0], k_geom_dims[1]), + nn.GELU(), + nn.Dropout(dropout * 0.25), + # nn.LayerNorm(k_geom_dims[1], eps=1e-6) + ) + + q_final_in_dim = q_geom_dims[-1] + k_final_in_dim = k_geom_dims[-1] + if self.use_app_feats: + q_final_in_dim = q_geom_dims[-1] + q_app_dims[-1] + k_final_in_dim = k_geom_dims[-1] + q_app_dims[-1] + + self.q_full_layers = nn.Sequential( + nn.LayerNorm(q_final_in_dim, eps=1e-6), + nn.Linear(q_final_in_dim, lq_dim), + nn.GELU(), + nn.Dropout(dropout * 0.25), + # nn.LayerNorm(lq_dim, eps=1e-6) + ) + self.k_full_layers = nn.Sequential( + nn.LayerNorm(k_final_in_dim, eps=1e-6), + nn.Linear(k_final_in_dim, sq_dim), + nn.GELU(), + nn.Dropout(dropout * 0.25), + # nn.LayerNorm(sq_dim, eps=1e-6) + ) + self.q_final_layers = nn.Sequential( + nn.LayerNorm(lq_dim, eps=1e-6), + nn.Linear(lq_dim, sq_dim), + nn.GELU(), + nn.Dropout(dropout * 0.25), + nn.LayerNorm(sq_dim, eps=1e-6), + nn.Linear(sq_dim, 1), + nn.Sigmoid() + ) + + def forward(self, q_geom_feats=None, k_geom_feats=None, msk=None, app_feats=None): + q_feats = self.q_geom_layers(q_geom_feats) + k_feats = self.k_geom_layers(k_geom_feats) + + if self.use_app_feats and app_feats is not None: + app_feats = self.q_app_layers(app_feats) + q_feats = torch.cat((q_feats, app_feats), dim=2) + k_feats = torch.cat((k_feats, app_feats.transpose(0, 1).repeat(k_feats.shape[1], 1, 1)), dim=2) + + elif app_feats is None: + raise UserWarning("Appearance-based representations not provided.") + q_feats = self.q_full_layers(q_feats) + k_feats = self.k_full_layers(k_feats) + for i in range(self.num_JPUs): + q_feats, k_feats = self.joint_processing_units[i](q_feats, k_feats, msk) + scores = self.q_final_layers(q_feats) + return scores.squeeze(1) + + +class Joint_processing_unit(nn.Module): + def __init__(self, heads=2, lq_dim=256, sq_dim=128, dropout=0.1): + super().__init__() + self.q_block1 = nn.Sequential( + nn.LayerNorm(lq_dim, eps=1e-6), + nn.Linear(lq_dim, sq_dim), + nn.GELU(), + nn.Dropout(dropout) + ) + self.norm_layer_q = nn.LayerNorm(sq_dim, eps=1e-6) + self.norm_layer_k = nn.LayerNorm(sq_dim, eps=1e-6) + self.self_attention_module = Self_attention_module(heads=heads, l_dim=lq_dim, s_dim=sq_dim, dropout=dropout) + self.scale_layer = Scale_layer(s_dim=sq_dim) + + self.q_block2 = nn.Sequential( + nn.LayerNorm(sq_dim, eps=1e-6), + nn.Linear(sq_dim, lq_dim), + nn.GELU(), + nn.Dropout(dropout) + ) + + def forward(self, q_feats, k_feats, msk): + q_atten = self.q_block1(q_feats) + kv_atten_in = self.norm_layer_k(k_feats) + q_atten_in = self.norm_layer_q(q_atten) + q_atten = q_atten + self.self_attention_module(q=q_atten_in, k=kv_atten_in, v=kv_atten_in, mask=msk) + k_feats = k_feats + self.scale_layer(q_atten).transpose(0, 1).repeat(q_atten.shape[0], 1, 1) + q_feats = q_feats + self.q_block2(q_atten) + return q_feats, k_feats + + +class Self_attention_module(nn.Module): + def __init__(self, heads, l_dim, s_dim, dropout=0.1): + super().__init__() + self.l_dim = l_dim + self.s_dim = s_dim + self.qkv_split_dim = s_dim // heads + self.h = heads + self.q_linear = nn.Linear(self.s_dim, self.s_dim) + self.v_linear = nn.Linear(self.s_dim, self.s_dim) + self.k_linear = nn.Linear(self.s_dim, self.s_dim) + + self.dropout = nn.Dropout(dropout) + self.q_out = nn.Sequential( + nn.Linear(self.s_dim, self.s_dim), + nn.GELU(), + nn.Dropout(dropout) + ) + + def forward(self, q, k, v, mask=None): + samples_dim = q.size(0) + k = self.k_linear(k).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2) + q = self.q_linear(q).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2) + v = self.v_linear(v).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2) + scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.qkv_split_dim) + + mask = mask.unsqueeze(1) + mask = mask.unsqueeze(1) + mask = mask.repeat(1, scores.shape[1], 1, 1) + scores = torch.mul(scores, mask) + scores = scores.masked_fill(mask == 0, -1e9) + + scores = F.softmax(scores, dim=-1) + scores = self.dropout(scores) + q = torch.matmul(scores, v) + q = q.transpose(1, 2).contiguous().view(samples_dim, -1, self.s_dim) + q = self.q_out(q) + return q + + +class Scale_layer(nn.Module): + def __init__(self, s_dim=1): + super().__init__() + self.scale_weights = nn.Parameter(torch.empty(s_dim), requires_grad=True) + nn.init.uniform_(self.scale_weights, a=0.01, b=2.0) + + def forward(self, feats): + return feats * self.scale_weights diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py new file mode 100644 index 0000000000..fd8a97d16c --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py @@ -0,0 +1,812 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from opendr.engine.learners import Learner +from opendr.engine.constants import OPENDR_SERVER_URL +from opendr.engine.target import BoundingBox, BoundingBoxList +from opendr.engine.data import Image +from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.seq2seq_model import Seq2SeqNet +from opendr.perception.object_detection_2d.nms.utils import NMSCustom +from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS +from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.fmod import FMoD +from opendr.perception.object_detection_2d.nms.utils.nms_utils import drop_dets, det_matching, \ + run_coco_eval, filter_iou_boxes, bb_intersection_over_union, compute_class_weights, apply_torchNMS +import torch +import torch.nn.functional as F +import pickle +import numpy as np +import os +from urllib.request import urlretrieve +import torch.nn as nn +from tensorboardX import SummaryWriter +import torch.optim as optim +from tqdm import tqdm +import collections +import json +import zipfile + + +class Seq2SeqNMSLearner(Learner, NMSCustom): + def __init__(self, lr=0.0001, epochs=8, device='cuda', temp_path='./temp', checkpoint_after_iter=0, + checkpoint_load_iter=0, log_after=10000, variant='medium', + iou_filtering=0.8, dropout=0.025, app_feats='fmod', + fmod_map_type='EDGEMAP', fmod_map_bin=True, app_input_dim=None): + super(Seq2SeqNMSLearner, self).__init__(lr=lr, batch_size=1, + checkpoint_after_iter=checkpoint_after_iter, + checkpoint_load_iter=checkpoint_load_iter, + temp_path=temp_path, device=device, backbone='default') + self.epochs = epochs + self.variant = variant + self.app_feats = app_feats + self.use_app_feats = False + if self.app_feats is not None: + self.use_app_feats = True + self.fmod_map_type = None + self.fmod_map_bin = None + self.fmod_map_res_dim = None + self.fmod_pyramid_lvl = None + self.fmod_roi_pooling_dim = None + if self.app_feats == 'fmod': + self.fmod_map_type = fmod_map_type + self.fmod_roi_pooling_dim = 160 + self.fmod_map_res_dim = 600 + self.fmod_pyramid_lvl = 3 + self.sef_fmod_architecture() + self.fmod_feats_dim = 0 + for i in range(0, self.fmod_pyramid_lvl): + self.fmod_feats_dim = self.fmod_feats_dim + 15 * (pow(4, i)) + self.fmod_map_bin = fmod_map_bin + self.app_input_dim = self.fmod_feats_dim + self.fmod_mean_std = None + elif self.app_feats == 'zeros' or self.app_feats == 'custom': + if app_input_dim is None: + raise Exception("The dimension of the input appearance-based features is not provided...") + else: + self.app_input_dim = app_input_dim + if self.app_feats == 'custom': + raise AttributeError("Custom appearance-based features are not yet supported.") + self.lq_dim = 256 + self.sq_dim = 128 + self.geom_input_dim = 14 + self.num_JPUs = 4 + self.geom_input_dim = 14 + self.set_architecture() + self.dropout = dropout + self.temp_path = temp_path + if not os.path.isdir(self.temp_path): + os.mkdir(self.temp_path) + self.checkpoint_load_iter = checkpoint_load_iter + self.log_after = log_after + self.iou_filtering = iou_filtering + self.classes = None + self.class_ids = None + self.fMoD = None + self.fmod_init_file = None + if self.app_feats == 'fmod': + self.fMoD = FMoD(roi_pooling_dim=self.fmod_roi_pooling_dim, pyramid_depth=self.fmod_pyramid_lvl, + resize_dim=self.fmod_map_res_dim, + map_type=self.fmod_map_type, map_bin=self.fmod_map_bin, device=self.device) + self.init_model() + if "cuda" in self.device: + self.model = self.model.to(self.device) + + def fit(self, dataset, logging_path='', logging_flush_secs=30, silent=True, + verbose=True, nms_gt_iou=0.5, max_dt_boxes=400, datasets_folder='./datasets', + use_ssd=False, lr_step=True): + + dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split='train', use_ssd=use_ssd, + device=self.device) + if self.classes is None: + self.classes = dataset_nms.classes + self.class_ids = dataset_nms.class_ids + + if logging_path != '' and logging_path is not None: + logging = True + file_writer = SummaryWriter(logging_path, flush_secs=logging_flush_secs) + else: + logging = False + file_writer = None + + checkpoints_folder = self.temp_path + if self.checkpoint_after_iter != 0 and not os.path.exists(checkpoints_folder): + os.makedirs(checkpoints_folder) + + if not silent and verbose: + print("Model trainable parameters:", self.count_parameters()) + + self.model.train() + if "cuda" in self.device: + self.model = self.model.to(self.device) + + if self.epochs is None: + raise ValueError("Training epochs not specified") + elif self.epochs <= self.checkpoint_load_iter: + raise ValueError("Training epochs are less than those of the loaded model") + + if self.app_feats == 'fmod': + if self.fmod_mean_std is None: + self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type, + fmod_pyramid_lvl=self.fmod_pyramid_lvl, + datasets_folder=datasets_folder, + verbose=verbose) + self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std']) + + start_epoch = 0 + drop_after_epoch = [] + if lr_step and self.epochs > 1: + drop_after_epoch = [int(self.epochs * 0.5)] + if self.epochs > 3: + drop_after_epoch.append(int(self.epochs * 0.7)) + + train_ids = np.arange(len(dataset_nms.src_data)) + total_loss_iter = 0 + total_loss_epoch = 0 + optimizer = optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.99), eps=1e-9) # HERE + scheduler = None + if len(drop_after_epoch) > 0: + scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.1) + + num_iter = 0 + training_weights = compute_class_weights(pos_weights=[0.9, 0.1], max_dets=max_dt_boxes, dataset_nms=dataset_nms) + # Single class NMS only. + class_index = 1 + training_dict = {"cross_entropy_loss": []} + for epoch in range(start_epoch, self.epochs): + pbar = None + if not silent: + pbarDesc = "Epoch #" + str(epoch) + " progress" + pbar = tqdm(desc=pbarDesc, total=len(train_ids)) + np.random.shuffle(train_ids) + for sample_id in train_ids: + + if self.log_after != 0 and num_iter > 0 and num_iter % self.log_after == 0: + if logging: + file_writer.add_scalar(tag="cross_entropy_loss", + scalar_value=total_loss_iter/self.log_after, + global_step=num_iter) + if verbose: + print(''.join(['\nEpoch: {}', + ' Iter: {}, cross_entropy_loss: {}']).format(epoch, num_iter, + total_loss_iter/self.log_after)) + total_loss_iter = 0 + + image_fln = dataset_nms.src_data[sample_id]['filename'] + if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0: + dt_boxes = torch.tensor( + dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float() + dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float() + dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True) + dt_boxes = dt_boxes[dt_scores_ids] + else: + if not silent: + pbar.update(1) + num_iter = num_iter + 1 + continue + gt_boxes = torch.tensor([]).float() + if len(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]) > 0: + gt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]).float() + image_path = os.path.join(datasets_folder, dataset, image_fln) + img_res = dataset_nms.src_data[sample_id]['resolution'][::-1] + + if "cuda" in self.device: + dt_boxes = dt_boxes.to(self.device) + dt_scores = dt_scores.to(self.device) + gt_boxes = gt_boxes.to(self.device) + + val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4, + (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4) + dt_boxes = dt_boxes[val_ids, :] + dt_scores = dt_scores[val_ids] + + dt_boxes, dt_scores = drop_dets(dt_boxes, dt_scores) + if dt_boxes.shape[0] < 1: + if not silent: + pbar.update(1) + num_iter = num_iter + 1 + continue + if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0: + dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores, + iou_thres=self.iou_filtering) + + dt_boxes = dt_boxes[:max_dt_boxes] + dt_scores = dt_scores[:max_dt_boxes] + app_feats = None + if self.app_feats == 'fmod': + img = Image.open(image_path) + img = img.convert(format='channels_last', channel_order='bgr') + self.fMoD.extract_maps(img=img, augm=True) + app_feats = self.fMoD.extract_FMoD_feats(dt_boxes) + app_feats = torch.unsqueeze(app_feats, dim=1) + elif self.app_feats == 'zeros': + app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim]) + if "cuda" in self.device: + app_feats = app_feats.to(self.device) + elif self.app_feats == 'custom': + raise AttributeError("Custom appearance-based features are not yet supported.") + + msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1) + q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores, + resolution=img_res) + preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk, + app_feats=app_feats) + preds = torch.clamp(preds, 0.001, 1 - 0.001) + + labels = det_matching(scores=preds, dt_boxes=dt_boxes, gt_boxes=gt_boxes, + iou_thres=nms_gt_iou, device=self.device) + weights = (training_weights[class_index][1] * labels + training_weights[class_index][0] * ( + 1 - labels)) + + e = torch.distributions.uniform.Uniform(0.001, 0.005).sample([labels.shape[0], 1]) + if "cuda" in self.device: + weights = weights.to(self.device) + e = e.to(self.device) + labels = labels * (1 - e) + (1 - labels) * e + ce_loss = F.binary_cross_entropy(preds, labels, reduction="none") + loss = (ce_loss * weights).sum() + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + # Memory leak if not loss not detached in total_loss_iter and total_loss_epoch computations + loss_t = loss.detach().cpu().numpy() + total_loss_iter = total_loss_iter + loss_t + total_loss_epoch = total_loss_epoch + loss_t + num_iter = num_iter + 1 + if not silent: + pbar.update(1) + if not silent: + pbar.close() + if verbose: + print(''.join(['\nEpoch: {}', + ' cross_entropy_loss: {}\n']).format(epoch, + total_loss_epoch/len(train_ids))) + training_dict['cross_entropy_loss'].append(total_loss_epoch/len(train_ids)) + if self.checkpoint_after_iter != 0 and epoch % self.checkpoint_after_iter == self.checkpoint_after_iter - 1: + snapshot_name = '{}/checkpoint_epoch_{}'.format(checkpoints_folder, epoch) + self.save(path=snapshot_name, optimizer=optimizer, scheduler=scheduler, + current_epoch=epoch, max_dt_boxes=max_dt_boxes) + snapshot_name_lw = '{}/last_weights'.format(checkpoints_folder) + self.save(path=snapshot_name_lw, optimizer=optimizer, scheduler=scheduler, + current_epoch=epoch, max_dt_boxes=max_dt_boxes) + total_loss_epoch = 0 + if scheduler is not None: + scheduler.step() + if logging: + file_writer.close() + return training_dict + + def eval(self, dataset, split='test', verbose=True, max_dt_boxes=400, threshold=0.0, + datasets_folder='./datasets', use_ssd=False): + + dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split=split, use_ssd=use_ssd, + device=self.device) + + if self.classes is None: + self.classes = dataset_nms.classes + self.class_ids = dataset_nms.class_ids + + annotations_filename = dataset_nms.annotation_file + + eval_folder = self.temp_path + if not os.path.isdir(os.path.join(self.temp_path)): + os.mkdir(os.path.join(self.temp_path)) + if not os.path.isdir(eval_folder): + os.mkdir(eval_folder) + output_file = os.path.join(eval_folder, 'detections.json') + + if self.app_feats == 'fmod': + if self.fmod_mean_std is None: + self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type, + fmod_pyramid_lvl=self.fmod_pyramid_lvl, + datasets_folder=datasets_folder, + verbose=verbose) + self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std']) + + self.model = self.model.eval() + if "cuda" in self.device: + self.model = self.model.to(self.device) + + train_ids = np.arange(len(dataset_nms.src_data)) + nms_results = [] + pbar_eval = None + if verbose: + pbarDesc = "Evaluation progress" + pbar_eval = tqdm(desc=pbarDesc, total=len(train_ids)) + for sample_id in train_ids: + image_fln = dataset_nms.src_data[sample_id]['filename'] + + image_path = os.path.join(datasets_folder, dataset, image_fln) + img_res = dataset_nms.src_data[sample_id]['resolution'][::-1] + # Single class NMS only. + class_index = 1 + if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0: + dt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float() + dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float() + dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True) + dt_boxes = dt_boxes[dt_scores_ids] + else: + pbar_eval.update(1) + continue + + if "cuda" in self.device: + dt_boxes = dt_boxes.to(self.device) + dt_scores = dt_scores.to(self.device) + + val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4, + (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4) + dt_boxes = dt_boxes[val_ids, :] + dt_scores = dt_scores[val_ids] + + if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0: + dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores, iou_thres=self.iou_filtering) + + dt_boxes = dt_boxes[:max_dt_boxes] + dt_scores = dt_scores[:max_dt_boxes] + app_feats = None + if self.app_feats == 'fmod': + img = Image.open(image_path) + img = img.convert(format='channels_last', channel_order='bgr') + self.fMoD.extract_maps(img=img, augm=False) + app_feats = self.fMoD.extract_FMoD_feats(dt_boxes) + app_feats = torch.unsqueeze(app_feats, dim=1) + elif self.app_feats == 'zeros': + app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim]) + if "cuda" in self.device: + app_feats = app_feats.to(self.device) + elif self.app_feats == 'custom': + raise AttributeError("Custom appearance-based features are not yet supported.") + msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1) + q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores, + resolution=img_res) + with torch.no_grad(): + preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk, + app_feats=app_feats) + bboxes = dt_boxes.cpu().numpy().astype('float64') + preds = preds.cpu().detach() + if threshold > 0.0: + ids = (preds > threshold) + preds = preds[ids] + bboxes = bboxes[ids.numpy().squeeze(-1), :] + for j in range(len(preds)): + nms_results.append({ + 'image_id': dataset_nms.src_data[sample_id]['id'], + 'bbox': [bboxes[j][0], bboxes[j][1], bboxes[j][2] - bboxes[j][0], bboxes[j][3] - bboxes[j][1]], + 'category_id': class_index, + 'score': np.float64(preds[j]) + }) + pbar_eval.update(1) + pbar_eval.close() + if verbose: + print('Writing results json to {}'.format(output_file)) + with open(output_file, 'w') as fid: + json.dump(nms_results, fid, indent=2) + eval_result = run_coco_eval(gt_file_path=os.path.join(dataset_nms.path, 'annotations', annotations_filename), + dt_file_path=output_file, only_classes=[1], + verbose=verbose, max_dets=[max_dt_boxes]) + os.remove(output_file) + if verbose: + for i in range(len(eval_result)): + print('Evaluation results (num_dets={})'.format(str(eval_result[i][1]))) + print(eval_result[i][0][0][1]) + print(eval_result[i][0][1][1]) + print(eval_result[i][0][2][1]) + print(eval_result[i][0][3][1]) + print('\n') + return eval_result + + def save(self, path, verbose=False, optimizer=None, scheduler=None, current_epoch=None, max_dt_boxes=400): + fname = path.split('/')[-1] + dir_name = path.replace('/' + fname, '') + if not os.path.isdir(dir_name): + os.makedirs(dir_name) + custom_dict = {'state_dict': self.model.state_dict(), 'current_epoch': current_epoch} + if optimizer is not None: + custom_dict['optimizer'] = optimizer.state_dict() + if scheduler is not None: + custom_dict['scheduler'] = scheduler.state_dict() + torch.save(custom_dict, path + '.pth') + + metadata = {"model_paths": [fname + '.pth'], "framework": "pytorch", "has_data": False, + "inference_params": {}, "optimized": False, "optimizer_info": {}, "backbone": {}, + "format": "pth", "classes": self.classes, "app_feats": self.app_feats, + "lq_dim": self.lq_dim, "sq_dim": self.sq_dim, "num_JPUs": self.num_JPUs, + "geom_input_dim": self.geom_input_dim, "app_input_dim": self.app_input_dim, + "max_dt_boxes": max_dt_boxes, "variant": self.variant} + if self.app_feats == 'fmod': + metadata["fmod_map_type"] = self.fmod_map_type + metadata["fmod_map_bin"] = self.fmod_map_bin + metadata["fmod_roi_pooling_dim"] = self.fmod_roi_pooling_dim + metadata["fmod_map_res_dim"] = self.fmod_map_res_dim + metadata["fmod_pyramid_lvl"] = self.fmod_pyramid_lvl + metadata["fmod_normalization"] = "fmod_normalization.pkl" + with open(os.path.join(dir_name, 'fmod_normalization.pkl'), 'wb') as f: + pickle.dump(self.fmod_mean_std, f) + with open(path + '.json', 'w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + if verbose: + print("Saved Pytorch model.") + + def init_model(self): + if self.model is None: + self.model = Seq2SeqNet(dropout=self.dropout, use_app_feats=self.use_app_feats, + app_input_dim=self.app_input_dim, + geom_input_dim=self.geom_input_dim, lq_dim=self.lq_dim, sq_dim=self.sq_dim, + num_JPUs=self.num_JPUs, device=self.device) + for p in self.model.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + else: + raise UserWarning("Tried to initialize model while model is already initialized.") + + def load(self, path, verbose=False): + if os.path.isdir(path): + model_name = 'last_weights' + dir_path = path + else: + model_name = os.path.basename(os.path.normpath(path)).split('.')[0] + dir_path = os.path.dirname(os.path.normpath(path)) + + if verbose: + print("Model name:", model_name, "-->", os.path.join(dir_path, model_name + ".json")) + with open(os.path.join(dir_path, model_name + ".json"), encoding='utf-8-sig') as f: + metadata = json.load(f) + pth_path = os.path.join(dir_path, metadata["model_paths"][0]) + if verbose: + print("Loading checkpoint:", pth_path) + try: + checkpoint = torch.load(pth_path, map_location=torch.device(self.device)) + except FileNotFoundError as e: + e.strerror = "File " + pth_path + "not found." + raise e + if 'fmod_normalization' in metadata: + pkl_fmod = os.path.join(dir_path, metadata["fmod_normalization"]) + if verbose: + print("Loading FMoD normalization values:", pkl_fmod) + try: + with open(pkl_fmod, 'rb') as f: + self.fmod_mean_std = pickle.load(f) + self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std']) + except FileNotFoundError as e: + e.strerror = "File " + pkl_fmod + "not found." + raise e + + self.assign_params(metadata=metadata, verbose=verbose) + self.load_state(checkpoint) + if verbose: + print("Loaded parameters and metadata.") + return True + + def assign_params(self, metadata, verbose): + + if verbose and self.variant is not None and self.variant != metadata["variant"]: + print("Incompatible value for the attribute \"variant\". It is now set to: " + + str(metadata["variant"])) + self.variant = metadata["variant"] + if verbose and self.geom_input_dim is not None and self.geom_input_dim != metadata["geom_input_dim"]: + print("Incompatible value for the attribute \"geom_input_dim\". It is now set to: " + + str(metadata["geom_input_dim"])) + self.geom_input_dim = metadata["geom_input_dim"] + if verbose and self.app_input_dim is not None and self.app_input_dim != metadata["app_input_dim"]: + print("Incompatible value for the attribute \"app_input_dim\". It is now set to: " + + str(metadata["app_input_dim"])) + self.app_input_dim = metadata["app_input_dim"] + if verbose and self.app_feats != metadata["app_feats"]: + print("Incompatible value for the attribute \"app_feats\". It is now set to: " + + str(metadata["app_feats"])) + self.app_feats = metadata["app_feats"] + if verbose and self.fmod_map_type is not None and self.fmod_map_type != metadata["fmod_map_type"]: + print("Incompatible value for the attribute \"fmod_map_type\". It is now set to: " + + str(metadata["fmod_map_type"])) + if "fmod_map_type" in metadata: + self.fmod_map_type = metadata["fmod_map_type"] + if verbose and self.fmod_map_bin is not None and self.fmod_map_bin != metadata["fmod_map_bin"]: + print("Incompatible value for the attribute \"fmod_map_bin\". It is now set to: " + + str(metadata["fmod_map_bin"])) + if "fmod_map_bin" in metadata: + self.fmod_map_bin = metadata["fmod_map_bin"] + if verbose and self.fmod_roi_pooling_dim is not None and \ + self.fmod_roi_pooling_dim != metadata["fmod_roi_pooling_dim"]: + print("Incompatible value for the attribute \"fmod_roi_pooling_dim\". It is now set to: " + + str(metadata["fmod_roi_pooling_dim"])) + if "fmod_roi_pooling_dim" in metadata: + self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"] + if verbose and self.fmod_map_res_dim is not None and \ + self.fmod_map_res_dim != metadata["fmod_map_res_dim"]: + print("Incompatible value for the attribute \"fmod_map_res_dim\". It is now set to: " + + str(metadata["fmod_map_res_dim"])) + if "fmod_roi_pooling_dim" in metadata: + self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"] + if verbose and self.fmod_pyramid_lvl is not None and \ + self.fmod_pyramid_lvl != metadata["fmod_pyramid_lvl"]: + print("Incompatible value for the attribute \"fmod_pyramid_lvl\". It is now set to: " + + str(metadata["fmod_pyramid_lvl"])) + if "fmod_pyramid_lvl" in metadata: + self.fmod_pyramid_lvl = metadata["fmod_pyramid_lvl"] + if verbose and self.lq_dim is not None and \ + self.lq_dim != metadata["lq_dim"]: + print("Incompatible value for the attribute \"lq_dim\". It is now set to: " + + str(metadata["lq_dim"])) + self.lq_dim = metadata["lq_dim"] + if verbose and self.sq_dim is not None and self.sq_dim != metadata["sq_dim"]: + print("Incompatible value for the attribute \"sq_dim\". It is now set to: " + + str(metadata["sq_dim"])) + self.sq_dim = metadata["sq_dim"] + if verbose and self.num_JPUs is not None and self.num_JPUs != metadata["num_JPUs"]: + print("Incompatible value for the attribute \"num_JPUs\". It is now set to: " + + str(metadata["num_JPUs"])) + self.num_JPUs = metadata["num_JPUs"] + if verbose and 'max_dt_boxes' in metadata: + print('Model is trained with ' + str(metadata['max_dt_boxes']) + ' as the maximum number of detections.') + + def load_state(self, checkpoint=None): + if checkpoint is None: + for p in self.model.parameters(): + if p.dim() > 1: + nn.init.xavier_uniform_(p) + else: + try: + source_state = checkpoint['state_dict'] + except KeyError: + source_state = checkpoint + target_state = self.model.state_dict() + new_target_state = collections.OrderedDict() + for target_key, target_value in target_state.items(): + if target_key in source_state and source_state[target_key].size() == target_state[target_key].size(): + new_target_state[target_key] = source_state[target_key] + else: + new_target_state[target_key] = target_state[target_key] + + self.model.load_state_dict(new_target_state) + + def count_parameters(self): + + if self.model is None: + raise UserWarning("Model is not initialized, can't count trainable parameters.") + return sum(p.numel() for p in self.model.parameters() if p.requires_grad) + + def download(self, path=None, model_name='seq2seq_pets_jpd_fmod', verbose=False, + url=OPENDR_SERVER_URL + "perception/object_detection_2d/nms/"): + + supported_pretrained_models = ["seq2seq_pets_jpd_fmod", "seq2seq_pets_ssd_fmod", + "seq2seq_coco_frcn_fmod", "seq2seq_coco_ssd_fmod"] + + if model_name not in supported_pretrained_models: + str_error = model_name + " pretrained model is not supported. The available pretrained models are: " + for i in range(len(supported_pretrained_models)): + str_error = str_error + supported_pretrained_models[i] + ", " + str_error = str_error[:-2] + '.' + raise ValueError(str_error) + + if path is None: + path = self.temp_path + + if not os.path.exists(path): + os.makedirs(path) + + if verbose: + print("Downloading pretrained model...") + + file_url = os.path.join(url, "pretrained", model_name + '.zip') + try: + urlretrieve(file_url, os.path.join(path, model_name + '.zip')) + with zipfile.ZipFile(os.path.join(path, model_name + '.zip'), 'r') as zip_ref: + zip_ref.extractall(path) + os.remove(os.path.join(path, model_name + '.zip')) + except: + raise UserWarning('Pretrained model not found on server.') + + def infer(self, boxes=None, scores=None, boxes_sorted=False, max_dt_boxes=400, img_res=None, threshold=0.1): + bounding_boxes = BoundingBoxList([]) + if scores.shape[0] == 0: + return bounding_boxes + if scores.shape[1] > 1: + raise ValueError('Multi-class NMS is not supported in Seq2Seq-NMS yet.') + if boxes.shape[0] != scores.shape[0]: + raise ValueError('Scores and boxes must have the same size in dim 0.') + if "cuda" in self.device: + boxes = boxes.to(self.device) + scores = scores.to(self.device) + + scores = scores.squeeze(-1) + keep_ids = torch.where(scores > 0.05)[0] + scores = scores[keep_ids] + boxes = boxes[keep_ids, :] + if not boxes_sorted: + scores, scores_ids = torch.sort(scores, dim=0, descending=True) + boxes = boxes[scores_ids] + + val_ids = torch.logical_and((boxes[:, 2] - boxes[:, 0]) > 4, + (boxes[:, 3] - boxes[:, 1]) > 4) + boxes = boxes[val_ids, :] + scores = scores[val_ids] + + if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0: + boxes, scores = apply_torchNMS(boxes=boxes, scores=scores, iou_thres=self.iou_filtering) + + boxes = boxes[:max_dt_boxes] + scores = scores[:max_dt_boxes] + app_feats = None + + if self.app_feats == 'fmod': + app_feats = self.fMoD.extract_FMoD_feats(boxes) + app_feats = torch.unsqueeze(app_feats, dim=1) + elif self.app_feats == 'zeros': + app_feats = torch.zeros([boxes.shape[0], 1, self.app_input_dim]) + if "cuda" in self.device: + app_feats = app_feats.to(self.device) + elif self.app_feats == 'custom': + raise AttributeError("Custom appearance-based features are not yet supported.") + + msk = self.compute_mask(boxes, iou_thres=0.2, extra=0.1) + q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=boxes, scores=scores, + resolution=img_res) + + with torch.no_grad(): + preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk, + app_feats=app_feats) + + mask = torch.where(preds > threshold)[0] + if mask.size == 0: + return BoundingBoxList([]) + preds = preds[mask].cpu().detach().numpy() + boxes = boxes[mask, :].cpu().numpy() + + for idx, box in enumerate(boxes): + bbox = BoundingBox(left=box[0], top=box[1], + width=box[2] - box[0], + height=box[3] - box[1], + name=0, + score=preds[idx]) + bounding_boxes.data.append(bbox) + return bounding_boxes, [boxes, np.zeros(scores.shape[0]), preds] + + def optimize(self, **kwargs): + """This method is not used in this implementation.""" + raise NotImplementedError + + def reset(self): + """This method is not used in this implementation.""" + return NotImplementedError + + def run_nms(self, boxes=None, scores=None, boxes_sorted=False, top_k=400, img=None, threshold=0.2): + + if self.app_feats == 'fmod': + if not isinstance(img, Image): + img = Image(img) + _img = img.convert("channels_last", "rgb") + self.fMoD.extract_maps(img=_img, augm=False) + + if isinstance(boxes, np.ndarray): + boxes = torch.tensor(boxes, device=self.device) + elif torch.is_tensor(boxes): + if "cuda" in self.device: + boxes = boxes.to(self.device) + + if isinstance(scores, np.ndarray): + scores = torch.tensor(scores, device=self.device) + elif torch.is_tensor(scores): + if "cuda" in self.device: + scores = scores.to(self.device) + boxes = self.infer(boxes=boxes, scores=scores, boxes_sorted=boxes_sorted, max_dt_boxes=top_k, + img_res=img.opencv().shape[::-1][1:]) + return boxes + + def set_architecture(self): + if self.variant == 'light': + self.lq_dim = 160 + elif self.variant == 'full': + self.lq_dim = 320 + if self.variant == 'light': + self.sq_dim = 80 + elif self.variant == 'full': + self.sq_dim = 160 + if self.variant == 'light': + self.num_JPUs = 2 + + def sef_fmod_architecture(self): + if self.variant == 'light': + self.fmod_roi_pooling_dim = 120 + if self.variant == 'light': + self.fmod_map_res_dim = 480 + elif self.variant == 'full': + self.fmod_map_res_dim = 800 + if self.variant == 'light': + self.fmod_pyramid_lvl = 2 + + def compute_mask(self, boxes=None, iou_thres=0.2, extra=0.1): + relations = filter_iou_boxes(boxes, iou_thres=iou_thres) + mask1 = torch.tril(relations).float() + mask2 = extra * torch.triu(relations, diagonal=1).float() + mask = mask1 + mask2 + return mask + + def compute_geometrical_feats(self, boxes, scores, resolution): + boxBs = boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1) + boxAs = boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1) + scoresBs = scores.unsqueeze(0).unsqueeze(-1).repeat(scores.shape[0], 1, 1) + scoresAs = scores.unsqueeze(1).unsqueeze(1).repeat(1, scores.shape[0], 1) + + scale_div = [resolution[1] / 20, resolution[0] / 20] + dx = ((boxBs[:, :, 0] - boxAs[:, :, 0] + boxBs[:, :, 2] - boxAs[:, :, 2]) / 2).unsqueeze(-1) + dy = ((boxBs[:, :, 1] - boxAs[:, :, 1] + boxBs[:, :, 3] - boxAs[:, :, 3]) / 2).unsqueeze(-1) + dxy = dx * dx + dy * dy + dxy = dxy / (scale_div[0] * scale_div[0] + scale_div[1] * scale_div[1]) + dx = (dx / scale_div[0]) + dy = (dy / scale_div[1]) + sx = boxBs[:, :, 2] - boxBs[:, :, 0] + sx_1 = (sx / (boxAs[:, :, 2] - boxAs[:, :, 0])).unsqueeze(-1) + sx_2 = (sx / scale_div[0]).unsqueeze(-1) + sy = boxBs[:, :, 3] - boxBs[:, :, 1] + sy_1 = (sy / (boxAs[:, :, 3] - boxAs[:, :, 1])).unsqueeze(-1) + sy_2 = (sy / scale_div[1]).unsqueeze(-1) + scl = (boxBs[:, :, 2] - boxBs[:, :, 0]) * (boxBs[:, :, 3] - boxBs[:, :, 1]) + scl_1 = (scl / ((boxAs[:, :, 2] - boxAs[:, :, 0]) * (boxAs[:, :, 3] - boxAs[:, :, 1]))).unsqueeze(-1) + scl_2 = (scl / (scale_div[0] * scale_div[1])).unsqueeze(-1) + del scl + + scr_1 = 5 * scoresBs + scr_2 = scr_1 - 5 * scoresAs + + sr_1 = torch.unsqueeze((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0]), dim=-1) + sr_2 = torch.unsqueeze(((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0])) / ( + (boxAs[:, :, 3] - boxAs[:, :, 1]) / (boxAs[:, :, 2] - boxAs[:, :, 0])), dim=-1) + + ious = 5 * (bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1), + boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1))).unsqueeze(-1) + enc_vers_all = torch.cat((dx, dy, dxy, sx_1, sx_2, sy_1, sy_2, ious, scl_1, scl_2, scr_1, scr_2, sr_1, sr_2), + dim=2) + enc_vers = enc_vers_all.diagonal(dim1=0, dim2=1).transpose(0, 1).unsqueeze(1) + return enc_vers, enc_vers_all + + def load_FMoD_init_from_dataset(self, dataset=None, map_type='edgemap', fmod_pyramid_lvl=3, + datasets_folder='./datasets', + map_bin=True, verbose=False): + fmod_dir = os.path.join(datasets_folder, dataset, 'FMoD') + if not os.path.exists(fmod_dir): + os.makedirs(fmod_dir, exist_ok=True) + map_type_c = map_type + if map_bin: + map_type_c = map_type_c + '_B' + fmod_filename = dataset + '_' + map_type_c + '_' + str(fmod_pyramid_lvl) + '.pkl' + fmod_filename = fmod_filename.lower() + fmod_stats = None + if not os.path.exists(os.path.join(fmod_dir, fmod_filename)): + file_url = os.path.join(OPENDR_SERVER_URL + 'perception/object_detection_2d/nms/FMoD', fmod_filename) + try: + urlretrieve(file_url, os.path.join(fmod_dir, fmod_filename)) + except: + if verbose: + print( + 'Normalization files not found on FTP server. Normalization will be performed setting \u03BC = ' + '0 and \u03C3 = 1.') + fmod_feats_dim = 0 + for i in range(0, fmod_pyramid_lvl): + fmod_feats_dim = fmod_feats_dim + 15 * (pow(4, i)) + self.fmod_init_file = None + return {'mean': np.zeros(fmod_feats_dim), 'std': np.ones(fmod_feats_dim)} + self.fmod_init_file = os.path.join(fmod_dir, fmod_filename) + fmod_stats = self.load_FMoD_init(self.fmod_init_file) + return fmod_stats + + def load_FMoD_init(self, path=None): + try: + with open(path, 'rb') as fp: + fmod_stats = pickle.load(fp) + map_type = list(fmod_stats.keys())[0] + fmod_stats = fmod_stats[map_type] + except EnvironmentError as e: + e.strerror = 'FMoD initialization .pkl file not found' + raise e + return fmod_stats diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md new file mode 100644 index 0000000000..6b8c2513d0 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md @@ -0,0 +1,35 @@ +Soft-NMS +====== + +This folder contains an implementation of Soft-NMS [[1]](#soft_nms-1). + +Sources +------ +Large parts of code are taken from [here](https://github.com/DocF/Soft-NMS) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the MIT license: + +``` +MIT License + +Copyright (c) 2020 DocF + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +``` + +[1] Soft-NMS -- Improving Object Detection With One Line of Code, +[ArXiv](https://arxiv.org/abs/1704.04503). diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py new file mode 100644 index 0000000000..a0c668c850 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py @@ -0,0 +1,129 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# MIT License +# +# Copyright (c) 2020 DocF +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +from opendr.perception.object_detection_2d.nms.utils import NMSCustom +from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard +from opendr.engine.target import BoundingBox, BoundingBoxList +import torch +import numpy as np + + +class SoftNMS(NMSCustom): + def __init__(self, nms_type='linear', device='cuda', nms_thres=None, top_k=400, post_k=100): + self.nms_types = ['linear', 'gaussian'] + if nms_type not in self.nms_types: + raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.') + else: + self.nms_type = nms_type + if nms_thres is None: + if nms_type == 'linear': + nms_thres = 0.3 + elif nms_type == 'gaussian': + nms_thres = 0.5 + self.device = device + self.nms_thres = nms_thres + self.top_k = top_k + self.post_k = post_k + + def nms_thres(self, nms_thres=0.45): + self.nms_thres = nms_thres + + def set_top_k(self, top_k=400): + self.top_k = top_k + + def set_post_k(self, post_k=100): + self.post_k = post_k + + def set_nms_type(self, nms_type='linear'): + if nms_type not in self.nms_types: + raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.') + else: + self.nms_type = nms_type + + def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None): + + if isinstance(boxes, np.ndarray): + boxes = torch.tensor(boxes, device=self.device) + elif torch.is_tensor(boxes): + if self.device == 'cpu': + boxes = boxes.cpu() + elif self.device == 'cuda': + boxes = boxes.cuda() + + if isinstance(scores, np.ndarray): + scores = torch.tensor(scores, device=self.device) + elif torch.is_tensor(scores): + if self.device == 'cpu': + scores = scores.cpu() + elif self.device == 'cuda': + scores = scores.cuda() + + scores, classes = scores.max(dim=1) + _, idx = scores.sort(0, descending=True) + idx = idx[:self.top_k] + boxes = boxes[idx] + scores = scores[idx] + classes = classes[idx] + + dets = torch.cat((boxes, scores.unsqueeze(-1)), dim=1) + + i = 0 + while dets.shape[0] > 0: + scores[i] = dets[0, 4] + iou = jaccard(dets[:1, :-1], dets[1:, :-1]).triu_(diagonal=0).squeeze(0) + weight = torch.ones_like(iou) + if self.nms_type == 'linear': + weight[iou > self.nms_thres] -= iou[iou > self.nms_thres] + elif self.nms_type == 'gaussian': + weight = np.exp(-(iou * iou) / self.nms_thres) + + dets[1:, 4] *= weight + dets = dets[1:, :] + i = i + 1 + keep_ids = torch.where(scores > threshold) + scores = scores[keep_ids].cpu().numpy() + classes = classes[keep_ids].cpu().numpy() + boxes = boxes[keep_ids].cpu().numpy() + bounding_boxes = BoundingBoxList([]) + for idx, box in enumerate(boxes): + bbox = BoundingBox(left=box[0], top=box[1], + width=box[2] - box[0], + height=box[3] - box[1], + name=classes[idx], + score=scores[idx]) + bounding_boxes.data.append(bbox) + + return bounding_boxes, [boxes, classes, scores] diff --git a/src/opendr/perception/object_detection_2d/nms/utils/__init__.py b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py new file mode 100644 index 0000000000..2d130e14b8 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py @@ -0,0 +1,3 @@ +from opendr.perception.object_detection_2d.nms.utils.nms_custom import NMSCustom + +__all__ = ['NMSCustom'] diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py new file mode 100644 index 0000000000..7d551cd401 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py @@ -0,0 +1,24 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from abc import ABC, abstractmethod + + +class NMSCustom(ABC): + def __init__(self, device='cpu'): + self.device = device + + @abstractmethod + def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None, device='cpu'): + pass diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py new file mode 100644 index 0000000000..202f7f18c5 --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py @@ -0,0 +1,404 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from opendr.engine.datasets import Dataset +from opendr.engine.data import Image +from opendr.perception.object_detection_2d.datasets.transforms import BoundingBoxListToNumpyArray +from opendr.engine.constants import OPENDR_SERVER_URL +from pycocotools.coco import COCO +import os +from urllib.request import urlretrieve +import ssl +import time +from zipfile import ZipFile +import tarfile +import pickle +import numpy as np +import math +from tqdm import tqdm +import gc + + +class Dataset_NMS(Dataset): + def __init__(self, path=None, dataset_name=None, split=None, use_ssd=True, device='cuda'): + super().__init__() + available_dataset = ['COCO', 'PETS', 'TEST_MODULE'] + self.dataset_sets = {'train': None, + 'val': None, + 'test': None} + if dataset_name not in available_dataset: + except_str = 'Unsupported dataset: ' + dataset_name + '. Currently available are:' + for j in range(len(available_dataset)): + except_str = except_str + ' \'' + available_dataset[j] + '\'' + if j < len(available_dataset) - 1: + except_str = except_str + ',' + except_str = except_str + '.' + raise ValueError(except_str) + + ssl._create_default_https_context = ssl._create_unverified_context + self.dataset_name = dataset_name + self.split = split + # self.__prepare_dataset() + self.path = os.path.join(path, dataset_name) + self.src_data = [] + if self.dataset_name == "PETS": + self.detector = 'JPD' + self.detector_type = 'default' + if use_ssd: + self.detector = 'SSD' + self.detector_type = 'custom' + + self.dataset_sets['train'] = 'train' + self.dataset_sets['val'] = 'val' + self.dataset_sets['test'] = 'test' + if self.dataset_sets[self.split] is None: + raise ValueError(self.split + ' split is not available...') + + if not os.path.exists(os.path.join(self.path, 'images/S1/L1')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L1.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists(os.path.join(self.path, 'images/S1/L2')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L2.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists(os.path.join(self.path, 'images/S2/L1')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L1.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists(os.path.join(self.path, 'images/S2/L2')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L2.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists(os.path.join(self.path, 'images/S2/L3')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L3.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists(os.path.join(self.path, 'images/S3/Multiple_Flow')): + self.download( + 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S3_MF.tar.bz2', + download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True) + if not os.path.exists( + os.path.join(self.path, 'annotations', 'pets_' + self.dataset_sets[self.split] + '.json')): + self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations_json.zip', + download_path=os.path.join(self.path, 'annotations'), file_format="zip", + create_dir=True) + pkl_filename = os.path.join(self.path, + 'data_' + self.detector + '_' + self.dataset_sets[self.split] + '_pets.pkl') + if not os.path.exists(pkl_filename): + ssd = None + if use_ssd: + from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner + ssd = SingleShotDetectorLearner(device=device) + ssd.download(".", mode="pretrained") + ssd.load("./ssd_default_person", verbose=True) + if not os.path.exists( + os.path.join(self.path, 'detections', + 'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl')): + self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_detections.zip', + download_path=os.path.join(self.path, 'detections'), file_format="zip", + create_dir=True) + if not os.path.exists( + os.path.join(self.path, 'annotations', 'PETS-' + self.dataset_sets[self.split] + '.idl')): + self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations.zip', + download_path=os.path.join(self.path, 'annotations'), file_format="zip", + create_dir=True) + with open(os.path.join(self.path, 'annotations', + 'PETS-' + self.dataset_sets[self.split] + '.idl')) as fp_gt: + fp_dt = None + if self.detector_type == 'default': + fp_dt = open(os.path.join(self.path, 'detections', + 'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl')) + print('Preparing PETS ' + self.dataset_sets[self.split] + ' set...') + current_id = 0 + number_samples = 1696 + if self.split == 'val': + current_id = 1696 + number_samples = 240 + elif self.split == 'test': + current_id = 1936 + number_samples = 436 + pbarDesc = "Overall progress" + pbar = tqdm(desc=pbarDesc, total=number_samples) + if self.detector_type == 'default': + line_dt = fp_dt.readline() + line_gt = fp_gt.readline() + while line_gt: + remove_strings = ['PETS09-', '\"', ':', '(', ')', ',', '', ';'] + data_gt = line_gt.replace(':', ' ') + for j in range(len(remove_strings)): + data_gt = data_gt.replace(remove_strings[j], '') + data_gt = data_gt.split() + filename_gt = data_gt[0][0:2] + '/' + data_gt[0][2:] + if filename_gt[0:6] == 'S2/L1/': + filename_gt = filename_gt.replace('img/00', 'Time_12-34/View_001/frame_') + num = int(filename_gt[-8:-4]) - 1 + filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg' + if filename_gt[0:6] == 'S2/L2/': + filename_gt = filename_gt.replace('img/00', 'Time_14-55/View_001/frame_') + num = int(filename_gt[-8:-4]) - 1 + filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg' + if filename_gt[0:2] == 'S3': + filename_gt = filename_gt.replace('_MF', 'Multiple_Flow') + + if self.detector_type == 'default': + data_dt = line_dt.replace(':', ' ') + for j in range(len(remove_strings)): + data_dt = data_dt.replace(remove_strings[j], '') + data_dt = data_dt.split() + filename_dt = data_dt[0][0:2] + '/' + data_dt[0][2:] + if filename_dt[0:6] == 'S2/L1/': + filename_dt = filename_dt.replace('img/00', 'Time_12-34/View_001/frame_') + num = int(filename_dt[-8:-4]) - 1 + filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg' + if filename_dt[0:6] == 'S2/L2/': + filename_dt = filename_dt.replace('img/00', 'Time_14-55/View_001/frame_') + num = int(filename_dt[-8:-4]) - 1 + filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg' + if filename_dt[0:2] == 'S3': + filename_dt = filename_dt.replace('_MF', 'Multiple_Flow') + if filename_gt != filename_dt: + raise ValueError('Errors in files...') + + img = Image.open(os.path.join(self.path, 'images/', filename_gt)) + + dt_boxes = [] + if self.detector_type == 'default': + for i in range(1, (len(data_dt)), 5): + dt_box = np.array((float(data_dt[i]), float(data_dt[i + 1]), float(data_dt[i + 2]), + float(data_dt[i + 3]), 1 / (1 + math.exp(- float(data_dt[i + 4]))))) + dt_boxes.append(dt_box) + else: + bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975, + nms_topk=6000, post_nms=6000) + bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list) + bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015] + bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1] + bboxes_list = bboxes_list[:5000, :] + for b in range(len(bboxes_list)): + dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2], + bboxes_list[b, 3], bboxes_list[b, 4][0]])) + gt_boxes = [] + for i in range(1, (len(data_gt)), 5): + gt_box = np.array((float(data_gt[i]), float(data_gt[i + 1]), float(data_gt[i + 2]), + float(data_gt[i + 3]))) + gt_boxes.append(gt_box) + self.src_data.append({ + 'id': current_id, + 'filename': os.path.join('images', filename_gt), + 'resolution': img.opencv().shape[0:2][::-1], + 'gt_boxes': [np.asarray([]), np.asarray(gt_boxes)], + 'dt_boxes': [np.asarray([]), np.asarray(dt_boxes)] + }) + current_id = current_id + 1 + pbar.update(1) + if self.detector_type == 'default': + line_dt = fp_dt.readline() + line_gt = fp_gt.readline() + pbar.close() + if self.detector_type == 'default': + fp_dt.close() + elif self.detector == 'SSD': + del ssd + gc.collect() + with open(pkl_filename, 'wb') as handle: + pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL) + else: + with open(pkl_filename, 'rb') as fp_pkl: + self.src_data = pickle.load(fp_pkl) + + self.classes = ['background', 'human'] + self.class_ids = [-1, 1] + self.annotation_file = 'pets_' + self.dataset_sets[self.split] + '.json' + elif self.dataset_name == "COCO": + self.dataset_sets['train'] = 'train' + self.dataset_sets['val'] = 'minival' + self.dataset_sets['test'] = 'valminusminival' + if self.dataset_sets[self.split] is None: + raise ValueError(self.split + ' split is not available...') + elif self.dataset_sets[self.split] == 'train': + imgs_split = 'train2014' + else: + imgs_split = 'val2014' + self.detector = 'FRCN' + self.detector_type = 'default' + ssd = None + if use_ssd: + self.detector = 'SSD' + self.detector_type = 'custom' + from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner + ssd = SingleShotDetectorLearner(device=device) + ssd.download(".", mode="pretrained") + ssd.load("./ssd_default_person", verbose=True) + if not os.path.exists(os.path.join(self.path, imgs_split)): + self.download('http://images.cocodataset.org/zips/' + imgs_split + '.zip', + download_path=os.path.join(self.path), file_format="zip", + create_dir=True) + pkl_filename = os.path.join(self.path, 'data_' + self.detector + '_' + + self.dataset_sets[self.split] + '_coco.pkl') + if not os.path.exists(pkl_filename): + if not os.path.exists(os.path.join(self.path, 'annotations', 'instances_' + + self.dataset_sets[self.split] + + '2014.json')): + if self.dataset_sets[self.split] == 'train': + ann_url = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip' + self.download(ann_url, download_path=os.path.join(self.path), file_format="zip", + create_dir=True) + else: + if self.dataset_sets[self.split] == 'minival': + ann_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/' \ + 'instances_minival2014.json.zip?dl=0' + else: + ann_url = 'https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/' \ + 'instances_valminusminival2014.json.zip?dl=0' + self.download(ann_url, download_path=os.path.join(self.path, 'annotations'), file_format="zip", + create_dir=True) + if not os.path.exists(os.path.join(self.path, 'detections', 'coco_2014_' + + self.dataset_sets[self.split] + + '_FRCN_train.pkl')): + self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/coco_2014_FRCN.tar.gz', + download_path=os.path.join(self.path, 'detections'), file_format='tar.gz', + create_dir=True) + with open(os.path.join(self.path, 'detections', + 'coco_2014_' + self.dataset_sets[self.split] + '_FRCN_train.pkl'), 'rb') as f: + dets_default = pickle.load(f, encoding='latin1') + annots = COCO(annotation_file=os.path.join(self.path, 'annotations', 'instances_' + + self.dataset_sets[self.split] + '2014.json')) + pbarDesc = "Overall progress" + pbar = tqdm(desc=pbarDesc, total=len(dets_default[1])) + for i in range(len(dets_default[1])): + dt_boxes = [] + img_info = annots.loadImgs([dets_default[1][i]])[0] + img = Image.open(os.path.join(self.path, imgs_split, img_info["file_name"])) + if self.detector_type == 'default': + dt_boxes = dets_default[0][1][i] + elif self.detector == 'SSD': + bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975, + nms_topk=6000, post_nms=6000) + bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list) + if bboxes_list.shape[0] > 0: + bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015] + if bboxes_list.shape[0] > 0: + bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1] + bboxes_list = bboxes_list[:5000, :] + for b in range(len(bboxes_list)): + dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2], + bboxes_list[b, 3], bboxes_list[b, 4][0]])) + dt_boxes = np.asarray(dt_boxes) + annots_in_frame = annots.loadAnns( + annots.getAnnIds(imgIds=[dets_default[1][i]], catIds=[1], iscrowd=False)) + gt_boxes = [] + for j in range(len(annots_in_frame)): + gt_boxes.append(annots_in_frame[j]['bbox']) + gt_boxes = np.asarray(np.asarray(gt_boxes)) + if gt_boxes.shape[0] > 0: + gt_boxes[:, 2] = gt_boxes[:, 0] + gt_boxes[:, 2] + gt_boxes[:, 3] = gt_boxes[:, 1] + gt_boxes[:, 3] + self.src_data.append({ + 'id': dets_default[1][i], + 'filename': os.path.join(imgs_split, img_info["file_name"]), + 'resolution': [img_info['width'], img_info['height']], + 'gt_boxes': [np.asarray([]), gt_boxes], + 'dt_boxes': [np.asarray([]), dt_boxes] + }) + pbar.update(1) + pbar.close() + if self.detector == 'SSD': + del ssd + gc.collect() + with open(pkl_filename, 'wb') as handle: + pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL) + else: + with open(pkl_filename, 'rb') as fp_pkl: + self.src_data = pickle.load(fp_pkl) + self.classes = ['background', 'person'] + self.class_ids = [-1, 1] + self.annotation_file = 'instances_' + self.dataset_sets[self.split] + '2014.json' + elif self.dataset_name == "TEST_MODULE": + self.dataset_sets['train'] = 'test' + self.dataset_sets['val'] = 'test' + self.dataset_sets['test'] = 'test' + if self.dataset_sets[self.split] is None: + raise ValueError(self.split + ' split is not available...') + pkl_filename = os.path.join(self.path, 'test_module.pkl') + if not os.path.exists(pkl_filename): + data_url = OPENDR_SERVER_URL + '/perception/object_detection_2d/nms/datasets/test_module.zip' + self.download(data_url, download_path=os.path.join(self.path).replace("TEST_MODULE", ""), file_format="zip", + create_dir=True) + with open(pkl_filename, 'rb') as fp_pkl: + self.src_data = pickle.load(fp_pkl) + self.classes = ['background', 'person'] + self.class_ids = [-1, 1] + self.annotation_file = 'test_module_anns.json' + + @staticmethod + def download( + url, download_path, dataset_sub_path=".", file_format="zip", create_dir=False): + + if create_dir: + os.makedirs(download_path, exist_ok=True) + + print("Downloading dataset from", url, "to", download_path) + + start_time = 0 + last_print = 0 + + def reporthook(count, block_size, total_size): + nonlocal start_time + nonlocal last_print + if count == 0: + start_time = time.time() + last_print = start_time + return + + duration = time.time() - start_time + progress_size = int(count * block_size) + speed = int(progress_size / (1024 * duration)) + if time.time() - last_print >= 1: + last_print = time.time() + print( + "\r%d MB, %d KB/s, %d seconds passed" % + (progress_size / (1024 * 1024), speed, duration), + end='' + ) + + if file_format == "zip": + zip_path = os.path.join(download_path, "dataset.zip") + urlretrieve(url, zip_path, reporthook=reporthook) + print() + print("Extracting data from zip file") + with ZipFile(zip_path, 'r') as zip_ref: + zip_ref.extractall(download_path) + os.remove(zip_path) + elif file_format == "tar.bz2" or file_format == "tar.gz": + tar_path = os.path.join(download_path, "dataset." + file_format) + urlretrieve(url, tar_path, reporthook=reporthook) + print() + + def members(tf): + l = len("Crowd_PETS09/") + for member in tf.getmembers(): + if member.path.startswith("Crowd_PETS09/"): + member.path = member.path[l:] + yield member + + with tarfile.open(tar_path, "r:" + file_format.split('.')[1]) as tar: + if file_format == "tar.bz2": + tar.extractall(path=download_path, members=members(tar)) + else: + tar.extractall(path=download_path) + tar.close() + os.remove(tar_path) + else: + raise ValueError("Unsupported file_format: " + file_format) diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py new file mode 100644 index 0000000000..93286bbc7a --- /dev/null +++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py @@ -0,0 +1,286 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU). +# Copyright (c) 2020 Zheng, Zhaohui. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, version 3. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +import torch +import torchvision +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +import sys +import os + + +def jaccard(box_a, box_b, iscrowd: bool = False): + use_batch = True + if box_a.dim() == 2: + use_batch = False + box_a = box_a[None, ...] + box_b = box_b[None, ...] + + inter = intersect(box_a, box_b) + area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * + (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B] + area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * + (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B] + union = area_a + area_b - inter + + out = inter / area_a if iscrowd else inter / union + return out if use_batch else out.squeeze(0) + + +def intersect(box_a, box_b): + n = box_a.size(0) + A = box_a.size(1) + B = box_b.size(1) + max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2), + box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2)) + min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2), + box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2)) + return torch.clamp(max_xy - min_xy, min=0).prod(3) # inter + + +def diou(box_a, box_b, iscrowd: bool = False): + use_batch = True + if box_a.dim() == 2: + use_batch = False + box_a = box_a[None, ...] + box_b = box_b[None, ...] + + inter = intersect(box_a, box_b) + area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) * + (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B] + area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) * + (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B] + union = area_a + area_b - inter + x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter) + y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter) + x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter) + y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter) + + t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter) + b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter) + l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter) + r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter) + + t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter) + b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter) + l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter) + r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter) + cr = torch.max(r1, r2) + cl = torch.min(l1, l2) + ct = torch.min(t1, t2) + cb = torch.max(b1, b2) + D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) + out = inter / area_a if iscrowd else inter / union - D ** 0.9 + return out if use_batch else out.squeeze(0) + + +def distance(box_a, box_b, iscrowd: bool = False): + use_batch = True + if box_a.dim() == 2: + use_batch = False + box_a = box_a[None, ...] + box_b = box_b[None, ...] + + inter = intersect(box_a, box_b) + x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter) + y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter) + x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter) + y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter) + + t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter) + b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter) + l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter) + r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter) + + t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter) + b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter) + l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter) + r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter) + + cr = torch.max(r1, r2) + cl = torch.min(l1, l2) + ct = torch.min(t1, t2) + cb = torch.max(b1, b2) + D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) ** 0.6 + out = D if iscrowd else D + return out if use_batch else out.squeeze(0) + + +def det_matching(scores, dt_boxes, gt_boxes, iou_thres, device='cuda'): + sorted_indices = torch.argsort(-scores, dim=0) + labels = torch.zeros(len(dt_boxes)) + if device == 'cuda': + labels = labels.cuda() + if gt_boxes.shape[0] == 0: + return labels.unsqueeze(-1) + assigned_GT = -torch.ones(len(gt_boxes)) + r = torch.tensor([-1, -1, -1, -1]).float().unsqueeze(0).unsqueeze(0) + if device == 'cuda': + r = r.cuda() + for s in sorted_indices: + gt_boxes_c = gt_boxes.clone().unsqueeze(0) + gt_boxes_c[0, assigned_GT > -1, :] = r + ious = bb_intersection_over_union(boxAs=dt_boxes[s].clone().unsqueeze(0), boxBs=gt_boxes_c) + annot_iou, annot_box_id = torch.sort(ious.squeeze(), descending=True) + if annot_box_id.ndim > 0: + annot_box_id = annot_box_id[0] + annot_iou = annot_iou[0] + if annot_iou > iou_thres: + assigned_GT[annot_box_id] = s + labels[s] = 1 + return labels.unsqueeze(-1) + + +def run_coco_eval(dt_file_path=None, gt_file_path=None, only_classes=None, max_dets=None, + verbose=False): + if max_dets is None: + max_dets = [200, 400, 600, 800, 1000, 1200] + results = [] + sys.stdout = open(os.devnull, 'w') + for i in range(len(max_dets)): + coco = COCO(gt_file_path) + coco_dt = coco.loadRes(dt_file_path) + cocoEval = COCOeval(coco, coco_dt, 'bbox') + cocoEval.params.iouType = 'bbox' + cocoEval.params.useCats = True + cocoEval.params.catIds = only_classes + cocoEval.params.maxDets = [max_dets[i]] + cocoEval.evaluate() + results.append([summarize_nms(coco_eval=cocoEval, maxDets=max_dets[i]), max_dets[i]]) + # print(results[i]) + del cocoEval, coco_dt, coco + sys.stdout = sys.__stdout__ + return results + + +def summarize_nms(coco_eval=None, maxDets=100): + def summarize(ap=1, iouThr=None, areaRng='all', maxDets=100): + p = coco_eval.params + iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}' + titleStr = 'Average Precision' if ap == 1 else 'Average Recall' + typeStr = '(AP)' if ap == 1 else '(AR)' + iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \ + if iouThr is None else '{:0.2f}'.format(iouThr) + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + if ap == 1: + # dimension of precision: [TxRxKxAxM] + s = coco_eval.eval['precision'] + # IoU + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, :, aind, mind] + else: + # dimension of recall: [TxKxAxM] + s = coco_eval.eval['recall'] + if iouThr is not None: + t = np.where(iouThr == p.iouThrs)[0] + s = s[t] + s = s[:, :, aind, mind] + if len(s[s > -1]) == 0: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + stat_str = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s) + return [mean_s, stat_str] + + def summarizeDets(): + stats = [] + stat, stat_str = summarize(1, maxDets=maxDets) + stats.append([stat, stat_str]) + stat, stat_str = summarize(1, iouThr=.5, maxDets=maxDets) + stats.append([stat, stat_str]) + stat, stat_str = summarize(1, iouThr=.75, maxDets=maxDets) + stats.append([stat, stat_str]) + stat, stat_str = summarize(0, maxDets=maxDets) + stats.append([stat, stat_str]) + return stats + + coco_eval.accumulate() + summarized = summarizeDets() + return summarized + + +def drop_dets(boxes, scores, keep_ratio=0.85): + ids = np.arange(len(boxes)) + np.random.shuffle(ids) + ids_keep = ids[0:int(len(boxes) * keep_ratio)] + boxes_new = boxes[ids_keep, :] + scores_new = scores[ids_keep] + scores_new, scores_new_ids = torch.sort(scores_new, descending=True) + boxes_new = boxes_new[scores_new_ids] + return boxes_new, scores_new + + +def filter_iou_boxes(boxes=None, iou_thres=0.2): + ious = bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1), + boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1)) + ids_boxes = ious >= iou_thres + return ids_boxes + + +def bb_intersection_over_union(boxAs=None, boxBs=None): + xA = torch.maximum(boxAs[:, :, 0], boxBs[:, :, 0]) + yA = torch.maximum(boxAs[:, :, 1], boxBs[:, :, 1]) + xB = torch.minimum(boxAs[:, :, 2], boxBs[:, :, 2]) + yB = torch.minimum(boxAs[:, :, 3], boxBs[:, :, 3]) + interAreas = torch.maximum(torch.zeros_like(xB), xB - xA + 1) * torch.maximum(torch.zeros_like(yB), yB - yA + 1) + boxAAreas = (boxAs[:, :, 2] - boxAs[:, :, 0] + 1) * (boxAs[:, :, 3] - boxAs[:, :, 1] + 1) + boxBAreas = (boxBs[:, :, 2] - boxBs[:, :, 0] + 1) * (boxBs[:, :, 3] - boxBs[:, :, 1] + 1) + ious = interAreas / (boxAAreas + boxBAreas - interAreas) + return ious + + +def compute_class_weights(pos_weights, max_dets=400, dataset_nms=None): + num_pos = np.ones([len(dataset_nms.classes), 1]) + num_bg = np.ones([len(dataset_nms.classes), 1]) + weights = np.zeros([len(dataset_nms.classes), 2]) + for i in range(len(dataset_nms.src_data)): + for cls_index in range(len(dataset_nms.classes)): + num_pos[cls_index] = num_pos[cls_index] + \ + min(max_dets, len(dataset_nms.src_data[i]['gt_boxes'][cls_index])) + num_bg[cls_index] = num_bg[cls_index] + max(0, min(max_dets, + len(dataset_nms.src_data[i]['dt_boxes'][cls_index])) - + min(max_dets, + len(dataset_nms.src_data[i]['gt_boxes'][cls_index]))) + for class_index in range(len(dataset_nms.classes)): + weights[class_index, 0] = (1 - pos_weights[class_index]) * (num_pos[class_index] + + num_bg[class_index]) / num_bg[class_index] + weights[class_index, 1] = pos_weights[class_index] * (num_pos[class_index] + + num_bg[class_index]) / num_pos[class_index] + return weights + + +def apply_torchNMS(boxes, scores, iou_thres): + ids_nms = torchvision.ops.nms(boxes, scores, iou_thres) + scores = scores[ids_nms] + boxes = boxes[ids_nms] + return boxes, scores diff --git a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py index 386f5b5306..70b4656cf1 100644 --- a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py +++ b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py @@ -43,8 +43,10 @@ # algorithm imports from opendr.perception.object_detection_2d.utils.eval_utils import DetectionDatasetCOCOEval from opendr.perception.object_detection_2d.datasets import DetectionDataset -from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, BoundingBoxListToNumpyArray, \ - transform_test +from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, \ + BoundingBoxListToNumpyArray, \ + transform_test, pad_test +from opendr.perception.object_detection_2d.nms.utils import NMSCustom gutils.random.seed(0) @@ -90,7 +92,6 @@ def __init__(self, lr=1e-3, epochs=120, batch_size=8, self.ctx = mx.gpu(int(self.device.split(':')[1])) else: self.ctx = mx.cpu() - print("Device set to cuda but no GPU available, using CPU...") else: self.ctx = mx.cpu() @@ -141,7 +142,7 @@ def save(self, path, verbose=False): if verbose: print("Model parameters saved.") - with open(os.path.join(path, model_name + '.json'), 'w', encoding='utf-8') as f: + with open(os.path.join(path, model_name + '.json'), 'w', encoding='utf-8') as f: json.dump(metadata, f, ensure_ascii=False, indent=4) if verbose: print("Model metadata saved.") @@ -216,7 +217,7 @@ def download(self, path=None, mode="pretrained", verbose=False, if verbose: print("Downloading params...") file_url = os.path.join(url, "pretrained", "ssd_512_vgg16_atrous_wider_person", - "ssd_512_vgg16_atrous_wider_person.params") + "ssd_512_vgg16_atrous_wider_person.params") urlretrieve(file_url, os.path.join(path, "ssd_512_vgg16_atrous_wider_person.params")) @@ -461,18 +462,27 @@ def __get_lr_at(self, epoch): else: return self.lr - def eval(self, dataset, use_subset=False, subset_size=100, verbose=False): + def eval(self, dataset, use_subset=False, subset_size=100, verbose=False, + nms_thresh=0.45, nms_topk=400, post_nms=100): """ This method performs evaluation on a given dataset and returns a dictionary with the evaluation results. :param dataset: dataset object, to perform evaluation on :type dataset: opendr.perception.object_detection_2d.datasets.DetectionDataset or opendr.engine.data.ExternalDataset - :return: dictionary containing evaluation metric names nad values :param use_subset: if True, only a subset of the dataset is evaluated, defaults to False :type use_subset: bool, optional :param subset_size: if use_subset is True, subset_size controls the size of the subset to be evaluated :type subset_size: int, optional :param verbose: if True, additional information is printed on stdout :type verbose: bool, optional + :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS. + :type nms_thresh: float, default is 0.45 + :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS. + :type nms_topk: int, default is 400 + :param post_nms: Only return top post_nms detection results, the rest is discarded. + The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if + expecting more objects. You can use -1 to return all detections. + :type post_nms: int, default is 100 + :return: dictionary containing evaluation metric names nad values :rtype: dict """ autograd.set_training(False) @@ -494,7 +504,7 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False): self._model.initialize() self._model.collect_params().reset_ctx(ctx) self._model.hybridize(static_alloc=True, static_shape=True) - self._model.set_nms(nms_thresh=0.45, nms_topk=400) + self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms) dataset, eval_metric = self.__prepare_val_dataset(dataset, data_shape=self.img_size) @@ -549,7 +559,8 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False): eval_dict = {k.lower(): v for k, v in zip(map_name, mean_ap)} return eval_dict - def infer(self, img, threshold=0.2, keep_size=False): + def infer(self, img, threshold=0.2, keep_size=False, custom_nms: NMSCustom=None, + nms_thresh=0.45, nms_topk=400, post_nms=100): """ Performs inference on a single image and returns the resulting bounding boxes. :param img: image to perform inference on @@ -558,13 +569,26 @@ def infer(self, img, threshold=0.2, keep_size=False): :type threshold: float, optional :param keep_size: if True, the image is not resized to fit the data shape used during training :type keep_size: bool, optional + :param custom_nms: Custom NMS method to be employed on inference + :type perception.object_detection_2d.nms.utils.nms_custom.NMSCustom + :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS. + :type nms_thresh: float, default is 0.45 + :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS. + :type nms_topk: int, default is 400 + :param post_nms: Only return top post_nms detection results, the rest is discarded. + The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if + expecting more objects. You can use -1 to return all detections. + :type post_nms: int, default is 100 :return: list of bounding boxes :rtype: BoundingBoxList """ - assert self._model is not None, "Model has not been loaded, call load(path) first" - self._model.set_nms(nms_thresh=0.45, nms_topk=400) + assert self._model is not None, "Model has not been loaded, call load(path) first" + if custom_nms: + self._model.set_nms(nms_thresh=0.85, nms_topk=5000, post_nms=1000) + else: + self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms) if not isinstance(img, Image): img = Image(img) _img = img.convert("channels_last", "rgb") @@ -576,33 +600,43 @@ def infer(self, img, threshold=0.2, keep_size=False): x, img_mx = transform_test(img_mx) else: x, img_mx = presets.ssd.transform_test(img_mx, short=self.img_size) - h_mx, w_mx, _ = img_mx.shape + x = pad_test(x, min_size=self.img_size) x = x.as_in_context(self.ctx) class_IDs, scores, boxes = self._model(x) class_IDs = class_IDs[0, :, 0].asnumpy() scores = scores[0, :, 0].asnumpy() - mask = np.where((class_IDs >= 0) & (scores > threshold))[0] + mask = np.where(class_IDs >= 0)[0] + if custom_nms is None: + mask = np.intersect1d(mask, np.where(scores > threshold)[0]) if mask.size == 0: return BoundingBoxList([]) scores = scores[mask, np.newaxis] class_IDs = class_IDs[mask, np.newaxis] boxes = boxes[0, mask, :].asnumpy() + if x.shape[2] > h_mx: + boxes[:, [1, 3]] -= (x.shape[2] - h_mx) + elif x.shape[3] > w_mx: + boxes[:, [0, 2]] -= (x.shape[3] - w_mx) boxes[:, [0, 2]] /= w_mx boxes[:, [1, 3]] /= h_mx boxes[:, [0, 2]] *= width boxes[:, [1, 3]] *= height - bounding_boxes = BoundingBoxList([]) - for idx, box in enumerate(boxes): - bbox = BoundingBox(left=box[0], top=box[1], - width=box[2] - box[0], - height=box[3] - box[1], - name=class_IDs[idx, :], - score=scores[idx, :]) - bounding_boxes.data.append(bbox) + if custom_nms is not None: + bounding_boxes, _ = custom_nms.run_nms(boxes=boxes, scores=scores, threshold=threshold, img=_img) + else: + bounding_boxes = BoundingBoxList([]) + for idx, box in enumerate(boxes): + bbox = BoundingBox(left=box[0], top=box[1], + width=box[2] - box[0], + height=box[3] - box[1], + name=class_IDs[idx, :], + score=scores[idx, :]) + bounding_boxes.data.append(bbox) + return bounding_boxes @staticmethod diff --git a/tests/sources/tools/perception/object_detection_2d/nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py new file mode 100644 index 0000000000..66d06bf3a6 --- /dev/null +++ b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py @@ -0,0 +1,139 @@ +# Copyright 2020-2021 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import gc +import shutil +import os +import numpy as np +from opendr.perception.object_detection_2d import Seq2SeqNMSLearner +from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS +from opendr.engine.data import Image + + +def rmfile(path): + try: + os.remove(path) + except OSError as e: + print("Error: %s - %s." % (e.filename, e.strerror)) + + +def rmdir(_dir): + try: + shutil.rmtree(_dir) + except OSError as e: + print("Error: %s - %s." % (e.filename, e.strerror)) + + +class TestSeq2SeqNMS(unittest.TestCase): + + @classmethod + def setUpClass(cls): + print("\n\n**********************************\nTEST Seq2Seq-NMS Learner\n" + "**********************************") + + cls.temp_dir = os.path.join(".", "tests", "sources", "tools", "perception", "object_detection_2d", + "nms", "seq2seq_nms", "temp") + cls.seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=None, app_feats='fmod', temp_path=cls.temp_dir, + device='cpu', checkpoint_after_iter=1, epochs=1) + + # Download all required files for testing + cls.seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=cls.temp_dir) + + @classmethod + def tearDownClass(cls): + print('Removing temporary directories for Seq2Seq-NMS...') + # Clean up downloaded files + rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "test_module.pkl")) + rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014", "COCO_val2014_000000262148.jpg")) + rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD", "coco_edgemap_b_3.pkl")) + rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "annotations", "test_module_anns.json")) + rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014")) + rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD")) + rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "fmod_normalization.pkl")) + rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.json")) + rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.pth")) + rmdir(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod")) + + rmdir(os.path.join(cls.temp_dir)) + + del cls.seq2SeqNMSLearner + gc.collect() + print('Finished cleaning for Seq2Seq-NMS...') + + def test_fit(self): + print('Starting training test for Seq2Seq-NMS...') + + m = list(self.seq2SeqNMSLearner.model.parameters())[0].clone() + self.seq2SeqNMSLearner.fit(dataset='TEST_MODULE', use_ssd=False, + datasets_folder=self.temp_dir + '/datasets', + logging_path=None, silent=False, verbose=True, nms_gt_iou=0.50, + max_dt_boxes=200) + n = list(self.seq2SeqNMSLearner.model.parameters())[0].clone() + self.assertFalse(np.array_equal(m, n), + msg="Model parameters did not change after running fit.") + del m, n + gc.collect() + print('Finished training test for Seq2Seq-NMS...') + + def test_eval(self): + print('Starting evaluation test for Seq2Seq-NMS...') + self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True) + results_dict = self.seq2SeqNMSLearner.eval(dataset='TEST_MODULE', split='test', max_dt_boxes=800, + datasets_folder=self.temp_dir + '/datasets', + use_ssd=False) + if results_dict is None: + self.assertIsNotNone(results_dict, + msg="Eval results dictionary not returned.") + else: + self.assertGreater(results_dict[0][0][1][0], 0.4) + del results_dict + gc.collect() + print('Finished evaluation test for Seq2Seq-NMS...') + + def test_infer(self): + print('Starting inference test for Seq2Seq-NMS...') + self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True) + dataset_nms = Dataset_NMS(path=self.temp_dir + '/datasets', dataset_name='TEST_MODULE', split='train', use_ssd=False) + image_fln = dataset_nms.src_data[0]['filename'] + img = Image.open(os.path.join(self.temp_dir, 'datasets', 'TEST_MODULE', image_fln)) + boxes = dataset_nms.src_data[0]['dt_boxes'][1][:, 0:4] + scores = np.expand_dims(dataset_nms.src_data[0]['dt_boxes'][1][:, 4], axis=-1) + + bounding_box_list = self.seq2SeqNMSLearner.run_nms(boxes=boxes, scores=scores, img=img, threshold=0.5) + + self.assertIsNotNone(bounding_box_list, + msg="Returned empty BoundingBoxList.") + del img + del bounding_box_list + del boxes + del scores + del dataset_nms + gc.collect() + print('Finished inference test for Seq2Seq-NMS...') + + def test_save_load(self): + print('Starting save/load test for Seq2Seq-NMS...') + self.seq2SeqNMSLearner.save(os.path.join(self.temp_dir, "test_model", "last_weights"), current_epoch=0) + self.seq2SeqNMSLearner.model = None + self.seq2SeqNMSLearner.init_model() + self.seq2SeqNMSLearner.load(os.path.join(self.temp_dir, "test_model")) + self.assertIsNotNone(self.seq2SeqNMSLearner.model, "model is None after loading model.") + # Cleanup + rmdir(os.path.join(self.temp_dir, "test_model")) + print('Finished save/load test for Seq2Seq-NMS...') + + +if __name__ == "__main__": + unittest.main()