diff --git a/.github/workflows/test_packages.yml b/.github/workflows/test_packages.yml
index 7fabc5b512..29c2511dc6 100644
--- a/.github/workflows/test_packages.yml
+++ b/.github/workflows/test_packages.yml
@@ -45,6 +45,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
@@ -93,6 +94,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
- perception/object_detection_3d
- control/mobile_manipulation
diff --git a/.github/workflows/tests_suite.yml b/.github/workflows/tests_suite.yml
index f084aac5a2..1a4e252b04 100644
--- a/.github/workflows/tests_suite.yml
+++ b/.github/workflows/tests_suite.yml
@@ -78,6 +78,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- simulation/human_model_generation
- perception/facial_expression_recognition
- control/single_demo_grasp
@@ -185,6 +186,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
@@ -255,6 +257,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
@@ -331,6 +334,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
- perception/object_detection_3d
- control/mobile_manipulation
diff --git a/.github/workflows/tests_suite_develop.yml b/.github/workflows/tests_suite_develop.yml
index 6da62e4f47..38f8113974 100644
--- a/.github/workflows/tests_suite_develop.yml
+++ b/.github/workflows/tests_suite_develop.yml
@@ -78,6 +78,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- simulation/human_model_generation
- perception/facial_expression_recognition
- control/single_demo_grasp
@@ -190,6 +191,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
@@ -260,6 +262,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
# - perception/object_detection_3d
# - control/mobile_manipulation
@@ -336,6 +339,7 @@ jobs:
- perception/object_detection_2d/ssd
- perception/object_detection_2d/yolov3
- perception/object_detection_2d/retinaface
+ - perception/object_detection_2d/nms
- perception/facial_expression_recognition
- perception/object_detection_3d
- control/mobile_manipulation
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c4cf9710ba..85847af40a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ Released on XX, XXth, 2022.
- New Features:
- Added end-to-end planning tool ([#223](https://github.com/opendr-eu/opendr/pull/223)).
+ - Added seq2seq-nms module, along with other custom NMS implementations for 2D object detection.([#232](https://github.com/opendr-eu/opendr/pull/232)).
- Enhancements:
- Added support for modular pip packages allowing tools to be installed separately ([#201](https://github.com/opendr-eu/opendr/pull/201)).
- Simplified the installation process for pip by including the appropriate post-installation scripts ([#201](https://github.com/opendr-eu/opendr/pull/201)).
diff --git a/docs/reference/index.md b/docs/reference/index.md
index 728f90a959..8d9a7d7202 100644
--- a/docs/reference/index.md
+++ b/docs/reference/index.md
@@ -43,6 +43,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any
- [centernet Module](object-detection-2d-centernet.md)
- [ssd Module](object-detection-2d-ssd.md)
- [yolov3 Module](object-detection-2d-yolov3.md)
+ - [seq2seq-nms Module](object-detection-2d-nms-seq2seq_nms.md)
- object detection 3d:
- [voxel Module](voxel-object-detection-3d.md)
- object tracking 2d:
@@ -113,6 +114,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any
- [centernet Demo](/projects/perception/object_detection_2d/centernet)
- [ssd Demo](/projects/perception/object_detection_2d/ssd)
- [yolov3 Demo](/projects/perception/object_detection_2d/yolov3)
+ - [seq2seq-nms Demo](/projects/perception/object_detection_2d/nms/seq2seq-nms)
- object detection 3d:
- [voxel Demo](/projects/perception/object_detection_3d/demos/voxel_object_detection_3d)
- object tracking 2d:
diff --git a/docs/reference/object-detection-2d-nms-seq2seq_nms.md b/docs/reference/object-detection-2d-nms-seq2seq_nms.md
new file mode 100644
index 0000000000..513233c833
--- /dev/null
+++ b/docs/reference/object-detection-2d-nms-seq2seq_nms.md
@@ -0,0 +1,305 @@
+## Seq2Seq-NMS module
+
+The *seq2seq-nms* module contains the *Seq2SeqNMSLearner* class, which inherits from the abstract class *Learner*.
+
+### Class Seq2SeqNMSLearner
+Bases: `engine.learners.Learner`
+
+It can be used to perform single-class non-maximum suppression (NMS) on images (inference) as well as training new seq2seq-nms models. The implementation is based on [[1]](#seq2seq_nms-1). The method is set-up for performing NMS on the person-detection task, using the implemention of the [SSD](/docs/reference/object-detection-2d-ssd.md) detector. The Seq2Seq-NMS method can also be employed for performing single-class NMS, in any class other than human/pedestrian class. In that case the method needs to be trained from scratch. Finally, a pretrained-model can be employed for evaluation or inference on the same class that it was trained with, using RoIs from a different detector than the one used in the training. In that case, we advise to fine-tune the Seq2Seq-nms pretrained model using RoIs from the detector, deployed in the inference/evaluation of the method, in order to achieve the highest possible performance.
+
+The [Seq2SeqNMSLearner](/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py) class has the following
+public methods:
+
+#### `Seq2SeqNMSLearner` constructor
+```python
+Seq2SeqNMSLearner(self, lr, epochs, device, temp_path, checkpoint_after_iter, checkpoint_load_iter, log_after, variant,
+ iou_filtering, dropout, app_feats, fmod_map_type, fmod_map_bin, app_input_dim)
+```
+
+Constructor parameters:
+
+- **lr**: *float, default=0.0001*\
+ Specifies the initial learning rate to be used during training.
+- **epochs**: *int, default=8*\
+ Specifies the number of epochs to be used during training.
+- **device**: *{'cuda', 'cpu'}, default='cuda'*\
+ Specifies the device to be used.
+- **temp_path**: *str, default='./temp'*\
+ Specifies a path to be used for storage of checkpoints during training.
+- **checkpoint_after_iter**: *int, default=0*\
+ Specifies the epoch interval between checkpoints during training.
+ If set to 0 no checkpoint will be saved.
+- **checkpoint_load_iter**: *int, default=0*\
+ Specifies the epoch to load a saved checkpoint from.
+ If set to 0 no checkpoint will be loaded.
+- **log_after**: *int, default=500*\
+ Specifies interval (in iterations/batches) between information logging on *stdout*.
+- **variant**: *{'light', 'medium', 'full'}, default='medium'*\
+ Specifies the variant of seq2seq-nms model.
+- **iou_filtering**: *float, default=0.8*\
+ Specifies the IoU threshold used for filtering RoIs before provided by the seq2seq-nms model.
+ If set to values <0 or >1, no filtering is applied.
+- **dropout**: *float, default=0.025*\
+ Specifies the dropout rate.
+- **app_feats**: *{'fmod', 'zeros', 'custom'}, default='fmod'*\
+ Specifies the type of the appearance-based features of RoIs used in the model.
+- **fmod_map_type**: *{'EDGEMAP', 'FAST', 'AKAZE', 'BRISK', 'ORB'}, default='EDGEMAP'*\
+ Specifies the type of maps used by FMoD, in the case where *app_feats*='fmod'.
+- **fmod_map_bin**: *bool, default=True*\
+ Specifies whether FMoD maps are binary or not, in the case where *app_feats*='fmod'.
+- **app_input_dim**: *int, default=None*\
+ Specifies the dimension of appearance-based RoI features.
+ In the case where *app_feats*='fmod', the corresponding dimension is automatically computed.
+
+
+#### `Seq2SeqNMSLearner.fit`
+```python
+Seq2SeqNMSLearner.fit(self, dataset, logging_path, logging_flush_secs, silent, verbose, nms_gt_iou, max_dt_boxes, datasets_folder, use_ssd)
+```
+
+This method is used to train the algorithm on a `Dataset_NMS` dataset.
+Returns a dictionary containing stats regarding the training process.
+
+Parameters:
+
+- **dataset**: *{'PETS', 'COCO'}*\
+ Specifies the name of the dataset among those available from training.
+- **logging_path**: *str, default=None*\
+ Path to save log files.
+ If set to None, only the console will be used for logging.
+- **logging_flush_secs**: *int, default=30*\
+ How often, in seconds, to flush the TensorBoard data to disk.
+- **silent**: *bool, default=False*\
+ If set to True, disables all printing of training progress reports and other information to STDOUT.
+- **verbose**: *bool, default=True*\
+ If True, enables maximum verbosity.
+- **nms_gt_iou**: *float, default=0.5*\
+ Specifies the threshold used to determine whether a detection RoI must be suppressed or not based on its IoU with the image's ground-truth RoIs.
+- **max_dt_boxes**: *int, default=500*\
+ Specifies the maximum number of RoIs provided to seq2Seq-nms model as input.
+- **datasets_folder**: *str, default='./datasets'*\
+ Specifies the path to the folder where the datasets are stored.
+- **use_ssd**: *bool, default=False*\
+ If set to True, RoIs from SSD are fed to the seq2Seq-nms model.
+ Otherwise, RoIs from the default detector of the specified dataset are used as input.
+
+#### `Seq2SeqNMSLearner.eval`
+```python
+Seq2SeqNMSLearner.eval(self, dataset, split, verbose, max_dt_boxes, datasets_folder, use_ssd)
+```
+
+Performs evaluation on a set of dataset.
+
+Parameters:
+
+- **dataset**: *{'PETS', 'COCO'}*\
+ Specifies the name of the dataset among those available from training.
+- **split**: *{'train', 'val', 'test'} default='test'*\
+ Specifies the set of the corresponding dataset where the evaluation will be performed.
+- **verbose**: *bool, default=True*\
+ If True, enables maximum verbosity.
+- **max_dt_boxes**: *int, default=500*\
+ Specifies the maximum number of RoIs provided to seq2Seq-nms model as input.
+- **threshold**: *float, default=0.0*\
+ Specifies the confidence threshold, used for RoI selection after seq2seq-nms rescoring.
+- **datasets_folder**: *str, default='./datasets'*\
+ Specifies the path to the folder where the datasets are stored.
+- **use_ssd**: *bool, default=False*\
+ If set to True, RoIs from SSD are fed to the seq2Seq-nms model.
+ Otherwise, RoIs from the default detector of the specified dataset are used as input.
+
+#### `Seq2SeqNMSLearner.infer`
+```python
+Seq2SeqNMSLearner.infer(self, boxes, scores, boxes_sorted, max_dt_boxes, img_res, threshold)
+```
+
+Performs non-maximum suppression, using seq2seq-nms.
+In the case where FMoD is selected for appearance-based RoI feature computation, FMoD maps are not computed.
+
+Parameters:
+
+- **boxes**: *torch.tensor, default=None*\
+ Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max).
+ For N candidate detection RoIs, the size of the *torch.tensor* is Nx4.
+- **scores**: *torch.tensor, default=None*\
+ Specifies the scores of the candidate detection RoIs, assigned previously by a detector.
+ For N candidate detection RoIs, the size of the *torch.tensor* is Nx1.
+- **boxes_sorted**: *bool, default=False*\
+ Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order.
+- **max_dt_boxes**: *int, default=400*\
+ Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+- **img_res**: *[int, int], default=None*\
+ Specifies the image resolution expressed as [width, height].
+- **threshold**: *float, default=0.1*\
+ Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring.
+
+#### `Seq2SeqNMSLearner.run_nms`
+```python
+Seq2SeqNMSLearner.run_nms(self, boxes, scores, img, threshold, boxes_sorted, top_k)
+```
+
+Performs non-maximum suppression, using seq2seq-nms.
+It incorporates the full pipeline needed for inference, including the FMoD's edge/interest-point map computation step.
+
+Parameters:
+
+- **boxes**: *numpy.ndarray, default=None*\
+ Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max).
+ For N candidate detection RoIs, the size of the array is Nx4.
+- **scores**: *numpy.ndarray, default=None*\
+ Specifies the scores of the candidate detection RoIs, assigned previously by a detector.
+ For N candidate detection RoIs, the size of the array is Nx1.
+- **boxes_sorted**: *bool, default=False*\
+ Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order.
+- **top_k**: *int, default=400*\
+ Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+- **img**: *object*\
+ Object of type engine.data.Image.
+- **threshold**: *float, default=0.1*\
+ Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring.
+
+#### `Seq2SeqNMSLearner.save`
+```python
+Seq2SeqNMSLearner.save(self, path, verbose, optimizer, scheduler, current_epoch, max_dt_boxes)
+```
+
+Saves a model in OpenDR format at the specified path.
+
+Parameters:
+
+- **path**: *str*\
+ Specifies the folder where the model will be saved.
+- **verbose**: *bool default=False*\
+ If True, enables maximum verbosity.
+- **optimizer**: *torch.optim.Optimizer default=None*\
+ Specifies the optimizer used for training.
+- **scheduler**: *torch.optim.lr_scheduler default=None*\
+ Specifies the learning rate scheduler used for training.
+- **current_epoch**: *int, default=None*\
+ Specifies the number of epochs the model has been trained.
+- **max_dt_boxes**: *int, default=400*\
+ Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+
+
+
+#### `Seq2SeqNMSLearner.load`
+```python
+Seq2SeqNMSLearner.load(self, path, verbose)
+```
+
+Loads a model which was previously saved in OpenDR format at the specified path.
+
+Parameters:
+
+- **path**: *str*\
+ Specifies the folder where the model will be loaded from.
+- **verbose**: *bool default=False*\
+ If True, enables maximum verbosity.
+
+
+#### `Seq2SeqNMSLearner.download`
+```python
+Seq2SeqNMSLearner.download(self, path, model_name, verbose, url)
+```
+
+Downloads pretrained models of seq2seq-nms.
+
+Parameters:
+
+Downloads data needed for the various functions of the learner, e.g., pretrained models as well as test data.
+
+Parameters:
+
+- **path**: *str, default=None*\
+ Specifies the folder where data will be downloaded.
+ If *None*, the *self.temp_path* directory is used instead.
+- **model_name**: *{'seq2seq_medium_pets_jpd_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3', 'seq2seq_medium_coco_frcn_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3'}, default=''seq2seq_medium_pets_jpd_fmod_3'*\
+ If *'pretrained'*, downloads a pretrained detector model.
+ If *'images'*, downloads an image to perform inference on. If
+ *'test_data'* downloads a dummy dataset for testing purposes.
+- **verbose**: *bool default=True*\
+ If True, enables maximum verbosity.
+- **url**: *str, default=OpenDR FTP URL*\
+ URL of the FTP server.
+
+#### Examples
+
+* **Training example.**
+ To train seq2seq-nms properly, the PETS and COCO datasets are supported as Dataset_NMS types.
+
+ ```python
+ from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner
+ import os
+ OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/tmp'
+ datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets'
+
+ seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8,
+ app_feats='fmod', checkpoint_after_iter=1,
+ temp_path=temp_path, epochs=8)
+ seq2SeqNMSLearner.fit(dataset='PETS', use_ssd=False, datasets_folder=datasets_folder,
+ logging_path=os.path.join(temp_path, 'logs'), silent=False,
+ verbose=True, nms_gt_iou=0.50, max_dt_boxes=500)
+ ```
+
+* **Inference and result drawing example on a test .jpg image using OpenCV.**
+
+ ```python
+ from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner
+ from opendr.engine.data import Image
+ from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+ from opendr.perception.object_detection_2d import draw_bounding_boxes
+ import os
+ OPENDR_HOME = os.environ['OPENDR_HOME']
+ temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp'
+
+ seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering = 0.8,
+ app_feats='fmod', device='cpu',
+ temp_path=temp_path)
+ seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path)
+ seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True)
+ ssd = SingleShotDetectorLearner(device='cuda')
+ ssd.download(".", mode="pretrained")
+ ssd.load("./ssd_default_person", verbose=True)
+ img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+ if not isinstance(img, Image):
+ img = Image(img)
+ boxes = ssd.infer(img, threshold=0.25, custom_nms=seq2SeqNMSLearner)
+ draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
+ ```
+
+* **Evaluation of pretrained model on PETS dataset.**
+
+ ```python
+ from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+ import os
+ OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets'
+ temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp'
+
+ seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=0.8, app_feats='fmod',
+ temp_path=temp_path, device='cuda')
+ seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path)
+ seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True)
+ seq2SeqNMSLearner.eval(dataset='PETS', split='test', max_dt_boxes=800,
+ datasets_folder=datasets_folder, use_ssd=False, threshold=0.0)
+ ```
+
+#### Performance Evaluation
+
+TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800.
+| **Pretrained Model** | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** |
+|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:|
+| seq2seq_pets_jpd_fmod | PETS | JPD | FMoD | 0.8 | 80.2% | 84.3% |
+| seq2seq_pets_ssd_fmod | PETS | SSD | FMoD | 0.8 | 77.4% | 79.1% |
+| seq2seq_coco_frcn_fmod | COCO | FRCN | FMoD | - | 68.1% \* | 67.5% \*\* |
+| seq2seq_coco_ssd_fmod | COCO | SSD | FMoD | - | 41.8% \* | 42.4% ** |
+
+\* The minival set was used as validation set.
+\*\* The minitest set was used as test set.
+
+
+#### References
+[1] Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275).
diff --git a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
index 6f643e61cf..f0dd7ca1d3 100755
--- a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
+++ b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
@@ -22,11 +22,12 @@
from opendr.engine.data import Image
from opendr.perception.object_detection_2d import SingleShotDetectorLearner
from opendr.perception.object_detection_2d import draw_bounding_boxes
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner, SoftNMS, FastNMS, ClusterNMS
class ObjectDetectionSSDNode:
def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/opendr/image_boxes_annotated",
- detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous"):
+ detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous", nms_type='default'):
"""
Creates a ROS Node for face detection
:param input_image_topic: Topic from which we are reading the input image
@@ -41,6 +42,8 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/
:type device: str
:param backbone: backbone network
:type backbone: str
+ :param ms_type: type of NMS method
+ :type nms_type: str
"""
# Initialize the face detector
@@ -48,6 +51,20 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/
self.object_detector.download(path=".", verbose=True)
self.object_detector.load("ssd_default_person")
self.class_names = self.object_detector.classes
+ self.custom_nms = None
+
+ # Initialize Seq2Seq-NMS if selected
+ if nms_type == 'seq2seq-nms':
+ self.custom_nms = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8,
+ app_feats='fmod', device=self.device)
+ self.custom_nms.download(model_name='seq2seq_pets_jpd', path='.')
+ self.custom_nms.load('./seq2seq_pets_jpd/', verbose=True)
+ elif nms_type == 'soft-nms':
+ self.custom_nms = SoftNMS(nms_thres=0.45, device=self.device)
+ elif nms_type == 'fast-nms':
+ self.custom_nms = FastNMS(nms_thres=0.45, device=self.device)
+ elif nms_type == 'cluster-nms':
+ self.custom_nms = ClusterNMS(nms_thres=0.45, device=self.device)
# Initialize OpenDR ROSBridge object
self.bridge = ROSBridge()
@@ -76,7 +93,7 @@ def callback(self, data):
image = self.bridge.from_ros_image(data, encoding='bgr8')
# Run pose estimation
- boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False)
+ boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False, custom_nms=self.custom_nms)
# Get an OpenCV image back
image = np.float32(image.opencv())
diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/README.md b/projects/perception/object_detection_2d/nms/cluster_nms/README.md
new file mode 100644
index 0000000000..0ff5c5fd9c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/cluster_nms/README.md
@@ -0,0 +1,7 @@
+# Cluster-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Cluster-NMS implementation
+provided by OpenDR. Specifically the following examples are provided:
+
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
+
diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py
new file mode 100644
index 0000000000..e653f5820c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import ClusterNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+ img = Image(img)
+cluster_nms = ClusterNMS(device='cuda', nms_type='default', cross_class=True)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/fast_nms/README.md b/projects/perception/object_detection_2d/nms/fast_nms/README.md
new file mode 100644
index 0000000000..5a1ccb3fd6
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/fast_nms/README.md
@@ -0,0 +1,5 @@
+# Fast-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Fast-NMS implementation
+provided by OpenDR. Specifically the following examples are provided:
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
diff --git a/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py
new file mode 100644
index 0000000000..5e0a5b48fa
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import FastNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+ img = Image(img)
+cluster_nms = FastNMS(device='cpu', cross_class=True)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg
new file mode 100644
index 0000000000..5efb4d9298
Binary files /dev/null and b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg differ
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md
new file mode 100644
index 0000000000..c831924349
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md
@@ -0,0 +1,17 @@
+# Seq2Seq-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Seq2Seq-NMS implementation
+provided by OpenDR. Specifically the following examples are provided:
+
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
+
+2. eval_demo.py: Perform evaluation on the `WiderPersonDataset`, implemented in OpenDR format. The user must first download
+ the dataset and provide the path to the dataset root via `--data-root /path/to/wider_person`.
+ Setting `--device cpu` performs evaluation on CPU.
+
+3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class and any
+ `DetectionDataset` can be used as well. Provided is an example of training on `WiderPersonDataset`. The user must set the
+ dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument.
+ Setting `--device cpu` performs training on CPU. Additional command line arguments can be set to change various training
+ hyperparameters, and running `python3 train_demo.py -h` prints information about them on stdout.
+
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py
new file mode 100644
index 0000000000..01437e578b
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+ choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+ choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod',
+ choices=['seq2seq_pets_jpd'])
+parser.add_argument("--split", help="The split of the corresponding dataset", type=str, default='test',
+ choices=["test", "val", "train"])
+parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=600)
+parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO",
+ "TEST_MODULE"])
+parser.add_argument("--data_root", help="Dataset root folder", type=str,
+ default=os.path.join(OPENDR_HOME,
+ 'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets'))
+parser.add_argument("--use_ssd", help="Train using SSD as detector", type=bool, default=False)
+parser.add_argument("--post_thres", help="Confidence threshold, used for RoI selection after seq2seq-nms rescoring",
+ type=float, default=0.0)
+
+args = parser.parse_args()
+tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp')
+seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type,
+ iou_filtering=args.iou_filtering,
+ temp_path=tmp_path)
+seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path)
+seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True)
+seq2SeqNMSLearner.eval(dataset=args.dataset, use_ssd=args.use_ssd, split=args.split, max_dt_boxes=args.max_dt_boxes,
+ datasets_folder=args.data_root, threshold=args.post_thres)
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py
new file mode 100755
index 0000000000..c260546d13
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+from opendr.engine.data import Image
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+ choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+ choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod',
+ choices=['seq2seq_pets_jpd'])
+
+args = parser.parse_args()
+tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp')
+seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type,
+ iou_filtering=args.iou_filtering,
+ temp_path=tmp_path)
+seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path)
+seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True)
+
+ssd = SingleShotDetectorLearner(device=args.device)
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+ img = Image(img)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=seq2SeqNMSLearner)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py
new file mode 100644
index 0000000000..4facf2696b
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+ choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+ choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=1e-4)
+parser.add_argument("--n_epochs", help="Number of total epochs", type=int, default=10)
+parser.add_argument("--tmp_path", help="Temporary path where weights will be saved", type=str,
+ default=os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp'))
+parser.add_argument("--checkpoint_freq", help="Frequency in-between checkpoint saving", type=int, default=1)
+parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", type=int, default=0)
+parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO",
+ "TEST_MODULE"])
+parser.add_argument("--use_ssd", help="Train using SSD as default detector", type=bool, default=False)
+parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=500)
+parser.add_argument("--data-root", help="Dataset root folder", type=str,
+ default=os.path.join(OPENDR_HOME,
+ 'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets'))
+args = parser.parse_args()
+seq2SeqNMSLearner = Seq2SeqNMSLearner(epochs=args.n_epochs, lr=args.lr, device=args.device, app_feats=args.app_feats,
+ fmod_map_type=args.fmod_type, iou_filtering=args.iou_filtering,
+ temp_path=args.tmp_path, checkpoint_after_iter=args.checkpoint_freq,
+ checkpoint_load_iter=args.resume_from)
+seq2SeqNMSLearner.fit(dataset=args.dataset, use_ssd=args.use_ssd,
+ datasets_folder=args.data_root, silent=False, verbose=True,
+ max_dt_boxes=args.max_dt_boxes)
+seq2SeqNMSLearner.save(path=os.path.join(args.tmp_path, 'saved_model'), current_epoch=args.n_epochs-1,
+ max_dt_boxes=args.max_dt_boxes)
diff --git a/projects/perception/object_detection_2d/nms/soft_nms/README.md b/projects/perception/object_detection_2d/nms/soft_nms/README.md
new file mode 100644
index 0000000000..a4c778f35c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/soft_nms/README.md
@@ -0,0 +1,5 @@
+# Soft-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Soft-NMS implementation
+provided by OpenDR. Specifically the following examples are provided:
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
diff --git a/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py
new file mode 100644
index 0000000000..c05ff4c7c2
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import SoftNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+ img = Image(img)
+cluster_nms = SoftNMS(device='cpu', nms_type='gaussian')
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/src/opendr/perception/object_detection_2d/__init__.py b/src/opendr/perception/object_detection_2d/__init__.py
index 61428cb1bd..9fac6ba424 100644
--- a/src/opendr/perception/object_detection_2d/__init__.py
+++ b/src/opendr/perception/object_detection_2d/__init__.py
@@ -11,6 +11,11 @@
from opendr.perception.object_detection_2d.utils.vis_utils import draw_bounding_boxes
+from opendr.perception.object_detection_2d.nms.cluster_nms.cluster_nms import ClusterNMS
+from opendr.perception.object_detection_2d.nms.fast_nms.fast_nms import FastNMS
+from opendr.perception.object_detection_2d.nms.soft_nms.soft_nms import SoftNMS
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.seq2seq_nms_learner import Seq2SeqNMSLearner
+
__all__ = ['CenterNetDetectorLearner', 'DetrLearner', 'GemLearner', 'RetinaFaceLearner',
'SingleShotDetectorLearner', 'YOLOv3DetectorLearner', 'WiderPersonDataset', 'WiderFaceDataset',
- 'transforms', 'draw_bounding_boxes']
+ 'transforms', 'draw_bounding_boxes', 'ClusterNMS', 'FastNMS', 'SoftNMS', 'Seq2SeqNMSLearner']
diff --git a/src/opendr/perception/object_detection_2d/datasets/transforms.py b/src/opendr/perception/object_detection_2d/datasets/transforms.py
index 5aa6f1e327..08c0f34ecf 100644
--- a/src/opendr/perception/object_detection_2d/datasets/transforms.py
+++ b/src/opendr/perception/object_detection_2d/datasets/transforms.py
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+
import cv2
import numpy as np
import mxnet as mx
@@ -141,3 +142,20 @@ def transform_test(imgs, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
if len(tensors) == 1:
return tensors[0], origs[0]
return tensors, origs
+
+
+def pad_test(img, min_size=512):
+ h_pad_size = 0
+ min_dim = 2 + np.argmin([img.shape[2:4]])
+ img_padded = img
+ if img.shape[min_dim] < min_size:
+ h_pad_size = int((min_size - img.shape[min_dim]) / 2.0)
+ if min_dim == 2:
+ img_padded = mx.nd.pad(img, mode="constant", constant_value=0,
+ pad_width=(0, 0, 0, 0, h_pad_size,
+ h_pad_size, 0, 0))
+ else:
+ img_padded = mx.nd.pad(img, mode="constant", constant_value=0,
+ pad_width=(0, 0, 0, 0, 0, 0,
+ h_pad_size, h_pad_size))
+ return img_padded
diff --git a/src/opendr/perception/object_detection_2d/dependencies.ini b/src/opendr/perception/object_detection_2d/dependencies.ini
index c6beccc16e..c181807f92 100644
--- a/src/opendr/perception/object_detection_2d/dependencies.ini
+++ b/src/opendr/perception/object_detection_2d/dependencies.ini
@@ -7,6 +7,7 @@ python=mxnet==1.8.0
tqdm
pycocotools>=2.0.4
easydict
+ gdown
numba==0.53.0
linux=libopenblas-dev
diff --git a/src/opendr/perception/object_detection_2d/nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md
new file mode 100644
index 0000000000..410c887028
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md
@@ -0,0 +1,28 @@
+Cluster-NMS
+======
+
+This folder contains an implementation of Cluster-NMS [[1]](#cluster_nms-1).
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0:
+
+```
+This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+Copyright (c) 2020 Zheng, Zhaohui.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
+```
+
+[1] Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation,
+[ArXiv](https://arxiv.org/abs/2005.03572).
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py
new file mode 100644
index 0000000000..ee34323346
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py
@@ -0,0 +1,510 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard, diou, distance
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import numpy as np
+import torch
+
+
+class ClusterNMS(NMSCustom):
+ def __init__(self, nms_type='default', cross_class=True, device='cuda', iou_thres=0.45, top_k=400, post_k=100):
+ self.device = device
+ self.nms_types = ['default', 'diou', 'spm', 'spm_dist', 'spm_dist_weighted']
+ if nms_type not in self.nms_types:
+ raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.')
+ else:
+ self.nms_type = nms_type
+ self.iou_thres = iou_thres
+ self.top_k = top_k
+ self.post_k = post_k
+ self.cross_class = cross_class
+
+ def set_iou_thres(self, iou_thres=0.45):
+ self.iou_thres = iou_thres
+
+ def top_k(self, top_k=400):
+ self.top_k = top_k
+
+ def post_k(self, post_k=100):
+ self.post_k = post_k
+
+ def set_type(self, nms_type=None):
+ if nms_type not in self.nms_types:
+ raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.')
+ else:
+ self.nms_type = nms_type
+
+ def set_cross_class(self, cross_class=True):
+ self.cross_class = cross_class
+
+ def run_nms(self, boxes=None, scores=None, img=None, threshold=0.2):
+
+ if isinstance(boxes, np.ndarray):
+ boxes = torch.tensor(boxes, device=self.device)
+ elif torch.is_tensor(boxes):
+ if self.device == 'cpu':
+ boxes = boxes.cpu()
+ elif self.device == 'cuda':
+ boxes = boxes.cuda()
+
+ if isinstance(scores, np.ndarray):
+ scores = torch.tensor(scores, device=self.device)
+ elif torch.is_tensor(scores):
+ if self.device == 'cpu':
+ scores = scores.cpu()
+ elif self.device == 'cuda':
+ scores = scores.cuda()
+
+ scores = torch.transpose(scores, dim0=1, dim1=0)
+
+ if self.nms_type == 'default':
+ if self.cross_class:
+ [boxes, classes, scores] = cc_cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ elif self.nms_type == 'diou':
+ if self.cross_class:
+ [boxes, classes, scores] = cc_cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ elif self.nms_type == 'spm':
+ if self.cross_class:
+ [boxes, classes, scores] = cc_cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ elif self.nms_type == 'spm_dist':
+ if self.cross_class:
+ [boxes, classes, scores] = cc_cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+
+ elif self.nms_type == 'spm_dist_weighted':
+ if self.cross_class:
+ [boxes, classes, scores] = cc_cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores,
+ iou_thres=self.iou_thres,
+ top_k=self.top_k,
+ post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores,
+ iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+
+ keep_ids = torch.where(scores > threshold)
+ scores = scores[keep_ids].cpu().numpy()
+ classes = classes[keep_ids].cpu().numpy()
+ boxes = boxes[keep_ids].cpu().numpy()
+ bounding_boxes = BoundingBoxList([])
+ for idx, box in enumerate(boxes):
+ bbox = BoundingBox(left=box[0], top=box[1],
+ width=box[2] - box[0],
+ height=box[3] - box[1],
+ name=classes[idx],
+ score=scores[idx])
+ bounding_boxes.data.append(bbox)
+
+ return bounding_boxes, [boxes, classes, scores]
+
+
+def cc_cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+ # Collapse all the classes into 1
+
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = torch.max(A, dim=0)
+ E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+
+ idx_out = torch.where(maxA > iou_thres)
+ scores[idx_out] = 0
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, idx = scores.sort(1, descending=True)
+ idx = idx[:top_k]
+ scores = scores[:top_k]
+ boxes = boxes[idx, :]
+
+ num_classes, num_dets = scores.shape
+ boxes = boxes.view(num_classes, num_dets, 4)
+ _, classes = scores.max(dim=0)
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ maxA = None
+ for i in range(200):
+ A = B
+ maxA, _ = A.max(dim=1)
+ E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ keep = (scores > 0.00)
+ discard = (maxA > iou_thres)
+ scores[discard] = 0
+ # Assign each kept detection to its corresponding class
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ # Only keep the top cfg.max_num_detections highest scores across all classes
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cc_cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ iou = diou(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = torch.max(A, dim=0)
+ E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+
+ idx_out = torch.where(maxA > iou_thres)
+ scores[idx_out] = 0
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, idx = scores.sort(1, descending=True)
+ idx = idx[:top_k]
+ scores = scores[:top_k]
+ boxes = boxes[idx, :]
+
+ num_classes, num_dets = scores.shape
+ boxes = boxes.view(num_classes, num_dets, 4)
+ _, classes = scores.max(dim=0)
+
+ iou = diou(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ maxA = None
+ for i in range(200):
+ A = B
+ maxA, _ = A.max(dim=1)
+ E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ keep = (scores > 0.00)
+ discard = (maxA > iou_thres)
+ scores[discard] = 0
+ # Assign each kept detection to its corresponding class
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ # Only keep the top cfg.max_num_detections highest scores across all classes
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+
+ return boxes, classes, scores
+
+
+def cc_cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = torch.max(A, dim=0)
+ E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ scores = torch.prod(torch.exp(-B ** 2 / 0.2), 0) * scores
+
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, idx = scores.sort(1, descending=True)
+ idx = idx[:top_k]
+ scores = scores[:top_k]
+ boxes = boxes[idx, :]
+
+ num_classes, num_dets = scores.shape
+ boxes = boxes.view(num_classes, num_dets, 4)
+ _, classes = scores.max(dim=0)
+
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = A.max(dim=1)
+ E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ keep = (scores > 0.00)
+ scores = torch.prod(torch.exp(-B ** 2 / 0.2), 1) * scores
+ # Assign each kept detection to its corresponding class
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ # Only keep the top cfg.max_num_detections highest scores across all classes
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cc_cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = torch.max(A, dim=0)
+ E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ D = distance(boxes, boxes)
+ X = (B >= 0).float()
+ scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
+
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, idx = scores.sort(1, descending=True)
+ idx = idx[:top_k]
+ scores = scores[:top_k]
+ boxes = boxes[idx, :]
+
+ num_classes, num_dets = scores.shape
+ boxes = boxes.view(num_classes, num_dets, 4)
+ _, classes = scores.max(dim=0)
+
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = A.max(dim=1)
+ E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ D = distance(boxes, boxes)
+ X = (B >= 0).float()
+ keep = (scores > 0.00)
+ scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores
+
+ # Assign each kept detection to its corresponding class
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ # Only keep the top cfg.max_num_detections highest scores across all classes
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+
+ return boxes, classes, scores
+
+
+def cc_cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ n = len(scores)
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ for i in range(200):
+ A = B
+ maxA, _ = torch.max(A, dim=0)
+ E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ D = distance(boxes, boxes)
+ X = (B >= 0).float()
+ scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
+ eye = torch.eye(n)
+ if boxes.device.type == 'cuda':
+ eye = eye.cuda()
+ weights = (B * (B > 0.8).float() + eye) * (scores.reshape((1, n)))
+ xx1 = boxes[:, 0].expand(n, n)
+ yy1 = boxes[:, 1].expand(n, n)
+ xx2 = boxes[:, 2].expand(n, n)
+ yy2 = boxes[:, 3].expand(n, n)
+
+ weightsum = weights.sum(dim=1)
+ xx1 = (xx1 * weights).sum(dim=1) / (weightsum)
+ yy1 = (yy1 * weights).sum(dim=1) / (weightsum)
+ xx2 = (xx2 * weights).sum(dim=1) / (weightsum)
+ yy2 = (yy2 * weights).sum(dim=1) / (weightsum)
+ boxes = torch.stack([xx1, yy1, xx2, yy2], 1)
+
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+ scores, idx = scores.sort(1, descending=True)
+ idx = idx[:top_k]
+ scores = scores[:top_k]
+ boxes = boxes[idx, :]
+
+ num_classes, num_dets = scores.shape
+ boxes = boxes.view(num_classes, num_dets, 4)
+ _, classes = scores.max(dim=0)
+
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ B = iou
+ A = None
+ for i in range(200):
+ A = B
+ maxA, _ = A.max(dim=1)
+ E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ if A.equal(B):
+ break
+ D = distance(boxes, boxes)
+ X = (B >= 0).float()
+ keep = (scores > 0.0)
+
+ scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores
+
+ E = keep.float().unsqueeze(2).expand_as(A)
+ B = iou.mul(E)
+ _, n = scores.size()
+ eye = torch.eye(n).expand(num_classes, n, n)
+ if boxes.device.type == 'cuda':
+ eye = eye.cuda()
+ weights = (B * (B > 0.8).float() + eye) * (
+ scores.unsqueeze(2).expand(num_classes, n, n))
+ xx1 = boxes[:, :, 0].unsqueeze(1).expand(num_classes, n, n)
+ yy1 = boxes[:, :, 1].unsqueeze(1).expand(num_classes, n, n)
+ xx2 = boxes[:, :, 2].unsqueeze(1).expand(num_classes, n, n)
+ yy2 = boxes[:, :, 3].unsqueeze(1).expand(num_classes, n, n)
+
+ weightsum = weights.sum(dim=2)
+ xx1 = (xx1 * weights).sum(dim=2) / (weightsum)
+ yy1 = (yy1 * weights).sum(dim=2) / (weightsum)
+ xx2 = (xx2 * weights).sum(dim=2) / (weightsum)
+ yy2 = (yy2 * weights).sum(dim=2) / (weightsum)
+ boxes = torch.stack([xx1, yy1, xx2, yy2], 2)
+
+ # Assign each kept detection to its corresponding class
+ classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
+ classes = classes[keep]
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ # Only keep the top cfg.max_num_detections highest scores across all classes
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+
+ return boxes, classes, scores
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md
new file mode 100644
index 0000000000..1b6165122d
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md
@@ -0,0 +1,28 @@
+Fast-NMS
+======
+
+This folder contains an implementation of Fast-NMS [[1]](#fast_nms-1).
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0:
+
+```
+This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+Copyright (c) 2020 Zheng, Zhaohui.
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, version 3.
+
+This program is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program. If not, see .
+```
+
+[1] YOLACT: Real-time Instance Segmentation,
+[ArXiv](https://arxiv.org/abs/1904.02689).
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py
new file mode 100644
index 0000000000..ace8b37089
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py
@@ -0,0 +1,147 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import torch
+import numpy as np
+
+
+class FastNMS(NMSCustom):
+ def __init__(self, cross_class=False, device='cuda', iou_thres=0.45, top_k=400, post_k=100):
+ self.device = device
+ self.iou_thres = iou_thres
+ self.top_k = top_k
+ self.post_k = post_k
+ self.cross_class = cross_class
+
+ def set_iou_thres(self, iou_thres=0.45):
+ self.iou_thres = iou_thres
+
+ def top_k(self, top_k=400):
+ self.top_k = top_k
+
+ def post_k(self, post_k=100):
+ self.post_k = post_k
+
+ def set_cross_class(self, cross_class=False):
+ self.cross_class = cross_class
+
+ def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None):
+
+ if isinstance(boxes, np.ndarray):
+ boxes = torch.tensor(boxes, device=self.device)
+ elif torch.is_tensor(boxes):
+ if self.device == 'cpu':
+ boxes = boxes.cpu()
+ elif self.device == 'cuda':
+ boxes = boxes.cuda()
+
+ if isinstance(scores, np.ndarray):
+ scores = torch.tensor(scores, device=self.device)
+ elif torch.is_tensor(scores):
+ if self.device == 'cpu':
+ scores = scores.cpu()
+ elif self.device == 'cuda':
+ scores = scores.cuda()
+
+ scores = torch.transpose(scores, dim0=1, dim1=0)
+ if self.cross_class:
+ [boxes, classes, scores] = cc_fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+ else:
+ [boxes, classes, scores] = fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+ top_k=self.top_k, post_k=self.post_k)
+
+ keep_ids = torch.where(scores > threshold)
+ scores = scores[keep_ids].cpu().numpy()
+ classes = classes[keep_ids].cpu().numpy()
+ boxes = boxes[keep_ids].cpu().numpy()
+ bounding_boxes = BoundingBoxList([])
+ for idx, box in enumerate(boxes):
+ bbox = BoundingBox(left=box[0], top=box[1],
+ width=box[2] - box[0],
+ height=box[3] - box[1],
+ name=classes[idx],
+ score=scores[idx])
+ bounding_boxes.data.append(bbox)
+
+ return bounding_boxes, [boxes, classes, scores]
+
+
+def fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+ scores, idx = scores.sort(1, descending=True)
+ boxes = boxes[idx, :]
+
+ scores = scores[:, :top_k]
+ boxes = boxes[:, :top_k]
+
+ num_classes, num_dets = scores.shape
+
+ boxes = boxes.view(num_classes, num_dets, 4)
+
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ iou_max, _ = iou.max(dim=1)
+
+ keep = (iou_max <= iou_thres)
+ keep *= (scores > 0.01)
+ classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
+ classes = classes[keep]
+
+ boxes = boxes[keep]
+ scores = scores[keep]
+
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
+
+
+def cc_fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+ scores, classes = scores.max(dim=0)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+ iou = jaccard(boxes, boxes).triu_(diagonal=1)
+ maxA, _ = torch.max(iou, dim=0)
+
+ idx_out = torch.where(maxA > iou_thres)
+ scores[idx_out] = 0
+ scores, idx = scores.sort(0, descending=True)
+ idx = idx[:post_k]
+ scores = scores[:post_k]
+ classes = classes[idx]
+ boxes = boxes[idx]
+ return boxes, classes, scores
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md
new file mode 100644
index 0000000000..4e03fce80c
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md
@@ -0,0 +1,17 @@
+Seq2Seq-NMS
+======
+
+This folder contains an implementation of Seq2Seq-NMS [[1]](#seq2seq_nms-1).
+
+TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800.
+| **Pretrained Model** | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** |
+|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:|
+| seq2seq_pets_jpd_fmod | PETS | JPD | FMoD | 0.8 | 80.2% | 84.3% |
+| seq2seq_pets_ssd_fmod | PETS | SSD | FMoD | 0.8 | 77.4% | 79.1% |
+| seq2seq_coco_frcn_fmod | COCO | FRCN | FMoD | - | 68.1% \* | 67.5% \*\* |
+| seq2seq_coco_ssd_fmod | COCO | SSD | FMoD | - | 41.8% \* | 42.4% ** |
+
+\* The minival set was used as validation set.
+\*\* The minitest set was used as test set.
+
+[1] Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275).
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py
new file mode 100755
index 0000000000..4b5d5ec2f5
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py
@@ -0,0 +1,200 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torchvision
+import numpy as np
+import cv2
+import random
+from opendr.engine.data import Image
+
+
+class FMoD:
+ def __init__(self, roi_pooling_dim=None, pyramid_depth=3, map_type="SIFT", map_bin=False,
+ resize_dim=None, device='cpu'):
+ if roi_pooling_dim is None:
+ roi_pooling_dim = 160
+ self.roi_pooling_dim = [roi_pooling_dim, roi_pooling_dim]
+ self.pyramid_depth = pyramid_depth
+ self.boxes_p = []
+ self.rp_size = []
+ for p in range(self.pyramid_depth):
+ s = 1 / pow(2, p)
+ for i in np.arange(0, 1.0, s):
+ for j in np.arange(0, 1.0, s):
+ self.boxes_p.append([0, int(i * self.roi_pooling_dim[0]), int(j * self.roi_pooling_dim[1]),
+ int((i + s) * self.roi_pooling_dim[0]),
+ int((j + s) * self.roi_pooling_dim[1])])
+ self.rp_size.append([int(self.roi_pooling_dim[0] * s), int(self.roi_pooling_dim[1] * s)])
+ self.device = device
+ self.boxes_p = torch.tensor(self.boxes_p).float()
+ if "cuda" in self.device:
+ self.boxes_p = self.boxes_p.to(self.device)
+ self.resc = 1.0
+ self.map = None
+ self.resize_dim = resize_dim
+ self.map_type = map_type
+ self.map_bin = map_bin
+ self.mean = None
+ self.std = None
+
+ def set_mean_std(self, mean_values=None, std_values=None):
+ self.mean = torch.tensor(mean_values).float()
+ self.std = torch.tensor(std_values).float()
+ if "cuda" in self.device:
+ self.mean = self.mean.to(self.device)
+ self.std = self.std.to(self.device)
+
+ def extract_maps(self, img=None, augm=False):
+ if img is None:
+ raise Exception('Image is not provided to FMoD...')
+
+ if not isinstance(img, Image):
+ img = Image(img)
+ img = img.convert(format='channels_last', channel_order='bgr')
+
+ if self.resize_dim is not None:
+ max_dim = max(img.shape[0], img.shape[1])
+ if max_dim > self.resize_dim:
+ self.resc = float(self.resize_dim) / max_dim
+ img = cv2.resize(img, (int(img.shape[1] * self.resc), int(img.shape[0] * self.resc)))
+ if augm:
+ img = augm_brightness(img, 0.75, 1.25)
+ img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+ if self.map_type == "EDGEMAP":
+ dst_img = np.copy(img)
+ dst_img = cv2.GaussianBlur(dst_img, (3, 3), 0, 0, cv2.BORDER_DEFAULT)
+ gradX = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=1, dy=0, scale=1, delta=0,
+ borderType=cv2.BORDER_DEFAULT)
+ gradY = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=0, dy=1, scale=1, delta=0,
+ borderType=cv2.BORDER_DEFAULT)
+ absGradX = cv2.convertScaleAbs(gradX)
+ absGradY = cv2.convertScaleAbs(gradY)
+ absGradXCV32 = absGradX.astype("float32")
+ absGradYCV32 = absGradY.astype("float32")
+ self.map = cv2.magnitude(absGradXCV32 / 255.0, absGradYCV32 / 255.0)
+ self.map = self.map * 255
+ if self.map_bin:
+ self.map = cv2.threshold(self.map, 240, 255, cv2.THRESH_BINARY)[1]
+ else:
+ kps = None
+ if self.map_type == "FAST":
+ fast = cv2.FastFeatureDetector_create()
+ kps = fast.detect(img, None)
+ elif self.map_type == "AKAZE":
+ akaze = cv2.AKAZE_create()
+ kps, desc = akaze.detectAndCompute(img, None)
+ elif self.map_type == "BRISK":
+ brisk = cv2.BRISK_create()
+ kps = brisk.detect(img, None)
+ elif self.map_type == "ORB":
+ orb = cv2.ORB_create()
+ kps = orb.detect(img, None)
+ else:
+ raise Exception("Map type not supported...")
+ self.map = np.zeros(img.shape, dtype=np.uint8)
+ coords_x = []
+ coords_y = []
+ resps = []
+ for kp in kps:
+ coords_x.append(int(kp.pt[0]))
+ coords_y.append(int(kp.pt[1]))
+ resps.append(255 * kp.response)
+ if not self.map_bin:
+ self.map[coords_y, coords_x] = resps
+ else:
+ self.map[coords_y, coords_x] = 255
+ self.map = torch.from_numpy(self.map).float()
+ if "cuda" in self.device:
+ self.map = self.map.to(self.device)
+
+ def extract_FMoD_feats(self, boxes):
+ num_rois = boxes.shape[0]
+ map_gpu = self.map / 255.0
+ map_gpu = map_gpu.unsqueeze(0).unsqueeze(0)
+ descs = []
+ pooled_regions = torchvision.ops.roi_align(map_gpu, [self.resc * boxes],
+ output_size=self.rp_size[0], spatial_scale=1.0,
+ aligned=True)
+ pooled_regions = pooled_regions.unsqueeze(1)
+ descs.append(self.get_descriptor(pooled_regions))
+ for i in range(0, self.pyramid_depth - 1):
+ pooled_regions_pyr = pooled_regions.contiguous().view(num_rois, pooled_regions.shape[-2],
+ pooled_regions.shape[-1])
+ pooled_regions_pyr = pooled_regions_pyr.unsqueeze(0)
+ pooled_regions_pyr = torchvision.ops.roi_align(pooled_regions_pyr, self.boxes_p[(pow(4 + 1, i)):(
+ (pow(4 + 1, i)) + pow(4, (i + 1))), :], output_size=self.rp_size[i + 1], aligned=True)
+ pooled_regions_pyr = pooled_regions_pyr.permute(1, 0, 2, 3)
+ pooled_regions_pyr = pooled_regions_pyr.contiguous().view(num_rois, 1, pooled_regions_pyr.shape[-3],
+ pooled_regions_pyr.shape[-2],
+ pooled_regions_pyr.shape[-1])
+ descs.append(self.get_descriptor(pooled_regions_pyr))
+
+ descs = torch.cat(descs, dim=1)
+ if self.mean is not None and self.std is not None:
+ descs = (descs - self.mean) / self.std
+ descs = torch.clamp(descs, -50, 50)
+ return descs
+
+ def release_maps(self):
+ self.map = None
+
+ def get_descriptor(self, patches):
+ dt = []
+ # row data
+ dt.append(patches.mean(dim=3))
+ # collumn data
+ dt.append(patches.mean(dim=4))
+ # block data
+ dt.append(torch.flatten(patches, start_dim=3))
+
+ means = []
+ stds = []
+ diffs = []
+ zscores = []
+ skews = []
+ kurtoses = []
+ powers = []
+ for i in range(len(dt)):
+ if i == 2:
+ means.append(dt[i].mean(dim=3))
+ else:
+ means.append(dt[i][:, :, :, 0:-1:5].mean(dim=3))
+ stds.append(dt[i].std(dim=3))
+ diffs.append((dt[i] - means[i].unsqueeze(-1).expand(dt[i].size())))
+ zscores.append(diffs[i] / stds[i].unsqueeze(-1).expand(dt[i].size()))
+ zscores[i] = torch.where(stds[i].unsqueeze(-1).expand(zscores[i].shape) > 0, zscores[i],
+ torch.zeros_like(zscores[i]))
+ skews.append(torch.mean(torch.pow(zscores[i], 3.0), -1))
+ kurtoses.append(torch.mean(torch.pow(zscores[i], 4.0), -1) - 3.0)
+ powers.append((dt[i] * dt[i]).mean(-1))
+ descs = []
+ for i in range(len(dt)):
+ descs.append(torch.cat((means[i], stds[i], skews[i], kurtoses[i], powers[i]), 2))
+ desc = torch.cat((descs[0], descs[1], descs[2]), 2)
+ desc = desc.contiguous().view(desc.shape[0], desc.shape[1] * desc.shape[2])
+ return desc
+
+
+def augm_brightness(img, low, high):
+ value = random.uniform(low, high)
+ hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+ hsv = np.array(hsv, dtype=np.float64)
+ hsv[:, :, 1] = hsv[:, :, 1] * value
+ hsv[:, :, 1][hsv[:, :, 1] > 255] = 255
+ hsv[:, :, 2] = hsv[:, :, 2] * value
+ hsv[:, :, 2][hsv[:, :, 2] > 255] = 255
+ hsv = np.array(hsv, dtype=np.uint8)
+ img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+ return img
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py
new file mode 100755
index 0000000000..953892d04e
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py
@@ -0,0 +1,196 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn as nn
+import torch
+import math
+import torch.nn.functional as F
+
+
+class Seq2SeqNet(nn.Module):
+ def __init__(self, dropout=0.01, use_app_feats=True, app_input_dim=315, geom_input_dim=14, lq_dim=256, sq_dim=128,
+ num_JPUs=4, device='cuda'):
+ super().__init__()
+ self.use_app_feats = use_app_feats
+ self.dropout_q = nn.Dropout(dropout * 0.25)
+ self.num_JPUs = num_JPUs
+ self.joint_processing_units = []
+ self.device = device
+ for i in range(self.num_JPUs):
+ self.joint_processing_units.append(Joint_processing_unit(lq_dim=lq_dim, sq_dim=sq_dim, dropout=dropout))
+ if "cuda" in self.device:
+ self.joint_processing_units[i] = self.joint_processing_units[i].to(self.device)
+ self.joint_processing_units = nn.ModuleList(self.joint_processing_units)
+ if self.use_app_feats:
+ q_app_dims = [180, 180]
+ self.q_app_layers = nn.Sequential(
+ nn.Linear(app_input_dim, q_app_dims[0]),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ nn.LayerNorm(q_app_dims[0], eps=1e-6),
+ nn.Linear(q_app_dims[0], q_app_dims[1]),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ # nn.LayerNorm(q_fmod_dims[1], eps=1e-6)
+ )
+
+ q_geom_dims = [180, 180]
+ self.q_geom_layers = nn.Sequential(
+ nn.Linear(geom_input_dim, q_geom_dims[0]),
+ nn.GELU(),
+ nn.LayerNorm(q_geom_dims[0], eps=1e-6),
+ nn.Linear(q_geom_dims[0], q_geom_dims[1]),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ # nn.LayerNorm(q_geom_dims[1], eps=1e-6)
+ )
+
+ k_geom_dims = [180, 180]
+ self.k_geom_layers = nn.Sequential(
+ nn.Linear(geom_input_dim, k_geom_dims[0]),
+ nn.GELU(),
+ nn.LayerNorm(k_geom_dims[0], eps=1e-6),
+ nn.Linear(k_geom_dims[0], k_geom_dims[1]),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ # nn.LayerNorm(k_geom_dims[1], eps=1e-6)
+ )
+
+ q_final_in_dim = q_geom_dims[-1]
+ k_final_in_dim = k_geom_dims[-1]
+ if self.use_app_feats:
+ q_final_in_dim = q_geom_dims[-1] + q_app_dims[-1]
+ k_final_in_dim = k_geom_dims[-1] + q_app_dims[-1]
+
+ self.q_full_layers = nn.Sequential(
+ nn.LayerNorm(q_final_in_dim, eps=1e-6),
+ nn.Linear(q_final_in_dim, lq_dim),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ # nn.LayerNorm(lq_dim, eps=1e-6)
+ )
+ self.k_full_layers = nn.Sequential(
+ nn.LayerNorm(k_final_in_dim, eps=1e-6),
+ nn.Linear(k_final_in_dim, sq_dim),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ # nn.LayerNorm(sq_dim, eps=1e-6)
+ )
+ self.q_final_layers = nn.Sequential(
+ nn.LayerNorm(lq_dim, eps=1e-6),
+ nn.Linear(lq_dim, sq_dim),
+ nn.GELU(),
+ nn.Dropout(dropout * 0.25),
+ nn.LayerNorm(sq_dim, eps=1e-6),
+ nn.Linear(sq_dim, 1),
+ nn.Sigmoid()
+ )
+
+ def forward(self, q_geom_feats=None, k_geom_feats=None, msk=None, app_feats=None):
+ q_feats = self.q_geom_layers(q_geom_feats)
+ k_feats = self.k_geom_layers(k_geom_feats)
+
+ if self.use_app_feats and app_feats is not None:
+ app_feats = self.q_app_layers(app_feats)
+ q_feats = torch.cat((q_feats, app_feats), dim=2)
+ k_feats = torch.cat((k_feats, app_feats.transpose(0, 1).repeat(k_feats.shape[1], 1, 1)), dim=2)
+
+ elif app_feats is None:
+ raise UserWarning("Appearance-based representations not provided.")
+ q_feats = self.q_full_layers(q_feats)
+ k_feats = self.k_full_layers(k_feats)
+ for i in range(self.num_JPUs):
+ q_feats, k_feats = self.joint_processing_units[i](q_feats, k_feats, msk)
+ scores = self.q_final_layers(q_feats)
+ return scores.squeeze(1)
+
+
+class Joint_processing_unit(nn.Module):
+ def __init__(self, heads=2, lq_dim=256, sq_dim=128, dropout=0.1):
+ super().__init__()
+ self.q_block1 = nn.Sequential(
+ nn.LayerNorm(lq_dim, eps=1e-6),
+ nn.Linear(lq_dim, sq_dim),
+ nn.GELU(),
+ nn.Dropout(dropout)
+ )
+ self.norm_layer_q = nn.LayerNorm(sq_dim, eps=1e-6)
+ self.norm_layer_k = nn.LayerNorm(sq_dim, eps=1e-6)
+ self.self_attention_module = Self_attention_module(heads=heads, l_dim=lq_dim, s_dim=sq_dim, dropout=dropout)
+ self.scale_layer = Scale_layer(s_dim=sq_dim)
+
+ self.q_block2 = nn.Sequential(
+ nn.LayerNorm(sq_dim, eps=1e-6),
+ nn.Linear(sq_dim, lq_dim),
+ nn.GELU(),
+ nn.Dropout(dropout)
+ )
+
+ def forward(self, q_feats, k_feats, msk):
+ q_atten = self.q_block1(q_feats)
+ kv_atten_in = self.norm_layer_k(k_feats)
+ q_atten_in = self.norm_layer_q(q_atten)
+ q_atten = q_atten + self.self_attention_module(q=q_atten_in, k=kv_atten_in, v=kv_atten_in, mask=msk)
+ k_feats = k_feats + self.scale_layer(q_atten).transpose(0, 1).repeat(q_atten.shape[0], 1, 1)
+ q_feats = q_feats + self.q_block2(q_atten)
+ return q_feats, k_feats
+
+
+class Self_attention_module(nn.Module):
+ def __init__(self, heads, l_dim, s_dim, dropout=0.1):
+ super().__init__()
+ self.l_dim = l_dim
+ self.s_dim = s_dim
+ self.qkv_split_dim = s_dim // heads
+ self.h = heads
+ self.q_linear = nn.Linear(self.s_dim, self.s_dim)
+ self.v_linear = nn.Linear(self.s_dim, self.s_dim)
+ self.k_linear = nn.Linear(self.s_dim, self.s_dim)
+
+ self.dropout = nn.Dropout(dropout)
+ self.q_out = nn.Sequential(
+ nn.Linear(self.s_dim, self.s_dim),
+ nn.GELU(),
+ nn.Dropout(dropout)
+ )
+
+ def forward(self, q, k, v, mask=None):
+ samples_dim = q.size(0)
+ k = self.k_linear(k).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+ q = self.q_linear(q).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+ v = self.v_linear(v).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+ scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.qkv_split_dim)
+
+ mask = mask.unsqueeze(1)
+ mask = mask.unsqueeze(1)
+ mask = mask.repeat(1, scores.shape[1], 1, 1)
+ scores = torch.mul(scores, mask)
+ scores = scores.masked_fill(mask == 0, -1e9)
+
+ scores = F.softmax(scores, dim=-1)
+ scores = self.dropout(scores)
+ q = torch.matmul(scores, v)
+ q = q.transpose(1, 2).contiguous().view(samples_dim, -1, self.s_dim)
+ q = self.q_out(q)
+ return q
+
+
+class Scale_layer(nn.Module):
+ def __init__(self, s_dim=1):
+ super().__init__()
+ self.scale_weights = nn.Parameter(torch.empty(s_dim), requires_grad=True)
+ nn.init.uniform_(self.scale_weights, a=0.01, b=2.0)
+
+ def forward(self, feats):
+ return feats * self.scale_weights
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py
new file mode 100644
index 0000000000..fd8a97d16c
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py
@@ -0,0 +1,812 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.engine.learners import Learner
+from opendr.engine.constants import OPENDR_SERVER_URL
+from opendr.engine.target import BoundingBox, BoundingBoxList
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.seq2seq_model import Seq2SeqNet
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.fmod import FMoD
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import drop_dets, det_matching, \
+ run_coco_eval, filter_iou_boxes, bb_intersection_over_union, compute_class_weights, apply_torchNMS
+import torch
+import torch.nn.functional as F
+import pickle
+import numpy as np
+import os
+from urllib.request import urlretrieve
+import torch.nn as nn
+from tensorboardX import SummaryWriter
+import torch.optim as optim
+from tqdm import tqdm
+import collections
+import json
+import zipfile
+
+
+class Seq2SeqNMSLearner(Learner, NMSCustom):
+ def __init__(self, lr=0.0001, epochs=8, device='cuda', temp_path='./temp', checkpoint_after_iter=0,
+ checkpoint_load_iter=0, log_after=10000, variant='medium',
+ iou_filtering=0.8, dropout=0.025, app_feats='fmod',
+ fmod_map_type='EDGEMAP', fmod_map_bin=True, app_input_dim=None):
+ super(Seq2SeqNMSLearner, self).__init__(lr=lr, batch_size=1,
+ checkpoint_after_iter=checkpoint_after_iter,
+ checkpoint_load_iter=checkpoint_load_iter,
+ temp_path=temp_path, device=device, backbone='default')
+ self.epochs = epochs
+ self.variant = variant
+ self.app_feats = app_feats
+ self.use_app_feats = False
+ if self.app_feats is not None:
+ self.use_app_feats = True
+ self.fmod_map_type = None
+ self.fmod_map_bin = None
+ self.fmod_map_res_dim = None
+ self.fmod_pyramid_lvl = None
+ self.fmod_roi_pooling_dim = None
+ if self.app_feats == 'fmod':
+ self.fmod_map_type = fmod_map_type
+ self.fmod_roi_pooling_dim = 160
+ self.fmod_map_res_dim = 600
+ self.fmod_pyramid_lvl = 3
+ self.sef_fmod_architecture()
+ self.fmod_feats_dim = 0
+ for i in range(0, self.fmod_pyramid_lvl):
+ self.fmod_feats_dim = self.fmod_feats_dim + 15 * (pow(4, i))
+ self.fmod_map_bin = fmod_map_bin
+ self.app_input_dim = self.fmod_feats_dim
+ self.fmod_mean_std = None
+ elif self.app_feats == 'zeros' or self.app_feats == 'custom':
+ if app_input_dim is None:
+ raise Exception("The dimension of the input appearance-based features is not provided...")
+ else:
+ self.app_input_dim = app_input_dim
+ if self.app_feats == 'custom':
+ raise AttributeError("Custom appearance-based features are not yet supported.")
+ self.lq_dim = 256
+ self.sq_dim = 128
+ self.geom_input_dim = 14
+ self.num_JPUs = 4
+ self.geom_input_dim = 14
+ self.set_architecture()
+ self.dropout = dropout
+ self.temp_path = temp_path
+ if not os.path.isdir(self.temp_path):
+ os.mkdir(self.temp_path)
+ self.checkpoint_load_iter = checkpoint_load_iter
+ self.log_after = log_after
+ self.iou_filtering = iou_filtering
+ self.classes = None
+ self.class_ids = None
+ self.fMoD = None
+ self.fmod_init_file = None
+ if self.app_feats == 'fmod':
+ self.fMoD = FMoD(roi_pooling_dim=self.fmod_roi_pooling_dim, pyramid_depth=self.fmod_pyramid_lvl,
+ resize_dim=self.fmod_map_res_dim,
+ map_type=self.fmod_map_type, map_bin=self.fmod_map_bin, device=self.device)
+ self.init_model()
+ if "cuda" in self.device:
+ self.model = self.model.to(self.device)
+
+ def fit(self, dataset, logging_path='', logging_flush_secs=30, silent=True,
+ verbose=True, nms_gt_iou=0.5, max_dt_boxes=400, datasets_folder='./datasets',
+ use_ssd=False, lr_step=True):
+
+ dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split='train', use_ssd=use_ssd,
+ device=self.device)
+ if self.classes is None:
+ self.classes = dataset_nms.classes
+ self.class_ids = dataset_nms.class_ids
+
+ if logging_path != '' and logging_path is not None:
+ logging = True
+ file_writer = SummaryWriter(logging_path, flush_secs=logging_flush_secs)
+ else:
+ logging = False
+ file_writer = None
+
+ checkpoints_folder = self.temp_path
+ if self.checkpoint_after_iter != 0 and not os.path.exists(checkpoints_folder):
+ os.makedirs(checkpoints_folder)
+
+ if not silent and verbose:
+ print("Model trainable parameters:", self.count_parameters())
+
+ self.model.train()
+ if "cuda" in self.device:
+ self.model = self.model.to(self.device)
+
+ if self.epochs is None:
+ raise ValueError("Training epochs not specified")
+ elif self.epochs <= self.checkpoint_load_iter:
+ raise ValueError("Training epochs are less than those of the loaded model")
+
+ if self.app_feats == 'fmod':
+ if self.fmod_mean_std is None:
+ self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type,
+ fmod_pyramid_lvl=self.fmod_pyramid_lvl,
+ datasets_folder=datasets_folder,
+ verbose=verbose)
+ self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+
+ start_epoch = 0
+ drop_after_epoch = []
+ if lr_step and self.epochs > 1:
+ drop_after_epoch = [int(self.epochs * 0.5)]
+ if self.epochs > 3:
+ drop_after_epoch.append(int(self.epochs * 0.7))
+
+ train_ids = np.arange(len(dataset_nms.src_data))
+ total_loss_iter = 0
+ total_loss_epoch = 0
+ optimizer = optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.99), eps=1e-9) # HERE
+ scheduler = None
+ if len(drop_after_epoch) > 0:
+ scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.1)
+
+ num_iter = 0
+ training_weights = compute_class_weights(pos_weights=[0.9, 0.1], max_dets=max_dt_boxes, dataset_nms=dataset_nms)
+ # Single class NMS only.
+ class_index = 1
+ training_dict = {"cross_entropy_loss": []}
+ for epoch in range(start_epoch, self.epochs):
+ pbar = None
+ if not silent:
+ pbarDesc = "Epoch #" + str(epoch) + " progress"
+ pbar = tqdm(desc=pbarDesc, total=len(train_ids))
+ np.random.shuffle(train_ids)
+ for sample_id in train_ids:
+
+ if self.log_after != 0 and num_iter > 0 and num_iter % self.log_after == 0:
+ if logging:
+ file_writer.add_scalar(tag="cross_entropy_loss",
+ scalar_value=total_loss_iter/self.log_after,
+ global_step=num_iter)
+ if verbose:
+ print(''.join(['\nEpoch: {}',
+ ' Iter: {}, cross_entropy_loss: {}']).format(epoch, num_iter,
+ total_loss_iter/self.log_after))
+ total_loss_iter = 0
+
+ image_fln = dataset_nms.src_data[sample_id]['filename']
+ if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0:
+ dt_boxes = torch.tensor(
+ dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float()
+ dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float()
+ dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True)
+ dt_boxes = dt_boxes[dt_scores_ids]
+ else:
+ if not silent:
+ pbar.update(1)
+ num_iter = num_iter + 1
+ continue
+ gt_boxes = torch.tensor([]).float()
+ if len(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]) > 0:
+ gt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]).float()
+ image_path = os.path.join(datasets_folder, dataset, image_fln)
+ img_res = dataset_nms.src_data[sample_id]['resolution'][::-1]
+
+ if "cuda" in self.device:
+ dt_boxes = dt_boxes.to(self.device)
+ dt_scores = dt_scores.to(self.device)
+ gt_boxes = gt_boxes.to(self.device)
+
+ val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4,
+ (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4)
+ dt_boxes = dt_boxes[val_ids, :]
+ dt_scores = dt_scores[val_ids]
+
+ dt_boxes, dt_scores = drop_dets(dt_boxes, dt_scores)
+ if dt_boxes.shape[0] < 1:
+ if not silent:
+ pbar.update(1)
+ num_iter = num_iter + 1
+ continue
+ if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+ dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores,
+ iou_thres=self.iou_filtering)
+
+ dt_boxes = dt_boxes[:max_dt_boxes]
+ dt_scores = dt_scores[:max_dt_boxes]
+ app_feats = None
+ if self.app_feats == 'fmod':
+ img = Image.open(image_path)
+ img = img.convert(format='channels_last', channel_order='bgr')
+ self.fMoD.extract_maps(img=img, augm=True)
+ app_feats = self.fMoD.extract_FMoD_feats(dt_boxes)
+ app_feats = torch.unsqueeze(app_feats, dim=1)
+ elif self.app_feats == 'zeros':
+ app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim])
+ if "cuda" in self.device:
+ app_feats = app_feats.to(self.device)
+ elif self.app_feats == 'custom':
+ raise AttributeError("Custom appearance-based features are not yet supported.")
+
+ msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1)
+ q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores,
+ resolution=img_res)
+ preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+ app_feats=app_feats)
+ preds = torch.clamp(preds, 0.001, 1 - 0.001)
+
+ labels = det_matching(scores=preds, dt_boxes=dt_boxes, gt_boxes=gt_boxes,
+ iou_thres=nms_gt_iou, device=self.device)
+ weights = (training_weights[class_index][1] * labels + training_weights[class_index][0] * (
+ 1 - labels))
+
+ e = torch.distributions.uniform.Uniform(0.001, 0.005).sample([labels.shape[0], 1])
+ if "cuda" in self.device:
+ weights = weights.to(self.device)
+ e = e.to(self.device)
+ labels = labels * (1 - e) + (1 - labels) * e
+ ce_loss = F.binary_cross_entropy(preds, labels, reduction="none")
+ loss = (ce_loss * weights).sum()
+
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+ # Memory leak if not loss not detached in total_loss_iter and total_loss_epoch computations
+ loss_t = loss.detach().cpu().numpy()
+ total_loss_iter = total_loss_iter + loss_t
+ total_loss_epoch = total_loss_epoch + loss_t
+ num_iter = num_iter + 1
+ if not silent:
+ pbar.update(1)
+ if not silent:
+ pbar.close()
+ if verbose:
+ print(''.join(['\nEpoch: {}',
+ ' cross_entropy_loss: {}\n']).format(epoch,
+ total_loss_epoch/len(train_ids)))
+ training_dict['cross_entropy_loss'].append(total_loss_epoch/len(train_ids))
+ if self.checkpoint_after_iter != 0 and epoch % self.checkpoint_after_iter == self.checkpoint_after_iter - 1:
+ snapshot_name = '{}/checkpoint_epoch_{}'.format(checkpoints_folder, epoch)
+ self.save(path=snapshot_name, optimizer=optimizer, scheduler=scheduler,
+ current_epoch=epoch, max_dt_boxes=max_dt_boxes)
+ snapshot_name_lw = '{}/last_weights'.format(checkpoints_folder)
+ self.save(path=snapshot_name_lw, optimizer=optimizer, scheduler=scheduler,
+ current_epoch=epoch, max_dt_boxes=max_dt_boxes)
+ total_loss_epoch = 0
+ if scheduler is not None:
+ scheduler.step()
+ if logging:
+ file_writer.close()
+ return training_dict
+
+ def eval(self, dataset, split='test', verbose=True, max_dt_boxes=400, threshold=0.0,
+ datasets_folder='./datasets', use_ssd=False):
+
+ dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split=split, use_ssd=use_ssd,
+ device=self.device)
+
+ if self.classes is None:
+ self.classes = dataset_nms.classes
+ self.class_ids = dataset_nms.class_ids
+
+ annotations_filename = dataset_nms.annotation_file
+
+ eval_folder = self.temp_path
+ if not os.path.isdir(os.path.join(self.temp_path)):
+ os.mkdir(os.path.join(self.temp_path))
+ if not os.path.isdir(eval_folder):
+ os.mkdir(eval_folder)
+ output_file = os.path.join(eval_folder, 'detections.json')
+
+ if self.app_feats == 'fmod':
+ if self.fmod_mean_std is None:
+ self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type,
+ fmod_pyramid_lvl=self.fmod_pyramid_lvl,
+ datasets_folder=datasets_folder,
+ verbose=verbose)
+ self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+
+ self.model = self.model.eval()
+ if "cuda" in self.device:
+ self.model = self.model.to(self.device)
+
+ train_ids = np.arange(len(dataset_nms.src_data))
+ nms_results = []
+ pbar_eval = None
+ if verbose:
+ pbarDesc = "Evaluation progress"
+ pbar_eval = tqdm(desc=pbarDesc, total=len(train_ids))
+ for sample_id in train_ids:
+ image_fln = dataset_nms.src_data[sample_id]['filename']
+
+ image_path = os.path.join(datasets_folder, dataset, image_fln)
+ img_res = dataset_nms.src_data[sample_id]['resolution'][::-1]
+ # Single class NMS only.
+ class_index = 1
+ if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0:
+ dt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float()
+ dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float()
+ dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True)
+ dt_boxes = dt_boxes[dt_scores_ids]
+ else:
+ pbar_eval.update(1)
+ continue
+
+ if "cuda" in self.device:
+ dt_boxes = dt_boxes.to(self.device)
+ dt_scores = dt_scores.to(self.device)
+
+ val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4,
+ (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4)
+ dt_boxes = dt_boxes[val_ids, :]
+ dt_scores = dt_scores[val_ids]
+
+ if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+ dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores, iou_thres=self.iou_filtering)
+
+ dt_boxes = dt_boxes[:max_dt_boxes]
+ dt_scores = dt_scores[:max_dt_boxes]
+ app_feats = None
+ if self.app_feats == 'fmod':
+ img = Image.open(image_path)
+ img = img.convert(format='channels_last', channel_order='bgr')
+ self.fMoD.extract_maps(img=img, augm=False)
+ app_feats = self.fMoD.extract_FMoD_feats(dt_boxes)
+ app_feats = torch.unsqueeze(app_feats, dim=1)
+ elif self.app_feats == 'zeros':
+ app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim])
+ if "cuda" in self.device:
+ app_feats = app_feats.to(self.device)
+ elif self.app_feats == 'custom':
+ raise AttributeError("Custom appearance-based features are not yet supported.")
+ msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1)
+ q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores,
+ resolution=img_res)
+ with torch.no_grad():
+ preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+ app_feats=app_feats)
+ bboxes = dt_boxes.cpu().numpy().astype('float64')
+ preds = preds.cpu().detach()
+ if threshold > 0.0:
+ ids = (preds > threshold)
+ preds = preds[ids]
+ bboxes = bboxes[ids.numpy().squeeze(-1), :]
+ for j in range(len(preds)):
+ nms_results.append({
+ 'image_id': dataset_nms.src_data[sample_id]['id'],
+ 'bbox': [bboxes[j][0], bboxes[j][1], bboxes[j][2] - bboxes[j][0], bboxes[j][3] - bboxes[j][1]],
+ 'category_id': class_index,
+ 'score': np.float64(preds[j])
+ })
+ pbar_eval.update(1)
+ pbar_eval.close()
+ if verbose:
+ print('Writing results json to {}'.format(output_file))
+ with open(output_file, 'w') as fid:
+ json.dump(nms_results, fid, indent=2)
+ eval_result = run_coco_eval(gt_file_path=os.path.join(dataset_nms.path, 'annotations', annotations_filename),
+ dt_file_path=output_file, only_classes=[1],
+ verbose=verbose, max_dets=[max_dt_boxes])
+ os.remove(output_file)
+ if verbose:
+ for i in range(len(eval_result)):
+ print('Evaluation results (num_dets={})'.format(str(eval_result[i][1])))
+ print(eval_result[i][0][0][1])
+ print(eval_result[i][0][1][1])
+ print(eval_result[i][0][2][1])
+ print(eval_result[i][0][3][1])
+ print('\n')
+ return eval_result
+
+ def save(self, path, verbose=False, optimizer=None, scheduler=None, current_epoch=None, max_dt_boxes=400):
+ fname = path.split('/')[-1]
+ dir_name = path.replace('/' + fname, '')
+ if not os.path.isdir(dir_name):
+ os.makedirs(dir_name)
+ custom_dict = {'state_dict': self.model.state_dict(), 'current_epoch': current_epoch}
+ if optimizer is not None:
+ custom_dict['optimizer'] = optimizer.state_dict()
+ if scheduler is not None:
+ custom_dict['scheduler'] = scheduler.state_dict()
+ torch.save(custom_dict, path + '.pth')
+
+ metadata = {"model_paths": [fname + '.pth'], "framework": "pytorch", "has_data": False,
+ "inference_params": {}, "optimized": False, "optimizer_info": {}, "backbone": {},
+ "format": "pth", "classes": self.classes, "app_feats": self.app_feats,
+ "lq_dim": self.lq_dim, "sq_dim": self.sq_dim, "num_JPUs": self.num_JPUs,
+ "geom_input_dim": self.geom_input_dim, "app_input_dim": self.app_input_dim,
+ "max_dt_boxes": max_dt_boxes, "variant": self.variant}
+ if self.app_feats == 'fmod':
+ metadata["fmod_map_type"] = self.fmod_map_type
+ metadata["fmod_map_bin"] = self.fmod_map_bin
+ metadata["fmod_roi_pooling_dim"] = self.fmod_roi_pooling_dim
+ metadata["fmod_map_res_dim"] = self.fmod_map_res_dim
+ metadata["fmod_pyramid_lvl"] = self.fmod_pyramid_lvl
+ metadata["fmod_normalization"] = "fmod_normalization.pkl"
+ with open(os.path.join(dir_name, 'fmod_normalization.pkl'), 'wb') as f:
+ pickle.dump(self.fmod_mean_std, f)
+ with open(path + '.json', 'w', encoding='utf-8') as f:
+ json.dump(metadata, f, ensure_ascii=False, indent=4)
+ if verbose:
+ print("Saved Pytorch model.")
+
+ def init_model(self):
+ if self.model is None:
+ self.model = Seq2SeqNet(dropout=self.dropout, use_app_feats=self.use_app_feats,
+ app_input_dim=self.app_input_dim,
+ geom_input_dim=self.geom_input_dim, lq_dim=self.lq_dim, sq_dim=self.sq_dim,
+ num_JPUs=self.num_JPUs, device=self.device)
+ for p in self.model.parameters():
+ if p.dim() > 1:
+ nn.init.xavier_uniform_(p)
+ else:
+ raise UserWarning("Tried to initialize model while model is already initialized.")
+
+ def load(self, path, verbose=False):
+ if os.path.isdir(path):
+ model_name = 'last_weights'
+ dir_path = path
+ else:
+ model_name = os.path.basename(os.path.normpath(path)).split('.')[0]
+ dir_path = os.path.dirname(os.path.normpath(path))
+
+ if verbose:
+ print("Model name:", model_name, "-->", os.path.join(dir_path, model_name + ".json"))
+ with open(os.path.join(dir_path, model_name + ".json"), encoding='utf-8-sig') as f:
+ metadata = json.load(f)
+ pth_path = os.path.join(dir_path, metadata["model_paths"][0])
+ if verbose:
+ print("Loading checkpoint:", pth_path)
+ try:
+ checkpoint = torch.load(pth_path, map_location=torch.device(self.device))
+ except FileNotFoundError as e:
+ e.strerror = "File " + pth_path + "not found."
+ raise e
+ if 'fmod_normalization' in metadata:
+ pkl_fmod = os.path.join(dir_path, metadata["fmod_normalization"])
+ if verbose:
+ print("Loading FMoD normalization values:", pkl_fmod)
+ try:
+ with open(pkl_fmod, 'rb') as f:
+ self.fmod_mean_std = pickle.load(f)
+ self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+ except FileNotFoundError as e:
+ e.strerror = "File " + pkl_fmod + "not found."
+ raise e
+
+ self.assign_params(metadata=metadata, verbose=verbose)
+ self.load_state(checkpoint)
+ if verbose:
+ print("Loaded parameters and metadata.")
+ return True
+
+ def assign_params(self, metadata, verbose):
+
+ if verbose and self.variant is not None and self.variant != metadata["variant"]:
+ print("Incompatible value for the attribute \"variant\". It is now set to: " +
+ str(metadata["variant"]))
+ self.variant = metadata["variant"]
+ if verbose and self.geom_input_dim is not None and self.geom_input_dim != metadata["geom_input_dim"]:
+ print("Incompatible value for the attribute \"geom_input_dim\". It is now set to: " +
+ str(metadata["geom_input_dim"]))
+ self.geom_input_dim = metadata["geom_input_dim"]
+ if verbose and self.app_input_dim is not None and self.app_input_dim != metadata["app_input_dim"]:
+ print("Incompatible value for the attribute \"app_input_dim\". It is now set to: " +
+ str(metadata["app_input_dim"]))
+ self.app_input_dim = metadata["app_input_dim"]
+ if verbose and self.app_feats != metadata["app_feats"]:
+ print("Incompatible value for the attribute \"app_feats\". It is now set to: " +
+ str(metadata["app_feats"]))
+ self.app_feats = metadata["app_feats"]
+ if verbose and self.fmod_map_type is not None and self.fmod_map_type != metadata["fmod_map_type"]:
+ print("Incompatible value for the attribute \"fmod_map_type\". It is now set to: " +
+ str(metadata["fmod_map_type"]))
+ if "fmod_map_type" in metadata:
+ self.fmod_map_type = metadata["fmod_map_type"]
+ if verbose and self.fmod_map_bin is not None and self.fmod_map_bin != metadata["fmod_map_bin"]:
+ print("Incompatible value for the attribute \"fmod_map_bin\". It is now set to: " +
+ str(metadata["fmod_map_bin"]))
+ if "fmod_map_bin" in metadata:
+ self.fmod_map_bin = metadata["fmod_map_bin"]
+ if verbose and self.fmod_roi_pooling_dim is not None and \
+ self.fmod_roi_pooling_dim != metadata["fmod_roi_pooling_dim"]:
+ print("Incompatible value for the attribute \"fmod_roi_pooling_dim\". It is now set to: " +
+ str(metadata["fmod_roi_pooling_dim"]))
+ if "fmod_roi_pooling_dim" in metadata:
+ self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"]
+ if verbose and self.fmod_map_res_dim is not None and \
+ self.fmod_map_res_dim != metadata["fmod_map_res_dim"]:
+ print("Incompatible value for the attribute \"fmod_map_res_dim\". It is now set to: " +
+ str(metadata["fmod_map_res_dim"]))
+ if "fmod_roi_pooling_dim" in metadata:
+ self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"]
+ if verbose and self.fmod_pyramid_lvl is not None and \
+ self.fmod_pyramid_lvl != metadata["fmod_pyramid_lvl"]:
+ print("Incompatible value for the attribute \"fmod_pyramid_lvl\". It is now set to: " +
+ str(metadata["fmod_pyramid_lvl"]))
+ if "fmod_pyramid_lvl" in metadata:
+ self.fmod_pyramid_lvl = metadata["fmod_pyramid_lvl"]
+ if verbose and self.lq_dim is not None and \
+ self.lq_dim != metadata["lq_dim"]:
+ print("Incompatible value for the attribute \"lq_dim\". It is now set to: " +
+ str(metadata["lq_dim"]))
+ self.lq_dim = metadata["lq_dim"]
+ if verbose and self.sq_dim is not None and self.sq_dim != metadata["sq_dim"]:
+ print("Incompatible value for the attribute \"sq_dim\". It is now set to: " +
+ str(metadata["sq_dim"]))
+ self.sq_dim = metadata["sq_dim"]
+ if verbose and self.num_JPUs is not None and self.num_JPUs != metadata["num_JPUs"]:
+ print("Incompatible value for the attribute \"num_JPUs\". It is now set to: " +
+ str(metadata["num_JPUs"]))
+ self.num_JPUs = metadata["num_JPUs"]
+ if verbose and 'max_dt_boxes' in metadata:
+ print('Model is trained with ' + str(metadata['max_dt_boxes']) + ' as the maximum number of detections.')
+
+ def load_state(self, checkpoint=None):
+ if checkpoint is None:
+ for p in self.model.parameters():
+ if p.dim() > 1:
+ nn.init.xavier_uniform_(p)
+ else:
+ try:
+ source_state = checkpoint['state_dict']
+ except KeyError:
+ source_state = checkpoint
+ target_state = self.model.state_dict()
+ new_target_state = collections.OrderedDict()
+ for target_key, target_value in target_state.items():
+ if target_key in source_state and source_state[target_key].size() == target_state[target_key].size():
+ new_target_state[target_key] = source_state[target_key]
+ else:
+ new_target_state[target_key] = target_state[target_key]
+
+ self.model.load_state_dict(new_target_state)
+
+ def count_parameters(self):
+
+ if self.model is None:
+ raise UserWarning("Model is not initialized, can't count trainable parameters.")
+ return sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+
+ def download(self, path=None, model_name='seq2seq_pets_jpd_fmod', verbose=False,
+ url=OPENDR_SERVER_URL + "perception/object_detection_2d/nms/"):
+
+ supported_pretrained_models = ["seq2seq_pets_jpd_fmod", "seq2seq_pets_ssd_fmod",
+ "seq2seq_coco_frcn_fmod", "seq2seq_coco_ssd_fmod"]
+
+ if model_name not in supported_pretrained_models:
+ str_error = model_name + " pretrained model is not supported. The available pretrained models are: "
+ for i in range(len(supported_pretrained_models)):
+ str_error = str_error + supported_pretrained_models[i] + ", "
+ str_error = str_error[:-2] + '.'
+ raise ValueError(str_error)
+
+ if path is None:
+ path = self.temp_path
+
+ if not os.path.exists(path):
+ os.makedirs(path)
+
+ if verbose:
+ print("Downloading pretrained model...")
+
+ file_url = os.path.join(url, "pretrained", model_name + '.zip')
+ try:
+ urlretrieve(file_url, os.path.join(path, model_name + '.zip'))
+ with zipfile.ZipFile(os.path.join(path, model_name + '.zip'), 'r') as zip_ref:
+ zip_ref.extractall(path)
+ os.remove(os.path.join(path, model_name + '.zip'))
+ except:
+ raise UserWarning('Pretrained model not found on server.')
+
+ def infer(self, boxes=None, scores=None, boxes_sorted=False, max_dt_boxes=400, img_res=None, threshold=0.1):
+ bounding_boxes = BoundingBoxList([])
+ if scores.shape[0] == 0:
+ return bounding_boxes
+ if scores.shape[1] > 1:
+ raise ValueError('Multi-class NMS is not supported in Seq2Seq-NMS yet.')
+ if boxes.shape[0] != scores.shape[0]:
+ raise ValueError('Scores and boxes must have the same size in dim 0.')
+ if "cuda" in self.device:
+ boxes = boxes.to(self.device)
+ scores = scores.to(self.device)
+
+ scores = scores.squeeze(-1)
+ keep_ids = torch.where(scores > 0.05)[0]
+ scores = scores[keep_ids]
+ boxes = boxes[keep_ids, :]
+ if not boxes_sorted:
+ scores, scores_ids = torch.sort(scores, dim=0, descending=True)
+ boxes = boxes[scores_ids]
+
+ val_ids = torch.logical_and((boxes[:, 2] - boxes[:, 0]) > 4,
+ (boxes[:, 3] - boxes[:, 1]) > 4)
+ boxes = boxes[val_ids, :]
+ scores = scores[val_ids]
+
+ if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+ boxes, scores = apply_torchNMS(boxes=boxes, scores=scores, iou_thres=self.iou_filtering)
+
+ boxes = boxes[:max_dt_boxes]
+ scores = scores[:max_dt_boxes]
+ app_feats = None
+
+ if self.app_feats == 'fmod':
+ app_feats = self.fMoD.extract_FMoD_feats(boxes)
+ app_feats = torch.unsqueeze(app_feats, dim=1)
+ elif self.app_feats == 'zeros':
+ app_feats = torch.zeros([boxes.shape[0], 1, self.app_input_dim])
+ if "cuda" in self.device:
+ app_feats = app_feats.to(self.device)
+ elif self.app_feats == 'custom':
+ raise AttributeError("Custom appearance-based features are not yet supported.")
+
+ msk = self.compute_mask(boxes, iou_thres=0.2, extra=0.1)
+ q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=boxes, scores=scores,
+ resolution=img_res)
+
+ with torch.no_grad():
+ preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+ app_feats=app_feats)
+
+ mask = torch.where(preds > threshold)[0]
+ if mask.size == 0:
+ return BoundingBoxList([])
+ preds = preds[mask].cpu().detach().numpy()
+ boxes = boxes[mask, :].cpu().numpy()
+
+ for idx, box in enumerate(boxes):
+ bbox = BoundingBox(left=box[0], top=box[1],
+ width=box[2] - box[0],
+ height=box[3] - box[1],
+ name=0,
+ score=preds[idx])
+ bounding_boxes.data.append(bbox)
+ return bounding_boxes, [boxes, np.zeros(scores.shape[0]), preds]
+
+ def optimize(self, **kwargs):
+ """This method is not used in this implementation."""
+ raise NotImplementedError
+
+ def reset(self):
+ """This method is not used in this implementation."""
+ return NotImplementedError
+
+ def run_nms(self, boxes=None, scores=None, boxes_sorted=False, top_k=400, img=None, threshold=0.2):
+
+ if self.app_feats == 'fmod':
+ if not isinstance(img, Image):
+ img = Image(img)
+ _img = img.convert("channels_last", "rgb")
+ self.fMoD.extract_maps(img=_img, augm=False)
+
+ if isinstance(boxes, np.ndarray):
+ boxes = torch.tensor(boxes, device=self.device)
+ elif torch.is_tensor(boxes):
+ if "cuda" in self.device:
+ boxes = boxes.to(self.device)
+
+ if isinstance(scores, np.ndarray):
+ scores = torch.tensor(scores, device=self.device)
+ elif torch.is_tensor(scores):
+ if "cuda" in self.device:
+ scores = scores.to(self.device)
+ boxes = self.infer(boxes=boxes, scores=scores, boxes_sorted=boxes_sorted, max_dt_boxes=top_k,
+ img_res=img.opencv().shape[::-1][1:])
+ return boxes
+
+ def set_architecture(self):
+ if self.variant == 'light':
+ self.lq_dim = 160
+ elif self.variant == 'full':
+ self.lq_dim = 320
+ if self.variant == 'light':
+ self.sq_dim = 80
+ elif self.variant == 'full':
+ self.sq_dim = 160
+ if self.variant == 'light':
+ self.num_JPUs = 2
+
+ def sef_fmod_architecture(self):
+ if self.variant == 'light':
+ self.fmod_roi_pooling_dim = 120
+ if self.variant == 'light':
+ self.fmod_map_res_dim = 480
+ elif self.variant == 'full':
+ self.fmod_map_res_dim = 800
+ if self.variant == 'light':
+ self.fmod_pyramid_lvl = 2
+
+ def compute_mask(self, boxes=None, iou_thres=0.2, extra=0.1):
+ relations = filter_iou_boxes(boxes, iou_thres=iou_thres)
+ mask1 = torch.tril(relations).float()
+ mask2 = extra * torch.triu(relations, diagonal=1).float()
+ mask = mask1 + mask2
+ return mask
+
+ def compute_geometrical_feats(self, boxes, scores, resolution):
+ boxBs = boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1)
+ boxAs = boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1)
+ scoresBs = scores.unsqueeze(0).unsqueeze(-1).repeat(scores.shape[0], 1, 1)
+ scoresAs = scores.unsqueeze(1).unsqueeze(1).repeat(1, scores.shape[0], 1)
+
+ scale_div = [resolution[1] / 20, resolution[0] / 20]
+ dx = ((boxBs[:, :, 0] - boxAs[:, :, 0] + boxBs[:, :, 2] - boxAs[:, :, 2]) / 2).unsqueeze(-1)
+ dy = ((boxBs[:, :, 1] - boxAs[:, :, 1] + boxBs[:, :, 3] - boxAs[:, :, 3]) / 2).unsqueeze(-1)
+ dxy = dx * dx + dy * dy
+ dxy = dxy / (scale_div[0] * scale_div[0] + scale_div[1] * scale_div[1])
+ dx = (dx / scale_div[0])
+ dy = (dy / scale_div[1])
+ sx = boxBs[:, :, 2] - boxBs[:, :, 0]
+ sx_1 = (sx / (boxAs[:, :, 2] - boxAs[:, :, 0])).unsqueeze(-1)
+ sx_2 = (sx / scale_div[0]).unsqueeze(-1)
+ sy = boxBs[:, :, 3] - boxBs[:, :, 1]
+ sy_1 = (sy / (boxAs[:, :, 3] - boxAs[:, :, 1])).unsqueeze(-1)
+ sy_2 = (sy / scale_div[1]).unsqueeze(-1)
+ scl = (boxBs[:, :, 2] - boxBs[:, :, 0]) * (boxBs[:, :, 3] - boxBs[:, :, 1])
+ scl_1 = (scl / ((boxAs[:, :, 2] - boxAs[:, :, 0]) * (boxAs[:, :, 3] - boxAs[:, :, 1]))).unsqueeze(-1)
+ scl_2 = (scl / (scale_div[0] * scale_div[1])).unsqueeze(-1)
+ del scl
+
+ scr_1 = 5 * scoresBs
+ scr_2 = scr_1 - 5 * scoresAs
+
+ sr_1 = torch.unsqueeze((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0]), dim=-1)
+ sr_2 = torch.unsqueeze(((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0])) / (
+ (boxAs[:, :, 3] - boxAs[:, :, 1]) / (boxAs[:, :, 2] - boxAs[:, :, 0])), dim=-1)
+
+ ious = 5 * (bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1),
+ boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1))).unsqueeze(-1)
+ enc_vers_all = torch.cat((dx, dy, dxy, sx_1, sx_2, sy_1, sy_2, ious, scl_1, scl_2, scr_1, scr_2, sr_1, sr_2),
+ dim=2)
+ enc_vers = enc_vers_all.diagonal(dim1=0, dim2=1).transpose(0, 1).unsqueeze(1)
+ return enc_vers, enc_vers_all
+
+ def load_FMoD_init_from_dataset(self, dataset=None, map_type='edgemap', fmod_pyramid_lvl=3,
+ datasets_folder='./datasets',
+ map_bin=True, verbose=False):
+ fmod_dir = os.path.join(datasets_folder, dataset, 'FMoD')
+ if not os.path.exists(fmod_dir):
+ os.makedirs(fmod_dir, exist_ok=True)
+ map_type_c = map_type
+ if map_bin:
+ map_type_c = map_type_c + '_B'
+ fmod_filename = dataset + '_' + map_type_c + '_' + str(fmod_pyramid_lvl) + '.pkl'
+ fmod_filename = fmod_filename.lower()
+ fmod_stats = None
+ if not os.path.exists(os.path.join(fmod_dir, fmod_filename)):
+ file_url = os.path.join(OPENDR_SERVER_URL + 'perception/object_detection_2d/nms/FMoD', fmod_filename)
+ try:
+ urlretrieve(file_url, os.path.join(fmod_dir, fmod_filename))
+ except:
+ if verbose:
+ print(
+ 'Normalization files not found on FTP server. Normalization will be performed setting \u03BC = '
+ '0 and \u03C3 = 1.')
+ fmod_feats_dim = 0
+ for i in range(0, fmod_pyramid_lvl):
+ fmod_feats_dim = fmod_feats_dim + 15 * (pow(4, i))
+ self.fmod_init_file = None
+ return {'mean': np.zeros(fmod_feats_dim), 'std': np.ones(fmod_feats_dim)}
+ self.fmod_init_file = os.path.join(fmod_dir, fmod_filename)
+ fmod_stats = self.load_FMoD_init(self.fmod_init_file)
+ return fmod_stats
+
+ def load_FMoD_init(self, path=None):
+ try:
+ with open(path, 'rb') as fp:
+ fmod_stats = pickle.load(fp)
+ map_type = list(fmod_stats.keys())[0]
+ fmod_stats = fmod_stats[map_type]
+ except EnvironmentError as e:
+ e.strerror = 'FMoD initialization .pkl file not found'
+ raise e
+ return fmod_stats
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md
new file mode 100644
index 0000000000..6b8c2513d0
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md
@@ -0,0 +1,35 @@
+Soft-NMS
+======
+
+This folder contains an implementation of Soft-NMS [[1]](#soft_nms-1).
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/DocF/Soft-NMS) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the MIT license:
+
+```
+MIT License
+
+Copyright (c) 2020 DocF
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
+
+[1] Soft-NMS -- Improving Object Detection With One Line of Code,
+[ArXiv](https://arxiv.org/abs/1704.04503).
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py
new file mode 100644
index 0000000000..a0c668c850
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py
@@ -0,0 +1,129 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+#
+# Copyright (c) 2020 DocF
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import torch
+import numpy as np
+
+
+class SoftNMS(NMSCustom):
+ def __init__(self, nms_type='linear', device='cuda', nms_thres=None, top_k=400, post_k=100):
+ self.nms_types = ['linear', 'gaussian']
+ if nms_type not in self.nms_types:
+ raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.')
+ else:
+ self.nms_type = nms_type
+ if nms_thres is None:
+ if nms_type == 'linear':
+ nms_thres = 0.3
+ elif nms_type == 'gaussian':
+ nms_thres = 0.5
+ self.device = device
+ self.nms_thres = nms_thres
+ self.top_k = top_k
+ self.post_k = post_k
+
+ def nms_thres(self, nms_thres=0.45):
+ self.nms_thres = nms_thres
+
+ def set_top_k(self, top_k=400):
+ self.top_k = top_k
+
+ def set_post_k(self, post_k=100):
+ self.post_k = post_k
+
+ def set_nms_type(self, nms_type='linear'):
+ if nms_type not in self.nms_types:
+ raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.')
+ else:
+ self.nms_type = nms_type
+
+ def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None):
+
+ if isinstance(boxes, np.ndarray):
+ boxes = torch.tensor(boxes, device=self.device)
+ elif torch.is_tensor(boxes):
+ if self.device == 'cpu':
+ boxes = boxes.cpu()
+ elif self.device == 'cuda':
+ boxes = boxes.cuda()
+
+ if isinstance(scores, np.ndarray):
+ scores = torch.tensor(scores, device=self.device)
+ elif torch.is_tensor(scores):
+ if self.device == 'cpu':
+ scores = scores.cpu()
+ elif self.device == 'cuda':
+ scores = scores.cuda()
+
+ scores, classes = scores.max(dim=1)
+ _, idx = scores.sort(0, descending=True)
+ idx = idx[:self.top_k]
+ boxes = boxes[idx]
+ scores = scores[idx]
+ classes = classes[idx]
+
+ dets = torch.cat((boxes, scores.unsqueeze(-1)), dim=1)
+
+ i = 0
+ while dets.shape[0] > 0:
+ scores[i] = dets[0, 4]
+ iou = jaccard(dets[:1, :-1], dets[1:, :-1]).triu_(diagonal=0).squeeze(0)
+ weight = torch.ones_like(iou)
+ if self.nms_type == 'linear':
+ weight[iou > self.nms_thres] -= iou[iou > self.nms_thres]
+ elif self.nms_type == 'gaussian':
+ weight = np.exp(-(iou * iou) / self.nms_thres)
+
+ dets[1:, 4] *= weight
+ dets = dets[1:, :]
+ i = i + 1
+ keep_ids = torch.where(scores > threshold)
+ scores = scores[keep_ids].cpu().numpy()
+ classes = classes[keep_ids].cpu().numpy()
+ boxes = boxes[keep_ids].cpu().numpy()
+ bounding_boxes = BoundingBoxList([])
+ for idx, box in enumerate(boxes):
+ bbox = BoundingBox(left=box[0], top=box[1],
+ width=box[2] - box[0],
+ height=box[3] - box[1],
+ name=classes[idx],
+ score=scores[idx])
+ bounding_boxes.data.append(bbox)
+
+ return bounding_boxes, [boxes, classes, scores]
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/__init__.py b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py
new file mode 100644
index 0000000000..2d130e14b8
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py
@@ -0,0 +1,3 @@
+from opendr.perception.object_detection_2d.nms.utils.nms_custom import NMSCustom
+
+__all__ = ['NMSCustom']
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py
new file mode 100644
index 0000000000..7d551cd401
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py
@@ -0,0 +1,24 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+
+
+class NMSCustom(ABC):
+ def __init__(self, device='cpu'):
+ self.device = device
+
+ @abstractmethod
+ def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None, device='cpu'):
+ pass
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py
new file mode 100644
index 0000000000..202f7f18c5
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py
@@ -0,0 +1,404 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.engine.datasets import Dataset
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d.datasets.transforms import BoundingBoxListToNumpyArray
+from opendr.engine.constants import OPENDR_SERVER_URL
+from pycocotools.coco import COCO
+import os
+from urllib.request import urlretrieve
+import ssl
+import time
+from zipfile import ZipFile
+import tarfile
+import pickle
+import numpy as np
+import math
+from tqdm import tqdm
+import gc
+
+
+class Dataset_NMS(Dataset):
+ def __init__(self, path=None, dataset_name=None, split=None, use_ssd=True, device='cuda'):
+ super().__init__()
+ available_dataset = ['COCO', 'PETS', 'TEST_MODULE']
+ self.dataset_sets = {'train': None,
+ 'val': None,
+ 'test': None}
+ if dataset_name not in available_dataset:
+ except_str = 'Unsupported dataset: ' + dataset_name + '. Currently available are:'
+ for j in range(len(available_dataset)):
+ except_str = except_str + ' \'' + available_dataset[j] + '\''
+ if j < len(available_dataset) - 1:
+ except_str = except_str + ','
+ except_str = except_str + '.'
+ raise ValueError(except_str)
+
+ ssl._create_default_https_context = ssl._create_unverified_context
+ self.dataset_name = dataset_name
+ self.split = split
+ # self.__prepare_dataset()
+ self.path = os.path.join(path, dataset_name)
+ self.src_data = []
+ if self.dataset_name == "PETS":
+ self.detector = 'JPD'
+ self.detector_type = 'default'
+ if use_ssd:
+ self.detector = 'SSD'
+ self.detector_type = 'custom'
+
+ self.dataset_sets['train'] = 'train'
+ self.dataset_sets['val'] = 'val'
+ self.dataset_sets['test'] = 'test'
+ if self.dataset_sets[self.split] is None:
+ raise ValueError(self.split + ' split is not available...')
+
+ if not os.path.exists(os.path.join(self.path, 'images/S1/L1')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L1.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'images/S1/L2')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L2.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'images/S2/L1')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L1.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'images/S2/L2')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L2.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'images/S2/L3')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L3.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'images/S3/Multiple_Flow')):
+ self.download(
+ 'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S3_MF.tar.bz2',
+ download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+ if not os.path.exists(
+ os.path.join(self.path, 'annotations', 'pets_' + self.dataset_sets[self.split] + '.json')):
+ self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations_json.zip',
+ download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+ create_dir=True)
+ pkl_filename = os.path.join(self.path,
+ 'data_' + self.detector + '_' + self.dataset_sets[self.split] + '_pets.pkl')
+ if not os.path.exists(pkl_filename):
+ ssd = None
+ if use_ssd:
+ from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner
+ ssd = SingleShotDetectorLearner(device=device)
+ ssd.download(".", mode="pretrained")
+ ssd.load("./ssd_default_person", verbose=True)
+ if not os.path.exists(
+ os.path.join(self.path, 'detections',
+ 'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl')):
+ self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_detections.zip',
+ download_path=os.path.join(self.path, 'detections'), file_format="zip",
+ create_dir=True)
+ if not os.path.exists(
+ os.path.join(self.path, 'annotations', 'PETS-' + self.dataset_sets[self.split] + '.idl')):
+ self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations.zip',
+ download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+ create_dir=True)
+ with open(os.path.join(self.path, 'annotations',
+ 'PETS-' + self.dataset_sets[self.split] + '.idl')) as fp_gt:
+ fp_dt = None
+ if self.detector_type == 'default':
+ fp_dt = open(os.path.join(self.path, 'detections',
+ 'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl'))
+ print('Preparing PETS ' + self.dataset_sets[self.split] + ' set...')
+ current_id = 0
+ number_samples = 1696
+ if self.split == 'val':
+ current_id = 1696
+ number_samples = 240
+ elif self.split == 'test':
+ current_id = 1936
+ number_samples = 436
+ pbarDesc = "Overall progress"
+ pbar = tqdm(desc=pbarDesc, total=number_samples)
+ if self.detector_type == 'default':
+ line_dt = fp_dt.readline()
+ line_gt = fp_gt.readline()
+ while line_gt:
+ remove_strings = ['PETS09-', '\"', ':', '(', ')', ',', '', ';']
+ data_gt = line_gt.replace(':', ' ')
+ for j in range(len(remove_strings)):
+ data_gt = data_gt.replace(remove_strings[j], '')
+ data_gt = data_gt.split()
+ filename_gt = data_gt[0][0:2] + '/' + data_gt[0][2:]
+ if filename_gt[0:6] == 'S2/L1/':
+ filename_gt = filename_gt.replace('img/00', 'Time_12-34/View_001/frame_')
+ num = int(filename_gt[-8:-4]) - 1
+ filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg'
+ if filename_gt[0:6] == 'S2/L2/':
+ filename_gt = filename_gt.replace('img/00', 'Time_14-55/View_001/frame_')
+ num = int(filename_gt[-8:-4]) - 1
+ filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg'
+ if filename_gt[0:2] == 'S3':
+ filename_gt = filename_gt.replace('_MF', 'Multiple_Flow')
+
+ if self.detector_type == 'default':
+ data_dt = line_dt.replace(':', ' ')
+ for j in range(len(remove_strings)):
+ data_dt = data_dt.replace(remove_strings[j], '')
+ data_dt = data_dt.split()
+ filename_dt = data_dt[0][0:2] + '/' + data_dt[0][2:]
+ if filename_dt[0:6] == 'S2/L1/':
+ filename_dt = filename_dt.replace('img/00', 'Time_12-34/View_001/frame_')
+ num = int(filename_dt[-8:-4]) - 1
+ filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg'
+ if filename_dt[0:6] == 'S2/L2/':
+ filename_dt = filename_dt.replace('img/00', 'Time_14-55/View_001/frame_')
+ num = int(filename_dt[-8:-4]) - 1
+ filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg'
+ if filename_dt[0:2] == 'S3':
+ filename_dt = filename_dt.replace('_MF', 'Multiple_Flow')
+ if filename_gt != filename_dt:
+ raise ValueError('Errors in files...')
+
+ img = Image.open(os.path.join(self.path, 'images/', filename_gt))
+
+ dt_boxes = []
+ if self.detector_type == 'default':
+ for i in range(1, (len(data_dt)), 5):
+ dt_box = np.array((float(data_dt[i]), float(data_dt[i + 1]), float(data_dt[i + 2]),
+ float(data_dt[i + 3]), 1 / (1 + math.exp(- float(data_dt[i + 4])))))
+ dt_boxes.append(dt_box)
+ else:
+ bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975,
+ nms_topk=6000, post_nms=6000)
+ bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list)
+ bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015]
+ bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1]
+ bboxes_list = bboxes_list[:5000, :]
+ for b in range(len(bboxes_list)):
+ dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2],
+ bboxes_list[b, 3], bboxes_list[b, 4][0]]))
+ gt_boxes = []
+ for i in range(1, (len(data_gt)), 5):
+ gt_box = np.array((float(data_gt[i]), float(data_gt[i + 1]), float(data_gt[i + 2]),
+ float(data_gt[i + 3])))
+ gt_boxes.append(gt_box)
+ self.src_data.append({
+ 'id': current_id,
+ 'filename': os.path.join('images', filename_gt),
+ 'resolution': img.opencv().shape[0:2][::-1],
+ 'gt_boxes': [np.asarray([]), np.asarray(gt_boxes)],
+ 'dt_boxes': [np.asarray([]), np.asarray(dt_boxes)]
+ })
+ current_id = current_id + 1
+ pbar.update(1)
+ if self.detector_type == 'default':
+ line_dt = fp_dt.readline()
+ line_gt = fp_gt.readline()
+ pbar.close()
+ if self.detector_type == 'default':
+ fp_dt.close()
+ elif self.detector == 'SSD':
+ del ssd
+ gc.collect()
+ with open(pkl_filename, 'wb') as handle:
+ pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL)
+ else:
+ with open(pkl_filename, 'rb') as fp_pkl:
+ self.src_data = pickle.load(fp_pkl)
+
+ self.classes = ['background', 'human']
+ self.class_ids = [-1, 1]
+ self.annotation_file = 'pets_' + self.dataset_sets[self.split] + '.json'
+ elif self.dataset_name == "COCO":
+ self.dataset_sets['train'] = 'train'
+ self.dataset_sets['val'] = 'minival'
+ self.dataset_sets['test'] = 'valminusminival'
+ if self.dataset_sets[self.split] is None:
+ raise ValueError(self.split + ' split is not available...')
+ elif self.dataset_sets[self.split] == 'train':
+ imgs_split = 'train2014'
+ else:
+ imgs_split = 'val2014'
+ self.detector = 'FRCN'
+ self.detector_type = 'default'
+ ssd = None
+ if use_ssd:
+ self.detector = 'SSD'
+ self.detector_type = 'custom'
+ from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner
+ ssd = SingleShotDetectorLearner(device=device)
+ ssd.download(".", mode="pretrained")
+ ssd.load("./ssd_default_person", verbose=True)
+ if not os.path.exists(os.path.join(self.path, imgs_split)):
+ self.download('http://images.cocodataset.org/zips/' + imgs_split + '.zip',
+ download_path=os.path.join(self.path), file_format="zip",
+ create_dir=True)
+ pkl_filename = os.path.join(self.path, 'data_' + self.detector + '_' +
+ self.dataset_sets[self.split] + '_coco.pkl')
+ if not os.path.exists(pkl_filename):
+ if not os.path.exists(os.path.join(self.path, 'annotations', 'instances_' +
+ self.dataset_sets[self.split] +
+ '2014.json')):
+ if self.dataset_sets[self.split] == 'train':
+ ann_url = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip'
+ self.download(ann_url, download_path=os.path.join(self.path), file_format="zip",
+ create_dir=True)
+ else:
+ if self.dataset_sets[self.split] == 'minival':
+ ann_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/' \
+ 'instances_minival2014.json.zip?dl=0'
+ else:
+ ann_url = 'https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/' \
+ 'instances_valminusminival2014.json.zip?dl=0'
+ self.download(ann_url, download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+ create_dir=True)
+ if not os.path.exists(os.path.join(self.path, 'detections', 'coco_2014_' +
+ self.dataset_sets[self.split] +
+ '_FRCN_train.pkl')):
+ self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/coco_2014_FRCN.tar.gz',
+ download_path=os.path.join(self.path, 'detections'), file_format='tar.gz',
+ create_dir=True)
+ with open(os.path.join(self.path, 'detections',
+ 'coco_2014_' + self.dataset_sets[self.split] + '_FRCN_train.pkl'), 'rb') as f:
+ dets_default = pickle.load(f, encoding='latin1')
+ annots = COCO(annotation_file=os.path.join(self.path, 'annotations', 'instances_' +
+ self.dataset_sets[self.split] + '2014.json'))
+ pbarDesc = "Overall progress"
+ pbar = tqdm(desc=pbarDesc, total=len(dets_default[1]))
+ for i in range(len(dets_default[1])):
+ dt_boxes = []
+ img_info = annots.loadImgs([dets_default[1][i]])[0]
+ img = Image.open(os.path.join(self.path, imgs_split, img_info["file_name"]))
+ if self.detector_type == 'default':
+ dt_boxes = dets_default[0][1][i]
+ elif self.detector == 'SSD':
+ bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975,
+ nms_topk=6000, post_nms=6000)
+ bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list)
+ if bboxes_list.shape[0] > 0:
+ bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015]
+ if bboxes_list.shape[0] > 0:
+ bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1]
+ bboxes_list = bboxes_list[:5000, :]
+ for b in range(len(bboxes_list)):
+ dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2],
+ bboxes_list[b, 3], bboxes_list[b, 4][0]]))
+ dt_boxes = np.asarray(dt_boxes)
+ annots_in_frame = annots.loadAnns(
+ annots.getAnnIds(imgIds=[dets_default[1][i]], catIds=[1], iscrowd=False))
+ gt_boxes = []
+ for j in range(len(annots_in_frame)):
+ gt_boxes.append(annots_in_frame[j]['bbox'])
+ gt_boxes = np.asarray(np.asarray(gt_boxes))
+ if gt_boxes.shape[0] > 0:
+ gt_boxes[:, 2] = gt_boxes[:, 0] + gt_boxes[:, 2]
+ gt_boxes[:, 3] = gt_boxes[:, 1] + gt_boxes[:, 3]
+ self.src_data.append({
+ 'id': dets_default[1][i],
+ 'filename': os.path.join(imgs_split, img_info["file_name"]),
+ 'resolution': [img_info['width'], img_info['height']],
+ 'gt_boxes': [np.asarray([]), gt_boxes],
+ 'dt_boxes': [np.asarray([]), dt_boxes]
+ })
+ pbar.update(1)
+ pbar.close()
+ if self.detector == 'SSD':
+ del ssd
+ gc.collect()
+ with open(pkl_filename, 'wb') as handle:
+ pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL)
+ else:
+ with open(pkl_filename, 'rb') as fp_pkl:
+ self.src_data = pickle.load(fp_pkl)
+ self.classes = ['background', 'person']
+ self.class_ids = [-1, 1]
+ self.annotation_file = 'instances_' + self.dataset_sets[self.split] + '2014.json'
+ elif self.dataset_name == "TEST_MODULE":
+ self.dataset_sets['train'] = 'test'
+ self.dataset_sets['val'] = 'test'
+ self.dataset_sets['test'] = 'test'
+ if self.dataset_sets[self.split] is None:
+ raise ValueError(self.split + ' split is not available...')
+ pkl_filename = os.path.join(self.path, 'test_module.pkl')
+ if not os.path.exists(pkl_filename):
+ data_url = OPENDR_SERVER_URL + '/perception/object_detection_2d/nms/datasets/test_module.zip'
+ self.download(data_url, download_path=os.path.join(self.path).replace("TEST_MODULE", ""), file_format="zip",
+ create_dir=True)
+ with open(pkl_filename, 'rb') as fp_pkl:
+ self.src_data = pickle.load(fp_pkl)
+ self.classes = ['background', 'person']
+ self.class_ids = [-1, 1]
+ self.annotation_file = 'test_module_anns.json'
+
+ @staticmethod
+ def download(
+ url, download_path, dataset_sub_path=".", file_format="zip", create_dir=False):
+
+ if create_dir:
+ os.makedirs(download_path, exist_ok=True)
+
+ print("Downloading dataset from", url, "to", download_path)
+
+ start_time = 0
+ last_print = 0
+
+ def reporthook(count, block_size, total_size):
+ nonlocal start_time
+ nonlocal last_print
+ if count == 0:
+ start_time = time.time()
+ last_print = start_time
+ return
+
+ duration = time.time() - start_time
+ progress_size = int(count * block_size)
+ speed = int(progress_size / (1024 * duration))
+ if time.time() - last_print >= 1:
+ last_print = time.time()
+ print(
+ "\r%d MB, %d KB/s, %d seconds passed" %
+ (progress_size / (1024 * 1024), speed, duration),
+ end=''
+ )
+
+ if file_format == "zip":
+ zip_path = os.path.join(download_path, "dataset.zip")
+ urlretrieve(url, zip_path, reporthook=reporthook)
+ print()
+ print("Extracting data from zip file")
+ with ZipFile(zip_path, 'r') as zip_ref:
+ zip_ref.extractall(download_path)
+ os.remove(zip_path)
+ elif file_format == "tar.bz2" or file_format == "tar.gz":
+ tar_path = os.path.join(download_path, "dataset." + file_format)
+ urlretrieve(url, tar_path, reporthook=reporthook)
+ print()
+
+ def members(tf):
+ l = len("Crowd_PETS09/")
+ for member in tf.getmembers():
+ if member.path.startswith("Crowd_PETS09/"):
+ member.path = member.path[l:]
+ yield member
+
+ with tarfile.open(tar_path, "r:" + file_format.split('.')[1]) as tar:
+ if file_format == "tar.bz2":
+ tar.extractall(path=download_path, members=members(tar))
+ else:
+ tar.extractall(path=download_path)
+ tar.close()
+ os.remove(tar_path)
+ else:
+ raise ValueError("Unsupported file_format: " + file_format)
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py
new file mode 100644
index 0000000000..93286bbc7a
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py
@@ -0,0 +1,286 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import torch
+import torchvision
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import sys
+import os
+
+
+def jaccard(box_a, box_b, iscrowd: bool = False):
+ use_batch = True
+ if box_a.dim() == 2:
+ use_batch = False
+ box_a = box_a[None, ...]
+ box_b = box_b[None, ...]
+
+ inter = intersect(box_a, box_b)
+ area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) *
+ (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
+ area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) *
+ (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
+ union = area_a + area_b - inter
+
+ out = inter / area_a if iscrowd else inter / union
+ return out if use_batch else out.squeeze(0)
+
+
+def intersect(box_a, box_b):
+ n = box_a.size(0)
+ A = box_a.size(1)
+ B = box_b.size(1)
+ max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
+ box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
+ min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
+ box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
+ return torch.clamp(max_xy - min_xy, min=0).prod(3) # inter
+
+
+def diou(box_a, box_b, iscrowd: bool = False):
+ use_batch = True
+ if box_a.dim() == 2:
+ use_batch = False
+ box_a = box_a[None, ...]
+ box_b = box_b[None, ...]
+
+ inter = intersect(box_a, box_b)
+ area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) *
+ (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter) # [A,B]
+ area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) *
+ (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter) # [A,B]
+ union = area_a + area_b - inter
+ x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
+ y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
+ x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
+ y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
+
+ t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
+ b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
+ l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
+ r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
+
+ t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
+ b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
+ l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
+ r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
+ cr = torch.max(r1, r2)
+ cl = torch.min(l1, l2)
+ ct = torch.min(t1, t2)
+ cb = torch.max(b1, b2)
+ D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7))
+ out = inter / area_a if iscrowd else inter / union - D ** 0.9
+ return out if use_batch else out.squeeze(0)
+
+
+def distance(box_a, box_b, iscrowd: bool = False):
+ use_batch = True
+ if box_a.dim() == 2:
+ use_batch = False
+ box_a = box_a[None, ...]
+ box_b = box_b[None, ...]
+
+ inter = intersect(box_a, box_b)
+ x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
+ y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
+ x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
+ y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
+
+ t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
+ b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
+ l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
+ r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
+
+ t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
+ b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
+ l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
+ r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
+
+ cr = torch.max(r1, r2)
+ cl = torch.min(l1, l2)
+ ct = torch.min(t1, t2)
+ cb = torch.max(b1, b2)
+ D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) ** 0.6
+ out = D if iscrowd else D
+ return out if use_batch else out.squeeze(0)
+
+
+def det_matching(scores, dt_boxes, gt_boxes, iou_thres, device='cuda'):
+ sorted_indices = torch.argsort(-scores, dim=0)
+ labels = torch.zeros(len(dt_boxes))
+ if device == 'cuda':
+ labels = labels.cuda()
+ if gt_boxes.shape[0] == 0:
+ return labels.unsqueeze(-1)
+ assigned_GT = -torch.ones(len(gt_boxes))
+ r = torch.tensor([-1, -1, -1, -1]).float().unsqueeze(0).unsqueeze(0)
+ if device == 'cuda':
+ r = r.cuda()
+ for s in sorted_indices:
+ gt_boxes_c = gt_boxes.clone().unsqueeze(0)
+ gt_boxes_c[0, assigned_GT > -1, :] = r
+ ious = bb_intersection_over_union(boxAs=dt_boxes[s].clone().unsqueeze(0), boxBs=gt_boxes_c)
+ annot_iou, annot_box_id = torch.sort(ious.squeeze(), descending=True)
+ if annot_box_id.ndim > 0:
+ annot_box_id = annot_box_id[0]
+ annot_iou = annot_iou[0]
+ if annot_iou > iou_thres:
+ assigned_GT[annot_box_id] = s
+ labels[s] = 1
+ return labels.unsqueeze(-1)
+
+
+def run_coco_eval(dt_file_path=None, gt_file_path=None, only_classes=None, max_dets=None,
+ verbose=False):
+ if max_dets is None:
+ max_dets = [200, 400, 600, 800, 1000, 1200]
+ results = []
+ sys.stdout = open(os.devnull, 'w')
+ for i in range(len(max_dets)):
+ coco = COCO(gt_file_path)
+ coco_dt = coco.loadRes(dt_file_path)
+ cocoEval = COCOeval(coco, coco_dt, 'bbox')
+ cocoEval.params.iouType = 'bbox'
+ cocoEval.params.useCats = True
+ cocoEval.params.catIds = only_classes
+ cocoEval.params.maxDets = [max_dets[i]]
+ cocoEval.evaluate()
+ results.append([summarize_nms(coco_eval=cocoEval, maxDets=max_dets[i]), max_dets[i]])
+ # print(results[i])
+ del cocoEval, coco_dt, coco
+ sys.stdout = sys.__stdout__
+ return results
+
+
+def summarize_nms(coco_eval=None, maxDets=100):
+ def summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
+ p = coco_eval.params
+ iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+ titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+ typeStr = '(AP)' if ap == 1 else '(AR)'
+ iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+ if iouThr is None else '{:0.2f}'.format(iouThr)
+ aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+ mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+ if ap == 1:
+ # dimension of precision: [TxRxKxAxM]
+ s = coco_eval.eval['precision']
+ # IoU
+ if iouThr is not None:
+ t = np.where(iouThr == p.iouThrs)[0]
+ s = s[t]
+ s = s[:, :, :, aind, mind]
+ else:
+ # dimension of recall: [TxKxAxM]
+ s = coco_eval.eval['recall']
+ if iouThr is not None:
+ t = np.where(iouThr == p.iouThrs)[0]
+ s = s[t]
+ s = s[:, :, aind, mind]
+ if len(s[s > -1]) == 0:
+ mean_s = -1
+ else:
+ mean_s = np.mean(s[s > -1])
+ stat_str = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
+ return [mean_s, stat_str]
+
+ def summarizeDets():
+ stats = []
+ stat, stat_str = summarize(1, maxDets=maxDets)
+ stats.append([stat, stat_str])
+ stat, stat_str = summarize(1, iouThr=.5, maxDets=maxDets)
+ stats.append([stat, stat_str])
+ stat, stat_str = summarize(1, iouThr=.75, maxDets=maxDets)
+ stats.append([stat, stat_str])
+ stat, stat_str = summarize(0, maxDets=maxDets)
+ stats.append([stat, stat_str])
+ return stats
+
+ coco_eval.accumulate()
+ summarized = summarizeDets()
+ return summarized
+
+
+def drop_dets(boxes, scores, keep_ratio=0.85):
+ ids = np.arange(len(boxes))
+ np.random.shuffle(ids)
+ ids_keep = ids[0:int(len(boxes) * keep_ratio)]
+ boxes_new = boxes[ids_keep, :]
+ scores_new = scores[ids_keep]
+ scores_new, scores_new_ids = torch.sort(scores_new, descending=True)
+ boxes_new = boxes_new[scores_new_ids]
+ return boxes_new, scores_new
+
+
+def filter_iou_boxes(boxes=None, iou_thres=0.2):
+ ious = bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1),
+ boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1))
+ ids_boxes = ious >= iou_thres
+ return ids_boxes
+
+
+def bb_intersection_over_union(boxAs=None, boxBs=None):
+ xA = torch.maximum(boxAs[:, :, 0], boxBs[:, :, 0])
+ yA = torch.maximum(boxAs[:, :, 1], boxBs[:, :, 1])
+ xB = torch.minimum(boxAs[:, :, 2], boxBs[:, :, 2])
+ yB = torch.minimum(boxAs[:, :, 3], boxBs[:, :, 3])
+ interAreas = torch.maximum(torch.zeros_like(xB), xB - xA + 1) * torch.maximum(torch.zeros_like(yB), yB - yA + 1)
+ boxAAreas = (boxAs[:, :, 2] - boxAs[:, :, 0] + 1) * (boxAs[:, :, 3] - boxAs[:, :, 1] + 1)
+ boxBAreas = (boxBs[:, :, 2] - boxBs[:, :, 0] + 1) * (boxBs[:, :, 3] - boxBs[:, :, 1] + 1)
+ ious = interAreas / (boxAAreas + boxBAreas - interAreas)
+ return ious
+
+
+def compute_class_weights(pos_weights, max_dets=400, dataset_nms=None):
+ num_pos = np.ones([len(dataset_nms.classes), 1])
+ num_bg = np.ones([len(dataset_nms.classes), 1])
+ weights = np.zeros([len(dataset_nms.classes), 2])
+ for i in range(len(dataset_nms.src_data)):
+ for cls_index in range(len(dataset_nms.classes)):
+ num_pos[cls_index] = num_pos[cls_index] + \
+ min(max_dets, len(dataset_nms.src_data[i]['gt_boxes'][cls_index]))
+ num_bg[cls_index] = num_bg[cls_index] + max(0, min(max_dets,
+ len(dataset_nms.src_data[i]['dt_boxes'][cls_index])) -
+ min(max_dets,
+ len(dataset_nms.src_data[i]['gt_boxes'][cls_index])))
+ for class_index in range(len(dataset_nms.classes)):
+ weights[class_index, 0] = (1 - pos_weights[class_index]) * (num_pos[class_index] +
+ num_bg[class_index]) / num_bg[class_index]
+ weights[class_index, 1] = pos_weights[class_index] * (num_pos[class_index] +
+ num_bg[class_index]) / num_pos[class_index]
+ return weights
+
+
+def apply_torchNMS(boxes, scores, iou_thres):
+ ids_nms = torchvision.ops.nms(boxes, scores, iou_thres)
+ scores = scores[ids_nms]
+ boxes = boxes[ids_nms]
+ return boxes, scores
diff --git a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
index 386f5b5306..70b4656cf1 100644
--- a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
+++ b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
@@ -43,8 +43,10 @@
# algorithm imports
from opendr.perception.object_detection_2d.utils.eval_utils import DetectionDatasetCOCOEval
from opendr.perception.object_detection_2d.datasets import DetectionDataset
-from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, BoundingBoxListToNumpyArray, \
- transform_test
+from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, \
+ BoundingBoxListToNumpyArray, \
+ transform_test, pad_test
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
gutils.random.seed(0)
@@ -90,7 +92,6 @@ def __init__(self, lr=1e-3, epochs=120, batch_size=8,
self.ctx = mx.gpu(int(self.device.split(':')[1]))
else:
self.ctx = mx.cpu()
- print("Device set to cuda but no GPU available, using CPU...")
else:
self.ctx = mx.cpu()
@@ -141,7 +142,7 @@ def save(self, path, verbose=False):
if verbose:
print("Model parameters saved.")
- with open(os.path.join(path, model_name + '.json'), 'w', encoding='utf-8') as f:
+ with open(os.path.join(path, model_name + '.json'), 'w', encoding='utf-8') as f:
json.dump(metadata, f, ensure_ascii=False, indent=4)
if verbose:
print("Model metadata saved.")
@@ -216,7 +217,7 @@ def download(self, path=None, mode="pretrained", verbose=False,
if verbose:
print("Downloading params...")
file_url = os.path.join(url, "pretrained", "ssd_512_vgg16_atrous_wider_person",
- "ssd_512_vgg16_atrous_wider_person.params")
+ "ssd_512_vgg16_atrous_wider_person.params")
urlretrieve(file_url,
os.path.join(path, "ssd_512_vgg16_atrous_wider_person.params"))
@@ -461,18 +462,27 @@ def __get_lr_at(self, epoch):
else:
return self.lr
- def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
+ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False,
+ nms_thresh=0.45, nms_topk=400, post_nms=100):
"""
This method performs evaluation on a given dataset and returns a dictionary with the evaluation results.
:param dataset: dataset object, to perform evaluation on
:type dataset: opendr.perception.object_detection_2d.datasets.DetectionDataset or opendr.engine.data.ExternalDataset
- :return: dictionary containing evaluation metric names nad values
:param use_subset: if True, only a subset of the dataset is evaluated, defaults to False
:type use_subset: bool, optional
:param subset_size: if use_subset is True, subset_size controls the size of the subset to be evaluated
:type subset_size: int, optional
:param verbose: if True, additional information is printed on stdout
:type verbose: bool, optional
+ :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
+ :type nms_thresh: float, default is 0.45
+ :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS.
+ :type nms_topk: int, default is 400
+ :param post_nms: Only return top post_nms detection results, the rest is discarded.
+ The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if
+ expecting more objects. You can use -1 to return all detections.
+ :type post_nms: int, default is 100
+ :return: dictionary containing evaluation metric names nad values
:rtype: dict
"""
autograd.set_training(False)
@@ -494,7 +504,7 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
self._model.initialize()
self._model.collect_params().reset_ctx(ctx)
self._model.hybridize(static_alloc=True, static_shape=True)
- self._model.set_nms(nms_thresh=0.45, nms_topk=400)
+ self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms)
dataset, eval_metric = self.__prepare_val_dataset(dataset, data_shape=self.img_size)
@@ -549,7 +559,8 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
eval_dict = {k.lower(): v for k, v in zip(map_name, mean_ap)}
return eval_dict
- def infer(self, img, threshold=0.2, keep_size=False):
+ def infer(self, img, threshold=0.2, keep_size=False, custom_nms: NMSCustom=None,
+ nms_thresh=0.45, nms_topk=400, post_nms=100):
"""
Performs inference on a single image and returns the resulting bounding boxes.
:param img: image to perform inference on
@@ -558,13 +569,26 @@ def infer(self, img, threshold=0.2, keep_size=False):
:type threshold: float, optional
:param keep_size: if True, the image is not resized to fit the data shape used during training
:type keep_size: bool, optional
+ :param custom_nms: Custom NMS method to be employed on inference
+ :type perception.object_detection_2d.nms.utils.nms_custom.NMSCustom
+ :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
+ :type nms_thresh: float, default is 0.45
+ :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS.
+ :type nms_topk: int, default is 400
+ :param post_nms: Only return top post_nms detection results, the rest is discarded.
+ The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if
+ expecting more objects. You can use -1 to return all detections.
+ :type post_nms: int, default is 100
:return: list of bounding boxes
:rtype: BoundingBoxList
"""
- assert self._model is not None, "Model has not been loaded, call load(path) first"
- self._model.set_nms(nms_thresh=0.45, nms_topk=400)
+ assert self._model is not None, "Model has not been loaded, call load(path) first"
+ if custom_nms:
+ self._model.set_nms(nms_thresh=0.85, nms_topk=5000, post_nms=1000)
+ else:
+ self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms)
if not isinstance(img, Image):
img = Image(img)
_img = img.convert("channels_last", "rgb")
@@ -576,33 +600,43 @@ def infer(self, img, threshold=0.2, keep_size=False):
x, img_mx = transform_test(img_mx)
else:
x, img_mx = presets.ssd.transform_test(img_mx, short=self.img_size)
-
h_mx, w_mx, _ = img_mx.shape
+ x = pad_test(x, min_size=self.img_size)
x = x.as_in_context(self.ctx)
class_IDs, scores, boxes = self._model(x)
class_IDs = class_IDs[0, :, 0].asnumpy()
scores = scores[0, :, 0].asnumpy()
- mask = np.where((class_IDs >= 0) & (scores > threshold))[0]
+ mask = np.where(class_IDs >= 0)[0]
+ if custom_nms is None:
+ mask = np.intersect1d(mask, np.where(scores > threshold)[0])
if mask.size == 0:
return BoundingBoxList([])
scores = scores[mask, np.newaxis]
class_IDs = class_IDs[mask, np.newaxis]
boxes = boxes[0, mask, :].asnumpy()
+ if x.shape[2] > h_mx:
+ boxes[:, [1, 3]] -= (x.shape[2] - h_mx)
+ elif x.shape[3] > w_mx:
+ boxes[:, [0, 2]] -= (x.shape[3] - w_mx)
boxes[:, [0, 2]] /= w_mx
boxes[:, [1, 3]] /= h_mx
boxes[:, [0, 2]] *= width
boxes[:, [1, 3]] *= height
- bounding_boxes = BoundingBoxList([])
- for idx, box in enumerate(boxes):
- bbox = BoundingBox(left=box[0], top=box[1],
- width=box[2] - box[0],
- height=box[3] - box[1],
- name=class_IDs[idx, :],
- score=scores[idx, :])
- bounding_boxes.data.append(bbox)
+ if custom_nms is not None:
+ bounding_boxes, _ = custom_nms.run_nms(boxes=boxes, scores=scores, threshold=threshold, img=_img)
+ else:
+ bounding_boxes = BoundingBoxList([])
+ for idx, box in enumerate(boxes):
+ bbox = BoundingBox(left=box[0], top=box[1],
+ width=box[2] - box[0],
+ height=box[3] - box[1],
+ name=class_IDs[idx, :],
+ score=scores[idx, :])
+ bounding_boxes.data.append(bbox)
+
return bounding_boxes
@staticmethod
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py
new file mode 100644
index 0000000000..66d06bf3a6
--- /dev/null
+++ b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py
@@ -0,0 +1,139 @@
+# Copyright 2020-2021 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import gc
+import shutil
+import os
+import numpy as np
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS
+from opendr.engine.data import Image
+
+
+def rmfile(path):
+ try:
+ os.remove(path)
+ except OSError as e:
+ print("Error: %s - %s." % (e.filename, e.strerror))
+
+
+def rmdir(_dir):
+ try:
+ shutil.rmtree(_dir)
+ except OSError as e:
+ print("Error: %s - %s." % (e.filename, e.strerror))
+
+
+class TestSeq2SeqNMS(unittest.TestCase):
+
+ @classmethod
+ def setUpClass(cls):
+ print("\n\n**********************************\nTEST Seq2Seq-NMS Learner\n"
+ "**********************************")
+
+ cls.temp_dir = os.path.join(".", "tests", "sources", "tools", "perception", "object_detection_2d",
+ "nms", "seq2seq_nms", "temp")
+ cls.seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=None, app_feats='fmod', temp_path=cls.temp_dir,
+ device='cpu', checkpoint_after_iter=1, epochs=1)
+
+ # Download all required files for testing
+ cls.seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=cls.temp_dir)
+
+ @classmethod
+ def tearDownClass(cls):
+ print('Removing temporary directories for Seq2Seq-NMS...')
+ # Clean up downloaded files
+ rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "test_module.pkl"))
+ rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014", "COCO_val2014_000000262148.jpg"))
+ rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD", "coco_edgemap_b_3.pkl"))
+ rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "annotations", "test_module_anns.json"))
+ rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014"))
+ rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD"))
+ rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "fmod_normalization.pkl"))
+ rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.json"))
+ rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.pth"))
+ rmdir(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod"))
+
+ rmdir(os.path.join(cls.temp_dir))
+
+ del cls.seq2SeqNMSLearner
+ gc.collect()
+ print('Finished cleaning for Seq2Seq-NMS...')
+
+ def test_fit(self):
+ print('Starting training test for Seq2Seq-NMS...')
+
+ m = list(self.seq2SeqNMSLearner.model.parameters())[0].clone()
+ self.seq2SeqNMSLearner.fit(dataset='TEST_MODULE', use_ssd=False,
+ datasets_folder=self.temp_dir + '/datasets',
+ logging_path=None, silent=False, verbose=True, nms_gt_iou=0.50,
+ max_dt_boxes=200)
+ n = list(self.seq2SeqNMSLearner.model.parameters())[0].clone()
+ self.assertFalse(np.array_equal(m, n),
+ msg="Model parameters did not change after running fit.")
+ del m, n
+ gc.collect()
+ print('Finished training test for Seq2Seq-NMS...')
+
+ def test_eval(self):
+ print('Starting evaluation test for Seq2Seq-NMS...')
+ self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True)
+ results_dict = self.seq2SeqNMSLearner.eval(dataset='TEST_MODULE', split='test', max_dt_boxes=800,
+ datasets_folder=self.temp_dir + '/datasets',
+ use_ssd=False)
+ if results_dict is None:
+ self.assertIsNotNone(results_dict,
+ msg="Eval results dictionary not returned.")
+ else:
+ self.assertGreater(results_dict[0][0][1][0], 0.4)
+ del results_dict
+ gc.collect()
+ print('Finished evaluation test for Seq2Seq-NMS...')
+
+ def test_infer(self):
+ print('Starting inference test for Seq2Seq-NMS...')
+ self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True)
+ dataset_nms = Dataset_NMS(path=self.temp_dir + '/datasets', dataset_name='TEST_MODULE', split='train', use_ssd=False)
+ image_fln = dataset_nms.src_data[0]['filename']
+ img = Image.open(os.path.join(self.temp_dir, 'datasets', 'TEST_MODULE', image_fln))
+ boxes = dataset_nms.src_data[0]['dt_boxes'][1][:, 0:4]
+ scores = np.expand_dims(dataset_nms.src_data[0]['dt_boxes'][1][:, 4], axis=-1)
+
+ bounding_box_list = self.seq2SeqNMSLearner.run_nms(boxes=boxes, scores=scores, img=img, threshold=0.5)
+
+ self.assertIsNotNone(bounding_box_list,
+ msg="Returned empty BoundingBoxList.")
+ del img
+ del bounding_box_list
+ del boxes
+ del scores
+ del dataset_nms
+ gc.collect()
+ print('Finished inference test for Seq2Seq-NMS...')
+
+ def test_save_load(self):
+ print('Starting save/load test for Seq2Seq-NMS...')
+ self.seq2SeqNMSLearner.save(os.path.join(self.temp_dir, "test_model", "last_weights"), current_epoch=0)
+ self.seq2SeqNMSLearner.model = None
+ self.seq2SeqNMSLearner.init_model()
+ self.seq2SeqNMSLearner.load(os.path.join(self.temp_dir, "test_model"))
+ self.assertIsNotNone(self.seq2SeqNMSLearner.model, "model is None after loading model.")
+ # Cleanup
+ rmdir(os.path.join(self.temp_dir, "test_model"))
+ print('Finished save/load test for Seq2Seq-NMS...')
+
+
+if __name__ == "__main__":
+ unittest.main()