diff --git a/.github/workflows/test_packages.yml b/.github/workflows/test_packages.yml
index 7fabc5b512..29c2511dc6 100644
--- a/.github/workflows/test_packages.yml
+++ b/.github/workflows/test_packages.yml
@@ -45,6 +45,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           # - perception/object_detection_3d
           # - control/mobile_manipulation
@@ -93,6 +94,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           - perception/object_detection_3d
           - control/mobile_manipulation
diff --git a/.github/workflows/tests_suite.yml b/.github/workflows/tests_suite.yml
index f084aac5a2..1a4e252b04 100644
--- a/.github/workflows/tests_suite.yml
+++ b/.github/workflows/tests_suite.yml
@@ -78,6 +78,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - simulation/human_model_generation
           - perception/facial_expression_recognition
           - control/single_demo_grasp
@@ -185,6 +186,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           # - perception/object_detection_3d
           # - control/mobile_manipulation
@@ -255,6 +257,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           # - perception/object_detection_3d
           # - control/mobile_manipulation
@@ -331,6 +334,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           - perception/object_detection_3d
           - control/mobile_manipulation
diff --git a/.github/workflows/tests_suite_develop.yml b/.github/workflows/tests_suite_develop.yml
index 6da62e4f47..38f8113974 100644
--- a/.github/workflows/tests_suite_develop.yml
+++ b/.github/workflows/tests_suite_develop.yml
@@ -78,6 +78,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - simulation/human_model_generation
           - perception/facial_expression_recognition
           - control/single_demo_grasp
@@ -190,6 +191,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           # - perception/object_detection_3d
           # - control/mobile_manipulation
@@ -260,6 +262,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           # - perception/object_detection_3d
           # - control/mobile_manipulation
@@ -336,6 +339,7 @@ jobs:
           - perception/object_detection_2d/ssd
           - perception/object_detection_2d/yolov3
           - perception/object_detection_2d/retinaface
+          - perception/object_detection_2d/nms
           - perception/facial_expression_recognition
           - perception/object_detection_3d
           - control/mobile_manipulation
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c4cf9710ba..85847af40a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,7 @@ Released on XX, XXth, 2022.
 
   - New Features:
     - Added end-to-end planning tool ([#223](https://github.com/opendr-eu/opendr/pull/223)).
+    - Added seq2seq-nms module, along with other custom NMS implementations for 2D object detection.([#232](https://github.com/opendr-eu/opendr/pull/232)).
   - Enhancements:
     - Added support for modular pip packages allowing tools to be installed separately ([#201](https://github.com/opendr-eu/opendr/pull/201)).
     - Simplified the installation process for pip by including the appropriate post-installation scripts ([#201](https://github.com/opendr-eu/opendr/pull/201)).
diff --git a/docs/reference/index.md b/docs/reference/index.md
index 728f90a959..8d9a7d7202 100644
--- a/docs/reference/index.md
+++ b/docs/reference/index.md
@@ -43,6 +43,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any
             - [centernet Module](object-detection-2d-centernet.md)
             - [ssd Module](object-detection-2d-ssd.md)
             - [yolov3 Module](object-detection-2d-yolov3.md)
+            - [seq2seq-nms Module](object-detection-2d-nms-seq2seq_nms.md)
         - object detection 3d:
             - [voxel Module](voxel-object-detection-3d.md)
         - object tracking 2d:
@@ -113,6 +114,7 @@ Neither the copyright holder nor any applicable licensor will be liable for any
             - [centernet Demo](/projects/perception/object_detection_2d/centernet)
             - [ssd Demo](/projects/perception/object_detection_2d/ssd)
             - [yolov3 Demo](/projects/perception/object_detection_2d/yolov3)
+            - [seq2seq-nms Demo](/projects/perception/object_detection_2d/nms/seq2seq-nms)
         - object detection 3d:
             - [voxel Demo](/projects/perception/object_detection_3d/demos/voxel_object_detection_3d)
         - object tracking 2d:
diff --git a/docs/reference/object-detection-2d-nms-seq2seq_nms.md b/docs/reference/object-detection-2d-nms-seq2seq_nms.md
new file mode 100644
index 0000000000..513233c833
--- /dev/null
+++ b/docs/reference/object-detection-2d-nms-seq2seq_nms.md
@@ -0,0 +1,305 @@
+## Seq2Seq-NMS module
+
+The *seq2seq-nms* module contains the *Seq2SeqNMSLearner* class, which inherits from the abstract class *Learner*.
+
+### Class Seq2SeqNMSLearner
+Bases: `engine.learners.Learner`
+
+It can be used to perform single-class non-maximum suppression (NMS) on images (inference) as well as training new seq2seq-nms models. The implementation is based on [[1]](#seq2seq_nms-1). The method is set-up for performing NMS on the person-detection task, using the implemention of the [SSD](/docs/reference/object-detection-2d-ssd.md) detector. The Seq2Seq-NMS method can also be employed for performing single-class NMS, in any class other than human/pedestrian class. In that case the method needs to be trained from scratch. Finally, a pretrained-model can be employed for evaluation or inference on the same class that it was trained with, using RoIs from a different detector than the one used in the training. In that case, we advise to fine-tune the Seq2Seq-nms pretrained model using RoIs from the detector, deployed in the inference/evaluation of the method, in order to achieve the highest possible performance.
+
+The [Seq2SeqNMSLearner](/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py) class has the following
+public methods:
+
+#### `Seq2SeqNMSLearner` constructor
+```python
+Seq2SeqNMSLearner(self, lr, epochs, device, temp_path, checkpoint_after_iter, checkpoint_load_iter, log_after, variant,
+                  iou_filtering, dropout, app_feats, fmod_map_type, fmod_map_bin, app_input_dim)
+```
+
+Constructor parameters:
+
+- **lr**: *float, default=0.0001*\
+  Specifies the initial learning rate to be used during training.
+- **epochs**: *int, default=8*\
+  Specifies the number of epochs to be used during training.
+- **device**: *{'cuda', 'cpu'}, default='cuda'*\
+  Specifies the device to be used.
+- **temp_path**: *str, default='./temp'*\
+  Specifies a path to be used for storage of checkpoints during training.
+- **checkpoint_after_iter**: *int, default=0*\
+  Specifies the epoch interval between checkpoints during training.
+  If set to 0 no checkpoint will be saved.
+- **checkpoint_load_iter**: *int, default=0*\
+  Specifies the epoch to load a saved checkpoint from.
+  If set to 0 no checkpoint will be loaded.
+- **log_after**: *int, default=500*\
+  Specifies interval (in iterations/batches) between information logging on *stdout*.
+- **variant**: *{'light', 'medium', 'full'}, default='medium'*\
+  Specifies the variant of seq2seq-nms model.
+- **iou_filtering**: *float, default=0.8*\
+  Specifies the IoU threshold used for filtering RoIs before provided by the seq2seq-nms model.
+  If set to values <0 or >1, no filtering is applied.
+- **dropout**: *float, default=0.025*\
+  Specifies the dropout rate.
+- **app_feats**: *{'fmod', 'zeros', 'custom'}, default='fmod'*\
+  Specifies the type of the appearance-based features of RoIs used in the model.
+- **fmod_map_type**: *{'EDGEMAP', 'FAST', 'AKAZE', 'BRISK', 'ORB'}, default='EDGEMAP'*\
+  Specifies the type of maps used by FMoD, in the case where *app_feats*='fmod'.
+- **fmod_map_bin**: *bool, default=True*\
+  Specifies whether FMoD maps are binary or not, in the case where *app_feats*='fmod'.
+- **app_input_dim**: *int, default=None*\
+  Specifies the dimension of appearance-based RoI features.
+  In the case where *app_feats*='fmod', the corresponding dimension is automatically computed.
+
+
+#### `Seq2SeqNMSLearner.fit`
+```python
+Seq2SeqNMSLearner.fit(self, dataset, logging_path, logging_flush_secs, silent, verbose, nms_gt_iou, max_dt_boxes, datasets_folder, use_ssd)
+```
+
+This method is used to train the algorithm on a `Dataset_NMS` dataset.
+Returns a dictionary containing stats regarding the training process.
+
+Parameters:
+
+- **dataset**: *{'PETS', 'COCO'}*\
+  Specifies the name of the dataset among those available from training.
+- **logging_path**: *str, default=None*\
+  Path to save log files.
+  If set to None, only the console will be used for logging.
+- **logging_flush_secs**: *int, default=30*\
+  How often, in seconds, to flush the TensorBoard data to disk.
+- **silent**: *bool, default=False*\
+  If set to True, disables all printing of training progress reports and other information to STDOUT.
+- **verbose**: *bool, default=True*\
+  If True, enables maximum verbosity.
+- **nms_gt_iou**: *float, default=0.5*\
+  Specifies the threshold used to determine whether a detection RoI must be suppressed or not based on its IoU with the image's ground-truth RoIs.
+- **max_dt_boxes**: *int, default=500*\
+  Specifies the maximum number of RoIs provided to seq2Seq-nms model as input.
+- **datasets_folder**: *str, default='./datasets'*\
+  Specifies the path to the folder where the datasets are stored.
+- **use_ssd**: *bool, default=False*\
+  If set to True, RoIs from SSD are fed to the seq2Seq-nms model.
+  Otherwise, RoIs from the default detector of the specified dataset are used as input.
+  
+#### `Seq2SeqNMSLearner.eval`
+```python
+Seq2SeqNMSLearner.eval(self, dataset, split, verbose, max_dt_boxes, datasets_folder, use_ssd)
+```
+
+Performs evaluation on a set of dataset.
+
+Parameters:
+
+- **dataset**: *{'PETS', 'COCO'}*\
+  Specifies the name of the dataset among those available from training.
+- **split**: *{'train', 'val', 'test'} default='test'*\
+  Specifies the set of the corresponding dataset where the evaluation will be performed.
+- **verbose**: *bool, default=True*\
+  If True, enables maximum verbosity.
+- **max_dt_boxes**: *int, default=500*\
+  Specifies the maximum number of RoIs provided to seq2Seq-nms model as input.
+- **threshold**: *float, default=0.0*\
+  Specifies the confidence threshold, used for RoI selection after seq2seq-nms rescoring.
+- **datasets_folder**: *str, default='./datasets'*\
+  Specifies the path to the folder where the datasets are stored.
+- **use_ssd**: *bool, default=False*\
+  If set to True, RoIs from SSD are fed to the seq2Seq-nms model.
+  Otherwise, RoIs from the default detector of the specified dataset are used as input.
+  
+#### `Seq2SeqNMSLearner.infer`
+```python
+Seq2SeqNMSLearner.infer(self, boxes, scores, boxes_sorted, max_dt_boxes, img_res, threshold)
+```
+
+Performs non-maximum suppression, using seq2seq-nms.
+In the case where FMoD is selected for appearance-based RoI feature computation, FMoD maps are not computed.
+
+Parameters:
+
+- **boxes**: *torch.tensor, default=None*\
+  Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max).
+  For N candidate detection RoIs, the size of the *torch.tensor* is Nx4.
+- **scores**: *torch.tensor, default=None*\
+  Specifies the scores of the candidate detection RoIs, assigned previously by a detector.
+  For N candidate detection RoIs, the size of the *torch.tensor* is Nx1.
+- **boxes_sorted**: *bool, default=False*\
+  Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order.
+- **max_dt_boxes**: *int, default=400*\
+  Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+- **img_res**: *[int, int], default=None*\
+  Specifies the image resolution expressed as [width, height].
+- **threshold**: *float, default=0.1*\
+  Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring. 
+  
+#### `Seq2SeqNMSLearner.run_nms`
+```python
+Seq2SeqNMSLearner.run_nms(self, boxes, scores, img, threshold, boxes_sorted, top_k)
+```
+
+Performs non-maximum suppression, using seq2seq-nms.
+It incorporates the full pipeline needed for inference, including the FMoD's edge/interest-point map computation step.
+
+Parameters:
+
+- **boxes**: *numpy.ndarray, default=None*\
+  Image coordinates of candidate detection RoIs, expressed as the coordinates of their upper-left and top-down corners (x_min, y_min, x_max, y_max).
+  For N candidate detection RoIs, the size of the array is Nx4.
+- **scores**: *numpy.ndarray, default=None*\
+  Specifies the scores of the candidate detection RoIs, assigned previously by a detector.
+  For N candidate detection RoIs, the size of the array is Nx1.
+- **boxes_sorted**: *bool, default=False*\
+  Specifies whether *boxes* and *scores* are sorted based on *scores* in descending order.
+- **top_k**: *int, default=400*\
+  Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+- **img**: *object*\
+  Object of type engine.data.Image.
+- **threshold**: *float, default=0.1*\
+  Specifies the score threshold that will determine which RoIs will be kept after seq2seq-nms rescoring. 
+  
+#### `Seq2SeqNMSLearner.save`
+```python
+Seq2SeqNMSLearner.save(self, path, verbose, optimizer, scheduler, current_epoch, max_dt_boxes)
+```
+
+Saves a model in OpenDR format at the specified path. 
+
+Parameters:
+
+- **path**: *str*\
+  Specifies the folder where the model will be saved.
+- **verbose**: *bool default=False*\
+  If True, enables maximum verbosity.
+- **optimizer**: *torch.optim.Optimizer default=None*\
+  Specifies the optimizer used for training.
+- **scheduler**: *torch.optim.lr_scheduler default=None*\
+  Specifies the learning rate scheduler used for training.
+- **current_epoch**: *int, default=None*\
+  Specifies the number of epochs the model has been trained.
+- **max_dt_boxes**: *int, default=400*\
+  Specifies the maximum number of detection RoIs that are fed as input to seq2seq-nms model.
+  
+ 
+
+#### `Seq2SeqNMSLearner.load`
+```python
+Seq2SeqNMSLearner.load(self, path, verbose)
+```
+
+Loads a model which was previously saved in OpenDR format at the specified path.
+
+Parameters:
+
+- **path**: *str*\
+  Specifies the folder where the model will be loaded from.
+- **verbose**: *bool default=False*\
+  If True, enables maximum verbosity.
+  
+  
+#### `Seq2SeqNMSLearner.download`
+```python
+Seq2SeqNMSLearner.download(self, path, model_name, verbose, url)
+```
+
+Downloads pretrained models of seq2seq-nms.
+
+Parameters:
+
+Downloads data needed for the various functions of the learner, e.g., pretrained models as well as test data.
+
+Parameters:
+
+- **path**: *str, default=None*\
+  Specifies the folder where data will be downloaded.
+  If *None*, the *self.temp_path* directory is used instead.
+- **model_name**: *{'seq2seq_medium_pets_jpd_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3', 'seq2seq_medium_coco_frcn_fmod_3', 'seq2seq_medium_pets_ssd_fmod_3'}, default=''seq2seq_medium_pets_jpd_fmod_3'*\
+  If *'pretrained'*, downloads a pretrained detector model.
+  If *'images'*, downloads an image to perform inference on. If
+  *'test_data'* downloads a dummy dataset for testing purposes.
+- **verbose**: *bool default=True*\
+  If True, enables maximum verbosity.
+- **url**: *str, default=OpenDR FTP URL*\
+  URL of the FTP server.
+  
+#### Examples
+
+* **Training example.**
+  To train seq2seq-nms properly, the PETS and COCO datasets are supported as Dataset_NMS types. 
+
+  ```python
+  from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner
+  import os
+  OPENDR_HOME = os.environ['OPENDR_HOME']
+  
+  temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/tmp'
+  datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets'
+  
+  seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8, 
+                                        app_feats='fmod', checkpoint_after_iter=1,
+                                        temp_path=temp_path, epochs=8)
+  seq2SeqNMSLearner.fit(dataset='PETS', use_ssd=False, datasets_folder=datasets_folder,
+                        logging_path=os.path.join(temp_path, 'logs'), silent=False,
+                        verbose=True, nms_gt_iou=0.50, max_dt_boxes=500)
+  ```
+
+* **Inference and result drawing example on a test .jpg image using OpenCV.**
+
+  ```python
+  from opendr.perception.object_detection_2d.nms import Seq2SeqNMSLearner
+  from opendr.engine.data import Image
+  from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+  from opendr.perception.object_detection_2d import draw_bounding_boxes
+  import os
+  OPENDR_HOME = os.environ['OPENDR_HOME']
+  temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp'
+
+  seq2SeqNMSLearner = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering = 0.8,
+                                        app_feats='fmod', device='cpu',
+                                        temp_path=temp_path)
+  seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path)
+  seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True)
+  ssd = SingleShotDetectorLearner(device='cuda')
+  ssd.download(".", mode="pretrained")
+  ssd.load("./ssd_default_person", verbose=True)
+  img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+  if not isinstance(img, Image):
+      img = Image(img)
+  boxes = ssd.infer(img, threshold=0.25, custom_nms=seq2SeqNMSLearner)
+  draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
+  ```
+  
+* **Evaluation of pretrained model on PETS dataset.**
+
+  ```python
+  from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+  import os
+  OPENDR_HOME = os.environ['OPENDR_HOME']
+  
+  datasets_folder = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/datasets'
+  temp_path = OPENDR_HOME + '/src/opendr/perception/object_detection_2d/nms/tmp'
+  
+  seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=0.8, app_feats='fmod',
+                                        temp_path=temp_path, device='cuda')
+  seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=temp_path)
+  seq2SeqNMSLearner.load(os.path.join(temp_path, seq2seq_pets_jpd_fmod), verbose=True)
+  seq2SeqNMSLearner.eval(dataset='PETS', split='test', max_dt_boxes=800,
+                       datasets_folder=datasets_folder, use_ssd=False, threshold=0.0)
+  ```
+  
+#### Performance Evaluation
+
+TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800.
+|  **Pretrained Model**  | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** |
+|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:|
+|  seq2seq_pets_jpd_fmod |     PETS    |      JPD     |                  FMoD                 |                0.8               |             80.2%            |          84.3%         |
+|  seq2seq_pets_ssd_fmod |     PETS    |      SSD     |                  FMoD                 |                0.8               |             77.4%            |          79.1%         |
+| seq2seq_coco_frcn_fmod |     COCO    |     FRCN     |                  FMoD                 |                 -                |             68.1% \*            |          67.5% \*\*         |
+| seq2seq_coco_ssd_fmod  |     COCO    |      SSD     |                  FMoD                 |                 -                |             41.8% \*                 |           42.4% **         |
+
+\* The minival set was used as validation set.<br>
+\*\* The minitest set was used as test set.
+
+
+#### References
+<a name="seq2seq_nms-1" href="https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275">[1]</a> Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275).
diff --git a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
index 6f643e61cf..f0dd7ca1d3 100755
--- a/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
+++ b/projects/opendr_ws/src/perception/scripts/object_detection_2d_ssd.py
@@ -22,11 +22,12 @@
 from opendr.engine.data import Image
 from opendr.perception.object_detection_2d import SingleShotDetectorLearner
 from opendr.perception.object_detection_2d import draw_bounding_boxes
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner, SoftNMS, FastNMS, ClusterNMS
 
 
 class ObjectDetectionSSDNode:
     def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/opendr/image_boxes_annotated",
-                 detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous"):
+                 detections_topic="/opendr/objects", device="cuda", backbone="vgg16_atrous", nms_type='default'):
         """
         Creates a ROS Node for face detection
         :param input_image_topic: Topic from which we are reading the input image
@@ -41,6 +42,8 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/
         :type device: str
         :param backbone: backbone network
         :type backbone: str
+        :param ms_type: type of NMS method
+        :type nms_type: str
         """
 
         # Initialize the face detector
@@ -48,6 +51,20 @@ def __init__(self, input_image_topic="/usb_cam/image_raw", output_image_topic="/
         self.object_detector.download(path=".", verbose=True)
         self.object_detector.load("ssd_default_person")
         self.class_names = self.object_detector.classes
+        self.custom_nms = None
+
+        # Initialize Seq2Seq-NMS if selected
+        if nms_type == 'seq2seq-nms':
+            self.custom_nms = Seq2SeqNMSLearner(fmod_map_type='EDGEMAP', iou_filtering=0.8,
+                                                app_feats='fmod', device=self.device)
+            self.custom_nms.download(model_name='seq2seq_pets_jpd', path='.')
+            self.custom_nms.load('./seq2seq_pets_jpd/', verbose=True)
+        elif nms_type == 'soft-nms':
+            self.custom_nms = SoftNMS(nms_thres=0.45, device=self.device)
+        elif nms_type == 'fast-nms':
+            self.custom_nms = FastNMS(nms_thres=0.45, device=self.device)
+        elif nms_type == 'cluster-nms':
+            self.custom_nms = ClusterNMS(nms_thres=0.45, device=self.device)
 
         # Initialize OpenDR ROSBridge object
         self.bridge = ROSBridge()
@@ -76,7 +93,7 @@ def callback(self, data):
         image = self.bridge.from_ros_image(data, encoding='bgr8')
 
         # Run pose estimation
-        boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False)
+        boxes = self.object_detector.infer(image, threshold=0.45, keep_size=False, custom_nms=self.custom_nms)
 
         # Get an OpenCV image back
         image = np.float32(image.opencv())
diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/README.md b/projects/perception/object_detection_2d/nms/cluster_nms/README.md
new file mode 100644
index 0000000000..0ff5c5fd9c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/cluster_nms/README.md
@@ -0,0 +1,7 @@
+# Cluster-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Cluster-NMS implementation 
+provided by OpenDR. Specifically the following examples are provided:
+
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
+   
diff --git a/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py
new file mode 100644
index 0000000000..e653f5820c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/cluster_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import ClusterNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+    img = Image(img)
+cluster_nms = ClusterNMS(device='cuda', nms_type='default', cross_class=True)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/fast_nms/README.md b/projects/perception/object_detection_2d/nms/fast_nms/README.md
new file mode 100644
index 0000000000..5a1ccb3fd6
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/fast_nms/README.md
@@ -0,0 +1,5 @@
+# Fast-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Fast-NMS implementation 
+provided by OpenDR. Specifically the following examples are provided:
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
diff --git a/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py
new file mode 100644
index 0000000000..5e0a5b48fa
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/fast_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import FastNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+    img = Image(img)
+cluster_nms = FastNMS(device='cpu', cross_class=True)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg
new file mode 100644
index 0000000000..5efb4d9298
Binary files /dev/null and b/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg differ
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md
new file mode 100644
index 0000000000..c831924349
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/README.md
@@ -0,0 +1,17 @@
+# Seq2Seq-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Seq2Seq-NMS implementation 
+provided by OpenDR. Specifically the following examples are provided:
+
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
+
+2. eval_demo.py: Perform evaluation on the `WiderPersonDataset`, implemented in OpenDR format. The user must first download 
+   the dataset and provide the path to the dataset root via `--data-root /path/to/wider_person`. 
+   Setting `--device cpu` performs evaluation on CPU. 
+   
+3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class and any 
+   `DetectionDataset` can be used as well. Provided is an example of training on `WiderPersonDataset`. The user must set the 
+   dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. 
+   Setting `--device cpu` performs training on CPU. Additional command line arguments can be set to change various training 
+   hyperparameters, and running `python3 train_demo.py -h` prints information about them on stdout.
+   
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py
new file mode 100644
index 0000000000..01437e578b
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/eval_demo.py
@@ -0,0 +1,49 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+                    choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+                    choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod',
+                    choices=['seq2seq_pets_jpd'])
+parser.add_argument("--split", help="The split of the corresponding dataset", type=str, default='test',
+                    choices=["test", "val", "train"])
+parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=600)
+parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO",
+                                                                                                "TEST_MODULE"])
+parser.add_argument("--data_root", help="Dataset root folder", type=str,
+                    default=os.path.join(OPENDR_HOME,
+                                         'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets'))
+parser.add_argument("--use_ssd", help="Train using SSD as detector", type=bool, default=False)
+parser.add_argument("--post_thres", help="Confidence threshold, used for RoI selection after seq2seq-nms rescoring",
+                    type=float, default=0.0)
+
+args = parser.parse_args()
+tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp')
+seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type,
+                                      iou_filtering=args.iou_filtering,
+                                      temp_path=tmp_path)
+seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path)
+seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True)
+seq2SeqNMSLearner.eval(dataset=args.dataset, use_ssd=args.use_ssd, split=args.split, max_dt_boxes=args.max_dt_boxes,
+                       datasets_folder=args.data_root, threshold=args.post_thres)
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py
new file mode 100755
index 0000000000..c260546d13
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/inference_demo.py
@@ -0,0 +1,48 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+from opendr.engine.data import Image
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+                    choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+                    choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--pretrained_model", help="Name of pretrained model", type=str, default='seq2seq_pets_jpd_fmod',
+                    choices=['seq2seq_pets_jpd'])
+
+args = parser.parse_args()
+tmp_path = os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp')
+seq2SeqNMSLearner = Seq2SeqNMSLearner(device=args.device, app_feats=args.app_feats, fmod_map_type=args.fmod_type,
+                                      iou_filtering=args.iou_filtering,
+                                      temp_path=tmp_path)
+seq2SeqNMSLearner.download(model_name=args.pretrained_model, path=tmp_path)
+seq2SeqNMSLearner.load(os.path.join(tmp_path, args.pretrained_model), verbose=True)
+
+ssd = SingleShotDetectorLearner(device=args.device)
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+    img = Image(img)
+boxes = ssd.infer(img, threshold=0.3, custom_nms=seq2SeqNMSLearner)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py
new file mode 100644
index 0000000000..4facf2696b
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/seq2seq-nms/train_demo.py
@@ -0,0 +1,50 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+import os
+import argparse
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--app_feats", help="Type of appearance-based features", type=str, default="fmod",
+                    choices=["fmod", "zeros"])
+parser.add_argument("--fmod_type", help="Type of fmod maps", type=str, default="EDGEMAP",
+                    choices=["EDGEMAP", "FAST", "AKAZE", "BRISK", "ORB"])
+parser.add_argument("--iou_filtering", help="Pre-processing IoU threshold", type=float, default=1.0)
+parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=1e-4)
+parser.add_argument("--n_epochs", help="Number of total epochs", type=int, default=10)
+parser.add_argument("--tmp_path", help="Temporary path where weights will be saved", type=str,
+                    default=os.path.join(OPENDR_HOME, 'projects/perception/object_detection_2d/nms/seq2seq-nms/tmp'))
+parser.add_argument("--checkpoint_freq", help="Frequency in-between checkpoint saving", type=int, default=1)
+parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", type=int, default=0)
+parser.add_argument("--dataset", help="Dataset to train on", type=str, default="PETS", choices=["PETS", "COCO",
+                                                                                                "TEST_MODULE"])
+parser.add_argument("--use_ssd", help="Train using SSD as default detector", type=bool, default=False)
+parser.add_argument("--max_dt_boxes", help="Maximum number of input RoIs fed to Seq2Seq-NMS", type=int, default=500)
+parser.add_argument("--data-root", help="Dataset root folder", type=str,
+                    default=os.path.join(OPENDR_HOME,
+                                         'projects/perception/object_detection_2d/nms/seq2seq-nms/datasets'))
+args = parser.parse_args()
+seq2SeqNMSLearner = Seq2SeqNMSLearner(epochs=args.n_epochs, lr=args.lr, device=args.device, app_feats=args.app_feats,
+                                      fmod_map_type=args.fmod_type, iou_filtering=args.iou_filtering,
+                                      temp_path=args.tmp_path, checkpoint_after_iter=args.checkpoint_freq,
+                                      checkpoint_load_iter=args.resume_from)
+seq2SeqNMSLearner.fit(dataset=args.dataset, use_ssd=args.use_ssd,
+                      datasets_folder=args.data_root, silent=False, verbose=True,
+                      max_dt_boxes=args.max_dt_boxes)
+seq2SeqNMSLearner.save(path=os.path.join(args.tmp_path, 'saved_model'), current_epoch=args.n_epochs-1,
+                       max_dt_boxes=args.max_dt_boxes)
diff --git a/projects/perception/object_detection_2d/nms/soft_nms/README.md b/projects/perception/object_detection_2d/nms/soft_nms/README.md
new file mode 100644
index 0000000000..a4c778f35c
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/soft_nms/README.md
@@ -0,0 +1,5 @@
+# Soft-NMS Demos
+
+This folder contains minimal code usage examples that showcase the basic functionality of the Soft-NMS implementation 
+provided by OpenDR. Specifically the following examples are provided:
+1. inference_demo.py: Perform inference on a single image. Setting `--device cpu` performs inference on CPU.
diff --git a/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py
new file mode 100644
index 0000000000..c05ff4c7c2
--- /dev/null
+++ b/projects/perception/object_detection_2d/nms/soft_nms/inference_demo.py
@@ -0,0 +1,31 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from opendr.perception.object_detection_2d import SoftNMS
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d import SingleShotDetectorLearner
+from opendr.perception.object_detection_2d import draw_bounding_boxes
+import os
+OPENDR_HOME = os.environ['OPENDR_HOME']
+
+ssd = SingleShotDetectorLearner(device='cuda')
+ssd.download(".", mode="pretrained")
+ssd.load("./ssd_default_person", verbose=True)
+img = Image.open(OPENDR_HOME + '/projects/perception/object_detection_2d/nms/img_temp/frame_0000.jpg')
+if not isinstance(img, Image):
+    img = Image(img)
+cluster_nms = SoftNMS(device='cpu', nms_type='gaussian')
+boxes = ssd.infer(img, threshold=0.3, custom_nms=cluster_nms)
+draw_bounding_boxes(img.opencv(), boxes, class_names=ssd.classes, show=True)
diff --git a/src/opendr/perception/object_detection_2d/__init__.py b/src/opendr/perception/object_detection_2d/__init__.py
index 61428cb1bd..9fac6ba424 100644
--- a/src/opendr/perception/object_detection_2d/__init__.py
+++ b/src/opendr/perception/object_detection_2d/__init__.py
@@ -11,6 +11,11 @@
 
 from opendr.perception.object_detection_2d.utils.vis_utils import draw_bounding_boxes
 
+from opendr.perception.object_detection_2d.nms.cluster_nms.cluster_nms import ClusterNMS
+from opendr.perception.object_detection_2d.nms.fast_nms.fast_nms import FastNMS
+from opendr.perception.object_detection_2d.nms.soft_nms.soft_nms import SoftNMS
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.seq2seq_nms_learner import Seq2SeqNMSLearner
+
 __all__ = ['CenterNetDetectorLearner', 'DetrLearner', 'GemLearner', 'RetinaFaceLearner',
            'SingleShotDetectorLearner', 'YOLOv3DetectorLearner', 'WiderPersonDataset', 'WiderFaceDataset',
-           'transforms', 'draw_bounding_boxes']
+           'transforms', 'draw_bounding_boxes', 'ClusterNMS', 'FastNMS', 'SoftNMS', 'Seq2SeqNMSLearner']
diff --git a/src/opendr/perception/object_detection_2d/datasets/transforms.py b/src/opendr/perception/object_detection_2d/datasets/transforms.py
index 5aa6f1e327..08c0f34ecf 100644
--- a/src/opendr/perception/object_detection_2d/datasets/transforms.py
+++ b/src/opendr/perception/object_detection_2d/datasets/transforms.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import cv2
 import numpy as np
 import mxnet as mx
@@ -141,3 +142,20 @@ def transform_test(imgs, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)):
     if len(tensors) == 1:
         return tensors[0], origs[0]
     return tensors, origs
+
+
+def pad_test(img, min_size=512):
+    h_pad_size = 0
+    min_dim = 2 + np.argmin([img.shape[2:4]])
+    img_padded = img
+    if img.shape[min_dim] < min_size:
+        h_pad_size = int((min_size - img.shape[min_dim]) / 2.0)
+        if min_dim == 2:
+            img_padded = mx.nd.pad(img, mode="constant", constant_value=0,
+                                   pad_width=(0, 0, 0, 0, h_pad_size,
+                                              h_pad_size, 0, 0))
+        else:
+            img_padded = mx.nd.pad(img, mode="constant", constant_value=0,
+                                   pad_width=(0, 0, 0, 0, 0, 0,
+                                              h_pad_size, h_pad_size))
+    return img_padded
diff --git a/src/opendr/perception/object_detection_2d/dependencies.ini b/src/opendr/perception/object_detection_2d/dependencies.ini
index c6beccc16e..c181807f92 100644
--- a/src/opendr/perception/object_detection_2d/dependencies.ini
+++ b/src/opendr/perception/object_detection_2d/dependencies.ini
@@ -7,6 +7,7 @@ python=mxnet==1.8.0
        tqdm
        pycocotools>=2.0.4
        easydict
+       gdown
        numba==0.53.0
 
 linux=libopenblas-dev
diff --git a/src/opendr/perception/object_detection_2d/nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md
new file mode 100644
index 0000000000..410c887028
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/README.md
@@ -0,0 +1,28 @@
+Cluster-NMS
+======
+
+This folder contains an implementation of Cluster-NMS [[1]](#cluster_nms-1). 
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0:
+
+```
+This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+Copyright (c) 2020 Zheng, Zhaohui.
+
+This program is free software: you can redistribute it and/or modify  
+it under the terms of the GNU General Public License as published by  
+the Free Software Foundation, version 3.
+
+This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of 
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License 
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+```
+
+<a name="cluster_nms-1" href="https://arxiv.org/abs/2005.03572">[1]</a> Enhancing Geometric Factors in Model Learning and Inference for Object Detection and Instance Segmentation,
+[ArXiv](https://arxiv.org/abs/2005.03572).
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py
new file mode 100644
index 0000000000..ee34323346
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/cluster_nms/cluster_nms.py
@@ -0,0 +1,510 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard, diou, distance
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import numpy as np
+import torch
+
+
+class ClusterNMS(NMSCustom):
+    def __init__(self, nms_type='default', cross_class=True, device='cuda', iou_thres=0.45, top_k=400, post_k=100):
+        self.device = device
+        self.nms_types = ['default', 'diou', 'spm', 'spm_dist', 'spm_dist_weighted']
+        if nms_type not in self.nms_types:
+            raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.')
+        else:
+            self.nms_type = nms_type
+        self.iou_thres = iou_thres
+        self.top_k = top_k
+        self.post_k = post_k
+        self.cross_class = cross_class
+
+    def set_iou_thres(self, iou_thres=0.45):
+        self.iou_thres = iou_thres
+
+    def top_k(self, top_k=400):
+        self.top_k = top_k
+
+    def post_k(self, post_k=100):
+        self.post_k = post_k
+
+    def set_type(self, nms_type=None):
+        if nms_type not in self.nms_types:
+            raise ValueError('Type: ' + nms_type + ' of Cluster-NMS is not supported.')
+        else:
+            self.nms_type = nms_type
+
+    def set_cross_class(self, cross_class=True):
+        self.cross_class = cross_class
+
+    def run_nms(self, boxes=None, scores=None, img=None, threshold=0.2):
+
+        if isinstance(boxes, np.ndarray):
+            boxes = torch.tensor(boxes, device=self.device)
+        elif torch.is_tensor(boxes):
+            if self.device == 'cpu':
+                boxes = boxes.cpu()
+            elif self.device == 'cuda':
+                boxes = boxes.cuda()
+
+        if isinstance(scores, np.ndarray):
+            scores = torch.tensor(scores, device=self.device)
+        elif torch.is_tensor(scores):
+            if self.device == 'cpu':
+                scores = scores.cpu()
+            elif self.device == 'cuda':
+                scores = scores.cuda()
+
+        scores = torch.transpose(scores, dim0=1, dim1=0)
+
+        if self.nms_type == 'default':
+            if self.cross_class:
+                [boxes, classes, scores] = cc_cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                                  top_k=self.top_k, post_k=self.post_k)
+            else:
+                [boxes, classes, scores] = cluster_nms_default(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                               top_k=self.top_k, post_k=self.post_k)
+        elif self.nms_type == 'diou':
+            if self.cross_class:
+                [boxes, classes, scores] = cc_cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                              top_k=self.top_k, post_k=self.post_k)
+            else:
+                [boxes, classes, scores] = cluster_diounms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                           top_k=self.top_k, post_k=self.post_k)
+        elif self.nms_type == 'spm':
+            if self.cross_class:
+                [boxes, classes, scores] = cc_cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                              top_k=self.top_k, post_k=self.post_k)
+            else:
+                [boxes, classes, scores] = cluster_SPM_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                           top_k=self.top_k, post_k=self.post_k)
+        elif self.nms_type == 'spm_dist':
+            if self.cross_class:
+                [boxes, classes, scores] = cc_cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                                   top_k=self.top_k, post_k=self.post_k)
+            else:
+                [boxes, classes, scores] = cluster_SPM_dist_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                                top_k=self.top_k, post_k=self.post_k)
+
+        elif self.nms_type == 'spm_dist_weighted':
+            if self.cross_class:
+                [boxes, classes, scores] = cc_cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores,
+                                                                            iou_thres=self.iou_thres,
+                                                                            top_k=self.top_k,
+                                                                            post_k=self.post_k)
+            else:
+                [boxes, classes, scores] = cluster_SPM_dist_weighted_nms(boxes=boxes, scores=scores,
+                                                                         iou_thres=self.iou_thres,
+                                                                         top_k=self.top_k, post_k=self.post_k)
+
+        keep_ids = torch.where(scores > threshold)
+        scores = scores[keep_ids].cpu().numpy()
+        classes = classes[keep_ids].cpu().numpy()
+        boxes = boxes[keep_ids].cpu().numpy()
+        bounding_boxes = BoundingBoxList([])
+        for idx, box in enumerate(boxes):
+            bbox = BoundingBox(left=box[0], top=box[1],
+                               width=box[2] - box[0],
+                               height=box[3] - box[1],
+                               name=classes[idx],
+                               score=scores[idx])
+            bounding_boxes.data.append(bbox)
+
+        return bounding_boxes, [boxes, classes, scores]
+
+
+def cc_cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+    # Collapse all the classes into 1
+
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = torch.max(A, dim=0)
+        E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+
+    idx_out = torch.where(maxA > iou_thres)
+    scores[idx_out] = 0
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cluster_nms_default(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, idx = scores.sort(1, descending=True)
+    idx = idx[:top_k]
+    scores = scores[:top_k]
+    boxes = boxes[idx, :]
+
+    num_classes, num_dets = scores.shape
+    boxes = boxes.view(num_classes, num_dets, 4)
+    _, classes = scores.max(dim=0)
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    maxA = None
+    for i in range(200):
+        A = B
+        maxA, _ = A.max(dim=1)
+        E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    keep = (scores > 0.00)
+    discard = (maxA > iou_thres)
+    scores[discard] = 0
+    # Assign each kept detection to its corresponding class
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    # Only keep the top cfg.max_num_detections highest scores across all classes
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cc_cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    iou = diou(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = torch.max(A, dim=0)
+        E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+
+    idx_out = torch.where(maxA > iou_thres)
+    scores[idx_out] = 0
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cluster_diounms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, idx = scores.sort(1, descending=True)
+    idx = idx[:top_k]
+    scores = scores[:top_k]
+    boxes = boxes[idx, :]
+
+    num_classes, num_dets = scores.shape
+    boxes = boxes.view(num_classes, num_dets, 4)
+    _, classes = scores.max(dim=0)
+
+    iou = diou(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    maxA = None
+    for i in range(200):
+        A = B
+        maxA, _ = A.max(dim=1)
+        E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    keep = (scores > 0.00)
+    discard = (maxA > iou_thres)
+    scores[discard] = 0
+    # Assign each kept detection to its corresponding class
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    # Only keep the top cfg.max_num_detections highest scores across all classes
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+
+    return boxes, classes, scores
+
+
+def cc_cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = torch.max(A, dim=0)
+        E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    scores = torch.prod(torch.exp(-B ** 2 / 0.2), 0) * scores
+
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cluster_SPM_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, idx = scores.sort(1, descending=True)
+    idx = idx[:top_k]
+    scores = scores[:top_k]
+    boxes = boxes[idx, :]
+
+    num_classes, num_dets = scores.shape
+    boxes = boxes.view(num_classes, num_dets, 4)
+    _, classes = scores.max(dim=0)
+
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = A.max(dim=1)
+        E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    keep = (scores > 0.00)
+    scores = torch.prod(torch.exp(-B ** 2 / 0.2), 1) * scores
+    # Assign each kept detection to its corresponding class
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    # Only keep the top cfg.max_num_detections highest scores across all classes
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cc_cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = torch.max(A, dim=0)
+        E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    D = distance(boxes, boxes)
+    X = (B >= 0).float()
+    scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
+
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cluster_SPM_dist_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, idx = scores.sort(1, descending=True)
+    idx = idx[:top_k]
+    scores = scores[:top_k]
+    boxes = boxes[idx, :]
+
+    num_classes, num_dets = scores.shape
+    boxes = boxes.view(num_classes, num_dets, 4)
+    _, classes = scores.max(dim=0)
+
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = A.max(dim=1)
+        E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    D = distance(boxes, boxes)
+    X = (B >= 0).float()
+    keep = (scores > 0.00)
+    scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores
+
+    # Assign each kept detection to its corresponding class
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    # Only keep the top cfg.max_num_detections highest scores across all classes
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+
+    return boxes, classes, scores
+
+
+def cc_cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    n = len(scores)
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    for i in range(200):
+        A = B
+        maxA, _ = torch.max(A, dim=0)
+        E = (maxA <= iou_thres).float().unsqueeze(1).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    D = distance(boxes, boxes)
+    X = (B >= 0).float()
+    scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 0) * scores
+    eye = torch.eye(n)
+    if boxes.device.type == 'cuda':
+        eye = eye.cuda()
+    weights = (B * (B > 0.8).float() + eye) * (scores.reshape((1, n)))
+    xx1 = boxes[:, 0].expand(n, n)
+    yy1 = boxes[:, 1].expand(n, n)
+    xx2 = boxes[:, 2].expand(n, n)
+    yy2 = boxes[:, 3].expand(n, n)
+
+    weightsum = weights.sum(dim=1)
+    xx1 = (xx1 * weights).sum(dim=1) / (weightsum)
+    yy1 = (yy1 * weights).sum(dim=1) / (weightsum)
+    xx2 = (xx2 * weights).sum(dim=1) / (weightsum)
+    yy2 = (yy2 * weights).sum(dim=1) / (weightsum)
+    boxes = torch.stack([xx1, yy1, xx2, yy2], 1)
+
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cluster_SPM_dist_weighted_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+
+    scores, idx = scores.sort(1, descending=True)
+    idx = idx[:top_k]
+    scores = scores[:top_k]
+    boxes = boxes[idx, :]
+
+    num_classes, num_dets = scores.shape
+    boxes = boxes.view(num_classes, num_dets, 4)
+    _, classes = scores.max(dim=0)
+
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    B = iou
+    A = None
+    for i in range(200):
+        A = B
+        maxA, _ = A.max(dim=1)
+        E = (maxA <= iou_thres).float().unsqueeze(2).expand_as(A)
+        B = iou.mul(E)
+        if A.equal(B):
+            break
+    D = distance(boxes, boxes)
+    X = (B >= 0).float()
+    keep = (scores > 0.0)
+
+    scores = torch.prod(torch.min(torch.exp(-B ** 2 / 0.2) + D * ((B > 0).float()), X), 1) * scores
+
+    E = keep.float().unsqueeze(2).expand_as(A)
+    B = iou.mul(E)
+    _, n = scores.size()
+    eye = torch.eye(n).expand(num_classes, n, n)
+    if boxes.device.type == 'cuda':
+        eye = eye.cuda()
+    weights = (B * (B > 0.8).float() + eye) * (
+        scores.unsqueeze(2).expand(num_classes, n, n))
+    xx1 = boxes[:, :, 0].unsqueeze(1).expand(num_classes, n, n)
+    yy1 = boxes[:, :, 1].unsqueeze(1).expand(num_classes, n, n)
+    xx2 = boxes[:, :, 2].unsqueeze(1).expand(num_classes, n, n)
+    yy2 = boxes[:, :, 3].unsqueeze(1).expand(num_classes, n, n)
+
+    weightsum = weights.sum(dim=2)
+    xx1 = (xx1 * weights).sum(dim=2) / (weightsum)
+    yy1 = (yy1 * weights).sum(dim=2) / (weightsum)
+    xx2 = (xx2 * weights).sum(dim=2) / (weightsum)
+    yy2 = (yy2 * weights).sum(dim=2) / (weightsum)
+    boxes = torch.stack([xx1, yy1, xx2, yy2], 2)
+
+    # Assign each kept detection to its corresponding class
+    classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
+    classes = classes[keep]
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    # Only keep the top cfg.max_num_detections highest scores across all classes
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+
+    return boxes, classes, scores
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md
new file mode 100644
index 0000000000..1b6165122d
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/README.md
@@ -0,0 +1,28 @@
+Fast-NMS
+======
+
+This folder contains an implementation of Fast-NMS [[1]](#fast_nms-1).
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/Zzh-tju/CIoU) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the GNU General Public License v3.0:
+
+```
+This folder contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+Copyright (c) 2020 Zheng, Zhaohui.
+
+This program is free software: you can redistribute it and/or modify  
+it under the terms of the GNU General Public License as published by  
+the Free Software Foundation, version 3.
+
+This program is distributed in the hope that it will be useful, but 
+WITHOUT ANY WARRANTY; without even the implied warranty of 
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License 
+along with this program. If not, see <http://www.gnu.org/licenses/>.
+```
+
+<a name="fast_nms-1" href="https://arxiv.org/abs/1904.02689">[1]</a> YOLACT: Real-time Instance Segmentation,
+[ArXiv](https://arxiv.org/abs/1904.02689).
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py
new file mode 100644
index 0000000000..ace8b37089
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/fast_nms/fast_nms.py
@@ -0,0 +1,147 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import torch
+import numpy as np
+
+
+class FastNMS(NMSCustom):
+    def __init__(self, cross_class=False, device='cuda', iou_thres=0.45, top_k=400, post_k=100):
+        self.device = device
+        self.iou_thres = iou_thres
+        self.top_k = top_k
+        self.post_k = post_k
+        self.cross_class = cross_class
+
+    def set_iou_thres(self, iou_thres=0.45):
+        self.iou_thres = iou_thres
+
+    def top_k(self, top_k=400):
+        self.top_k = top_k
+
+    def post_k(self, post_k=100):
+        self.post_k = post_k
+
+    def set_cross_class(self, cross_class=False):
+        self.cross_class = cross_class
+
+    def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None):
+
+        if isinstance(boxes, np.ndarray):
+            boxes = torch.tensor(boxes, device=self.device)
+        elif torch.is_tensor(boxes):
+            if self.device == 'cpu':
+                boxes = boxes.cpu()
+            elif self.device == 'cuda':
+                boxes = boxes.cuda()
+
+        if isinstance(scores, np.ndarray):
+            scores = torch.tensor(scores, device=self.device)
+        elif torch.is_tensor(scores):
+            if self.device == 'cpu':
+                scores = scores.cpu()
+            elif self.device == 'cuda':
+                scores = scores.cuda()
+
+        scores = torch.transpose(scores, dim0=1, dim1=0)
+        if self.cross_class:
+            [boxes, classes, scores] = cc_fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                   top_k=self.top_k, post_k=self.post_k)
+        else:
+            [boxes, classes, scores] = fast_nms(boxes=boxes, scores=scores, iou_thres=self.iou_thres,
+                                                top_k=self.top_k, post_k=self.post_k)
+
+        keep_ids = torch.where(scores > threshold)
+        scores = scores[keep_ids].cpu().numpy()
+        classes = classes[keep_ids].cpu().numpy()
+        boxes = boxes[keep_ids].cpu().numpy()
+        bounding_boxes = BoundingBoxList([])
+        for idx, box in enumerate(boxes):
+            bbox = BoundingBox(left=box[0], top=box[1],
+                               width=box[2] - box[0],
+                               height=box[3] - box[1],
+                               name=classes[idx],
+                               score=scores[idx])
+            bounding_boxes.data.append(bbox)
+
+        return bounding_boxes, [boxes, classes, scores]
+
+
+def fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+    scores, idx = scores.sort(1, descending=True)
+    boxes = boxes[idx, :]
+
+    scores = scores[:, :top_k]
+    boxes = boxes[:, :top_k]
+
+    num_classes, num_dets = scores.shape
+
+    boxes = boxes.view(num_classes, num_dets, 4)
+
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    iou_max, _ = iou.max(dim=1)
+
+    keep = (iou_max <= iou_thres)
+    keep *= (scores > 0.01)
+    classes = torch.arange(num_classes, device=boxes.device)[:, None].expand_as(keep)
+    classes = classes[keep]
+
+    boxes = boxes[keep]
+    scores = scores[keep]
+
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
+
+
+def cc_fast_nms(boxes=None, scores=None, iou_thres=0.45, top_k=400, post_k=200):
+    scores, classes = scores.max(dim=0)
+    _, idx = scores.sort(0, descending=True)
+    idx = idx[:top_k]
+    boxes = boxes[idx]
+    scores = scores[idx]
+    classes = classes[idx]
+    iou = jaccard(boxes, boxes).triu_(diagonal=1)
+    maxA, _ = torch.max(iou, dim=0)
+
+    idx_out = torch.where(maxA > iou_thres)
+    scores[idx_out] = 0
+    scores, idx = scores.sort(0, descending=True)
+    idx = idx[:post_k]
+    scores = scores[:post_k]
+    classes = classes[idx]
+    boxes = boxes[idx]
+    return boxes, classes, scores
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md
new file mode 100644
index 0000000000..4e03fce80c
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/README.md
@@ -0,0 +1,17 @@
+Seq2Seq-NMS
+======
+
+This folder contains an implementation of Seq2Seq-NMS [[1]](#seq2seq_nms-1).
+
+TABLE-1: Average Precision (AP) achieved by pretrained models on the person detection task on the validation sets. The maximum number or RoIs, employed for the performance evaluation was set to 800.
+|  **Pretrained Model**  | **Dataset** | **Detector** | **Type of Appearance-based Features** | **Pre-processing IoU Threshold** | **AP@0.5 on validation set** | **AP@0.5 on test set** |
+|:----------------------:|:-----------:|:------------:|:-------------------------------------:|:--------------------------------:|:----------------------------:|:----------------------:|
+|  seq2seq_pets_jpd_fmod |     PETS    |      JPD     |                  FMoD                 |                0.8               |             80.2%            |          84.3%         |
+|  seq2seq_pets_ssd_fmod |     PETS    |      SSD     |                  FMoD                 |                0.8               |             77.4%            |          79.1%         |
+| seq2seq_coco_frcn_fmod |     COCO    |     FRCN     |                  FMoD                 |                 -                |             68.1% \*            |          67.5% \*\*         |
+| seq2seq_coco_ssd_fmod  |     COCO    |      SSD     |                  FMoD                 |                 -                |             41.8% \*                 |            42.4% **      |
+
+\* The minival set was used as validation set.<br>
+\*\* The minitest set was used as test set.
+
+<a name="seq2seq_nms-1" href="https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275">[1]</a> Neural Attention-driven Non-Maximum Suppression for Person Detection, [TechRxiv](https://www.techrxiv.org/articles/preprint/Neural_Attention-driven_Non-Maximum_Suppression_for_Person_Detection/16940275).
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py
new file mode 100755
index 0000000000..4b5d5ec2f5
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/fmod.py
@@ -0,0 +1,200 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch
+import torchvision
+import numpy as np
+import cv2
+import random
+from opendr.engine.data import Image
+
+
+class FMoD:
+    def __init__(self, roi_pooling_dim=None, pyramid_depth=3, map_type="SIFT", map_bin=False,
+                 resize_dim=None, device='cpu'):
+        if roi_pooling_dim is None:
+            roi_pooling_dim = 160
+        self.roi_pooling_dim = [roi_pooling_dim, roi_pooling_dim]
+        self.pyramid_depth = pyramid_depth
+        self.boxes_p = []
+        self.rp_size = []
+        for p in range(self.pyramid_depth):
+            s = 1 / pow(2, p)
+            for i in np.arange(0, 1.0, s):
+                for j in np.arange(0, 1.0, s):
+                    self.boxes_p.append([0, int(i * self.roi_pooling_dim[0]), int(j * self.roi_pooling_dim[1]),
+                                         int((i + s) * self.roi_pooling_dim[0]),
+                                         int((j + s) * self.roi_pooling_dim[1])])
+            self.rp_size.append([int(self.roi_pooling_dim[0] * s), int(self.roi_pooling_dim[1] * s)])
+        self.device = device
+        self.boxes_p = torch.tensor(self.boxes_p).float()
+        if "cuda" in self.device:
+            self.boxes_p = self.boxes_p.to(self.device)
+        self.resc = 1.0
+        self.map = None
+        self.resize_dim = resize_dim
+        self.map_type = map_type
+        self.map_bin = map_bin
+        self.mean = None
+        self.std = None
+
+    def set_mean_std(self, mean_values=None, std_values=None):
+        self.mean = torch.tensor(mean_values).float()
+        self.std = torch.tensor(std_values).float()
+        if "cuda" in self.device:
+            self.mean = self.mean.to(self.device)
+            self.std = self.std.to(self.device)
+
+    def extract_maps(self, img=None, augm=False):
+        if img is None:
+            raise Exception('Image is not provided to FMoD...')
+
+        if not isinstance(img, Image):
+            img = Image(img)
+        img = img.convert(format='channels_last', channel_order='bgr')
+
+        if self.resize_dim is not None:
+            max_dim = max(img.shape[0], img.shape[1])
+            if max_dim > self.resize_dim:
+                self.resc = float(self.resize_dim) / max_dim
+                img = cv2.resize(img, (int(img.shape[1] * self.resc), int(img.shape[0] * self.resc)))
+        if augm:
+            img = augm_brightness(img, 0.75, 1.25)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        if self.map_type == "EDGEMAP":
+            dst_img = np.copy(img)
+            dst_img = cv2.GaussianBlur(dst_img, (3, 3), 0, 0, cv2.BORDER_DEFAULT)
+            gradX = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=1, dy=0, scale=1, delta=0,
+                               borderType=cv2.BORDER_DEFAULT)
+            gradY = cv2.Scharr(dst_img, ddepth=cv2.CV_16S, dx=0, dy=1, scale=1, delta=0,
+                               borderType=cv2.BORDER_DEFAULT)
+            absGradX = cv2.convertScaleAbs(gradX)
+            absGradY = cv2.convertScaleAbs(gradY)
+            absGradXCV32 = absGradX.astype("float32")
+            absGradYCV32 = absGradY.astype("float32")
+            self.map = cv2.magnitude(absGradXCV32 / 255.0, absGradYCV32 / 255.0)
+            self.map = self.map * 255
+            if self.map_bin:
+                self.map = cv2.threshold(self.map, 240, 255, cv2.THRESH_BINARY)[1]
+        else:
+            kps = None
+            if self.map_type == "FAST":
+                fast = cv2.FastFeatureDetector_create()
+                kps = fast.detect(img, None)
+            elif self.map_type == "AKAZE":
+                akaze = cv2.AKAZE_create()
+                kps, desc = akaze.detectAndCompute(img, None)
+            elif self.map_type == "BRISK":
+                brisk = cv2.BRISK_create()
+                kps = brisk.detect(img, None)
+            elif self.map_type == "ORB":
+                orb = cv2.ORB_create()
+                kps = orb.detect(img, None)
+            else:
+                raise Exception("Map type not supported...")
+            self.map = np.zeros(img.shape, dtype=np.uint8)
+            coords_x = []
+            coords_y = []
+            resps = []
+            for kp in kps:
+                coords_x.append(int(kp.pt[0]))
+                coords_y.append(int(kp.pt[1]))
+                resps.append(255 * kp.response)
+            if not self.map_bin:
+                self.map[coords_y, coords_x] = resps
+            else:
+                self.map[coords_y, coords_x] = 255
+        self.map = torch.from_numpy(self.map).float()
+        if "cuda" in self.device:
+            self.map = self.map.to(self.device)
+
+    def extract_FMoD_feats(self, boxes):
+        num_rois = boxes.shape[0]
+        map_gpu = self.map / 255.0
+        map_gpu = map_gpu.unsqueeze(0).unsqueeze(0)
+        descs = []
+        pooled_regions = torchvision.ops.roi_align(map_gpu, [self.resc * boxes],
+                                                   output_size=self.rp_size[0], spatial_scale=1.0,
+                                                   aligned=True)
+        pooled_regions = pooled_regions.unsqueeze(1)
+        descs.append(self.get_descriptor(pooled_regions))
+        for i in range(0, self.pyramid_depth - 1):
+            pooled_regions_pyr = pooled_regions.contiguous().view(num_rois, pooled_regions.shape[-2],
+                                                                  pooled_regions.shape[-1])
+            pooled_regions_pyr = pooled_regions_pyr.unsqueeze(0)
+            pooled_regions_pyr = torchvision.ops.roi_align(pooled_regions_pyr, self.boxes_p[(pow(4 + 1, i)):(
+                    (pow(4 + 1, i)) + pow(4, (i + 1))), :], output_size=self.rp_size[i + 1], aligned=True)
+            pooled_regions_pyr = pooled_regions_pyr.permute(1, 0, 2, 3)
+            pooled_regions_pyr = pooled_regions_pyr.contiguous().view(num_rois, 1, pooled_regions_pyr.shape[-3],
+                                                                      pooled_regions_pyr.shape[-2],
+                                                                      pooled_regions_pyr.shape[-1])
+            descs.append(self.get_descriptor(pooled_regions_pyr))
+
+        descs = torch.cat(descs, dim=1)
+        if self.mean is not None and self.std is not None:
+            descs = (descs - self.mean) / self.std
+            descs = torch.clamp(descs, -50, 50)
+        return descs
+
+    def release_maps(self):
+        self.map = None
+
+    def get_descriptor(self, patches):
+        dt = []
+        # row data
+        dt.append(patches.mean(dim=3))
+        # collumn data
+        dt.append(patches.mean(dim=4))
+        # block data
+        dt.append(torch.flatten(patches, start_dim=3))
+
+        means = []
+        stds = []
+        diffs = []
+        zscores = []
+        skews = []
+        kurtoses = []
+        powers = []
+        for i in range(len(dt)):
+            if i == 2:
+                means.append(dt[i].mean(dim=3))
+            else:
+                means.append(dt[i][:, :, :, 0:-1:5].mean(dim=3))
+            stds.append(dt[i].std(dim=3))
+            diffs.append((dt[i] - means[i].unsqueeze(-1).expand(dt[i].size())))
+            zscores.append(diffs[i] / stds[i].unsqueeze(-1).expand(dt[i].size()))
+            zscores[i] = torch.where(stds[i].unsqueeze(-1).expand(zscores[i].shape) > 0, zscores[i],
+                                     torch.zeros_like(zscores[i]))
+            skews.append(torch.mean(torch.pow(zscores[i], 3.0), -1))
+            kurtoses.append(torch.mean(torch.pow(zscores[i], 4.0), -1) - 3.0)
+            powers.append((dt[i] * dt[i]).mean(-1))
+        descs = []
+        for i in range(len(dt)):
+            descs.append(torch.cat((means[i], stds[i], skews[i], kurtoses[i], powers[i]), 2))
+        desc = torch.cat((descs[0], descs[1], descs[2]), 2)
+        desc = desc.contiguous().view(desc.shape[0], desc.shape[1] * desc.shape[2])
+        return desc
+
+
+def augm_brightness(img, low, high):
+    value = random.uniform(low, high)
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+    hsv = np.array(hsv, dtype=np.float64)
+    hsv[:, :, 1] = hsv[:, :, 1] * value
+    hsv[:, :, 1][hsv[:, :, 1] > 255] = 255
+    hsv[:, :, 2] = hsv[:, :, 2] * value
+    hsv[:, :, 2][hsv[:, :, 2] > 255] = 255
+    hsv = np.array(hsv, dtype=np.uint8)
+    img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    return img
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py
new file mode 100755
index 0000000000..953892d04e
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/algorithm/seq2seq_model.py
@@ -0,0 +1,196 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import torch.nn as nn
+import torch
+import math
+import torch.nn.functional as F
+
+
+class Seq2SeqNet(nn.Module):
+    def __init__(self, dropout=0.01, use_app_feats=True, app_input_dim=315, geom_input_dim=14, lq_dim=256, sq_dim=128,
+                 num_JPUs=4, device='cuda'):
+        super().__init__()
+        self.use_app_feats = use_app_feats
+        self.dropout_q = nn.Dropout(dropout * 0.25)
+        self.num_JPUs = num_JPUs
+        self.joint_processing_units = []
+        self.device = device
+        for i in range(self.num_JPUs):
+            self.joint_processing_units.append(Joint_processing_unit(lq_dim=lq_dim, sq_dim=sq_dim, dropout=dropout))
+            if "cuda" in self.device:
+                self.joint_processing_units[i] = self.joint_processing_units[i].to(self.device)
+        self.joint_processing_units = nn.ModuleList(self.joint_processing_units)
+        if self.use_app_feats:
+            q_app_dims = [180, 180]
+            self.q_app_layers = nn.Sequential(
+                nn.Linear(app_input_dim, q_app_dims[0]),
+                nn.GELU(),
+                nn.Dropout(dropout * 0.25),
+                nn.LayerNorm(q_app_dims[0], eps=1e-6),
+                nn.Linear(q_app_dims[0], q_app_dims[1]),
+                nn.GELU(),
+                nn.Dropout(dropout * 0.25),
+                # nn.LayerNorm(q_fmod_dims[1], eps=1e-6)
+            )
+
+        q_geom_dims = [180, 180]
+        self.q_geom_layers = nn.Sequential(
+            nn.Linear(geom_input_dim, q_geom_dims[0]),
+            nn.GELU(),
+            nn.LayerNorm(q_geom_dims[0], eps=1e-6),
+            nn.Linear(q_geom_dims[0], q_geom_dims[1]),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.25),
+            # nn.LayerNorm(q_geom_dims[1], eps=1e-6)
+        )
+
+        k_geom_dims = [180, 180]
+        self.k_geom_layers = nn.Sequential(
+            nn.Linear(geom_input_dim, k_geom_dims[0]),
+            nn.GELU(),
+            nn.LayerNorm(k_geom_dims[0], eps=1e-6),
+            nn.Linear(k_geom_dims[0], k_geom_dims[1]),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.25),
+            # nn.LayerNorm(k_geom_dims[1], eps=1e-6)
+        )
+
+        q_final_in_dim = q_geom_dims[-1]
+        k_final_in_dim = k_geom_dims[-1]
+        if self.use_app_feats:
+            q_final_in_dim = q_geom_dims[-1] + q_app_dims[-1]
+            k_final_in_dim = k_geom_dims[-1] + q_app_dims[-1]
+
+        self.q_full_layers = nn.Sequential(
+            nn.LayerNorm(q_final_in_dim, eps=1e-6),
+            nn.Linear(q_final_in_dim, lq_dim),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.25),
+            # nn.LayerNorm(lq_dim, eps=1e-6)
+        )
+        self.k_full_layers = nn.Sequential(
+            nn.LayerNorm(k_final_in_dim, eps=1e-6),
+            nn.Linear(k_final_in_dim, sq_dim),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.25),
+            # nn.LayerNorm(sq_dim, eps=1e-6)
+        )
+        self.q_final_layers = nn.Sequential(
+            nn.LayerNorm(lq_dim, eps=1e-6),
+            nn.Linear(lq_dim, sq_dim),
+            nn.GELU(),
+            nn.Dropout(dropout * 0.25),
+            nn.LayerNorm(sq_dim, eps=1e-6),
+            nn.Linear(sq_dim, 1),
+            nn.Sigmoid()
+        )
+
+    def forward(self, q_geom_feats=None, k_geom_feats=None, msk=None, app_feats=None):
+        q_feats = self.q_geom_layers(q_geom_feats)
+        k_feats = self.k_geom_layers(k_geom_feats)
+
+        if self.use_app_feats and app_feats is not None:
+            app_feats = self.q_app_layers(app_feats)
+            q_feats = torch.cat((q_feats, app_feats), dim=2)
+            k_feats = torch.cat((k_feats, app_feats.transpose(0, 1).repeat(k_feats.shape[1], 1, 1)), dim=2)
+
+        elif app_feats is None:
+            raise UserWarning("Appearance-based representations not provided.")
+        q_feats = self.q_full_layers(q_feats)
+        k_feats = self.k_full_layers(k_feats)
+        for i in range(self.num_JPUs):
+            q_feats, k_feats = self.joint_processing_units[i](q_feats, k_feats, msk)
+        scores = self.q_final_layers(q_feats)
+        return scores.squeeze(1)
+
+
+class Joint_processing_unit(nn.Module):
+    def __init__(self, heads=2, lq_dim=256, sq_dim=128, dropout=0.1):
+        super().__init__()
+        self.q_block1 = nn.Sequential(
+            nn.LayerNorm(lq_dim, eps=1e-6),
+            nn.Linear(lq_dim, sq_dim),
+            nn.GELU(),
+            nn.Dropout(dropout)
+        )
+        self.norm_layer_q = nn.LayerNorm(sq_dim, eps=1e-6)
+        self.norm_layer_k = nn.LayerNorm(sq_dim, eps=1e-6)
+        self.self_attention_module = Self_attention_module(heads=heads, l_dim=lq_dim, s_dim=sq_dim, dropout=dropout)
+        self.scale_layer = Scale_layer(s_dim=sq_dim)
+
+        self.q_block2 = nn.Sequential(
+            nn.LayerNorm(sq_dim, eps=1e-6),
+            nn.Linear(sq_dim, lq_dim),
+            nn.GELU(),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, q_feats, k_feats, msk):
+        q_atten = self.q_block1(q_feats)
+        kv_atten_in = self.norm_layer_k(k_feats)
+        q_atten_in = self.norm_layer_q(q_atten)
+        q_atten = q_atten + self.self_attention_module(q=q_atten_in, k=kv_atten_in, v=kv_atten_in, mask=msk)
+        k_feats = k_feats + self.scale_layer(q_atten).transpose(0, 1).repeat(q_atten.shape[0], 1, 1)
+        q_feats = q_feats + self.q_block2(q_atten)
+        return q_feats, k_feats
+
+
+class Self_attention_module(nn.Module):
+    def __init__(self, heads, l_dim, s_dim, dropout=0.1):
+        super().__init__()
+        self.l_dim = l_dim
+        self.s_dim = s_dim
+        self.qkv_split_dim = s_dim // heads
+        self.h = heads
+        self.q_linear = nn.Linear(self.s_dim, self.s_dim)
+        self.v_linear = nn.Linear(self.s_dim, self.s_dim)
+        self.k_linear = nn.Linear(self.s_dim, self.s_dim)
+
+        self.dropout = nn.Dropout(dropout)
+        self.q_out = nn.Sequential(
+            nn.Linear(self.s_dim, self.s_dim),
+            nn.GELU(),
+            nn.Dropout(dropout)
+        )
+
+    def forward(self, q, k, v, mask=None):
+        samples_dim = q.size(0)
+        k = self.k_linear(k).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+        q = self.q_linear(q).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+        v = self.v_linear(v).view(samples_dim, -1, self.h, self.qkv_split_dim).transpose(1, 2)
+        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.qkv_split_dim)
+
+        mask = mask.unsqueeze(1)
+        mask = mask.unsqueeze(1)
+        mask = mask.repeat(1, scores.shape[1], 1, 1)
+        scores = torch.mul(scores, mask)
+        scores = scores.masked_fill(mask == 0, -1e9)
+
+        scores = F.softmax(scores, dim=-1)
+        scores = self.dropout(scores)
+        q = torch.matmul(scores, v)
+        q = q.transpose(1, 2).contiguous().view(samples_dim, -1, self.s_dim)
+        q = self.q_out(q)
+        return q
+
+
+class Scale_layer(nn.Module):
+    def __init__(self, s_dim=1):
+        super().__init__()
+        self.scale_weights = nn.Parameter(torch.empty(s_dim), requires_grad=True)
+        nn.init.uniform_(self.scale_weights, a=0.01, b=2.0)
+
+    def forward(self, feats):
+        return feats * self.scale_weights
diff --git a/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py
new file mode 100644
index 0000000000..fd8a97d16c
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/seq2seq_nms/seq2seq_nms_learner.py
@@ -0,0 +1,812 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.engine.learners import Learner
+from opendr.engine.constants import OPENDR_SERVER_URL
+from opendr.engine.target import BoundingBox, BoundingBoxList
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.seq2seq_model import Seq2SeqNet
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS
+from opendr.perception.object_detection_2d.nms.seq2seq_nms.algorithm.fmod import FMoD
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import drop_dets, det_matching, \
+    run_coco_eval, filter_iou_boxes, bb_intersection_over_union, compute_class_weights, apply_torchNMS
+import torch
+import torch.nn.functional as F
+import pickle
+import numpy as np
+import os
+from urllib.request import urlretrieve
+import torch.nn as nn
+from tensorboardX import SummaryWriter
+import torch.optim as optim
+from tqdm import tqdm
+import collections
+import json
+import zipfile
+
+
+class Seq2SeqNMSLearner(Learner, NMSCustom):
+    def __init__(self, lr=0.0001, epochs=8, device='cuda', temp_path='./temp', checkpoint_after_iter=0,
+                 checkpoint_load_iter=0, log_after=10000, variant='medium',
+                 iou_filtering=0.8, dropout=0.025, app_feats='fmod',
+                 fmod_map_type='EDGEMAP', fmod_map_bin=True, app_input_dim=None):
+        super(Seq2SeqNMSLearner, self).__init__(lr=lr, batch_size=1,
+                                                checkpoint_after_iter=checkpoint_after_iter,
+                                                checkpoint_load_iter=checkpoint_load_iter,
+                                                temp_path=temp_path, device=device, backbone='default')
+        self.epochs = epochs
+        self.variant = variant
+        self.app_feats = app_feats
+        self.use_app_feats = False
+        if self.app_feats is not None:
+            self.use_app_feats = True
+        self.fmod_map_type = None
+        self.fmod_map_bin = None
+        self.fmod_map_res_dim = None
+        self.fmod_pyramid_lvl = None
+        self.fmod_roi_pooling_dim = None
+        if self.app_feats == 'fmod':
+            self.fmod_map_type = fmod_map_type
+            self.fmod_roi_pooling_dim = 160
+            self.fmod_map_res_dim = 600
+            self.fmod_pyramid_lvl = 3
+            self.sef_fmod_architecture()
+            self.fmod_feats_dim = 0
+            for i in range(0, self.fmod_pyramid_lvl):
+                self.fmod_feats_dim = self.fmod_feats_dim + 15 * (pow(4, i))
+            self.fmod_map_bin = fmod_map_bin
+            self.app_input_dim = self.fmod_feats_dim
+            self.fmod_mean_std = None
+        elif self.app_feats == 'zeros' or self.app_feats == 'custom':
+            if app_input_dim is None:
+                raise Exception("The dimension of the input appearance-based features is not provided...")
+            else:
+                self.app_input_dim = app_input_dim
+        if self.app_feats == 'custom':
+            raise AttributeError("Custom appearance-based features are not yet supported.")
+        self.lq_dim = 256
+        self.sq_dim = 128
+        self.geom_input_dim = 14
+        self.num_JPUs = 4
+        self.geom_input_dim = 14
+        self.set_architecture()
+        self.dropout = dropout
+        self.temp_path = temp_path
+        if not os.path.isdir(self.temp_path):
+            os.mkdir(self.temp_path)
+        self.checkpoint_load_iter = checkpoint_load_iter
+        self.log_after = log_after
+        self.iou_filtering = iou_filtering
+        self.classes = None
+        self.class_ids = None
+        self.fMoD = None
+        self.fmod_init_file = None
+        if self.app_feats == 'fmod':
+            self.fMoD = FMoD(roi_pooling_dim=self.fmod_roi_pooling_dim, pyramid_depth=self.fmod_pyramid_lvl,
+                             resize_dim=self.fmod_map_res_dim,
+                             map_type=self.fmod_map_type, map_bin=self.fmod_map_bin, device=self.device)
+        self.init_model()
+        if "cuda" in self.device:
+            self.model = self.model.to(self.device)
+
+    def fit(self, dataset, logging_path='', logging_flush_secs=30, silent=True,
+            verbose=True, nms_gt_iou=0.5, max_dt_boxes=400, datasets_folder='./datasets',
+            use_ssd=False, lr_step=True):
+
+        dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split='train', use_ssd=use_ssd,
+                                  device=self.device)
+        if self.classes is None:
+            self.classes = dataset_nms.classes
+            self.class_ids = dataset_nms.class_ids
+
+        if logging_path != '' and logging_path is not None:
+            logging = True
+            file_writer = SummaryWriter(logging_path, flush_secs=logging_flush_secs)
+        else:
+            logging = False
+            file_writer = None
+
+        checkpoints_folder = self.temp_path
+        if self.checkpoint_after_iter != 0 and not os.path.exists(checkpoints_folder):
+            os.makedirs(checkpoints_folder)
+
+        if not silent and verbose:
+            print("Model trainable parameters:", self.count_parameters())
+
+        self.model.train()
+        if "cuda" in self.device:
+            self.model = self.model.to(self.device)
+
+        if self.epochs is None:
+            raise ValueError("Training epochs not specified")
+        elif self.epochs <= self.checkpoint_load_iter:
+            raise ValueError("Training epochs are less than those of the loaded model")
+
+        if self.app_feats == 'fmod':
+            if self.fmod_mean_std is None:
+                self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type,
+                                                                      fmod_pyramid_lvl=self.fmod_pyramid_lvl,
+                                                                      datasets_folder=datasets_folder,
+                                                                      verbose=verbose)
+            self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+
+        start_epoch = 0
+        drop_after_epoch = []
+        if lr_step and self.epochs > 1:
+            drop_after_epoch = [int(self.epochs * 0.5)]
+            if self.epochs > 3:
+                drop_after_epoch.append(int(self.epochs * 0.7))
+
+        train_ids = np.arange(len(dataset_nms.src_data))
+        total_loss_iter = 0
+        total_loss_epoch = 0
+        optimizer = optim.Adam(self.model.parameters(), lr=self.lr, betas=(0.9, 0.99), eps=1e-9)  # HERE
+        scheduler = None
+        if len(drop_after_epoch) > 0:
+            scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=drop_after_epoch, gamma=0.1)
+
+        num_iter = 0
+        training_weights = compute_class_weights(pos_weights=[0.9, 0.1], max_dets=max_dt_boxes, dataset_nms=dataset_nms)
+        # Single class NMS only.
+        class_index = 1
+        training_dict = {"cross_entropy_loss": []}
+        for epoch in range(start_epoch, self.epochs):
+            pbar = None
+            if not silent:
+                pbarDesc = "Epoch #" + str(epoch) + " progress"
+                pbar = tqdm(desc=pbarDesc, total=len(train_ids))
+            np.random.shuffle(train_ids)
+            for sample_id in train_ids:
+
+                if self.log_after != 0 and num_iter > 0 and num_iter % self.log_after == 0:
+                    if logging:
+                        file_writer.add_scalar(tag="cross_entropy_loss",
+                                               scalar_value=total_loss_iter/self.log_after,
+                                               global_step=num_iter)
+                    if verbose:
+                        print(''.join(['\nEpoch: {}',
+                                       ' Iter: {}, cross_entropy_loss: {}']).format(epoch, num_iter,
+                                                                                    total_loss_iter/self.log_after))
+                    total_loss_iter = 0
+
+                image_fln = dataset_nms.src_data[sample_id]['filename']
+                if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0:
+                    dt_boxes = torch.tensor(
+                        dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float()
+                    dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float()
+                    dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True)
+                    dt_boxes = dt_boxes[dt_scores_ids]
+                else:
+                    if not silent:
+                        pbar.update(1)
+                    num_iter = num_iter + 1
+                    continue
+                gt_boxes = torch.tensor([]).float()
+                if len(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]) > 0:
+                    gt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['gt_boxes'][class_index]).float()
+                image_path = os.path.join(datasets_folder, dataset, image_fln)
+                img_res = dataset_nms.src_data[sample_id]['resolution'][::-1]
+
+                if "cuda" in self.device:
+                    dt_boxes = dt_boxes.to(self.device)
+                    dt_scores = dt_scores.to(self.device)
+                    gt_boxes = gt_boxes.to(self.device)
+
+                val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4,
+                                            (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4)
+                dt_boxes = dt_boxes[val_ids, :]
+                dt_scores = dt_scores[val_ids]
+
+                dt_boxes, dt_scores = drop_dets(dt_boxes, dt_scores)
+                if dt_boxes.shape[0] < 1:
+                    if not silent:
+                        pbar.update(1)
+                    num_iter = num_iter + 1
+                    continue
+                if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+                    dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores,
+                                                         iou_thres=self.iou_filtering)
+
+                dt_boxes = dt_boxes[:max_dt_boxes]
+                dt_scores = dt_scores[:max_dt_boxes]
+                app_feats = None
+                if self.app_feats == 'fmod':
+                    img = Image.open(image_path)
+                    img = img.convert(format='channels_last', channel_order='bgr')
+                    self.fMoD.extract_maps(img=img, augm=True)
+                    app_feats = self.fMoD.extract_FMoD_feats(dt_boxes)
+                    app_feats = torch.unsqueeze(app_feats, dim=1)
+                elif self.app_feats == 'zeros':
+                    app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim])
+                    if "cuda" in self.device:
+                        app_feats = app_feats.to(self.device)
+                elif self.app_feats == 'custom':
+                    raise AttributeError("Custom appearance-based features are not yet supported.")
+
+                msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1)
+                q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores,
+                                                                            resolution=img_res)
+                preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+                                   app_feats=app_feats)
+                preds = torch.clamp(preds, 0.001, 1 - 0.001)
+
+                labels = det_matching(scores=preds, dt_boxes=dt_boxes, gt_boxes=gt_boxes,
+                                      iou_thres=nms_gt_iou, device=self.device)
+                weights = (training_weights[class_index][1] * labels + training_weights[class_index][0] * (
+                        1 - labels))
+
+                e = torch.distributions.uniform.Uniform(0.001, 0.005).sample([labels.shape[0], 1])
+                if "cuda" in self.device:
+                    weights = weights.to(self.device)
+                    e = e.to(self.device)
+                labels = labels * (1 - e) + (1 - labels) * e
+                ce_loss = F.binary_cross_entropy(preds, labels, reduction="none")
+                loss = (ce_loss * weights).sum()
+
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+
+                # Memory leak if not loss not detached in total_loss_iter and total_loss_epoch computations
+                loss_t = loss.detach().cpu().numpy()
+                total_loss_iter = total_loss_iter + loss_t
+                total_loss_epoch = total_loss_epoch + loss_t
+                num_iter = num_iter + 1
+                if not silent:
+                    pbar.update(1)
+            if not silent:
+                pbar.close()
+            if verbose:
+                print(''.join(['\nEpoch: {}',
+                               ' cross_entropy_loss: {}\n']).format(epoch,
+                                                                    total_loss_epoch/len(train_ids)))
+            training_dict['cross_entropy_loss'].append(total_loss_epoch/len(train_ids))
+            if self.checkpoint_after_iter != 0 and epoch % self.checkpoint_after_iter == self.checkpoint_after_iter - 1:
+                snapshot_name = '{}/checkpoint_epoch_{}'.format(checkpoints_folder, epoch)
+                self.save(path=snapshot_name, optimizer=optimizer, scheduler=scheduler,
+                          current_epoch=epoch, max_dt_boxes=max_dt_boxes)
+                snapshot_name_lw = '{}/last_weights'.format(checkpoints_folder)
+                self.save(path=snapshot_name_lw, optimizer=optimizer, scheduler=scheduler,
+                          current_epoch=epoch, max_dt_boxes=max_dt_boxes)
+            total_loss_epoch = 0
+            if scheduler is not None:
+                scheduler.step()
+        if logging:
+            file_writer.close()
+        return training_dict
+
+    def eval(self, dataset, split='test', verbose=True, max_dt_boxes=400, threshold=0.0,
+             datasets_folder='./datasets', use_ssd=False):
+
+        dataset_nms = Dataset_NMS(path=datasets_folder, dataset_name=dataset, split=split, use_ssd=use_ssd,
+                                  device=self.device)
+
+        if self.classes is None:
+            self.classes = dataset_nms.classes
+            self.class_ids = dataset_nms.class_ids
+
+        annotations_filename = dataset_nms.annotation_file
+
+        eval_folder = self.temp_path
+        if not os.path.isdir(os.path.join(self.temp_path)):
+            os.mkdir(os.path.join(self.temp_path))
+        if not os.path.isdir(eval_folder):
+            os.mkdir(eval_folder)
+        output_file = os.path.join(eval_folder, 'detections.json')
+
+        if self.app_feats == 'fmod':
+            if self.fmod_mean_std is None:
+                self.fmod_mean_std = self.load_FMoD_init_from_dataset(dataset=dataset, map_type=self.fmod_map_type,
+                                                                      fmod_pyramid_lvl=self.fmod_pyramid_lvl,
+                                                                      datasets_folder=datasets_folder,
+                                                                      verbose=verbose)
+            self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+
+        self.model = self.model.eval()
+        if "cuda" in self.device:
+            self.model = self.model.to(self.device)
+
+        train_ids = np.arange(len(dataset_nms.src_data))
+        nms_results = []
+        pbar_eval = None
+        if verbose:
+            pbarDesc = "Evaluation progress"
+            pbar_eval = tqdm(desc=pbarDesc, total=len(train_ids))
+        for sample_id in train_ids:
+            image_fln = dataset_nms.src_data[sample_id]['filename']
+
+            image_path = os.path.join(datasets_folder, dataset, image_fln)
+            img_res = dataset_nms.src_data[sample_id]['resolution'][::-1]
+            # Single class NMS only.
+            class_index = 1
+            if len(dataset_nms.src_data[sample_id]['dt_boxes'][class_index]) > 0:
+                dt_boxes = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 0:4]).float()
+                dt_scores = torch.tensor(dataset_nms.src_data[sample_id]['dt_boxes'][class_index][:, 4]).float()
+                dt_scores, dt_scores_ids = torch.sort(dt_scores, descending=True)
+                dt_boxes = dt_boxes[dt_scores_ids]
+            else:
+                pbar_eval.update(1)
+                continue
+
+            if "cuda" in self.device:
+                dt_boxes = dt_boxes.to(self.device)
+                dt_scores = dt_scores.to(self.device)
+
+            val_ids = torch.logical_and((dt_boxes[:, 2] - dt_boxes[:, 0]) > 4,
+                                        (dt_boxes[:, 3] - dt_boxes[:, 1]) > 4)
+            dt_boxes = dt_boxes[val_ids, :]
+            dt_scores = dt_scores[val_ids]
+
+            if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+                dt_boxes, dt_scores = apply_torchNMS(boxes=dt_boxes, scores=dt_scores, iou_thres=self.iou_filtering)
+
+            dt_boxes = dt_boxes[:max_dt_boxes]
+            dt_scores = dt_scores[:max_dt_boxes]
+            app_feats = None
+            if self.app_feats == 'fmod':
+                img = Image.open(image_path)
+                img = img.convert(format='channels_last', channel_order='bgr')
+                self.fMoD.extract_maps(img=img, augm=False)
+                app_feats = self.fMoD.extract_FMoD_feats(dt_boxes)
+                app_feats = torch.unsqueeze(app_feats, dim=1)
+            elif self.app_feats == 'zeros':
+                app_feats = torch.zeros([dt_boxes.shape[0], 1, self.app_input_dim])
+                if "cuda" in self.device:
+                    app_feats = app_feats.to(self.device)
+            elif self.app_feats == 'custom':
+                raise AttributeError("Custom appearance-based features are not yet supported.")
+            msk = self.compute_mask(dt_boxes, iou_thres=0.2, extra=0.1)
+            q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=dt_boxes, scores=dt_scores,
+                                                                        resolution=img_res)
+            with torch.no_grad():
+                preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+                                   app_feats=app_feats)
+                bboxes = dt_boxes.cpu().numpy().astype('float64')
+            preds = preds.cpu().detach()
+            if threshold > 0.0:
+                ids = (preds > threshold)
+                preds = preds[ids]
+                bboxes = bboxes[ids.numpy().squeeze(-1), :]
+            for j in range(len(preds)):
+                nms_results.append({
+                    'image_id': dataset_nms.src_data[sample_id]['id'],
+                    'bbox': [bboxes[j][0], bboxes[j][1], bboxes[j][2] - bboxes[j][0], bboxes[j][3] - bboxes[j][1]],
+                    'category_id': class_index,
+                    'score': np.float64(preds[j])
+                })
+            pbar_eval.update(1)
+        pbar_eval.close()
+        if verbose:
+            print('Writing results json to {}'.format(output_file))
+        with open(output_file, 'w') as fid:
+            json.dump(nms_results, fid, indent=2)
+        eval_result = run_coco_eval(gt_file_path=os.path.join(dataset_nms.path, 'annotations', annotations_filename),
+                                    dt_file_path=output_file, only_classes=[1],
+                                    verbose=verbose, max_dets=[max_dt_boxes])
+        os.remove(output_file)
+        if verbose:
+            for i in range(len(eval_result)):
+                print('Evaluation results (num_dets={})'.format(str(eval_result[i][1])))
+                print(eval_result[i][0][0][1])
+                print(eval_result[i][0][1][1])
+                print(eval_result[i][0][2][1])
+                print(eval_result[i][0][3][1])
+                print('\n')
+        return eval_result
+
+    def save(self, path, verbose=False, optimizer=None, scheduler=None, current_epoch=None, max_dt_boxes=400):
+        fname = path.split('/')[-1]
+        dir_name = path.replace('/' + fname, '')
+        if not os.path.isdir(dir_name):
+            os.makedirs(dir_name)
+        custom_dict = {'state_dict': self.model.state_dict(), 'current_epoch': current_epoch}
+        if optimizer is not None:
+            custom_dict['optimizer'] = optimizer.state_dict()
+        if scheduler is not None:
+            custom_dict['scheduler'] = scheduler.state_dict()
+        torch.save(custom_dict, path + '.pth')
+
+        metadata = {"model_paths": [fname + '.pth'], "framework": "pytorch", "has_data": False,
+                    "inference_params": {}, "optimized": False, "optimizer_info": {}, "backbone": {},
+                    "format": "pth", "classes": self.classes, "app_feats": self.app_feats,
+                    "lq_dim": self.lq_dim, "sq_dim": self.sq_dim, "num_JPUs": self.num_JPUs,
+                    "geom_input_dim": self.geom_input_dim, "app_input_dim": self.app_input_dim,
+                    "max_dt_boxes": max_dt_boxes, "variant": self.variant}
+        if self.app_feats == 'fmod':
+            metadata["fmod_map_type"] = self.fmod_map_type
+            metadata["fmod_map_bin"] = self.fmod_map_bin
+            metadata["fmod_roi_pooling_dim"] = self.fmod_roi_pooling_dim
+            metadata["fmod_map_res_dim"] = self.fmod_map_res_dim
+            metadata["fmod_pyramid_lvl"] = self.fmod_pyramid_lvl
+            metadata["fmod_normalization"] = "fmod_normalization.pkl"
+            with open(os.path.join(dir_name, 'fmod_normalization.pkl'), 'wb') as f:
+                pickle.dump(self.fmod_mean_std, f)
+        with open(path + '.json', 'w', encoding='utf-8') as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=4)
+        if verbose:
+            print("Saved Pytorch model.")
+
+    def init_model(self):
+        if self.model is None:
+            self.model = Seq2SeqNet(dropout=self.dropout, use_app_feats=self.use_app_feats,
+                                    app_input_dim=self.app_input_dim,
+                                    geom_input_dim=self.geom_input_dim, lq_dim=self.lq_dim, sq_dim=self.sq_dim,
+                                    num_JPUs=self.num_JPUs, device=self.device)
+            for p in self.model.parameters():
+                if p.dim() > 1:
+                    nn.init.xavier_uniform_(p)
+        else:
+            raise UserWarning("Tried to initialize model while model is already initialized.")
+
+    def load(self, path, verbose=False):
+        if os.path.isdir(path):
+            model_name = 'last_weights'
+            dir_path = path
+        else:
+            model_name = os.path.basename(os.path.normpath(path)).split('.')[0]
+            dir_path = os.path.dirname(os.path.normpath(path))
+
+        if verbose:
+            print("Model name:", model_name, "-->", os.path.join(dir_path, model_name + ".json"))
+        with open(os.path.join(dir_path, model_name + ".json"), encoding='utf-8-sig') as f:
+            metadata = json.load(f)
+        pth_path = os.path.join(dir_path, metadata["model_paths"][0])
+        if verbose:
+            print("Loading checkpoint:", pth_path)
+        try:
+            checkpoint = torch.load(pth_path, map_location=torch.device(self.device))
+        except FileNotFoundError as e:
+            e.strerror = "File " + pth_path + "not found."
+            raise e
+        if 'fmod_normalization' in metadata:
+            pkl_fmod = os.path.join(dir_path, metadata["fmod_normalization"])
+            if verbose:
+                print("Loading FMoD normalization values:", pkl_fmod)
+            try:
+                with open(pkl_fmod, 'rb') as f:
+                    self.fmod_mean_std = pickle.load(f)
+                    self.fMoD.set_mean_std(mean_values=self.fmod_mean_std['mean'], std_values=self.fmod_mean_std['std'])
+            except FileNotFoundError as e:
+                e.strerror = "File " + pkl_fmod + "not found."
+                raise e
+
+        self.assign_params(metadata=metadata, verbose=verbose)
+        self.load_state(checkpoint)
+        if verbose:
+            print("Loaded parameters and metadata.")
+        return True
+
+    def assign_params(self, metadata, verbose):
+
+        if verbose and self.variant is not None and self.variant != metadata["variant"]:
+            print("Incompatible value for the attribute \"variant\". It is now set to: " +
+                  str(metadata["variant"]))
+        self.variant = metadata["variant"]
+        if verbose and self.geom_input_dim is not None and self.geom_input_dim != metadata["geom_input_dim"]:
+            print("Incompatible value for the attribute \"geom_input_dim\". It is now set to: " +
+                  str(metadata["geom_input_dim"]))
+        self.geom_input_dim = metadata["geom_input_dim"]
+        if verbose and self.app_input_dim is not None and self.app_input_dim != metadata["app_input_dim"]:
+            print("Incompatible value for the attribute \"app_input_dim\". It is now set to: " +
+                  str(metadata["app_input_dim"]))
+        self.app_input_dim = metadata["app_input_dim"]
+        if verbose and self.app_feats != metadata["app_feats"]:
+            print("Incompatible value for the attribute \"app_feats\". It is now set to: " +
+                  str(metadata["app_feats"]))
+        self.app_feats = metadata["app_feats"]
+        if verbose and self.fmod_map_type is not None and self.fmod_map_type != metadata["fmod_map_type"]:
+            print("Incompatible value for the attribute \"fmod_map_type\". It is now set to: " +
+                  str(metadata["fmod_map_type"]))
+        if "fmod_map_type" in metadata:
+            self.fmod_map_type = metadata["fmod_map_type"]
+        if verbose and self.fmod_map_bin is not None and self.fmod_map_bin != metadata["fmod_map_bin"]:
+            print("Incompatible value for the attribute \"fmod_map_bin\". It is now set to: " +
+                  str(metadata["fmod_map_bin"]))
+        if "fmod_map_bin" in metadata:
+            self.fmod_map_bin = metadata["fmod_map_bin"]
+        if verbose and self.fmod_roi_pooling_dim is not None and \
+                self.fmod_roi_pooling_dim != metadata["fmod_roi_pooling_dim"]:
+            print("Incompatible value for the attribute \"fmod_roi_pooling_dim\". It is now set to: " +
+                  str(metadata["fmod_roi_pooling_dim"]))
+        if "fmod_roi_pooling_dim" in metadata:
+            self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"]
+        if verbose and self.fmod_map_res_dim is not None and \
+                self.fmod_map_res_dim != metadata["fmod_map_res_dim"]:
+            print("Incompatible value for the attribute \"fmod_map_res_dim\". It is now set to: " +
+                  str(metadata["fmod_map_res_dim"]))
+        if "fmod_roi_pooling_dim" in metadata:
+            self.fmod_roi_pooling_dim = metadata["fmod_roi_pooling_dim"]
+        if verbose and self.fmod_pyramid_lvl is not None and \
+                self.fmod_pyramid_lvl != metadata["fmod_pyramid_lvl"]:
+            print("Incompatible value for the attribute \"fmod_pyramid_lvl\". It is now set to: " +
+                  str(metadata["fmod_pyramid_lvl"]))
+        if "fmod_pyramid_lvl" in metadata:
+            self.fmod_pyramid_lvl = metadata["fmod_pyramid_lvl"]
+        if verbose and self.lq_dim is not None and \
+                self.lq_dim != metadata["lq_dim"]:
+            print("Incompatible value for the attribute \"lq_dim\". It is now set to: " +
+                  str(metadata["lq_dim"]))
+        self.lq_dim = metadata["lq_dim"]
+        if verbose and self.sq_dim is not None and self.sq_dim != metadata["sq_dim"]:
+            print("Incompatible value for the attribute \"sq_dim\". It is now set to: " +
+                  str(metadata["sq_dim"]))
+        self.sq_dim = metadata["sq_dim"]
+        if verbose and self.num_JPUs is not None and self.num_JPUs != metadata["num_JPUs"]:
+            print("Incompatible value for the attribute \"num_JPUs\". It is now set to: " +
+                  str(metadata["num_JPUs"]))
+        self.num_JPUs = metadata["num_JPUs"]
+        if verbose and 'max_dt_boxes' in metadata:
+            print('Model is trained with ' + str(metadata['max_dt_boxes']) + ' as the maximum number of detections.')
+
+    def load_state(self, checkpoint=None):
+        if checkpoint is None:
+            for p in self.model.parameters():
+                if p.dim() > 1:
+                    nn.init.xavier_uniform_(p)
+        else:
+            try:
+                source_state = checkpoint['state_dict']
+            except KeyError:
+                source_state = checkpoint
+            target_state = self.model.state_dict()
+            new_target_state = collections.OrderedDict()
+            for target_key, target_value in target_state.items():
+                if target_key in source_state and source_state[target_key].size() == target_state[target_key].size():
+                    new_target_state[target_key] = source_state[target_key]
+                else:
+                    new_target_state[target_key] = target_state[target_key]
+
+            self.model.load_state_dict(new_target_state)
+
+    def count_parameters(self):
+
+        if self.model is None:
+            raise UserWarning("Model is not initialized, can't count trainable parameters.")
+        return sum(p.numel() for p in self.model.parameters() if p.requires_grad)
+
+    def download(self, path=None, model_name='seq2seq_pets_jpd_fmod', verbose=False,
+                 url=OPENDR_SERVER_URL + "perception/object_detection_2d/nms/"):
+
+        supported_pretrained_models = ["seq2seq_pets_jpd_fmod", "seq2seq_pets_ssd_fmod",
+                                       "seq2seq_coco_frcn_fmod", "seq2seq_coco_ssd_fmod"]
+
+        if model_name not in supported_pretrained_models:
+            str_error = model_name + " pretrained model is not supported. The available pretrained models are: "
+            for i in range(len(supported_pretrained_models)):
+                str_error = str_error + supported_pretrained_models[i] + ", "
+            str_error = str_error[:-2] + '.'
+            raise ValueError(str_error)
+
+        if path is None:
+            path = self.temp_path
+
+        if not os.path.exists(path):
+            os.makedirs(path)
+
+        if verbose:
+            print("Downloading pretrained model...")
+
+        file_url = os.path.join(url, "pretrained", model_name + '.zip')
+        try:
+            urlretrieve(file_url, os.path.join(path, model_name + '.zip'))
+            with zipfile.ZipFile(os.path.join(path, model_name + '.zip'), 'r') as zip_ref:
+                zip_ref.extractall(path)
+            os.remove(os.path.join(path, model_name + '.zip'))
+        except:
+            raise UserWarning('Pretrained model not found on server.')
+
+    def infer(self, boxes=None, scores=None, boxes_sorted=False, max_dt_boxes=400, img_res=None, threshold=0.1):
+        bounding_boxes = BoundingBoxList([])
+        if scores.shape[0] == 0:
+            return bounding_boxes
+        if scores.shape[1] > 1:
+            raise ValueError('Multi-class NMS is not supported in Seq2Seq-NMS yet.')
+        if boxes.shape[0] != scores.shape[0]:
+            raise ValueError('Scores and boxes must have the same size in dim 0.')
+        if "cuda" in self.device:
+            boxes = boxes.to(self.device)
+            scores = scores.to(self.device)
+
+        scores = scores.squeeze(-1)
+        keep_ids = torch.where(scores > 0.05)[0]
+        scores = scores[keep_ids]
+        boxes = boxes[keep_ids, :]
+        if not boxes_sorted:
+            scores, scores_ids = torch.sort(scores, dim=0, descending=True)
+            boxes = boxes[scores_ids]
+
+        val_ids = torch.logical_and((boxes[:, 2] - boxes[:, 0]) > 4,
+                                    (boxes[:, 3] - boxes[:, 1]) > 4)
+        boxes = boxes[val_ids, :]
+        scores = scores[val_ids]
+
+        if self.iou_filtering is not None and 1.0 > self.iou_filtering > 0:
+            boxes, scores = apply_torchNMS(boxes=boxes, scores=scores, iou_thres=self.iou_filtering)
+
+        boxes = boxes[:max_dt_boxes]
+        scores = scores[:max_dt_boxes]
+        app_feats = None
+
+        if self.app_feats == 'fmod':
+            app_feats = self.fMoD.extract_FMoD_feats(boxes)
+            app_feats = torch.unsqueeze(app_feats, dim=1)
+        elif self.app_feats == 'zeros':
+            app_feats = torch.zeros([boxes.shape[0], 1, self.app_input_dim])
+            if "cuda" in self.device:
+                app_feats = app_feats.to(self.device)
+        elif self.app_feats == 'custom':
+            raise AttributeError("Custom appearance-based features are not yet supported.")
+
+        msk = self.compute_mask(boxes, iou_thres=0.2, extra=0.1)
+        q_geom_feats, k_geom_feats = self.compute_geometrical_feats(boxes=boxes, scores=scores,
+                                                                    resolution=img_res)
+
+        with torch.no_grad():
+            preds = self.model(q_geom_feats=q_geom_feats, k_geom_feats=k_geom_feats, msk=msk,
+                               app_feats=app_feats)
+
+        mask = torch.where(preds > threshold)[0]
+        if mask.size == 0:
+            return BoundingBoxList([])
+        preds = preds[mask].cpu().detach().numpy()
+        boxes = boxes[mask, :].cpu().numpy()
+
+        for idx, box in enumerate(boxes):
+            bbox = BoundingBox(left=box[0], top=box[1],
+                               width=box[2] - box[0],
+                               height=box[3] - box[1],
+                               name=0,
+                               score=preds[idx])
+            bounding_boxes.data.append(bbox)
+        return bounding_boxes, [boxes, np.zeros(scores.shape[0]), preds]
+
+    def optimize(self, **kwargs):
+        """This method is not used in this implementation."""
+        raise NotImplementedError
+
+    def reset(self):
+        """This method is not used in this implementation."""
+        return NotImplementedError
+
+    def run_nms(self, boxes=None, scores=None, boxes_sorted=False, top_k=400, img=None, threshold=0.2):
+
+        if self.app_feats == 'fmod':
+            if not isinstance(img, Image):
+                img = Image(img)
+            _img = img.convert("channels_last", "rgb")
+            self.fMoD.extract_maps(img=_img, augm=False)
+
+        if isinstance(boxes, np.ndarray):
+            boxes = torch.tensor(boxes, device=self.device)
+        elif torch.is_tensor(boxes):
+            if "cuda" in self.device:
+                boxes = boxes.to(self.device)
+
+        if isinstance(scores, np.ndarray):
+            scores = torch.tensor(scores, device=self.device)
+        elif torch.is_tensor(scores):
+            if "cuda" in self.device:
+                scores = scores.to(self.device)
+        boxes = self.infer(boxes=boxes, scores=scores, boxes_sorted=boxes_sorted, max_dt_boxes=top_k,
+                           img_res=img.opencv().shape[::-1][1:])
+        return boxes
+
+    def set_architecture(self):
+        if self.variant == 'light':
+            self.lq_dim = 160
+        elif self.variant == 'full':
+            self.lq_dim = 320
+        if self.variant == 'light':
+            self.sq_dim = 80
+        elif self.variant == 'full':
+            self.sq_dim = 160
+        if self.variant == 'light':
+            self.num_JPUs = 2
+
+    def sef_fmod_architecture(self):
+        if self.variant == 'light':
+            self.fmod_roi_pooling_dim = 120
+        if self.variant == 'light':
+            self.fmod_map_res_dim = 480
+        elif self.variant == 'full':
+            self.fmod_map_res_dim = 800
+        if self.variant == 'light':
+            self.fmod_pyramid_lvl = 2
+
+    def compute_mask(self, boxes=None, iou_thres=0.2, extra=0.1):
+        relations = filter_iou_boxes(boxes, iou_thres=iou_thres)
+        mask1 = torch.tril(relations).float()
+        mask2 = extra * torch.triu(relations, diagonal=1).float()
+        mask = mask1 + mask2
+        return mask
+
+    def compute_geometrical_feats(self, boxes, scores, resolution):
+        boxBs = boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1)
+        boxAs = boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1)
+        scoresBs = scores.unsqueeze(0).unsqueeze(-1).repeat(scores.shape[0], 1, 1)
+        scoresAs = scores.unsqueeze(1).unsqueeze(1).repeat(1, scores.shape[0], 1)
+
+        scale_div = [resolution[1] / 20, resolution[0] / 20]
+        dx = ((boxBs[:, :, 0] - boxAs[:, :, 0] + boxBs[:, :, 2] - boxAs[:, :, 2]) / 2).unsqueeze(-1)
+        dy = ((boxBs[:, :, 1] - boxAs[:, :, 1] + boxBs[:, :, 3] - boxAs[:, :, 3]) / 2).unsqueeze(-1)
+        dxy = dx * dx + dy * dy
+        dxy = dxy / (scale_div[0] * scale_div[0] + scale_div[1] * scale_div[1])
+        dx = (dx / scale_div[0])
+        dy = (dy / scale_div[1])
+        sx = boxBs[:, :, 2] - boxBs[:, :, 0]
+        sx_1 = (sx / (boxAs[:, :, 2] - boxAs[:, :, 0])).unsqueeze(-1)
+        sx_2 = (sx / scale_div[0]).unsqueeze(-1)
+        sy = boxBs[:, :, 3] - boxBs[:, :, 1]
+        sy_1 = (sy / (boxAs[:, :, 3] - boxAs[:, :, 1])).unsqueeze(-1)
+        sy_2 = (sy / scale_div[1]).unsqueeze(-1)
+        scl = (boxBs[:, :, 2] - boxBs[:, :, 0]) * (boxBs[:, :, 3] - boxBs[:, :, 1])
+        scl_1 = (scl / ((boxAs[:, :, 2] - boxAs[:, :, 0]) * (boxAs[:, :, 3] - boxAs[:, :, 1]))).unsqueeze(-1)
+        scl_2 = (scl / (scale_div[0] * scale_div[1])).unsqueeze(-1)
+        del scl
+
+        scr_1 = 5 * scoresBs
+        scr_2 = scr_1 - 5 * scoresAs
+
+        sr_1 = torch.unsqueeze((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0]), dim=-1)
+        sr_2 = torch.unsqueeze(((boxBs[:, :, 3] - boxBs[:, :, 1]) / (boxBs[:, :, 2] - boxBs[:, :, 0])) / (
+                (boxAs[:, :, 3] - boxAs[:, :, 1]) / (boxAs[:, :, 2] - boxAs[:, :, 0])), dim=-1)
+
+        ious = 5 * (bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1),
+                                               boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1))).unsqueeze(-1)
+        enc_vers_all = torch.cat((dx, dy, dxy, sx_1, sx_2, sy_1, sy_2, ious, scl_1, scl_2, scr_1, scr_2, sr_1, sr_2),
+                                 dim=2)
+        enc_vers = enc_vers_all.diagonal(dim1=0, dim2=1).transpose(0, 1).unsqueeze(1)
+        return enc_vers, enc_vers_all
+
+    def load_FMoD_init_from_dataset(self, dataset=None, map_type='edgemap', fmod_pyramid_lvl=3,
+                                    datasets_folder='./datasets',
+                                    map_bin=True, verbose=False):
+        fmod_dir = os.path.join(datasets_folder, dataset, 'FMoD')
+        if not os.path.exists(fmod_dir):
+            os.makedirs(fmod_dir, exist_ok=True)
+        map_type_c = map_type
+        if map_bin:
+            map_type_c = map_type_c + '_B'
+        fmod_filename = dataset + '_' + map_type_c + '_' + str(fmod_pyramid_lvl) + '.pkl'
+        fmod_filename = fmod_filename.lower()
+        fmod_stats = None
+        if not os.path.exists(os.path.join(fmod_dir, fmod_filename)):
+            file_url = os.path.join(OPENDR_SERVER_URL + 'perception/object_detection_2d/nms/FMoD', fmod_filename)
+            try:
+                urlretrieve(file_url, os.path.join(fmod_dir, fmod_filename))
+            except:
+                if verbose:
+                    print(
+                        'Normalization files not found on FTP server. Normalization will be performed setting \u03BC = '
+                        '0 and \u03C3 = 1.')
+                fmod_feats_dim = 0
+                for i in range(0, fmod_pyramid_lvl):
+                    fmod_feats_dim = fmod_feats_dim + 15 * (pow(4, i))
+                self.fmod_init_file = None
+                return {'mean': np.zeros(fmod_feats_dim), 'std': np.ones(fmod_feats_dim)}
+        self.fmod_init_file = os.path.join(fmod_dir, fmod_filename)
+        fmod_stats = self.load_FMoD_init(self.fmod_init_file)
+        return fmod_stats
+
+    def load_FMoD_init(self, path=None):
+        try:
+            with open(path, 'rb') as fp:
+                fmod_stats = pickle.load(fp)
+                map_type = list(fmod_stats.keys())[0]
+                fmod_stats = fmod_stats[map_type]
+        except EnvironmentError as e:
+            e.strerror = 'FMoD initialization .pkl file not found'
+            raise e
+        return fmod_stats
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md
new file mode 100644
index 0000000000..6b8c2513d0
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/README.md
@@ -0,0 +1,35 @@
+Soft-NMS
+======
+
+This folder contains an implementation of Soft-NMS [[1]](#soft_nms-1).
+
+Sources
+------
+Large parts of code are taken from [here](https://github.com/DocF/Soft-NMS) with modifications to make it compatible with OpenDR specifications. The original code is licensed under the MIT license:
+
+```
+MIT License
+
+Copyright (c) 2020 DocF
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+```
+
+<a name="soft_nms-1" href="https://arxiv.org/abs/1704.04503">[1]</a> Soft-NMS -- Improving Object Detection With One Line of Code,
+[ArXiv](https://arxiv.org/abs/1704.04503).
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py
new file mode 100644
index 0000000000..a0c668c850
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/soft_nms/soft_nms.py
@@ -0,0 +1,129 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# MIT License
+#
+# Copyright (c) 2020 DocF
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
+from opendr.perception.object_detection_2d.nms.utils.nms_utils import jaccard
+from opendr.engine.target import BoundingBox, BoundingBoxList
+import torch
+import numpy as np
+
+
+class SoftNMS(NMSCustom):
+    def __init__(self, nms_type='linear', device='cuda', nms_thres=None, top_k=400, post_k=100):
+        self.nms_types = ['linear', 'gaussian']
+        if nms_type not in self.nms_types:
+            raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.')
+        else:
+            self.nms_type = nms_type
+        if nms_thres is None:
+            if nms_type == 'linear':
+                nms_thres = 0.3
+            elif nms_type == 'gaussian':
+                nms_thres = 0.5
+        self.device = device
+        self.nms_thres = nms_thres
+        self.top_k = top_k
+        self.post_k = post_k
+
+    def nms_thres(self, nms_thres=0.45):
+        self.nms_thres = nms_thres
+
+    def set_top_k(self, top_k=400):
+        self.top_k = top_k
+
+    def set_post_k(self, post_k=100):
+        self.post_k = post_k
+
+    def set_nms_type(self, nms_type='linear'):
+        if nms_type not in self.nms_types:
+            raise ValueError('Type: ' + nms_type + ' of Soft-NMS is not supported.')
+        else:
+            self.nms_type = nms_type
+
+    def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None):
+
+        if isinstance(boxes, np.ndarray):
+            boxes = torch.tensor(boxes, device=self.device)
+        elif torch.is_tensor(boxes):
+            if self.device == 'cpu':
+                boxes = boxes.cpu()
+            elif self.device == 'cuda':
+                boxes = boxes.cuda()
+
+        if isinstance(scores, np.ndarray):
+            scores = torch.tensor(scores, device=self.device)
+        elif torch.is_tensor(scores):
+            if self.device == 'cpu':
+                scores = scores.cpu()
+            elif self.device == 'cuda':
+                scores = scores.cuda()
+
+        scores, classes = scores.max(dim=1)
+        _, idx = scores.sort(0, descending=True)
+        idx = idx[:self.top_k]
+        boxes = boxes[idx]
+        scores = scores[idx]
+        classes = classes[idx]
+
+        dets = torch.cat((boxes, scores.unsqueeze(-1)), dim=1)
+
+        i = 0
+        while dets.shape[0] > 0:
+            scores[i] = dets[0, 4]
+            iou = jaccard(dets[:1, :-1], dets[1:, :-1]).triu_(diagonal=0).squeeze(0)
+            weight = torch.ones_like(iou)
+            if self.nms_type == 'linear':
+                weight[iou > self.nms_thres] -= iou[iou > self.nms_thres]
+            elif self.nms_type == 'gaussian':
+                weight = np.exp(-(iou * iou) / self.nms_thres)
+
+            dets[1:, 4] *= weight
+            dets = dets[1:, :]
+            i = i + 1
+        keep_ids = torch.where(scores > threshold)
+        scores = scores[keep_ids].cpu().numpy()
+        classes = classes[keep_ids].cpu().numpy()
+        boxes = boxes[keep_ids].cpu().numpy()
+        bounding_boxes = BoundingBoxList([])
+        for idx, box in enumerate(boxes):
+            bbox = BoundingBox(left=box[0], top=box[1],
+                               width=box[2] - box[0],
+                               height=box[3] - box[1],
+                               name=classes[idx],
+                               score=scores[idx])
+            bounding_boxes.data.append(bbox)
+
+        return bounding_boxes, [boxes, classes, scores]
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/__init__.py b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py
new file mode 100644
index 0000000000..2d130e14b8
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/__init__.py
@@ -0,0 +1,3 @@
+from opendr.perception.object_detection_2d.nms.utils.nms_custom import NMSCustom
+
+__all__ = ['NMSCustom']
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py
new file mode 100644
index 0000000000..7d551cd401
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_custom.py
@@ -0,0 +1,24 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from abc import ABC, abstractmethod
+
+
+class NMSCustom(ABC):
+    def __init__(self, device='cpu'):
+        self.device = device
+
+    @abstractmethod
+    def run_nms(self, boxes=None, scores=None, threshold=0.2, img=None, device='cpu'):
+        pass
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py
new file mode 100644
index 0000000000..202f7f18c5
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_dataset.py
@@ -0,0 +1,404 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from opendr.engine.datasets import Dataset
+from opendr.engine.data import Image
+from opendr.perception.object_detection_2d.datasets.transforms import BoundingBoxListToNumpyArray
+from opendr.engine.constants import OPENDR_SERVER_URL
+from pycocotools.coco import COCO
+import os
+from urllib.request import urlretrieve
+import ssl
+import time
+from zipfile import ZipFile
+import tarfile
+import pickle
+import numpy as np
+import math
+from tqdm import tqdm
+import gc
+
+
+class Dataset_NMS(Dataset):
+    def __init__(self, path=None, dataset_name=None, split=None, use_ssd=True, device='cuda'):
+        super().__init__()
+        available_dataset = ['COCO', 'PETS', 'TEST_MODULE']
+        self.dataset_sets = {'train': None,
+                             'val': None,
+                             'test': None}
+        if dataset_name not in available_dataset:
+            except_str = 'Unsupported dataset: ' + dataset_name + '. Currently available are:'
+            for j in range(len(available_dataset)):
+                except_str = except_str + ' \'' + available_dataset[j] + '\''
+                if j < len(available_dataset) - 1:
+                    except_str = except_str + ','
+            except_str = except_str + '.'
+            raise ValueError(except_str)
+
+        ssl._create_default_https_context = ssl._create_unverified_context
+        self.dataset_name = dataset_name
+        self.split = split
+        # self.__prepare_dataset()
+        self.path = os.path.join(path, dataset_name)
+        self.src_data = []
+        if self.dataset_name == "PETS":
+            self.detector = 'JPD'
+            self.detector_type = 'default'
+            if use_ssd:
+                self.detector = 'SSD'
+                self.detector_type = 'custom'
+
+            self.dataset_sets['train'] = 'train'
+            self.dataset_sets['val'] = 'val'
+            self.dataset_sets['test'] = 'test'
+            if self.dataset_sets[self.split] is None:
+                raise ValueError(self.split + ' split is not available...')
+
+            if not os.path.exists(os.path.join(self.path, 'images/S1/L1')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L1.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(os.path.join(self.path, 'images/S1/L2')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S1_L2.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(os.path.join(self.path, 'images/S2/L1')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L1.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(os.path.join(self.path, 'images/S2/L2')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L2.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(os.path.join(self.path, 'images/S2/L3')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S2_L3.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(os.path.join(self.path, 'images/S3/Multiple_Flow')):
+                self.download(
+                    'http://ftp.cs.rdg.ac.uk/pub/PETS2009/Crowd_PETS09_dataset/a_data/Crowd_PETS09/S3_MF.tar.bz2',
+                    download_path=os.path.join(self.path, 'images'), file_format="tar.bz2", create_dir=True)
+            if not os.path.exists(
+                    os.path.join(self.path, 'annotations', 'pets_' + self.dataset_sets[self.split] + '.json')):
+                self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations_json.zip',
+                              download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+                              create_dir=True)
+            pkl_filename = os.path.join(self.path,
+                                        'data_' + self.detector + '_' + self.dataset_sets[self.split] + '_pets.pkl')
+            if not os.path.exists(pkl_filename):
+                ssd = None
+                if use_ssd:
+                    from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner
+                    ssd = SingleShotDetectorLearner(device=device)
+                    ssd.download(".", mode="pretrained")
+                    ssd.load("./ssd_default_person", verbose=True)
+                if not os.path.exists(
+                        os.path.join(self.path, 'detections',
+                                     'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl')):
+                    self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_detections.zip',
+                                  download_path=os.path.join(self.path, 'detections'), file_format="zip",
+                                  create_dir=True)
+                if not os.path.exists(
+                        os.path.join(self.path, 'annotations', 'PETS-' + self.dataset_sets[self.split] + '.idl')):
+                    self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/PETS_annotations.zip',
+                                  download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+                                  create_dir=True)
+                with open(os.path.join(self.path, 'annotations',
+                                       'PETS-' + self.dataset_sets[self.split] + '.idl')) as fp_gt:
+                    fp_dt = None
+                    if self.detector_type == 'default':
+                        fp_dt = open(os.path.join(self.path, 'detections',
+                                                  'PETS-' + self.dataset_sets[self.split] + '_siyudpm_dets.idl'))
+                    print('Preparing PETS ' + self.dataset_sets[self.split] + ' set...')
+                    current_id = 0
+                    number_samples = 1696
+                    if self.split == 'val':
+                        current_id = 1696
+                        number_samples = 240
+                    elif self.split == 'test':
+                        current_id = 1936
+                        number_samples = 436
+                    pbarDesc = "Overall progress"
+                    pbar = tqdm(desc=pbarDesc, total=number_samples)
+                    if self.detector_type == 'default':
+                        line_dt = fp_dt.readline()
+                    line_gt = fp_gt.readline()
+                    while line_gt:
+                        remove_strings = ['PETS09-', '\"', ':', '(', ')', ',', '', ';']
+                        data_gt = line_gt.replace(':', ' ')
+                        for j in range(len(remove_strings)):
+                            data_gt = data_gt.replace(remove_strings[j], '')
+                        data_gt = data_gt.split()
+                        filename_gt = data_gt[0][0:2] + '/' + data_gt[0][2:]
+                        if filename_gt[0:6] == 'S2/L1/':
+                            filename_gt = filename_gt.replace('img/00', 'Time_12-34/View_001/frame_')
+                            num = int(filename_gt[-8:-4]) - 1
+                            filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg'
+                        if filename_gt[0:6] == 'S2/L2/':
+                            filename_gt = filename_gt.replace('img/00', 'Time_14-55/View_001/frame_')
+                            num = int(filename_gt[-8:-4]) - 1
+                            filename_gt = filename_gt[:-8] + str(num).zfill(4) + '.jpg'
+                        if filename_gt[0:2] == 'S3':
+                            filename_gt = filename_gt.replace('_MF', 'Multiple_Flow')
+
+                        if self.detector_type == 'default':
+                            data_dt = line_dt.replace(':', ' ')
+                            for j in range(len(remove_strings)):
+                                data_dt = data_dt.replace(remove_strings[j], '')
+                            data_dt = data_dt.split()
+                            filename_dt = data_dt[0][0:2] + '/' + data_dt[0][2:]
+                            if filename_dt[0:6] == 'S2/L1/':
+                                filename_dt = filename_dt.replace('img/00', 'Time_12-34/View_001/frame_')
+                                num = int(filename_dt[-8:-4]) - 1
+                                filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg'
+                            if filename_dt[0:6] == 'S2/L2/':
+                                filename_dt = filename_dt.replace('img/00', 'Time_14-55/View_001/frame_')
+                                num = int(filename_dt[-8:-4]) - 1
+                                filename_dt = filename_dt[:-8] + str(num).zfill(4) + '.jpg'
+                            if filename_dt[0:2] == 'S3':
+                                filename_dt = filename_dt.replace('_MF', 'Multiple_Flow')
+                            if filename_gt != filename_dt:
+                                raise ValueError('Errors in files...')
+
+                        img = Image.open(os.path.join(self.path, 'images/', filename_gt))
+
+                        dt_boxes = []
+                        if self.detector_type == 'default':
+                            for i in range(1, (len(data_dt)), 5):
+                                dt_box = np.array((float(data_dt[i]), float(data_dt[i + 1]), float(data_dt[i + 2]),
+                                                   float(data_dt[i + 3]), 1 / (1 + math.exp(- float(data_dt[i + 4])))))
+                                dt_boxes.append(dt_box)
+                        else:
+                            bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975,
+                                                    nms_topk=6000, post_nms=6000)
+                            bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list)
+                            bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015]
+                            bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1]
+                            bboxes_list = bboxes_list[:5000, :]
+                            for b in range(len(bboxes_list)):
+                                dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2],
+                                                          bboxes_list[b, 3], bboxes_list[b, 4][0]]))
+                        gt_boxes = []
+                        for i in range(1, (len(data_gt)), 5):
+                            gt_box = np.array((float(data_gt[i]), float(data_gt[i + 1]), float(data_gt[i + 2]),
+                                               float(data_gt[i + 3])))
+                            gt_boxes.append(gt_box)
+                        self.src_data.append({
+                            'id': current_id,
+                            'filename': os.path.join('images', filename_gt),
+                            'resolution': img.opencv().shape[0:2][::-1],
+                            'gt_boxes': [np.asarray([]), np.asarray(gt_boxes)],
+                            'dt_boxes': [np.asarray([]), np.asarray(dt_boxes)]
+                        })
+                        current_id = current_id + 1
+                        pbar.update(1)
+                        if self.detector_type == 'default':
+                            line_dt = fp_dt.readline()
+                        line_gt = fp_gt.readline()
+                    pbar.close()
+                    if self.detector_type == 'default':
+                        fp_dt.close()
+                    elif self.detector == 'SSD':
+                        del ssd
+                        gc.collect()
+                    with open(pkl_filename, 'wb') as handle:
+                        pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL)
+            else:
+                with open(pkl_filename, 'rb') as fp_pkl:
+                    self.src_data = pickle.load(fp_pkl)
+
+            self.classes = ['background', 'human']
+            self.class_ids = [-1, 1]
+            self.annotation_file = 'pets_' + self.dataset_sets[self.split] + '.json'
+        elif self.dataset_name == "COCO":
+            self.dataset_sets['train'] = 'train'
+            self.dataset_sets['val'] = 'minival'
+            self.dataset_sets['test'] = 'valminusminival'
+            if self.dataset_sets[self.split] is None:
+                raise ValueError(self.split + ' split is not available...')
+            elif self.dataset_sets[self.split] == 'train':
+                imgs_split = 'train2014'
+            else:
+                imgs_split = 'val2014'
+            self.detector = 'FRCN'
+            self.detector_type = 'default'
+            ssd = None
+            if use_ssd:
+                self.detector = 'SSD'
+                self.detector_type = 'custom'
+                from opendr.perception.object_detection_2d.ssd.ssd_learner import SingleShotDetectorLearner
+                ssd = SingleShotDetectorLearner(device=device)
+                ssd.download(".", mode="pretrained")
+                ssd.load("./ssd_default_person", verbose=True)
+            if not os.path.exists(os.path.join(self.path, imgs_split)):
+                self.download('http://images.cocodataset.org/zips/' + imgs_split + '.zip',
+                              download_path=os.path.join(self.path), file_format="zip",
+                              create_dir=True)
+            pkl_filename = os.path.join(self.path, 'data_' + self.detector + '_' +
+                                        self.dataset_sets[self.split] + '_coco.pkl')
+            if not os.path.exists(pkl_filename):
+                if not os.path.exists(os.path.join(self.path, 'annotations', 'instances_' +
+                                                                             self.dataset_sets[self.split] +
+                                                                             '2014.json')):
+                    if self.dataset_sets[self.split] == 'train':
+                        ann_url = 'http://images.cocodataset.org/annotations/annotations_trainval2014.zip'
+                        self.download(ann_url, download_path=os.path.join(self.path), file_format="zip",
+                                      create_dir=True)
+                    else:
+                        if self.dataset_sets[self.split] == 'minival':
+                            ann_url = 'https://dl.dropboxusercontent.com/s/o43o90bna78omob/' \
+                                      'instances_minival2014.json.zip?dl=0'
+                        else:
+                            ann_url = 'https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/' \
+                                      'instances_valminusminival2014.json.zip?dl=0'
+                        self.download(ann_url, download_path=os.path.join(self.path, 'annotations'), file_format="zip",
+                                      create_dir=True)
+                if not os.path.exists(os.path.join(self.path, 'detections', 'coco_2014_' +
+                                                                            self.dataset_sets[self.split] +
+                                                                            '_FRCN_train.pkl')):
+                    self.download('http://datasets.d2.mpi-inf.mpg.de/hosang17cvpr/coco_2014_FRCN.tar.gz',
+                                  download_path=os.path.join(self.path, 'detections'), file_format='tar.gz',
+                                  create_dir=True)
+                with open(os.path.join(self.path, 'detections',
+                                       'coco_2014_' + self.dataset_sets[self.split] + '_FRCN_train.pkl'), 'rb') as f:
+                    dets_default = pickle.load(f, encoding='latin1')
+                annots = COCO(annotation_file=os.path.join(self.path, 'annotations', 'instances_' +
+                                                           self.dataset_sets[self.split] + '2014.json'))
+                pbarDesc = "Overall progress"
+                pbar = tqdm(desc=pbarDesc, total=len(dets_default[1]))
+                for i in range(len(dets_default[1])):
+                    dt_boxes = []
+                    img_info = annots.loadImgs([dets_default[1][i]])[0]
+                    img = Image.open(os.path.join(self.path, imgs_split, img_info["file_name"]))
+                    if self.detector_type == 'default':
+                        dt_boxes = dets_default[0][1][i]
+                    elif self.detector == 'SSD':
+                        bboxes_list = ssd.infer(img, threshold=0.0, custom_nms=None, nms_thresh=0.975,
+                                                nms_topk=6000, post_nms=6000)
+                        bboxes_list = BoundingBoxListToNumpyArray()(bboxes_list)
+                        if bboxes_list.shape[0] > 0:
+                            bboxes_list = bboxes_list[bboxes_list[:, 4] > 0.015]
+                        if bboxes_list.shape[0] > 0:
+                            bboxes_list = bboxes_list[np.argsort(bboxes_list[:, 4]), :][::-1]
+                            bboxes_list = bboxes_list[:5000, :]
+                        for b in range(len(bboxes_list)):
+                            dt_boxes.append(np.array([bboxes_list[b, 0], bboxes_list[b, 1], bboxes_list[b, 2],
+                                                      bboxes_list[b, 3], bboxes_list[b, 4][0]]))
+                    dt_boxes = np.asarray(dt_boxes)
+                    annots_in_frame = annots.loadAnns(
+                        annots.getAnnIds(imgIds=[dets_default[1][i]], catIds=[1], iscrowd=False))
+                    gt_boxes = []
+                    for j in range(len(annots_in_frame)):
+                        gt_boxes.append(annots_in_frame[j]['bbox'])
+                    gt_boxes = np.asarray(np.asarray(gt_boxes))
+                    if gt_boxes.shape[0] > 0:
+                        gt_boxes[:, 2] = gt_boxes[:, 0] + gt_boxes[:, 2]
+                        gt_boxes[:, 3] = gt_boxes[:, 1] + gt_boxes[:, 3]
+                    self.src_data.append({
+                        'id': dets_default[1][i],
+                        'filename': os.path.join(imgs_split, img_info["file_name"]),
+                        'resolution': [img_info['width'], img_info['height']],
+                        'gt_boxes': [np.asarray([]), gt_boxes],
+                        'dt_boxes': [np.asarray([]), dt_boxes]
+                    })
+                    pbar.update(1)
+                pbar.close()
+                if self.detector == 'SSD':
+                    del ssd
+                    gc.collect()
+                with open(pkl_filename, 'wb') as handle:
+                    pickle.dump(self.src_data, handle, protocol=pickle.DEFAULT_PROTOCOL)
+            else:
+                with open(pkl_filename, 'rb') as fp_pkl:
+                    self.src_data = pickle.load(fp_pkl)
+            self.classes = ['background', 'person']
+            self.class_ids = [-1, 1]
+            self.annotation_file = 'instances_' + self.dataset_sets[self.split] + '2014.json'
+        elif self.dataset_name == "TEST_MODULE":
+            self.dataset_sets['train'] = 'test'
+            self.dataset_sets['val'] = 'test'
+            self.dataset_sets['test'] = 'test'
+            if self.dataset_sets[self.split] is None:
+                raise ValueError(self.split + ' split is not available...')
+            pkl_filename = os.path.join(self.path, 'test_module.pkl')
+            if not os.path.exists(pkl_filename):
+                data_url = OPENDR_SERVER_URL + '/perception/object_detection_2d/nms/datasets/test_module.zip'
+                self.download(data_url, download_path=os.path.join(self.path).replace("TEST_MODULE", ""), file_format="zip",
+                              create_dir=True)
+            with open(pkl_filename, 'rb') as fp_pkl:
+                self.src_data = pickle.load(fp_pkl)
+            self.classes = ['background', 'person']
+            self.class_ids = [-1, 1]
+            self.annotation_file = 'test_module_anns.json'
+
+    @staticmethod
+    def download(
+            url, download_path, dataset_sub_path=".", file_format="zip", create_dir=False):
+
+        if create_dir:
+            os.makedirs(download_path, exist_ok=True)
+
+        print("Downloading dataset from", url, "to", download_path)
+
+        start_time = 0
+        last_print = 0
+
+        def reporthook(count, block_size, total_size):
+            nonlocal start_time
+            nonlocal last_print
+            if count == 0:
+                start_time = time.time()
+                last_print = start_time
+                return
+
+            duration = time.time() - start_time
+            progress_size = int(count * block_size)
+            speed = int(progress_size / (1024 * duration))
+            if time.time() - last_print >= 1:
+                last_print = time.time()
+                print(
+                    "\r%d MB, %d KB/s, %d seconds passed" %
+                    (progress_size / (1024 * 1024), speed, duration),
+                    end=''
+                )
+
+        if file_format == "zip":
+            zip_path = os.path.join(download_path, "dataset.zip")
+            urlretrieve(url, zip_path, reporthook=reporthook)
+            print()
+            print("Extracting data from zip file")
+            with ZipFile(zip_path, 'r') as zip_ref:
+                zip_ref.extractall(download_path)
+            os.remove(zip_path)
+        elif file_format == "tar.bz2" or file_format == "tar.gz":
+            tar_path = os.path.join(download_path, "dataset." + file_format)
+            urlretrieve(url, tar_path, reporthook=reporthook)
+            print()
+
+            def members(tf):
+                l = len("Crowd_PETS09/")
+                for member in tf.getmembers():
+                    if member.path.startswith("Crowd_PETS09/"):
+                        member.path = member.path[l:]
+                        yield member
+
+            with tarfile.open(tar_path, "r:" + file_format.split('.')[1]) as tar:
+                if file_format == "tar.bz2":
+                    tar.extractall(path=download_path, members=members(tar))
+                else:
+                    tar.extractall(path=download_path)
+            tar.close()
+            os.remove(tar_path)
+        else:
+            raise ValueError("Unsupported file_format: " + file_format)
diff --git a/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py
new file mode 100644
index 0000000000..93286bbc7a
--- /dev/null
+++ b/src/opendr/perception/object_detection_2d/nms/utils/nms_utils.py
@@ -0,0 +1,286 @@
+# Copyright 2020-2022 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This file contains code from the CIoU distribution (https://github.com/Zzh-tju/CIoU).
+# Copyright (c) 2020 Zheng, Zhaohui.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, version 3.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import torch
+import torchvision
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import sys
+import os
+
+
+def jaccard(box_a, box_b, iscrowd: bool = False):
+    use_batch = True
+    if box_a.dim() == 2:
+        use_batch = False
+        box_a = box_a[None, ...]
+        box_b = box_b[None, ...]
+
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) *
+              (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) *
+              (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+
+    out = inter / area_a if iscrowd else inter / union
+    return out if use_batch else out.squeeze(0)
+
+
+def intersect(box_a, box_b):
+    n = box_a.size(0)
+    A = box_a.size(1)
+    B = box_b.size(1)
+    max_xy = torch.min(box_a[:, :, 2:].unsqueeze(2).expand(n, A, B, 2),
+                       box_b[:, :, 2:].unsqueeze(1).expand(n, A, B, 2))
+    min_xy = torch.max(box_a[:, :, :2].unsqueeze(2).expand(n, A, B, 2),
+                       box_b[:, :, :2].unsqueeze(1).expand(n, A, B, 2))
+    return torch.clamp(max_xy - min_xy, min=0).prod(3)  # inter
+
+
+def diou(box_a, box_b, iscrowd: bool = False):
+    use_batch = True
+    if box_a.dim() == 2:
+        use_batch = False
+        box_a = box_a[None, ...]
+        box_b = box_b[None, ...]
+
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, :, 2] - box_a[:, :, 0]) *
+              (box_a[:, :, 3] - box_a[:, :, 1])).unsqueeze(2).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, :, 2] - box_b[:, :, 0]) *
+              (box_b[:, :, 3] - box_b[:, :, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+    x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
+    y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
+    x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
+    y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
+
+    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
+    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
+    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
+    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
+
+    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
+    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
+    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
+    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
+    cr = torch.max(r1, r2)
+    cl = torch.min(l1, l2)
+    ct = torch.min(t1, t2)
+    cb = torch.max(b1, b2)
+    D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7))
+    out = inter / area_a if iscrowd else inter / union - D ** 0.9
+    return out if use_batch else out.squeeze(0)
+
+
+def distance(box_a, box_b, iscrowd: bool = False):
+    use_batch = True
+    if box_a.dim() == 2:
+        use_batch = False
+        box_a = box_a[None, ...]
+        box_b = box_b[None, ...]
+
+    inter = intersect(box_a, box_b)
+    x1 = ((box_a[:, :, 2] + box_a[:, :, 0]) / 2).unsqueeze(2).expand_as(inter)
+    y1 = ((box_a[:, :, 3] + box_a[:, :, 1]) / 2).unsqueeze(2).expand_as(inter)
+    x2 = ((box_b[:, :, 2] + box_b[:, :, 0]) / 2).unsqueeze(1).expand_as(inter)
+    y2 = ((box_b[:, :, 3] + box_b[:, :, 1]) / 2).unsqueeze(1).expand_as(inter)
+
+    t1 = box_a[:, :, 1].unsqueeze(2).expand_as(inter)
+    b1 = box_a[:, :, 3].unsqueeze(2).expand_as(inter)
+    l1 = box_a[:, :, 0].unsqueeze(2).expand_as(inter)
+    r1 = box_a[:, :, 2].unsqueeze(2).expand_as(inter)
+
+    t2 = box_b[:, :, 1].unsqueeze(1).expand_as(inter)
+    b2 = box_b[:, :, 3].unsqueeze(1).expand_as(inter)
+    l2 = box_b[:, :, 0].unsqueeze(1).expand_as(inter)
+    r2 = box_b[:, :, 2].unsqueeze(1).expand_as(inter)
+
+    cr = torch.max(r1, r2)
+    cl = torch.min(l1, l2)
+    ct = torch.min(t1, t2)
+    cb = torch.max(b1, b2)
+    D = (((x2 - x1) ** 2 + (y2 - y1) ** 2) / ((cr - cl) ** 2 + (cb - ct) ** 2 + 1e-7)) ** 0.6
+    out = D if iscrowd else D
+    return out if use_batch else out.squeeze(0)
+
+
+def det_matching(scores, dt_boxes, gt_boxes, iou_thres, device='cuda'):
+    sorted_indices = torch.argsort(-scores, dim=0)
+    labels = torch.zeros(len(dt_boxes))
+    if device == 'cuda':
+        labels = labels.cuda()
+    if gt_boxes.shape[0] == 0:
+        return labels.unsqueeze(-1)
+    assigned_GT = -torch.ones(len(gt_boxes))
+    r = torch.tensor([-1, -1, -1, -1]).float().unsqueeze(0).unsqueeze(0)
+    if device == 'cuda':
+        r = r.cuda()
+    for s in sorted_indices:
+        gt_boxes_c = gt_boxes.clone().unsqueeze(0)
+        gt_boxes_c[0, assigned_GT > -1, :] = r
+        ious = bb_intersection_over_union(boxAs=dt_boxes[s].clone().unsqueeze(0), boxBs=gt_boxes_c)
+        annot_iou, annot_box_id = torch.sort(ious.squeeze(), descending=True)
+        if annot_box_id.ndim > 0:
+            annot_box_id = annot_box_id[0]
+            annot_iou = annot_iou[0]
+        if annot_iou > iou_thres:
+            assigned_GT[annot_box_id] = s
+            labels[s] = 1
+    return labels.unsqueeze(-1)
+
+
+def run_coco_eval(dt_file_path=None, gt_file_path=None, only_classes=None, max_dets=None,
+                  verbose=False):
+    if max_dets is None:
+        max_dets = [200, 400, 600, 800, 1000, 1200]
+    results = []
+    sys.stdout = open(os.devnull, 'w')
+    for i in range(len(max_dets)):
+        coco = COCO(gt_file_path)
+        coco_dt = coco.loadRes(dt_file_path)
+        cocoEval = COCOeval(coco, coco_dt, 'bbox')
+        cocoEval.params.iouType = 'bbox'
+        cocoEval.params.useCats = True
+        cocoEval.params.catIds = only_classes
+        cocoEval.params.maxDets = [max_dets[i]]
+        cocoEval.evaluate()
+        results.append([summarize_nms(coco_eval=cocoEval, maxDets=max_dets[i]), max_dets[i]])
+        # print(results[i])
+    del cocoEval, coco_dt, coco
+    sys.stdout = sys.__stdout__
+    return results
+
+
+def summarize_nms(coco_eval=None, maxDets=100):
+    def summarize(ap=1, iouThr=None, areaRng='all', maxDets=100):
+        p = coco_eval.params
+        iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6s} | maxDets={:>3d} ] = {:0.3f}'
+        titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
+        typeStr = '(AP)' if ap == 1 else '(AR)'
+        iouStr = '{:0.2f}:{:0.2f}'.format(p.iouThrs[0], p.iouThrs[-1]) \
+            if iouThr is None else '{:0.2f}'.format(iouThr)
+        aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng]
+        mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets]
+        if ap == 1:
+            # dimension of precision: [TxRxKxAxM]
+            s = coco_eval.eval['precision']
+            # IoU
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, :, aind, mind]
+        else:
+            # dimension of recall: [TxKxAxM]
+            s = coco_eval.eval['recall']
+            if iouThr is not None:
+                t = np.where(iouThr == p.iouThrs)[0]
+                s = s[t]
+            s = s[:, :, aind, mind]
+        if len(s[s > -1]) == 0:
+            mean_s = -1
+        else:
+            mean_s = np.mean(s[s > -1])
+        stat_str = iStr.format(titleStr, typeStr, iouStr, areaRng, maxDets, mean_s)
+        return [mean_s, stat_str]
+
+    def summarizeDets():
+        stats = []
+        stat, stat_str = summarize(1, maxDets=maxDets)
+        stats.append([stat, stat_str])
+        stat, stat_str = summarize(1, iouThr=.5, maxDets=maxDets)
+        stats.append([stat, stat_str])
+        stat, stat_str = summarize(1, iouThr=.75, maxDets=maxDets)
+        stats.append([stat, stat_str])
+        stat, stat_str = summarize(0, maxDets=maxDets)
+        stats.append([stat, stat_str])
+        return stats
+
+    coco_eval.accumulate()
+    summarized = summarizeDets()
+    return summarized
+
+
+def drop_dets(boxes, scores, keep_ratio=0.85):
+    ids = np.arange(len(boxes))
+    np.random.shuffle(ids)
+    ids_keep = ids[0:int(len(boxes) * keep_ratio)]
+    boxes_new = boxes[ids_keep, :]
+    scores_new = scores[ids_keep]
+    scores_new, scores_new_ids = torch.sort(scores_new, descending=True)
+    boxes_new = boxes_new[scores_new_ids]
+    return boxes_new, scores_new
+
+
+def filter_iou_boxes(boxes=None, iou_thres=0.2):
+    ious = bb_intersection_over_union(boxes.unsqueeze(1).repeat(1, boxes.shape[0], 1),
+                                      boxes.clone().unsqueeze(0).repeat(boxes.shape[0], 1, 1))
+    ids_boxes = ious >= iou_thres
+    return ids_boxes
+
+
+def bb_intersection_over_union(boxAs=None, boxBs=None):
+    xA = torch.maximum(boxAs[:, :, 0], boxBs[:, :, 0])
+    yA = torch.maximum(boxAs[:, :, 1], boxBs[:, :, 1])
+    xB = torch.minimum(boxAs[:, :, 2], boxBs[:, :, 2])
+    yB = torch.minimum(boxAs[:, :, 3], boxBs[:, :, 3])
+    interAreas = torch.maximum(torch.zeros_like(xB), xB - xA + 1) * torch.maximum(torch.zeros_like(yB), yB - yA + 1)
+    boxAAreas = (boxAs[:, :, 2] - boxAs[:, :, 0] + 1) * (boxAs[:, :, 3] - boxAs[:, :, 1] + 1)
+    boxBAreas = (boxBs[:, :, 2] - boxBs[:, :, 0] + 1) * (boxBs[:, :, 3] - boxBs[:, :, 1] + 1)
+    ious = interAreas / (boxAAreas + boxBAreas - interAreas)
+    return ious
+
+
+def compute_class_weights(pos_weights, max_dets=400, dataset_nms=None):
+    num_pos = np.ones([len(dataset_nms.classes), 1])
+    num_bg = np.ones([len(dataset_nms.classes), 1])
+    weights = np.zeros([len(dataset_nms.classes), 2])
+    for i in range(len(dataset_nms.src_data)):
+        for cls_index in range(len(dataset_nms.classes)):
+            num_pos[cls_index] = num_pos[cls_index] + \
+                                 min(max_dets, len(dataset_nms.src_data[i]['gt_boxes'][cls_index]))
+            num_bg[cls_index] = num_bg[cls_index] + max(0, min(max_dets,
+                                                               len(dataset_nms.src_data[i]['dt_boxes'][cls_index])) -
+                                                        min(max_dets,
+                                                            len(dataset_nms.src_data[i]['gt_boxes'][cls_index])))
+    for class_index in range(len(dataset_nms.classes)):
+        weights[class_index, 0] = (1 - pos_weights[class_index]) * (num_pos[class_index] +
+                                                                    num_bg[class_index]) / num_bg[class_index]
+        weights[class_index, 1] = pos_weights[class_index] * (num_pos[class_index] +
+                                                              num_bg[class_index]) / num_pos[class_index]
+    return weights
+
+
+def apply_torchNMS(boxes, scores, iou_thres):
+    ids_nms = torchvision.ops.nms(boxes, scores, iou_thres)
+    scores = scores[ids_nms]
+    boxes = boxes[ids_nms]
+    return boxes, scores
diff --git a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
index 386f5b5306..70b4656cf1 100644
--- a/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
+++ b/src/opendr/perception/object_detection_2d/ssd/ssd_learner.py
@@ -43,8 +43,10 @@
 # algorithm imports
 from opendr.perception.object_detection_2d.utils.eval_utils import DetectionDatasetCOCOEval
 from opendr.perception.object_detection_2d.datasets import DetectionDataset
-from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, BoundingBoxListToNumpyArray, \
-    transform_test
+from opendr.perception.object_detection_2d.datasets.transforms import ImageToNDArrayTransform, \
+    BoundingBoxListToNumpyArray, \
+    transform_test, pad_test
+from opendr.perception.object_detection_2d.nms.utils import NMSCustom
 
 gutils.random.seed(0)
 
@@ -90,7 +92,6 @@ def __init__(self, lr=1e-3, epochs=120, batch_size=8,
                     self.ctx = mx.gpu(int(self.device.split(':')[1]))
             else:
                 self.ctx = mx.cpu()
-                print("Device set to cuda but no GPU available, using CPU...")
         else:
             self.ctx = mx.cpu()
 
@@ -141,7 +142,7 @@ def save(self, path, verbose=False):
         if verbose:
             print("Model parameters saved.")
 
-        with open(os.path.join(path,  model_name + '.json'), 'w', encoding='utf-8') as f:
+        with open(os.path.join(path, model_name + '.json'), 'w', encoding='utf-8') as f:
             json.dump(metadata, f, ensure_ascii=False, indent=4)
         if verbose:
             print("Model metadata saved.")
@@ -216,7 +217,7 @@ def download(self, path=None, mode="pretrained", verbose=False,
             if verbose:
                 print("Downloading params...")
             file_url = os.path.join(url, "pretrained", "ssd_512_vgg16_atrous_wider_person",
-                                         "ssd_512_vgg16_atrous_wider_person.params")
+                                    "ssd_512_vgg16_atrous_wider_person.params")
 
             urlretrieve(file_url,
                         os.path.join(path, "ssd_512_vgg16_atrous_wider_person.params"))
@@ -461,18 +462,27 @@ def __get_lr_at(self, epoch):
         else:
             return self.lr
 
-    def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
+    def eval(self, dataset, use_subset=False, subset_size=100, verbose=False,
+             nms_thresh=0.45, nms_topk=400, post_nms=100):
         """
         This method performs evaluation on a given dataset and returns a dictionary with the evaluation results.
         :param dataset: dataset object, to perform evaluation on
         :type dataset: opendr.perception.object_detection_2d.datasets.DetectionDataset or opendr.engine.data.ExternalDataset
-        :return: dictionary containing evaluation metric names nad values
         :param use_subset: if True, only a subset of the dataset is evaluated, defaults to False
         :type use_subset: bool, optional
         :param subset_size: if use_subset is True, subset_size controls the size of the subset to be evaluated
         :type subset_size: int, optional
         :param verbose: if True, additional information is printed on stdout
         :type verbose: bool, optional
+        :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
+        :type nms_thresh: float, default is 0.45
+        :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS.
+        :type nms_topk: int, default is 400
+        :param post_nms: Only return top post_nms detection results, the rest is discarded.
+        The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if
+        expecting more objects. You can use -1 to return all detections.
+        :type post_nms: int, default is 100
+        :return: dictionary containing evaluation metric names nad values
         :rtype: dict
         """
         autograd.set_training(False)
@@ -494,7 +504,7 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
             self._model.initialize()
             self._model.collect_params().reset_ctx(ctx)
         self._model.hybridize(static_alloc=True, static_shape=True)
-        self._model.set_nms(nms_thresh=0.45, nms_topk=400)
+        self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms)
 
         dataset, eval_metric = self.__prepare_val_dataset(dataset, data_shape=self.img_size)
 
@@ -549,7 +559,8 @@ def eval(self, dataset, use_subset=False, subset_size=100, verbose=False):
         eval_dict = {k.lower(): v for k, v in zip(map_name, mean_ap)}
         return eval_dict
 
-    def infer(self, img, threshold=0.2, keep_size=False):
+    def infer(self, img, threshold=0.2, keep_size=False, custom_nms: NMSCustom=None,
+              nms_thresh=0.45, nms_topk=400, post_nms=100):
         """
         Performs inference on a single image and returns the resulting bounding boxes.
         :param img: image to perform inference on
@@ -558,13 +569,26 @@ def infer(self, img, threshold=0.2, keep_size=False):
         :type threshold: float, optional
         :param keep_size: if True, the image is not resized to fit the data shape used during training
         :type keep_size: bool, optional
+        :param custom_nms: Custom NMS method to be employed on inference
+        :type perception.object_detection_2d.nms.utils.nms_custom.NMSCustom
+        :param nms_thresh: Non-maximum suppression threshold. You can specify < 0 or > 1 to disable NMS.
+        :type nms_thresh: float, default is 0.45
+        :param nms_topk: Apply NMS to top k detection results, use -1 to disable so that every Detection result is used in NMS.
+        :type nms_topk: int, default is 400
+        :param post_nms: Only return top post_nms detection results, the rest is discarded.
+        The number is based on COCO dataset which has maximum 100 objects per image. You can adjust this number if
+        expecting more objects. You can use -1 to return all detections.
+        :type post_nms: int, default is 100
         :return: list of bounding boxes
         :rtype: BoundingBoxList
         """
-        assert self._model is not None, "Model has not been loaded, call load(path) first"
 
-        self._model.set_nms(nms_thresh=0.45, nms_topk=400)
+        assert self._model is not None, "Model has not been loaded, call load(path) first"
 
+        if custom_nms:
+            self._model.set_nms(nms_thresh=0.85, nms_topk=5000, post_nms=1000)
+        else:
+            self._model.set_nms(nms_thresh=nms_thresh, nms_topk=nms_topk, post_nms=post_nms)
         if not isinstance(img, Image):
             img = Image(img)
         _img = img.convert("channels_last", "rgb")
@@ -576,33 +600,43 @@ def infer(self, img, threshold=0.2, keep_size=False):
             x, img_mx = transform_test(img_mx)
         else:
             x, img_mx = presets.ssd.transform_test(img_mx, short=self.img_size)
-
         h_mx, w_mx, _ = img_mx.shape
+        x = pad_test(x, min_size=self.img_size)
         x = x.as_in_context(self.ctx)
         class_IDs, scores, boxes = self._model(x)
 
         class_IDs = class_IDs[0, :, 0].asnumpy()
         scores = scores[0, :, 0].asnumpy()
-        mask = np.where((class_IDs >= 0) & (scores > threshold))[0]
+        mask = np.where(class_IDs >= 0)[0]
+        if custom_nms is None:
+            mask = np.intersect1d(mask, np.where(scores > threshold)[0])
         if mask.size == 0:
             return BoundingBoxList([])
 
         scores = scores[mask, np.newaxis]
         class_IDs = class_IDs[mask, np.newaxis]
         boxes = boxes[0, mask, :].asnumpy()
+        if x.shape[2] > h_mx:
+            boxes[:, [1, 3]] -= (x.shape[2] - h_mx)
+        elif x.shape[3] > w_mx:
+            boxes[:, [0, 2]] -= (x.shape[3] - w_mx)
         boxes[:, [0, 2]] /= w_mx
         boxes[:, [1, 3]] /= h_mx
         boxes[:, [0, 2]] *= width
         boxes[:, [1, 3]] *= height
 
-        bounding_boxes = BoundingBoxList([])
-        for idx, box in enumerate(boxes):
-            bbox = BoundingBox(left=box[0], top=box[1],
-                               width=box[2] - box[0],
-                               height=box[3] - box[1],
-                               name=class_IDs[idx, :],
-                               score=scores[idx, :])
-            bounding_boxes.data.append(bbox)
+        if custom_nms is not None:
+            bounding_boxes, _ = custom_nms.run_nms(boxes=boxes, scores=scores, threshold=threshold, img=_img)
+        else:
+            bounding_boxes = BoundingBoxList([])
+            for idx, box in enumerate(boxes):
+                bbox = BoundingBox(left=box[0], top=box[1],
+                                   width=box[2] - box[0],
+                                   height=box[3] - box[1],
+                                   name=class_IDs[idx, :],
+                                   score=scores[idx, :])
+                bounding_boxes.data.append(bbox)
+
         return bounding_boxes
 
     @staticmethod
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py
new file mode 100644
index 0000000000..66d06bf3a6
--- /dev/null
+++ b/tests/sources/tools/perception/object_detection_2d/nms/seq2seq_nms/test_seq2seq_nms.py
@@ -0,0 +1,139 @@
+# Copyright 2020-2021 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import gc
+import shutil
+import os
+import numpy as np
+from opendr.perception.object_detection_2d import Seq2SeqNMSLearner
+from opendr.perception.object_detection_2d.nms.utils.nms_dataset import Dataset_NMS
+from opendr.engine.data import Image
+
+
+def rmfile(path):
+    try:
+        os.remove(path)
+    except OSError as e:
+        print("Error: %s - %s." % (e.filename, e.strerror))
+
+
+def rmdir(_dir):
+    try:
+        shutil.rmtree(_dir)
+    except OSError as e:
+        print("Error: %s - %s." % (e.filename, e.strerror))
+
+
+class TestSeq2SeqNMS(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        print("\n\n**********************************\nTEST Seq2Seq-NMS Learner\n"
+              "**********************************")
+
+        cls.temp_dir = os.path.join(".", "tests", "sources", "tools", "perception", "object_detection_2d",
+                                    "nms", "seq2seq_nms", "temp")
+        cls.seq2SeqNMSLearner = Seq2SeqNMSLearner(iou_filtering=None, app_feats='fmod', temp_path=cls.temp_dir,
+                                                  device='cpu',  checkpoint_after_iter=1, epochs=1)
+
+        # Download all required files for testing
+        cls.seq2SeqNMSLearner.download(model_name='seq2seq_pets_jpd_fmod', path=cls.temp_dir)
+
+    @classmethod
+    def tearDownClass(cls):
+        print('Removing temporary directories for Seq2Seq-NMS...')
+        # Clean up downloaded files
+        rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "test_module.pkl"))
+        rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014", "COCO_val2014_000000262148.jpg"))
+        rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD", "coco_edgemap_b_3.pkl"))
+        rmfile(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "annotations", "test_module_anns.json"))
+        rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "val2014"))
+        rmdir(os.path.join(cls.temp_dir, "datasets", "TEST_MODULE", "FMoD"))
+        rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "fmod_normalization.pkl"))
+        rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.json"))
+        rmfile(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod", "last_weights.pth"))
+        rmdir(os.path.join(cls.temp_dir, "seq2seq_pets_jpd_fmod"))
+
+        rmdir(os.path.join(cls.temp_dir))
+
+        del cls.seq2SeqNMSLearner
+        gc.collect()
+        print('Finished cleaning for Seq2Seq-NMS...')
+
+    def test_fit(self):
+        print('Starting training test for Seq2Seq-NMS...')
+
+        m = list(self.seq2SeqNMSLearner.model.parameters())[0].clone()
+        self.seq2SeqNMSLearner.fit(dataset='TEST_MODULE', use_ssd=False,
+                                   datasets_folder=self.temp_dir + '/datasets',
+                                   logging_path=None, silent=False, verbose=True, nms_gt_iou=0.50,
+                                   max_dt_boxes=200)
+        n = list(self.seq2SeqNMSLearner.model.parameters())[0].clone()
+        self.assertFalse(np.array_equal(m, n),
+                         msg="Model parameters did not change after running fit.")
+        del m, n
+        gc.collect()
+        print('Finished training test for Seq2Seq-NMS...')
+
+    def test_eval(self):
+        print('Starting evaluation test for Seq2Seq-NMS...')
+        self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True)
+        results_dict = self.seq2SeqNMSLearner.eval(dataset='TEST_MODULE', split='test', max_dt_boxes=800,
+                                                   datasets_folder=self.temp_dir + '/datasets',
+                                                   use_ssd=False)
+        if results_dict is None:
+            self.assertIsNotNone(results_dict,
+                                 msg="Eval results dictionary not returned.")
+        else:
+            self.assertGreater(results_dict[0][0][1][0], 0.4)
+        del results_dict
+        gc.collect()
+        print('Finished evaluation test for Seq2Seq-NMS...')
+
+    def test_infer(self):
+        print('Starting inference test for Seq2Seq-NMS...')
+        self.seq2SeqNMSLearner.load(self.temp_dir + '/seq2seq_pets_jpd_fmod/', verbose=True)
+        dataset_nms = Dataset_NMS(path=self.temp_dir + '/datasets', dataset_name='TEST_MODULE', split='train', use_ssd=False)
+        image_fln = dataset_nms.src_data[0]['filename']
+        img = Image.open(os.path.join(self.temp_dir, 'datasets', 'TEST_MODULE', image_fln))
+        boxes = dataset_nms.src_data[0]['dt_boxes'][1][:, 0:4]
+        scores = np.expand_dims(dataset_nms.src_data[0]['dt_boxes'][1][:, 4], axis=-1)
+
+        bounding_box_list = self.seq2SeqNMSLearner.run_nms(boxes=boxes, scores=scores, img=img, threshold=0.5)
+
+        self.assertIsNotNone(bounding_box_list,
+                             msg="Returned empty BoundingBoxList.")
+        del img
+        del bounding_box_list
+        del boxes
+        del scores
+        del dataset_nms
+        gc.collect()
+        print('Finished inference test for Seq2Seq-NMS...')
+
+    def test_save_load(self):
+        print('Starting save/load test for Seq2Seq-NMS...')
+        self.seq2SeqNMSLearner.save(os.path.join(self.temp_dir, "test_model", "last_weights"), current_epoch=0)
+        self.seq2SeqNMSLearner.model = None
+        self.seq2SeqNMSLearner.init_model()
+        self.seq2SeqNMSLearner.load(os.path.join(self.temp_dir, "test_model"))
+        self.assertIsNotNone(self.seq2SeqNMSLearner.model, "model is None after loading model.")
+        # Cleanup
+        rmdir(os.path.join(self.temp_dir, "test_model"))
+        print('Finished save/load test for Seq2Seq-NMS...')
+
+
+if __name__ == "__main__":
+    unittest.main()