opendr-eu · passalis · Nov 30, 2023 · Nov 15, 2023 · Nov 15, 2023 · Nov 15, 2023
@@ -16,11 +16,12 @@ The *NanodetModelT* structure keeps all the necessary information that are requi
 
 ### Function *loadNanodetModel()*
 ```C
-void loadNanodetModel(char *modelPath, char *modelName, char *device, float scoreThreshold, int height, int width, NanodetModelT *model);
+void loadNanodetModel(char *modelPath, char *modelName, char *device, float scoreThreshold, int height, int width, int keepRatio, NanodetModelT *model);
 ```
 Loads a Nanodet object detection model of type (*modelName*) saved in the local filesystem (*modelPath*) in OpenDR format.
 This function also initializes a (*device*) JIT network for performing inference using this model.
 If *width* or *height* is equal to zero, the model will reshape the images in the size that the model was trained.
+If *keepRatio* is equal to zero, the input image will keep its original aspect ratio during preprocessing.
 The pre-trained models should follow the OpenDR conventions.
 The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API.
 

@@ -160,6 +160,39 @@ Parameters:
 - **lazy_load**: *bool, default=True*\
   Enables loading optimized model from predetermined path without exporting it each time.
 
+#### `NanodetLearner.optimize_c_model`
+```python
+NanodetLearner.optimize_c_model(self, export_path, conf_threshold, iou_threshold, nms_max_num, hf, dynamic, verbose)
+```
+
+This method is used to export a JIT optimized model with its metadata compatible with the C API.
+If a model is already present in the *export_path* it will be replaced.
+Inside this folder, the model is saved as *nanodet_{model_name}.pth*
+and a metadata file *nanodet_{model_name}.json*.
+
+Parameters:
+
+- **export_path**: *str*\
+  Specifies the path to save the optimized model.
+- **conf_threshold**: *float*\
+  Specifies the threshold for object detection inference.
+  An object is detected if the confidence of the output is higher than the specified threshold.
+  The value needs to be set between 0.0 and 1.0, modify to achieve best results.
+- **iou_threshold**: *float*\
+  Specifies the IOU threshold for NMS in inference.
+  The value needs to be set between 0.0 and 1.0, modify to achieve best results.
+- **nms_max_num**: *int*\
+  Determines the maximum number of bounding boxes that will be retained following the nms.
+  The value needs to be set higher than 0.
+  Adjust the value based on the specific needs of your application.
+  Bigger number will make the model to run slower.
+- **hf**: *bool, default=False*\
+  Determines model's floating point precision.
+- **dynamic**: *bool, default=False*\
+  Determines if the optimized model runs with dynamic input. Dynamic input leads to slower inference times.
+- **verbose**: *bool, default=True*\
+  Enables the maximum verbosity.
+
 #### `NanodetLearner.save`
 ```python
 NanodetLearner.save(self, path, verbose)

@@ -52,10 +52,11 @@ typedef struct NanodetModel NanodetModelT;
  * @param scoreThreshold confidence threshold
  * @param height the height of model input, if set to zero the trained height will be used instead
  * @param width the width of model input, if set to zero the trained width will be used instead
+ * @param keepRatio flag to determine if the original aspect ratio of the image will be preserved during preprocessing
  * @param model the model to be loaded
  */
 void loadNanodetModel(const char *modelPath, const char *modelName, const char *device, float scoreThreshold, int height,
-                      int width, NanodetModelT *model);
+                      int width, int keepRatio, NanodetModelT *model);
 
 /**
  * This function performs inference using a nanodet object detection model and an input image.

@@ -24,7 +24,7 @@ int main(int argc, char **argv) {
   NanodetModelT model;
 
   printf("start init model\n");
-  loadNanodetModel("./data/object_detection_2d/nanodet/optimized_model", "m", "cuda", 0.35, 0, 0, &model);
+  loadNanodetModel("./data/object_detection_2d/nanodet/optimized_model", "m", "cuda", 0.35, 0, 0, 0, &model);
   printf("success\n");
 
   OpenDRImageT image;

@@ -24,4 +24,7 @@ provided by OpenDR. Specifically the following examples are provided:
     Example usage:
    `python3 train_demo.py --model m --dataset coco --data-root /path/to/coco_dataset`
 
-5. inference_tutorial.ipynb: A simple tutorial in jupyter for using the Nanodet tool for inference.
+5. export_c_compatible_network.py: A simple example to export any model to be used with the C API of OpenDR.
+   Note that this export will not be the same as the JIT optimization model used for inference in Python API, but it will perform the same.
+
+6. inference_tutorial.ipynb: A simple tutorial in jupyter for using the Nanodet tool for inference.
@@ -0,0 +1,34 @@
+# Copyright 2020-2023 OpenDR European Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+from opendr.perception.object_detection_2d import NanodetLearner
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"])
+    parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m")
+    parser.add_argument("--dynamic", help="Determines if the model runs with dynamic shape input or not",
+                        action="store_true")
+
+    args = parser.parse_args()
+
+    nanodet = NanodetLearner(model_to_use=args.model, device=args.device)
+    nanodet.download("./predefined_examples", mode="pretrained")
+    nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True)
+
+    nanodet.optimize_c_model("./c_compatible_jit/nanodet_{}".format(args.model), conf_threshold=0.35,
+                             iou_threshold=0.6, nms_max_num=100, dynamic=args.dynamic, verbose=True)
+    print("C compatible network was exported in directory ./c_compatible_jit/nanodet_{}".format(args.model))
@@ -16,6 +16,7 @@
 
 #include <document.h>
 #include <torch/script.h>
+#include <torch/torch.h>
 #include <torchvision/vision.h>
 #include <iostream>
 #include <opencv2/highgui/highgui.hpp>
@@ -71,6 +72,12 @@ torch::Tensor NanoDet::preProcess(cv::Mat *image) {
   tensorImage = tensorImage.add(this->mMeanTensor);
   tensorImage = tensorImage.mul(this->mStdTensor);
 
+  // divisible padding
+  int pad_width = (int((image->cols + 32 - 1) / 32) * 32) - image->cols;
+  int pad_height = (int((image->rows + 32 - 1) / 32) * 32) - image->rows;
+  torch::nn::functional::PadFuncOptions padding({0, pad_width, 0, pad_height});  // left, right, top, bottom,
+  tensorImage = torch::nn::functional::pad(tensorImage, padding);
+  tensorImage.unsqueeze_(0);
   return tensorImage;
 }
 
@@ -267,11 +274,11 @@ torch::DeviceType torchDevice(const char *deviceName, int verbose = 0) {
 }
 
 void loadNanodetModel(const char *modelPath, const char *modelName, const char *device, float scoreThreshold, int height,
-                      int width, NanodetModelT *model) {
+                      int width, int keepRatio, NanodetModelT *model) {
   // Initialize model
   model->network = NULL;
   model->scoreThreshold = scoreThreshold;
-  model->keepRatio = 0;
+  model->keepRatio = keepRatio;
 
   // Parse the model JSON file
   std::string basePath(modelPath);
@@ -338,14 +345,14 @@ void loadNanodetModel(const char *modelPath, const char *modelName, const char *
 }
 
 void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize,
-               std::vector<torch::Tensor> *outputs) {
+               torch::Tensor *outputs) {
   // Make all the inputs as tensors to use in jit model
   torch::Tensor srcHeight = torch::tensor(originalSize->height);
   torch::Tensor srcWidth = torch::tensor(originalSize->width);
   torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3});
 
   // Model inference
-  *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensorVector();
+  *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensor();
 }
 
 OpenDRDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpenDRImageT *image) {
@@ -369,23 +376,24 @@ OpenDRDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpenDRImageT *im
   torch::Tensor input = networkPTR->preProcess(&resizedImg);
   cv::Size originalSize(opencvImage->cols, opencvImage->rows);
 
-  std::vector<torch::Tensor> outputs;
+  torch::Tensor outputs;
 
   ffNanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs);
 
   std::vector<OpenDRDetectionTarget> detections;
 
-  for (int label = 0; label < outputs.size(); label++) {
-    for (int box = 0; box < outputs[label].size(0); box++) {
-      OpenDRDetectionTargetT detection;
-      detection.name = outputs[label][box][5].item<int>();
-      detection.left = outputs[label][box][0].item<float>();
-      detection.top = outputs[label][box][1].item<float>();
-      detection.width = outputs[label][box][2].item<float>() - outputs[label][box][0].item<float>();
-      detection.height = outputs[label][box][3].item<float>() - outputs[label][box][1].item<float>();
-      detection.score = outputs[label][box][4].item<float>();
-      detections.push_back(detection);
-    }
+  if (outputs.numel() == 0)
+    return detectionsVector;
+
+  for (int box = 0; box < outputs.size(0); box++) {
+    OpenDRDetectionTargetT detection;
+    detection.name = outputs[box][5].item<int>();
+    detection.left = outputs[box][0].item<float>();
+    detection.top = outputs[box][1].item<float>();
+    detection.width = outputs[box][2].item<float>() - outputs[box][0].item<float>();
+    detection.height = outputs[box][3].item<float>() - outputs[box][1].item<float>();
+    detection.score = outputs[box][4].item<float>();
+    detections.push_back(detection);
   }
   // Put vector detection as C pointer and size
   if (static_cast<int>(detections.size()) > 0)

@@ -194,7 +194,7 @@ def scriptable_warp_boxes(boxes, M, width, height):
     n = boxes.shape[0]
     if n:
         # warp points
-        xy = torch.ones((n * 4, 3), dtype=torch.float32, device=boxes.device)
+        xy = torch.ones((n * 4, 3), dtype=torch.float32, device=M.device)
         xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
             n * 4, 2
         )  # x1y1, x2y2, x1y2, x2y1

@@ -22,6 +22,23 @@
 from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.arch import build_model
 
 
+class ScriptedPredictor(nn.Module):
+    def __init__(self, model, dummy_input, conf_thresh=0.35, iou_thresh=0.6, nms_max_num=100, dynamic=False):
+        super(ScriptedPredictor, self).__init__()
+        model.forward = model.inference
+        self.model = model
+        self.conf_thresh = conf_thresh
+        self.iou_thresh = iou_thresh
+        self.nms_max_num = nms_max_num
+        self.jit_model = torch.jit.script(self.model) if dynamic else torch.jit.trace(self.model, dummy_input[0])
+
+    def forward(self, input, height, width, warp_matrix):
+        preds = self.jit_model(input)
+        meta = dict(height=height, width=width, warp_matrix=warp_matrix, img=input)
+        return self.model.head.post_process(preds, meta, conf_thresh=self.conf_thresh, iou_thresh=self.iou_thresh,
+                                            nms_max_num=self.nms_max_num)
+
+
 class Predictor(nn.Module):
     def __init__(self, cfg, model, device="cuda", conf_thresh=0.35, iou_thresh=0.6, nms_max_num=100,
                  hf=False, dynamic=False, ch_l=False):
@@ -33,8 +50,7 @@ def __init__(self, cfg, model, device="cuda", conf_thresh=0.35, iou_thresh=0.6,
         self.nms_max_num = nms_max_num
         self.hf = hf
         self.ch_l = ch_l
-        self.dynamic = dynamic
-        self.traced_model = None
+        self.dynamic = dynamic and self.cfg.data.val.keep_ratio
         if self.cfg.model.arch.backbone.name == "RepVGG":
             deploy_config = self.cfg.model
             deploy_config.arch.backbone.update({"deploy": True})
@@ -51,18 +67,23 @@ def __init__(self, cfg, model, device="cuda", conf_thresh=0.35, iou_thresh=0.6,
         if self.hf:
             model = model.half()
         model.set_dynamic(self.dynamic)
+        model.set_inference_mode(True)
 
         self.model = model.to(device).eval()
 
         self.pipeline = Pipeline(self.cfg.data.val.pipeline, self.cfg.data.val.keep_ratio)
 
     def trace_model(self, dummy_input):
-        self.traced_model = torch.jit.trace(self, dummy_input[0])
-        return self.traced_model
+        return torch.jit.trace(self, dummy_input[0])
 
     def script_model(self):
-        self.traced_model = torch.jit.script(self)
-        return self.traced_model
+        return torch.jit.script(self)
+
+    def c_script(self, dummy_input):
+        import copy
+        jit_ready_predictor = ScriptedPredictor(copy.deepcopy(self.model), dummy_input, self.conf_thresh,
+                                                self.iou_thresh, self.nms_max_num, dynamic=self.dynamic)
+        return torch.jit.script(jit_ready_predictor)
 
     def forward(self, img):
         return self.model.inference(img)
@@ -90,8 +111,7 @@ def preprocessing(self, img):
         return _input, _height, _width, _warp_matrix
 
     def postprocessing(self, preds, input, height, width, warp_matrix):
-        img_info = dict(height=height, width=width, id=torch.zeros(1))
-        meta = dict(img_info=img_info, warp_matrix=warp_matrix, img=input)
+        meta = dict(height=height, width=width, warp_matrix=warp_matrix, img=input)
         res = self.model.head.post_process(preds, meta, conf_thresh=self.conf_thresh, iou_thresh=self.iou_thresh,
                                            nms_max_num=self.nms_max_num)
         return res
@@ -66,6 +66,13 @@ def set_dynamic(self, dynamic=False):
         if hasattr(self, "aux_head"):
             self.aux_head.dynamic = dynamic
 
+    def set_inference_mode(self, inference_mode=False):
+        self.backbone.inference_mode = inference_mode
+        if hasattr(self, "fpn"):
+            self.fpn.inference_mode = inference_mode
+        if hasattr(self, "head"):
+            self.head.inference_mode = inference_mode
+
     def forward_train(self, gt_meta):
         preds = self(gt_meta["img"])
         loss, loss_states = self.head.loss(preds, gt_meta)

@@ -42,15 +42,15 @@ def __init__(
             norm_cfg=norm_cfg,
             activation=activation,
         )
-        if res_type == "add":
-            self.out_conv = ConvModule(
-                in_channels // 2,
-                in_channels,
-                kernel_size,
-                padding=(kernel_size - 1) // 2,
-                norm_cfg=norm_cfg,
-                activation=activation,
-            )
+
+        self.out_conv = ConvModule(
+            in_channels // 2,
+            in_channels,
+            kernel_size,
+            padding=(kernel_size - 1) // 2,
+            norm_cfg=norm_cfg,
+            activation=activation,
+        ) if res_type == "add" else nn.Identity()
 
     def forward(self, x):
         x = self.in_conv(x)