From 02a2513685540890aa31b35f5289858c72b93ffe Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 22 Nov 2022 18:46:03 +0200 Subject: [PATCH 01/87] Onnx and Jit tracing-scripting implementation with python and c api inference. Docs and tests have changed acordingly --- docs/reference/nanodet.md | 163 ++++++--- include/nanodet_c.h | 82 +++++ include/opendr_utils.h | 15 + include/target.h | 22 ++ projects/c_api/Makefile | 17 +- projects/c_api/README.md | 1 + projects/c_api/samples/nanodet/README.md | 14 + .../c_api/samples/nanodet/nanodet_jit_demo.c | 51 +++ .../object_detection_2d/nanodet/README.md | 14 +- .../object_detection_2d/nanodet/eval_demo.py | 5 +- .../nanodet/export_onnx.py | 38 ++ .../nanodet/export_torchscript.py | 39 ++ .../object_detection_2d/nanodet/train_demo.py | 4 +- src/c_api/Makefile | 8 +- src/c_api/README.md | 2 +- src/c_api/nanodet_libtorch.cpp | 342 ++++++++++++++++++ src/c_api/opendr_utils.cpp | 12 + .../object_detection_2d/nanodet/README.md | 2 +- .../Transformer/nanodet_t.yml | 5 +- .../algorithm/nanodet/data/batch_process.py | 31 +- .../nanodet/data/transform/pipeline.py | 20 +- .../algorithm/nanodet/data/transform/warp.py | 85 ++--- .../nanodet/evaluator/coco_detection.py | 8 +- .../algorithm/nanodet/inferencer/utilities.py | 63 ++-- .../nanodet/model/arch/nanodet_plus.py | 4 +- .../nanodet/model/arch/one_stage_detector.py | 10 +- .../nanodet/model/backbone/custom_csp.py | 5 +- .../model/backbone/efficientnet_lite.py | 6 +- .../nanodet/model/backbone/ghostnet.py | 14 +- .../nanodet/model/backbone/mobilenetv2.py | 13 +- .../nanodet/model/backbone/repvgg.py | 3 +- .../nanodet/model/backbone/resnet.py | 6 +- .../nanodet/model/backbone/shufflenetv2.py | 9 +- .../algorithm/nanodet/model/fpn/fpn.py | 20 +- .../algorithm/nanodet/model/fpn/ghost_pan.py | 14 +- .../algorithm/nanodet/model/fpn/pan.py | 17 +- .../algorithm/nanodet/model/fpn/tan.py | 7 +- .../algorithm/nanodet/model/head/gfl_head.py | 174 ++++++--- .../nanodet/model/head/nanodet_head.py | 44 +-- .../nanodet/model/head/nanodet_plus_head.py | 133 ++++--- .../nanodet/model/head/simple_conv_head.py | 12 +- .../nanodet/model/loss/gfocal_loss.py | 2 + .../algorithm/nanodet/model/loss/iou_loss.py | 1 + .../algorithm/nanodet/model/module/conv.py | 26 +- .../algorithm/nanodet/model/module/nms.py | 48 ++- .../nanodet/model/module/transformer.py | 2 + .../nanodet/algorithm/nanodet/trainer/task.py | 24 +- .../algorithm/nanodet/util/box_transform.py | 5 +- .../algorithm/nanodet/util/check_point.py | 7 +- .../nanodet/algorithm/nanodet/util/logger.py | 13 +- .../nanodet/algorithm/nanodet/util/path.py | 4 +- .../nanodet/dependencies.ini | 5 +- .../nanodet/nanodet_learner.py | 259 ++++++++++--- tests/Makefile | 11 + tests/sources/c_api/test_nanodet.c | 86 +++++ .../nanodet/test_nanodet.py | 39 +- 56 files changed, 1601 insertions(+), 465 deletions(-) create mode 100644 include/nanodet_c.h create mode 100644 projects/c_api/samples/nanodet/README.md create mode 100644 projects/c_api/samples/nanodet/nanodet_jit_demo.c create mode 100644 projects/python/perception/object_detection_2d/nanodet/export_onnx.py create mode 100644 projects/python/perception/object_detection_2d/nanodet/export_torchscript.py create mode 100644 src/c_api/nanodet_libtorch.cpp create mode 100644 tests/sources/c_api/test_nanodet.c diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 765f210673..36d1319bf7 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -22,7 +22,7 @@ Constructor parameters: - **model_to_use**: *{"EfficientNet_Lite0_320", "EfficientNet_Lite1_416", "EfficientNet_Lite2_512", "RepVGG_A0_416", "t", "g", "m", "m_416", "m_0.5x", "m_1.5x", "m_1.5x_416", "plus_m_320", "plus_m_1.5x_320", "plus_m_416", - "plus_m_1.5x_416", "custom"}, default=plus_m_1.5x_416*\ + "plus_m_1.5x_416", "custom"}, default=m*\ Specifies the model to use and the config file that contains all hyperparameters for training, evaluation and inference as the original [Nanodet implementation](https://github.com/RangiLyu/nanodet). If you want to overwrite some of the parameters you can put them as parameters in the learner. @@ -52,7 +52,7 @@ Constructor parameters: #### `NanodetLearner.fit` ```python -NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, seed) +NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, seed, local_rank) ``` This method is used for training the algorithm on a train dataset and validating on a val dataset. @@ -71,10 +71,12 @@ Parameters: Enables the maximum verbosity and the logger. - **seed** : *int, default=123*\ Seed for repeatability. +- **local_rank** : *int, default=1*\ + Is needed if train to multiple machines is wanted. #### `NanodetLearner.eval` ```python -NanodetLearner.eval(self, dataset, verbose) +NanodetLearner.eval(self, dataset, verbose, local_rank) ``` This method is used to evaluate a trained model on an evaluation dataset. @@ -86,6 +88,8 @@ Parameters: Object that holds the evaluation dataset. - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. +- **local_rank** : *int, default=1*\ + Is needed if evaluation to multiple machines is wanted. #### `NanodetLearner.infer` ```python @@ -105,6 +109,33 @@ Parameters: - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. +#### `NanodetLearner.optimize` +```python +NanodetLearner.optimize(self, export_path, initial_img=None, verbose=True, optimization="jit") +``` + +This method is used to perform jir or onnx optimizations and save a trained model with its metadata. +If a models is already saves in export_path, the model will be loaded instead. Provided with the "export_path", it creates +the "export_path" directory, if it does already exist it try to load the optimized model in the path. +Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX +and a metadata file *"nanodet_{model_name}.json"*. + +Note: Onnx optimization, optimize and saves only the actual model inference. This is important if the user wants to use +the model for C API. It will be needed to make a preproccess and postproccess that will work exactly the same as our python +implementation to have the exact same results. +For C API it is recomended the Jit optimization and the example that is provided in our [c_api](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) + +Parameters: + +- **export_path**: *str*\ + Path to save or load the optimized model. +- **initial_img**: *Image*\ + If optimize is called for the first time is needed a dummy input of opendr Image. +- **verbose**: *bool, default=True*\ + Enables the maximum verbosity and logger. +- **optimization**: *str, default="Jit"*\ + It can be Jit or Onnx. It determines what kind of optimization is used. + #### `NanodetLearner.save` ```python NanodetLearner.save(self, path, verbose) @@ -114,6 +145,7 @@ This method is used to save a trained model with its metadata. Provided with the path, it creates the "path" directory, if it does not already exist. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* and a metadata file *"nanodet_{model_name}.json"*. If the directory already exists, the *"nanodet_{model_name}.pth"* and *"nanodet_{model_name}.json"* files are overwritten. +If optimization is performed the optimized model is saved instead. Parameters: @@ -129,6 +161,7 @@ NanodetLearner.load(self, path, verbose) This method is used to load a previously saved model from its saved folder. Loads the model from inside the directory of the path provided, using the metadata .json file included. +If optimization is performed the optimized model is loaded instead. Parameters: @@ -171,7 +204,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det * **Training example using an `ExternalDataset`.** To train properly, the architecture weights must be downloaded in a predefined directory before fit is called, in this case the directory name is "predefined_examples". - Default architecture is *'plus-m-1.5x_416'*. + Default architecture is *'m'*. The training and evaluation dataset root should be present in the path provided, along with the annotation files. The default COCO 2017 training data can be found [here](https://cocodataset.org/#download) (train, val, annotations). All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file @@ -232,58 +265,98 @@ Furthermore, demos on performing [training](../../projects/perception/object_det if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) - parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) - parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) - parser.add_argument("--checkpoint-freq", help="Frequency in-between checkpoint saving and evaluations", type=int, default=50) - parser.add_argument("--n-epochs", help="Number of total epochs", type=int, default=300) - parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", type=int, default=0) - - args = parser.parse_args() - - if args.dataset == 'voc': - dataset = ExternalDataset(args.data_root, 'voc') - val_dataset = ExternalDataset(args.data_root, 'voc') - elif args.dataset == 'coco': - dataset = ExternalDataset(args.data_root, 'coco') - val_dataset = ExternalDataset(args.data_root, 'coco') - - nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, - checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, - device=args.device) - - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) - nanodet.fit(dataset, val_dataset) - nanodet.save() + parser = argparse.ArgumentParser() + parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) + parser.add_argument("--data-root", help="Dataset root folder", type=str) + parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") + parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) + parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) + parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) + parser.add_argument("--checkpoint-freq", help="Frequency in-between checkpoint saving and evaluations", + type=int, default=50) + parser.add_argument("--n-epochs", help="Number of total epochs", type=int, default=300) + parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", + type=int, default=0) + + args = parser.parse_args() + + if args.dataset == 'voc': + dataset = ExternalDataset(args.data_root, 'voc') + val_dataset = ExternalDataset(args.data_root, 'voc') + elif args.dataset == 'coco': + dataset = ExternalDataset(args.data_root, 'coco') + val_dataset = ExternalDataset(args.data_root, 'coco') + + nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, + checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, + device=args.device) + + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + nanodet.fit(dataset, val_dataset) + nanodet.save() + ``` * **Inference and result drawing example on a test image.** This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. - Moreover, inference can be used in all images in a folder, frames of a video or a webcam feedback with the provided *mode*. In this example first is downloaded a pre-trained model as in training example and then an image to be inference. - With the same *path* parameter you can choose a folder or a video file to be used as inference. Last but not least, if 'webcam' is - used in *mode* the *camid* parameter of inference must be used to determine the webcam device in your machine. + With the *path* parameter you can choose an image file to be used as inference. ```python import argparse from opendr.perception.object_detection_2d import NanodetLearner - + from opendr.engine.data import Image + from opendr.perception.object_detection_2d import draw_bounding_boxes + if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str) - args = parser.parse_args() + parser = argparse.ArgumentParser() + parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) + parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + args = parser.parse_args() + + nanodet = NanodetLearner(model_to_use=args.model, device=args.device) + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + nanodet.download("./predefined_examples", mode="images") + img = Image.open("./predefined_examples/000000000036.jpg") + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) + ``` - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) +* **Optimization framework with Inference and result drawing example on a test image.** + + This example shows how to perform optimization on a pretrained model and then inference and draw the resulting + bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first is loaded a + pretrained model and then an opendr Image is used to perform optimization, in this exampel we use onnx optimization but + with `--optimization` can be used one of `[jit, onnx]`. + With the *path* parameter you can choose an image file to be used as dummy input in optimization and after in inference. + The optimized model will be saves in `./optimization_models` folder + ```python + import argparse + from opendr.perception.object_detection_2d import NanodetLearner + from opendr.engine.data import Image + from opendr.perception.object_detection_2d import draw_bounding_boxes - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) - nanodet.download("./predefined_examples", mode="images") - boxes = nanodet.infer(path="./predefined_examples/000000000036.jpg") + + if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) + parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + parser.add_argument("--optimization", help="Optimization framework that will be used", type=str, default='onnx') + parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str) + args = parser.parse_args() + + nanodet = NanodetLearner(model_to_use=args.model, device=args.device) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + + # First read an openDR image from your dataset and run the optimizer: + img = Image.open(args.path) + nanodet.optimize("./optimization_models", img, optimization=args.optimization) + + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) ``` \ No newline at end of file diff --git a/include/nanodet_c.h b/include/nanodet_c.h new file mode 100644 index 0000000000..614112901f --- /dev/null +++ b/include/nanodet_c.h @@ -0,0 +1,82 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_NANODET_H +#define C_API_NANODET_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct nanodet_model { + // Jit cpp class holder + void *net; + + // Device to be used + char *device; + + // Recognition threshold + float scoreThreshold; + + // Model input size + int inputSize[2]; + + // Keep ratio flag + int keep_ratio; +}; +typedef struct nanodet_model nanodet_model_t; + +/** + * Loads a nanodet object detection model saved in libtorch format + * @param model_path path to the libtorch nanodet model (as exported using OpenDR library) + * @param device the device that will be used for the inference + * @param height the height of model input + * @param width the width of model input + * @param scoreThreshold a threshold for score to be infered + * @param model the loaded model + */ +void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); + +/** + * This function perform inference using a nanodet object detection model and an input image + * @param model nanodet model to be used for inference + * @param image OpenDR image + * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objectes + */ +opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model); + +/** + * Releases the memory allocated for a nanodet object detection model + * @param model model to be de-allocated + */ +void free_nanodet_model(nanodet_model_t *model); + +/** + * draw the bounding boxes from detections in given image + * @param opendr_image image that has been used for inference and wanted to be printed + * @param model nanodet model that has been used for inference + * @param detections output of the inference + */ +void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_NANODET_H diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 309c44a211..961b4a6cb4 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -18,6 +18,7 @@ #define C_API_OPENDR_UTILS_H #include "data.h" +#include "target.h" #ifdef __cplusplus extern "C" { @@ -36,6 +37,20 @@ void load_image(const char *path, opendr_image_t *image); */ void free_image(opendr_image_t *image); +/** + * Loads an OpenDR detection target list to be used in C api + * @param detections OpenDR detection_target_list structure to be loaded + * @param vectorDataPtr the pointer of the first OpenDR detection target in a vector + * @param vectorSize the size of the vector + */ +void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize); + +/** + * Releases the memory allocated for a detection list structure + * @param detections OpenDR detection_target_list structure to release + */ +void free_detections(opendr_detection_target_list_t *detections); + #ifdef __cplusplus } #endif diff --git a/include/target.h b/include/target.h index 99603e905d..7652cd8238 100644 --- a/include/target.h +++ b/include/target.h @@ -29,6 +29,28 @@ struct opendr_category_target { }; typedef struct opendr_category_target opendr_category_target_t; +/*** + * OpenDR data type for representing detection targets + */ +struct opendr_detection_target { + int name; + float left; + float top; + float width; + float height; + float score; +}; +typedef struct opendr_detection_target opendr_detection_target_t; + +/*** + * OpenDR data type for representing a structure of detections targets + */ +struct opendr_detection_target_list { + opendr_detection_target_t *starting_pointer; + int size; +}; +typedef struct opendr_detection_target_list opendr_detection_target_list_t; + #ifdef __cplusplus } #endif diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index 92d14951f9..9a6b35453c 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +OPENDR_HOME= /home/manos/develop/final/opendr SHELL := /bin/bash CPP = g++ @@ -33,7 +34,10 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos -demos: $(BUILD_DIR)/face_recognition_demo +demos: $(BUILD_DIR)/face_recognition_demo $(BUILD_DIR)/nanodet_demo + +face: $(BUILD_DIR)/face_recognition_demo +nano: $(BUILD_DIR)/nanodet_demo download: @+if [ -a $(DATA_DIR) ] ; \ @@ -47,6 +51,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/nanodet; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -55,6 +64,12 @@ $(BUILD_DIR)/face_recognition_demo: @+echo "Building face recognition demo..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/face_recognition_demo samples/face_recognition/face_recognition_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) +$(BUILD_DIR)/nanodet_demo: + $(MKDIR_P) $(BUILD_DIR) + @+echo "Building nanodet object detection demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/nanodet_libtorch_demo samples/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + + clean: @+echo "Cleaning C API demo binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/projects/c_api/README.md b/projects/c_api/README.md index 62dd65bea7..ebaccc45ea 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -10,4 +10,5 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: 1. Face recognition +2. Nanodet Jit module diff --git a/projects/c_api/samples/nanodet/README.md b/projects/c_api/samples/nanodet/README.md new file mode 100644 index 0000000000..fb2329cb2c --- /dev/null +++ b/projects/c_api/samples/nanodet/README.md @@ -0,0 +1,14 @@ +# OpenDR C API Nanodet Demo + +C API implementation of nanodet models for inference. To use the models first must be exported with the optimization Jit from python. +After the installation can be run from projects/c_api directory with: +```sh +./built/nanodet_libtorch_demo ./path/to/your/model.pth device_name{cpu, cuda} ./path/to/your/image.jpg height width +``` + +After installation a temporal model and image are downloaded based on nanodet_m model from python. +You can run it as: + +```sh +./built/nanodet_libtorch_demo ./data/nanodet/optimized_model/nanodet_m.pth cuda ./data/nanodet/database/000000000036.jpg 320 320 +``` diff --git a/projects/c_api/samples/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/nanodet/nanodet_jit_demo.c new file mode 100644 index 0000000000..9b733bf4ff --- /dev/null +++ b/projects/c_api/samples/nanodet/nanodet_jit_demo.c @@ -0,0 +1,51 @@ +#include +#include +#include "nanodet_c.h" +#include "opendr_utils.h" + +int main(int argc, char** argv) +{ + + if (argc != 6) + { + fprintf(stderr, "usage: %s [model_path] [device] [images_path] [input_sizes].\n" + "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", argv[0]); + return -1; + } + + nanodet_model_t model; + + int height = atoi(argv[4]); + int width = atoi(argv[5]); + printf("start init model\n"); + load_nanodet_model(argv[1], argv[2], height, width, 0.35, &model); + printf("success\n"); + + //Initialize opendr image + opendr_image_t image; + + //Load opendr image + load_image(argv[3], &image); + if (!image.data) + { + printf("Image not found!"); + return 1; + } + + //Initialize opendr detection target list; + opendr_detection_target_list_t results; + + //Infer nanodet model + results = infer_nanodet(&image, &model); + + //Draw the results + drawBboxes(&image, &model, &results); + + //Free the memory + free_detections(&results); + free_image(&image); + free_nanodet_model(&model); + + return 0; +} diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 92c456c235..721aceec80 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -13,6 +13,16 @@ provided by OpenDR. Specifically the following examples are provided: model is done with `--model "wanted model name"`. Setting `--device cpu` performs training on CPU. Additional command line arguments can be set to overwrite various training hyperparameters from the provided config file, and running `python3 train_demo.py -h` prints information about them on stdout. - + Example usage: - `python3 train_demo.py --model plus-m_416 --dataset coco --data-root /path/to/coco_dataset` \ No newline at end of file + `python3 train_demo.py --model plus-m_416 --dataset coco --data-root /path/to/coco_dataset` + +4. export_onnx: Export the pretrained model into the onnx optimization format. Setting the config file for the specific + model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. + Notes! Onnx model is only run the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. + It is recomended to install additional the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. +5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific + model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. + Note! If you're planning on using c api, Jit optimization is preferred, so it can be used for the same postprocessing of the output + and have same exact detection as the python api. +6. inference_tutorial: A simple tutorial in jupyter for easier use of Nanodet in inference. \ No newline at end of file diff --git a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py index 759c6aa4bd..ac5de4e8d5 100644 --- a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py @@ -21,7 +21,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) + parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) args = parser.parse_args() @@ -30,5 +30,6 @@ nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.eval(val_dataset) + nanodet.save("./temp") diff --git a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py new file mode 100644 index 0000000000..8054ccce97 --- /dev/null +++ b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py @@ -0,0 +1,38 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from opendr.perception.object_detection_2d import NanodetLearner +from opendr.engine.data import Image +from opendr.perception.object_detection_2d import draw_bounding_boxes + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) + parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + args = parser.parse_args() + + nanodet = NanodetLearner(model_to_use=args.model, device=args.device) + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + + nanodet.download("./predefined_examples", mode="images") + # First read an openDR image from your dataset and run the optimizer: + img = Image.open("./predefined_examples/000000000036.jpg") + nanodet.optimize("./onnx", img, optimization="onnx") + + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py new file mode 100644 index 0000000000..1bfa2c1482 --- /dev/null +++ b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py @@ -0,0 +1,39 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +from opendr.perception.object_detection_2d import NanodetLearner +from opendr.engine.data import Image +from opendr.perception.object_detection_2d import draw_bounding_boxes + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) + parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + args = parser.parse_args() + + nanodet = NanodetLearner(model_to_use=args.model, device=args.device) + + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load(f"./predefined_examples/nanodet_{args.model}", verbose=True) + + nanodet.download("./predefined_examples", mode="images") + # First read an openDR image from your dataset and run the optimizer: + img = Image.open("./predefined_examples/000000000036.jpg") + nanodet.optimize(f"./jit/nanodet_{args.model}", img, optimization="jit") + img = Image.open("./predefined_examples/000000000036.jpg") + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/train_demo.py b/projects/python/perception/object_detection_2d/nanodet/train_demo.py index 3ef0394392..8fda02650c 100644 --- a/projects/python/perception/object_detection_2d/nanodet/train_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/train_demo.py @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) + parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) @@ -46,6 +46,6 @@ device=args.device) nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.fit(dataset, val_dataset) nanodet.save() diff --git a/src/c_api/Makefile b/src/c_api/Makefile index 1e1d783c8d..ed9bbeb80c 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -30,6 +30,10 @@ LD = -L/usr/lib/ -L/usr/local/lib/ -lstdc++ -lm -lonnxruntime `pkg-config --lib OPENDR_INC = -I$(OPENDR_HOME)/include OPENDR_LD = -L$(OPENDR_HOME)//lib -lopendr +LIBTORCH_DIR = /usr/local/libtorch +TORCHSCRIPT_INC = -I$(LIBTORCH_DIR)/include -I$(LIBTORCH_DIR)/include/torch/csrc/api/include +TORCHSCRIPT_LD = -L$(LIBTORCH_DIR)/lib -L$(LIBTORCH_DIR)/share -ltorchvision -ltorch + all: libopendr libopendr: $(OPENDR_HOME)/lib/libopendr.so @@ -39,9 +43,9 @@ $(OPENDR_HOME)/lib/libopendr.so: @+$(MKDIR_P) $(BUILD_DIR) $(CPP) $(CFLAGS) -c opendr_utils.cpp -o $(BUILD_DIR)/opendr_utils.o $(INC) $(OPENDR_INC) $(CPP) $(CFLAGS) -c face_recognition.cpp -o $(BUILD_DIR)/opendr_face_recognition.o $(INC) $(OPENDR_INC) + $(CPP) $(CFLAGS) -c nanodet_libtorch.cpp -o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) @$(MKDIR_P) $(LIB_DIR) - $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(LD) --shared - + $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(LD) $(TORCHSCRIPT_LD) --shared clean: @+echo "Cleaning C API binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/src/c_api/README.md b/src/c_api/README.md index 1377f753a7..ceaab456e7 100644 --- a/src/c_api/README.md +++ b/src/c_api/README.md @@ -3,7 +3,7 @@ ## Description This module contains a C API that can be used for performing inference on models trained using the Python API of OpenDR. -Therefore, to use the C API you should first use the Python API to train a model and then export it to ONNX format using the `optimize()` method. +Therefore, to use the C API you should first use the Python API to train a model and then export it to ONNX or Jit format using the `optimize()` method. ## Setup diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp new file mode 100644 index 0000000000..9f8cdee17e --- /dev/null +++ b/src/c_api/nanodet_libtorch.cpp @@ -0,0 +1,342 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include "nanodet_c.h" + +/** + * Helper class holder of c++ values and jit model. + */ +class NanoDet { +private: + torch::DeviceType device; + torch::jit::script::Module network; + torch::Tensor meanTensor; + torch::Tensor stdTensor; + std::vector labels; + +public: + NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + std::vector labels); + ~NanoDet(); + + torch::Tensor mPreProcess(cv::Mat *image); + torch::jit::script::Module net() const; + torch::Tensor meanValues() const; + torch::Tensor stdValues() const; + std::vector classes() const; + std::vector outputs; +}; + +NanoDet::NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + const std::vector labels) { + this->device = device; + this->network = net; + this->meanTensor = meanValues.clone().to(device); + this->stdTensor = stdValues.clone().to(device); + this->labels = labels; +} + +NanoDet::~NanoDet() { +} + +/** + * Helper function for preprocessing images for normalization. + * This function follows the OpenDR's Nanodet pre-processing pipeline for color normalization. + * Mean and Standard deviation are already part of NanoDet class when is initialized. + * @param image, image to be preprocesses + */ +torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { + torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); + tensorImage = tensorImage.toType(torch::kFloat); + tensorImage = tensorImage.to(this->device); + tensorImage = tensorImage.permute({2, 0, 1}); + tensorImage = tensorImage.add(this->meanTensor); + tensorImage = tensorImage.mul(this->stdTensor); + + return tensorImage; +} + +/** + * Getter for jit model + */ +torch::jit::script::Module NanoDet::net() const { + return this->network; +} + +/** + * Getter for tensor with the mean values + */ +torch::Tensor NanoDet::meanValues() const { + return this->meanTensor; +} + +/** + * Getter for tensor with the standard deviation values + */ +torch::Tensor NanoDet::stdValues() const { + return this->stdTensor; +} + +/** + * Getter of labels for printing + */ +std::vector NanoDet::classes() const { + return labels; +} + +/** + * Helper function to calculate the final shape of the model input relative to size ratio of input image. + */ +void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { + float ratio; + float src_ratio = ((float)srcSize->width / (float)srcSize->height); + float dst_ratio = ((float)dstSize->width / (float)dstSize->height); + if (src_ratio < dst_ratio) + ratio = ((float)dstSize->height / (float)srcSize->height); + else + ratio = ((float)dstSize->width / (float)srcSize->width); + + dstSize->width = static_cast(ratio * srcSize->width); + dstSize->height = static_cast(ratio * srcSize->height); + + if (divisible > 0) { + dstSize->width = std::max(divisible, ((int)((dstSize->width + divisible - 1) / divisible) * divisible)); + dstSize->height = std::max(divisible, ((int)((dstSize->height + divisible - 1) / divisible) * divisible)); + } +} + +/** + * Helper function to calculate the warp matrix for resizing. + */ +void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, int keep_ratio) { + if (keep_ratio == 1) { + float ratio; + cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); + + C.at(0, 2) = -src_shape->width / 2.0; + C.at(1, 2) = -src_shape->height / 2.0; + float src_ratio = ((float)src_shape->width / (float)src_shape->height); + float dst_ratio = ((float)dst_shape->width / (float)dst_shape->height); + if (src_ratio < dst_ratio) { + ratio = ((float)dst_shape->height / (float)src_shape->height); + } else { + ratio = ((float)dst_shape->width / (float)src_shape->width); + } + + Rs->at(0, 0) *= ratio; + Rs->at(1, 1) *= ratio; + + cv::Mat T = cv::Mat::eye(3, 3, CV_32FC1); + T.at(0, 2) = 0.5 * dst_shape->width; + T.at(1, 2) = 0.5 * dst_shape->height; + + *Rs = T * (*Rs) * C; + } else { + Rs->at(0, 0) *= (float)dst_shape->width / (float)src_shape->width; + Rs->at(1, 1) *= (float)dst_shape->height / (float)src_shape->height; + } +} + +/** + * Helper function for preprocessing images for resizing. + * This function follows the OpenDR's Nanodet pre-processing pipeline for shape transformation, which include + * find the actual final size of model input if keep ratio is enabled, calculate the warp matrix and finally + * resize and warp perspective of the input image. + * @param src, image to be preprocesses + * @param dst, output image to be used as model input + * @param dstSize, final size of the dst + * @param Rs, matrix to be used for warp perspective + * @param keep_ratio, flag for targeting the resized image size relative to input image ratio + */ +void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warp_matrix, int keep_ratio) { + cv::Size srcSize = cv::Size(src->cols, src->rows); + const float divisible = 0.0; + + // Get new destination size if keep ratio is wanted + if (keep_ratio == 1) { + get_minimum_dst_shape(&srcSize, dstSize, divisible); + } + + get_resize_matrix(&srcSize, dstSize, warp_matrix, keep_ratio); + cv::warpPerspective(*src, *dst, *warp_matrix, *dstSize); +} + +/** + * Helper function to determine the device of jit model and tensors. + */ +torch::DeviceType torchDevice(char *device_name, int verbose = 0) { + torch::DeviceType device; + if (std::string(device_name) == "cuda") { + if (verbose == 1) + printf("to cuda\n"); + device = torch::kCUDA; + } else { + if (verbose == 1) + printf("to cpu\n"); + device = torch::kCPU; + } + return device; +} + +void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { + // Initialize model + model->inputSize[0] = width; + model->inputSize[1] = height; + + model->scoreThreshold = scoreThreshold; + model->keep_ratio = 1; + + const std::vector labels{ + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", + "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", + "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush"}; + + // mean and standard deviation tensors for normalization of input + torch::Tensor meanTensor = torch::tensor({{{-103.53f}}, {{-116.28f}}, {{-123.675f}}}); + torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); + + // initialization of jit model and class as holder of c++ values. + torch::DeviceType torch_device = torchDevice(device, 1); + torch::jit::script::Module net = torch::jit::load(model_path, torch_device); + net.eval(); + + NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); + + model->net = (void *)detector; +} + +opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { + NanoDet *networkPTR = static_cast(model->net); + opendr_detection_target_list_t detections; + + std::vector dets; + cv::Mat *opencv_image = static_cast(image->data); + if (!opencv_image) { + std::cerr << "Cannot load image for inference." << std::endl; + + load_detections(&detections, dets.data(), (int)dets.size()); + return detections; + } + + // Preprocess image and keep values as input in jit model + cv::Mat resizedImg; + cv::Size dstSize = cv::Size(model->inputSize[0], model->inputSize[1]); + cv::Mat warp_matrix = cv::Mat::eye(3, 3, CV_32FC1); + preprocess(opencv_image, &resizedImg, &dstSize, &warp_matrix, model->keep_ratio); + torch::Tensor input = networkPTR->mPreProcess(&resizedImg); + + // Make all the inputs as tensors to use in jit model + torch::Tensor srcHeight = torch::tensor(opencv_image->rows); + torch::Tensor srcWidth = torch::tensor(opencv_image->cols); + torch::Tensor warpMatrix = torch::from_blob(warp_matrix.data, {3, 3}); + + // Model inference + torch::Tensor outputs = (networkPTR->net()).forward({input, srcHeight, srcWidth, warpMatrix}).toTensor(); + outputs = outputs.to(torch::Device(torch::kCPU, 0)); + + // Postprocessing, find which outputs have better score than threshold and keep them. + for (int label = 0; label < outputs.size(0); label++) { + for (int box = 0; box < outputs.size(1); box++) { + if (outputs[label][box][4].item() > model->scoreThreshold) { + opendr_detection_target_t det; + det.name = label; + det.left = outputs[label][box][0].item(); + det.top = outputs[label][box][1].item(); + det.width = outputs[label][box][2].item() - outputs[label][box][0].item(); + det.height = outputs[label][box][3].item() - outputs[label][box][1].item(); + det.score = outputs[label][box][4].item(); + dets.push_back(det); + } + } + } + + // Put vector detection as C pointer and size + load_detections(&detections, dets.data(), (int)dets.size()); + return detections; +} + +void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections) { + const int colorList[80][3] = { + //{255 ,255 ,255}, //bg + {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, + {153, 153, 153}, {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0}, {0, 0, 255}, {170, 0, 255}, + {84, 84, 0}, {84, 170, 0}, {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0}, {255, 84, 0}, + {255, 170, 0}, {255, 255, 0}, {0, 84, 127}, {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127}, + {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127}, {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, + {255, 84, 127}, {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255}, {0, 255, 255}, {84, 0, 255}, + {84, 84, 255}, {84, 170, 255}, {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255}, {170, 255, 255}, + {255, 0, 255}, {255, 84, 255}, {255, 170, 255}, {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0}, + {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0}, {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, + {0, 255, 0}, {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170}, {0, 0, 212}, {0, 0, 255}, + {0, 0, 0}, {36, 36, 36}, {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182}, {218, 218, 218}, + {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, + }; + + std::vector classNames = (static_cast(model->net))->classes(); + + cv::Mat *opencv_image = static_cast(opendr_image->data); + if (!opencv_image) { + std::cerr << "Cannot load image for inference." << std::endl; + return; + } + + cv::Mat image = (*opencv_image).clone(); + for (size_t i = 0; i < detections->size; i++) { + const opendr_detection_target bbox = (detections->starting_pointer)[i]; + cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); + cv::rectangle( + image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); + + char text[256]; + float score = bbox.score > 1 ? 1 : bbox.score; + sprintf(text, "%s %.1f%%", (classNames)[bbox.name].c_str(), score * 100); + + int baseLine = 0; + cv::Size labelSize = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = (int)bbox.left; + int y = (int)bbox.top; + if (y < 0) + y = 0; + if (x + labelSize.width > image.cols) + x = image.cols - labelSize.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); + cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + } + + cv::imshow("image", image); + cv::waitKey(0); +} + +void free_nanodet_model(nanodet_model_t *model) { + if (model->net) { + NanoDet *networkPTR = static_cast(model->net); + delete networkPTR; + } +} diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index b28f398303..a44ed1da39 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -35,3 +35,15 @@ void free_image(opendr_image_t *image) { delete opencv_image; } } + +void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { + detections->size = vectorSize; + int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); + detections->starting_pointer = (opendr_detection_target_t *)malloc(sizeOfOutput); + std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); +} + +void free_detections(opendr_detection_target_list_t *detections) { + if (detections->starting_pointer != NULL) + free(detections->starting_pointer); +} \ No newline at end of file diff --git a/src/opendr/perception/object_detection_2d/nanodet/README.md b/src/opendr/perception/object_detection_2d/nanodet/README.md index 409e07a847..1efb8bae5e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/README.md +++ b/src/opendr/perception/object_detection_2d/nanodet/README.md @@ -10,4 +10,4 @@ Large parts of the implementation are taken from [Nanodet Github](https://github Usage ------ - For VOC and COCO like datasets, an ```ExternalDataset``` with the root path and dataset name (```voc```, ```coco```) must be passed to the fit function. -- The ```temp_path``` folder is used to save checkpoints during training. \ No newline at end of file +- The ```workspace``` folder is used to save checkpoints during training. \ No newline at end of file diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml index a8c312cd61..77d064c72a 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml @@ -54,7 +54,7 @@ model: data: train: input_size: [320,320] #[w,h] - keep_ratio: True + keep_ratio: False pipeline: perspective: 0.0 scale: [0.6, 1.4] @@ -69,7 +69,8 @@ data: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: input_size: [320,320] #[w,h] - keep_ratio: True + keep_ratio: False #feature_hw: [20,20]. Size for position embedding are hard coded and can not have varius values, + #Please use images with standard ratio and change the value accordingly if you want to keep_ratio. pipeline: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] device: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py index f84170a275..4b0c910d0f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py @@ -27,11 +27,38 @@ def stack_batch_img( img_widths.append(img.shape[-1]) max_h, max_w = max(img_heights), max(img_widths) if divisible > 0: - max_h = (max_h + divisible - 1) // divisible * divisible - max_w = (max_w + divisible - 1) // divisible * divisible + max_h = torch.div((max_h + divisible - 1), divisible, rounding_mode='trunc') * divisible + max_w = torch.div((max_w + divisible - 1), divisible, rounding_mode='trunc') * divisible batch_imgs = [] for img in img_tensors: padding_size = [0, max_w - img.shape[-1], 0, max_h - img.shape[-2]] batch_imgs.append(F.pad(img, padding_size, value=pad_value)) return torch.stack(batch_imgs, dim=0).contiguous() + + +def divisible_padding( + img_tensor: torch.Tensor, divisible: torch.Tensor = torch.tensor(0), pad_value: float = 0.0 +) -> torch.Tensor: + """ + Helper function which uses only pytorch api for scripting and tracing. + Args: + img_tensor (torch.Tensor): + divisible (int): + pad_value (float): value to pad + + Returns: + torch.Tensor. + """ + assert divisible >= 0 + + img_heights = img_tensor.shape[-2] + img_widths = img_tensor.shape[-1] + + if divisible > 0: + img_heights = torch.div((img_heights + divisible - 1), divisible, rounding_mode='trunc') * divisible + img_widths = torch.div((img_widths + divisible - 1), divisible, rounding_mode='trunc') * divisible + + padding_size = [0, img_widths - img_tensor.shape[-1], 0, img_heights - img_tensor.shape[-2]] + batch_img = F.pad(img_tensor, padding_size, value=pad_value) + return batch_img.unsqueeze(0) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py index 24acdb1880..2b9cfe6d32 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py @@ -13,30 +13,12 @@ # limitations under the License. import functools -import warnings from typing import Dict, Tuple from torch.utils.data import Dataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.color import color_aug_and_norm -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import ShapeTransform, warp_and_resize - - -class LegacyPipeline: - def __init__(self, cfg, keep_ratio): - warnings.warn( - "Deprecated warning! Pipeline from nanodet v0.x has been deprecated," - "Please use new Pipeline and update your config!" - ) - self.warp = functools.partial( - warp_and_resize, warp_kwargs=cfg, keep_ratio=keep_ratio - ) - self.color = functools.partial(color_aug_and_norm, kwargs=cfg) - - def __call__(self, meta, dst_shape): - meta = self.warp(meta, dst_shape=dst_shape) - meta = self.color(meta=meta) - return meta +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import ShapeTransform class Pipeline: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py index 6ffd1b66d3..29fa91d242 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py @@ -14,10 +14,11 @@ import math import random -from typing import Dict, Optional, Tuple +from typing import Tuple import cv2 import numpy as np +import torch def get_flip_matrix(prob=0.5): @@ -93,7 +94,8 @@ def get_shear_matrix(degree): def get_translate_matrix(translate, width, height): """ - + :param width: + :param height: :param translate: :return: """ @@ -136,60 +138,31 @@ def get_resize_matrix(raw_shape, dst_shape, keep_ratio): return Rs -def warp_and_resize( - meta: Dict, - warp_kwargs: Dict, - dst_shape: Tuple[int, int], - keep_ratio: bool = True, -): - # TODO: background, type - raw_img = meta["img"] - height = raw_img.shape[0] # shape(h,w,c) - width = raw_img.shape[1] - - # center - C = np.eye(3) - C[0, 2] = -width / 2 - C[1, 2] = -height / 2 - - # do not change the order of mat mul - if "perspective" in warp_kwargs and random.randint(0, 1): - P = get_perspective_matrix(warp_kwargs["perspective"]) - C = P @ C - if "scale" in warp_kwargs and random.randint(0, 1): - Scl = get_scale_matrix(warp_kwargs["scale"]) - C = Scl @ C - if "stretch" in warp_kwargs and random.randint(0, 1): - Str = get_stretch_matrix(*warp_kwargs["stretch"]) - C = Str @ C - if "rotation" in warp_kwargs and random.randint(0, 1): - R = get_rotation_matrix(warp_kwargs["rotation"]) - C = R @ C - if "shear" in warp_kwargs and random.randint(0, 1): - Sh = get_shear_matrix(warp_kwargs["shear"]) - C = Sh @ C - if "flip" in warp_kwargs: - F = get_flip_matrix(warp_kwargs["flip"]) - C = F @ C - if "translate" in warp_kwargs and random.randint(0, 1): - T = get_translate_matrix(warp_kwargs["translate"], width, height) +def scriptable_warp_boxes(boxes, M, width, height): + """ + Warp boxes function that uses pytorch api, so it can be used with scripting and tracing for optimization. + """ + n = boxes.shape[0] + if n: + # warp points + xy = torch.ones((n * 4, 3), dtype=torch.float32) + xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( + n * 4, 2 + ) # x1y1, x2y2, x1y2, x2y1 + M = torch.transpose(M, 0, 1).float() + xy = torch.mm(xy, M) # transform + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = torch.cat((x.min(1).values, y.min(1).values, x.max(1).values, y.max(1).values)).reshape(4, n) + xy = torch.transpose(xy, 0, 1).float() + # clip boxes + xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + return xy else: - T = get_translate_matrix(0, width, height) - M = T @ C - # M = T @ Sh @ R @ Str @ P @ C - ResizeM = get_resize_matrix((width, height), dst_shape, keep_ratio) - M = ResizeM @ M - img = cv2.warpPerspective(raw_img, M, dsize=tuple(dst_shape)) - meta["img"] = img - meta["warp_matrix"] = M - if "gt_bboxes" in meta: - boxes = meta["gt_bboxes"] - meta["gt_bboxes"] = warp_boxes(boxes, M, dst_shape[0], dst_shape[1]) - if "gt_masks" in meta: - for i, mask in enumerate(meta["gt_masks"]): - meta["gt_masks"][i] = cv2.warpPerspective(mask, M, dsize=tuple(dst_shape)) - - return meta + return boxes def warp_boxes(boxes, M, width, height): @@ -217,7 +190,7 @@ def warp_boxes(boxes, M, width, height): def get_minimum_dst_shape( src_shape: Tuple[int, int], dst_shape: Tuple[int, int], - divisible: Optional[int] = None, + divisible: int = 0, ) -> Tuple[int, int]: """Calculate minimum dst shape""" src_w, src_h = src_shape diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py index c408d996a6..764da3fa01 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py @@ -74,7 +74,7 @@ def results2json(self, results): json_results.append(detection) return json_results - def evaluate(self, results, save_dir): # rank=-1 + def evaluate(self, results, save_dir, rank=-1): results_json = self.results2json(results) if len(results_json) == 0: warnings.warn( @@ -87,8 +87,10 @@ def evaluate(self, results, save_dir): # rank=-1 for key in self.metric_names: empty_eval_results[key] = 0 return empty_eval_results - # json_path = os.path.join(save_dir, "results{}.json".format(rank)) - json_path = os.path.join(save_dir, "results.json") + if rank > 0: + json_path = os.path.join(save_dir, "results{}.json".format(rank)) + else: + json_path = os.path.join(save_dir, "results.json") json.dump(results_json, open(json_path, "w")) coco_dets = self.coco_api.loadRes(json_path) coco_eval = COCOeval( diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py index b20b891d58..586f396eee 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py @@ -14,20 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import torch +import torch.nn as nn -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import stack_batch_img -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.collate import naive_collate +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import divisible_padding from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform import Pipeline from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.arch import build_model -image_ext = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] -video_ext = ["mp4", "mov", "avi", "mkv"] - -class Predictor(object): +class Predictor(nn.Module): def __init__(self, cfg, model, device="cuda"): + super(Predictor, self).__init__() self.cfg = cfg self.device = device @@ -41,9 +38,33 @@ def __init__(self, cfg, model, device="cuda"): self.model = model.to(device).eval() + for para in self.model.parameters(): + para.requires_grad = False + self.pipeline = Pipeline(self.cfg.data.val.pipeline, self.cfg.data.val.keep_ratio) + self.traced_model = None + + def trace_model(self, dummy_input): + self.traced_model = torch.jit.trace(self, dummy_input) + return True + + def script_model(self, img, height, width, warp_matrix): + preds = self.traced_model(img, height, width, warp_matrix) + scripted_model = self.postprocessing(preds, img, height, width, warp_matrix) + return scripted_model - def inference(self, img, verbose=True): + def forward(self, img, height, width, warp_matrix): + if torch.jit.is_scripting(): + return self.script_model(img, height, width, warp_matrix) + # In tracing (Jit and Onnx optimizations) we must first run the pipeline before the graf, + # cv2 is needed, and it is installed with abi cxx11 but torch is in cxx<11 + meta = {"height": height, "width": width, "img": img, "warp_matrix": warp_matrix} + meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) + with torch.no_grad(): + results = self.model.inference(meta) + return results + + def preprocessing(self, img): img_info = {"id": 0} height, width = img.shape[:2] img_info["height"] = height @@ -51,19 +72,17 @@ def inference(self, img, verbose=True): meta = dict(img_info=img_info, raw_img=img, img=img) meta = self.pipeline(None, meta, self.cfg.data.val.input_size) meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device) - meta = naive_collate([meta]) - meta["img"] = stack_batch_img(meta["img"], divisible=32) - with torch.no_grad(): - results = self.model.inference(meta, verbose) - return meta, results + _input = meta["img"] + print(f"[{_input[0][50][50]}, {_input[1][50][50]}, {_input[2][50][50]}]") + _height = torch.tensor(height) + _width = torch.tensor(width) + _warp_matrix = torch.from_numpy(meta["warp_matrix"]) + + return _input, _height, _width, _warp_matrix -def get_image_list(path): - image_names = [] - for maindir, subdir, file_name_list in os.walk(path): - for filename in file_name_list: - apath = os.path.join(maindir, filename) - ext = os.path.splitext(apath)[1] - if ext in image_ext: - image_names.append(apath) - return image_names + def postprocessing(self, preds, input, height, width, warp_matrix): + meta = {"height": height, "width": width, 'img': input, 'warp_matrix': warp_matrix} + meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) + res = self.model.head.post_process(preds, meta) + return res diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py index 518c0af01b..a4ae17e39b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py @@ -48,9 +48,9 @@ def forward_train(self, gt_meta): ) else: aux_fpn_feat = self.aux_fpn(feat) - dual_fpn_feat = ( + dual_fpn_feat = [ torch.cat([f, aux_f], dim=1) for f, aux_f in zip(fpn_feat, aux_fpn_feat) - ) + ] head_out = self.head(fpn_feat) aux_head_out = self.aux_head(dual_fpn_feat) loss, loss_states = self.head.loss(head_out, gt_meta, aux_preds=aux_head_out) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py index e1ce7a650e..909dbb78bf 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py @@ -15,6 +15,7 @@ import torch import torch.nn as nn +from typing import Dict from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.backbone import build_backbone from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.fpn import build_fpn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.head import build_head @@ -43,9 +44,16 @@ def forward(self, x): x = self.head(x) return x - def inference(self, meta, verbose=True): + def inference(self, meta: Dict[str, torch.Tensor]): with torch.no_grad(): preds = self(meta["img"]) + if torch.jit.is_tracing(): + return preds + if torch.onnx.is_in_onnx_export(): + # torch.linalg.inv is not supported from onnx opset 11. + # problem with constant folding although is set to false. + # export scriptable model have problem with barchnorm2d + return preds results = self.head.post_process(preds, meta) return results diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py index 17cd08402e..ef8c1cd368 100755 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py @@ -52,6 +52,7 @@ def __init__( activation=activation, ) + @torch.jit.unused def forward(self, x): x = self.in_conv(x) x1 = self.mid_conv(x) @@ -96,6 +97,7 @@ def __init__( activation=activation, ) + @torch.jit.unused def forward(self, x): x = self.in_conv(x) x1 = self.res_blocks(x) @@ -145,13 +147,14 @@ def __init__( self.stages.append(stage) self._init_weight() + @torch.jit.unused def forward(self, x): output = [] for i, stage in enumerate(self.stages): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _init_weight(self): for m in self.modules(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py index 9cd6e41baf..6d2f6d4d55 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py @@ -123,7 +123,8 @@ def __init__( ) self._relu = act_layers(activation) - def forward(self, x, drop_connect_rate=None): + @torch.jit.unused + def forward(self, x, drop_connect_rate: bool = None): """ :param x: input tensor :param drop_connect_rate: drop connect rate (float, between 0 and 1) @@ -148,7 +149,7 @@ def forward(self, x, drop_connect_rate=None): if self.id_skip and self.stride == 1 and self.input_filters == self.output_filters: if drop_connect_rate: x = drop_connect(x, drop_connect_rate, training=self.training) - x += identity # skip connection + x = x + identity # skip connection return x @@ -246,6 +247,7 @@ def __init__( self.blocks.append(stage) self._initialize_weights(pretrain) + @torch.jit.unused def forward(self, x): x = self.stem(x) output = [] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py index 353939fe5d..4acf98e01f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py @@ -10,7 +10,6 @@ """ import logging import math -import warnings import torch import torch.nn as nn @@ -39,7 +38,7 @@ def _make_divisible(v, divisor, min_value=None): new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: - new_v += divisor + new_v = new_v + divisor return new_v @@ -193,6 +192,7 @@ def __init__( nn.BatchNorm2d(out_chs), ) + @torch.jit.unused def forward(self, x): residual = x @@ -211,7 +211,7 @@ def forward(self, x): # 2nd ghost bottleneck x = self.ghost2(x) - x += self.shortcut(residual) + x = x + self.shortcut(residual) return x @@ -222,7 +222,6 @@ def __init__( out_stages=(4, 6, 9), activation="ReLU", pretrain=True, - act=None, ): super(GhostNet, self).__init__() assert set(out_stages).issubset(i for i in range(10)) @@ -260,11 +259,6 @@ def __init__( # ------conv+bn+act----------# 9 1/32 self.activation = activation - if act is not None: - warnings.warn( - "Warning! act argument has been deprecated, " "use activation instead!" - ) - self.activation = act # building first layer output_channel = _make_divisible(16 * width_mult, 4) @@ -315,7 +309,7 @@ def forward(self, x): x = self.blocks[i](x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _initialize_weights(self, pretrain=True): print("init weights...") diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py index 19fcae379e..a08f4abb38 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function -import warnings - +import torch.jit import torch.nn as nn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.activation import act_layers @@ -79,7 +78,6 @@ def __init__( out_stages=(1, 2, 4, 6), last_channel=1280, activation="ReLU", - act=None, ): super(MobileNetV2, self).__init__() # TODO: support load torchvison pretrained weight @@ -89,11 +87,6 @@ def __init__( input_channel = 32 self.last_channel = last_channel self.activation = activation - if act is not None: - warnings.warn( - "Warning! act argument has been deprecated, " "use activation instead!" - ) - self.activation = act self.interverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], @@ -154,6 +147,7 @@ def build_mobilenet_stage(self, stage_num): stage = nn.Sequential(*stage) return stage + @torch.jit.unused def forward(self, x): x = self.first_layer(x) output = [] @@ -162,8 +156,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - - return tuple(output) + return output def _initialize_weights(self): for m in self.modules(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py index fa30508f13..c6c090276f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py @@ -176,6 +176,7 @@ def _make_stage(self, planes, num_blocks, stride): self.cur_layer_idx += 1 return nn.Sequential(*blocks) + @torch.jit.unused def forward(self, x): x = self.stage0(x) output = [] @@ -184,7 +185,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def repvgg_model_convert(model, deploy_model, save_path=None): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py index cbd84f7546..d4cdacb0b7 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function +import torch.jit import torch.nn as nn import torch.utils.model_zoo as model_zoo @@ -99,8 +100,6 @@ def fill_fc_weights(layers): for m in layers.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.001) - # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') - # torch.nn.init.xavier_normal_(m.weight.data) if m.bias is not None: nn.init.constant_(m.bias, 0) @@ -161,6 +160,7 @@ def _make_layer(self, block, planes, blocks, stride=1): return nn.Sequential(*layers) + @torch.jit.unused def forward(self, x): x = self.conv1(x) x = self.bn1(x) @@ -173,7 +173,7 @@ def forward(self, x): if i in self.out_stages: output.append(x) - return tuple(output) + return output def init_weights(self, pretrain=True): if pretrain: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py index 013f22a8c1..75a322f179 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py @@ -14,11 +14,11 @@ def channel_shuffle(x, groups): # type: (torch.Tensor, int) -> torch.Tensor - batchsize, num_channels, height, width = x.data.size() - channels_per_group = num_channels // groups + batchsize, num_channels, height, width = x.size() + channels_per_group = int(num_channels/groups) # reshape - x = x.view(batchsize, groups, channels_per_group, height, width) + x = x.view([batchsize, groups, channels_per_group, height, width]) x = torch.transpose(x, 1, 2).contiguous() @@ -173,6 +173,7 @@ def __init__( self.stage4.add_module("conv5", conv5) self._initialize_weights(pretrain) + @torch.jit.unused def forward(self, x): x = self.conv1(x) x = self.maxpool(x) @@ -182,7 +183,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _initialize_weights(self, pretrain=True): print("init weights...") diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py index 4549c7409e..f373f9f5d6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py @@ -1,6 +1,6 @@ # Modification 2020 RangiLyu # Copyright 2018-2019 Open-MMLab. - +import torch.jit # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,8 @@ import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import xavier_init @@ -72,7 +74,8 @@ def init_weights(self): if isinstance(m, nn.Conv2d): xavier_init(m, distribution="uniform") - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): assert len(inputs) == len(self.in_channels) # build laterals @@ -84,17 +87,10 @@ def forward(self, inputs): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.interpolate( + laterals[i - 1] = laterals[i - 1] + F.interpolate( laterals[i], scale_factor=2, mode="bilinear" ) # build outputs - outs = [ - # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) - laterals[i] - for i in range(used_backbone_levels) - ] - return tuple(outs) - - -# if __name__ == '__main__': + outs = [laterals[i] for i in range(used_backbone_levels)] + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py index 76e043179c..cf03e3fb4e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py @@ -13,6 +13,8 @@ # limitations under the License. import torch import torch.nn as nn +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.backbone.ghostnet import GhostBottleneck from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule, DepthwiseConvModule @@ -65,6 +67,7 @@ def __init__( ) self.blocks = nn.Sequential(*blocks) + @torch.jit.unused def forward(self, x): out = self.blocks(x) if self.use_res: @@ -118,7 +121,7 @@ def __init__( conv = DepthwiseConvModule if use_depthwise else ConvModule # build top-down blocks - self.upsample = nn.Upsample(**upsample_cfg) + self.upsample = nn.Upsample(**upsample_cfg, align_corners=False) self.reduce_layers = nn.ModuleList() for idx in range(len(in_channels)): self.reduce_layers.append( @@ -198,12 +201,13 @@ def __init__( ) ) - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): """ Args: - inputs (tuple[Tensor]): input features. + inputs (List[Tensor]): input features. Returns: - tuple[Tensor]: multi level features. + List[Tensor]: multi level features. """ assert len(inputs) == len(self.in_channels) inputs = [ @@ -241,4 +245,4 @@ def forward(self, inputs): ): outs.append(extra_in_layer(inputs[-1]) + extra_out_layer(outs[-1])) - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py index c12482f294..8bb2114b76 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py @@ -13,7 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import torch.jit import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.fpn.fpn import FPN @@ -61,7 +64,8 @@ def __init__( ) self.init_weights() - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): """Forward function.""" assert len(inputs) == len(self.in_channels) @@ -74,8 +78,8 @@ def forward(self, inputs): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.interpolate( - laterals[i], scale_factor=2, mode="bilinear" + laterals[i - 1] = laterals[i - 1] + F.interpolate( + laterals[i], scale_factor=2.0, mode="bilinear" ) # build outputs @@ -84,11 +88,10 @@ def forward(self, inputs): # part 2: add bottom-up path for i in range(0, used_backbone_levels - 1): - inter_outs[i + 1] += F.interpolate( + inter_outs[i + 1] = inter_outs[i + 1] + F.interpolate( inter_outs[i], scale_factor=0.5, mode="bilinear" ) - outs = [] - outs.append(inter_outs[0]) + outs = [inter_outs[0]] outs.extend([inter_outs[i] for i in range(1, used_backbone_levels)]) - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py index 42efd128b9..b079dde44f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py @@ -15,6 +15,8 @@ import torch import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init @@ -92,7 +94,8 @@ def init_weights(self): elif isinstance(m, nn.Conv2d): normal_init(m, 0.01) - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): assert len(inputs) == len(self.in_channels) # build laterals @@ -118,4 +121,4 @@ def forward(self, inputs): laterals[1] + mid_lvl, laterals[2] + F.interpolate(mid_lvl, size=laterals[2].shape[2:], mode="bilinear"), ] - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py index e26e083b37..471d2951fd 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py @@ -5,6 +5,8 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List, Dict, Tuple from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import ( bbox2distance, @@ -13,7 +15,8 @@ multi_apply, ) -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes,\ + scriptable_warp_boxes from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.gfocal_loss\ import DistributionFocalLoss, QualityFocalLoss from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.iou_loss import GIoULoss, bbox_overlaps @@ -62,6 +65,11 @@ def forward(self, x): offsets from the box center in four directions, shape (N, 4). """ shape = x.size() + if torch.jit.is_scripting(): + x = F.softmax(x.reshape(shape[0], shape[1], 4, self.reg_max + 1), dim=-1) + x = F.linear(x, self.project.type_as(x)).reshape(shape[0], shape[1], 4) + return x + x = F.softmax(x.reshape(*shape[:-1], 4, self.reg_max + 1), dim=-1) x = F.linear(x, self.project.type_as(x)).reshape(*shape[:-1], 4) return x @@ -185,13 +193,11 @@ def init_weights(self): normal_init(self.gfl_cls, std=0.01, bias=bias_cls) normal_init(self.gfl_reg, std=0.01) - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + def forward(self, feats: List[Tensor]): outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x + for idx, scale in enumerate(self.scales): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: @@ -371,7 +377,8 @@ def target_assign( ): """ Assign target for a batch of images. - :param batch_size: num of images in one batch + :param cls_preds: predictions of class in images in one batch + :param reg_preds: predictions of bbox in images in one batch :param featmap_sizes: A list of all grid cell boxes in all image :param gt_bboxes_list: A list of ground truth boxes in all image :param gt_bboxes_ignore_list: A list of all ignored boxes in all image @@ -405,8 +412,6 @@ def target_assign( if gt_labels_list is None: gt_labels_list = [None for _ in range(batch_size)] # target assign on all images, get list of tensors - # list length = batch size - # tensor first dim = num of all grid cell ( all_grid_cells, all_labels, @@ -449,6 +454,7 @@ def target_assign( num_total_neg, ) + @torch.no_grad() def target_assign_single_img( self, grid_cells, num_level_cells, gt_bboxes, gt_bboxes_ignore, gt_labels ): @@ -480,7 +486,7 @@ def target_assign_single_img( label_weights = grid_cells.new_zeros(num_cells, dtype=torch.float) if len(pos_inds) > 0: - pos_bbox_targets = pos_gt_bboxes + pos_bbox_targets = pos_gt_bboxes.float() bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: @@ -505,6 +511,7 @@ def target_assign_single_img( ) def sample(self, assign_result, gt_bboxes): + """Sample positive and negative bboxes.""" pos_inds = ( torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) .squeeze(-1) @@ -527,11 +534,51 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta): + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): + """Prediction results postprocessing. Decode bboxes and rescale + to original image size. + Args: + preds (Tensor): Prediction output. + meta (dict): Meta info. + mode (str): Determines if it uses batches and numpy or tensors for scripting. + """ + if mode == "eval" and not torch.jit.is_scripting(): + # Inference do not use batches and tries to have + # tensors exclusively for better optimization during scripting. + return self._eval_post_process(preds, meta) + cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - result_list = self.get_bboxes(cls_scores, bbox_preds, meta) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"]) + (det_bboxes, det_labels) = results + + det_bboxes[:, :4] = scriptable_warp_boxes( + det_bboxes[:, :4], + torch.linalg.inv(meta["warp_matrix"]), meta["width"], meta["height"] + ) + + # constant output of model every time for tracing + det_result = torch.zeros((self.num_classes, 100, 5)) + for i in range(self.num_classes): + inds = det_labels == i + det = torch.cat(( + det_bboxes[inds, :4], + det_bboxes[inds, 4:5] + ), + dim=1 + ) + + pad = det.new_zeros((100 - det.size(0), 5)) + det = torch.cat([det, pad], dim=0) + det_result[i] = det + return det_result + + def _eval_post_process(self, preds, meta): + cls_scores, bbox_preds = preds.split( + [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 + ) + result_list = self.get_bboxes(cls_scores, bbox_preds, meta["img"], mode="eval") det_results = {} warp_matrixes = ( meta["warp_matrix"] @@ -576,67 +623,105 @@ def post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, img_metas): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). - img_metas (dict): Dict of image info. - + input_img (Tensor): Input image to net. + mode (str): Determines if it uses batches and numpy or tensors for scripting. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ device = cls_preds.device b = cls_preds.shape[0] - input_height, input_width = img_metas["img"].shape[2:] + input_height, input_width = input_img.shape[2:] input_shape = (input_height, input_width) featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (int(math.ceil(input_height / stride)), int(math.ceil(input_width / stride))) for stride in self.strides ] # get grid cells of one image mlvl_center_priors = [] for i, stride in enumerate(self.strides): - y, x = self.get_single_level_center_point( - featmap_sizes[i], stride, torch.float32, device + proiors = self.get_single_level_center_priors( + b, featmap_sizes[i], stride, torch.float32, device ) - strides = x.new_full((x.shape[0],), stride) - proiors = torch.stack([x, y, strides, strides], dim=-1) - mlvl_center_priors.append(proiors.unsqueeze(0).repeat(b, 1, 1)) + mlvl_center_priors.append(proiors) center_priors = torch.cat(mlvl_center_priors, dim=1) dis_preds = self.distribution_project(reg_preds) * center_priors[..., 2, None] bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=input_shape) - scores = cls_preds.sigmoid() + cls_preds = cls_preds.sigmoid() + # add a dummy background class at the end of all labels + if torch.jit.is_scripting() or mode == "infer": + # for faster inference and jit scripting in most common cases we do not try to go through for statement + score, bbox = cls_preds[0], bboxes[0] + padding = score.new_zeros(score.shape[0], 1) + score = torch.cat([score, padding], dim=1) + + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=100) + result_list = [] for i in range(b): # add a dummy background class at the end of all labels # same with mmdetection2.0 - score, bbox = scores[i], bboxes[i] + score, bbox = cls_preds[i], bboxes[i] padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) results = multiclass_nms( bbox, score, score_thr=0.05, - nms_cfg=dict(type="nms", iou_threshold=0.6), + nms_cfg=dict(iou_threshold=0.6), max_num=100, ) result_list.append(results) return result_list - def get_single_level_center_point( - self, featmap_size, stride, dtype, device, flatten=True + def get_single_level_center_priors( + self, + batch_size: int, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True ): + """Generate centers of a single stage feature map. + Args: + batch_size (int): Number of images in one batch. + featmap_size (tuple[int]): height and width of the feature map + stride (int): down sample stride of the feature map + dtype (obj:`torch.dtype`): data type of the tensors + device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors + Return: + priors (Tensor): center priors of a single level feature map. """ - Generate pixel centers of a single stage feature map. - :param featmap_size: height and width of the feature map - :param stride: down sample stride of the feature map - :param dtype: data type of the tensors - :param device: device of the tensors - :param flatten: flatten the x and y tensors - :return: y and x of the center points + x, y = self.get_single_level_center_point(featmap_size, stride, dtype, device, flatten) + strides = x.new_full((x.shape[0],), stride) + proiors = torch.stack([x, y, strides, strides], dim=-1) + return proiors.unsqueeze(0).repeat(batch_size, 1, 1) + + def get_single_level_center_point( + self, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True + ): + """Generate pixel centers of a single stage feature map. + Args: + featmap_size (tuple[int]): height and width of the feature map + stride (int): down sample stride of the feature map + dtype (obj:`torch.dtype`): data type of the tensors + device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors + Return: + x, y (Tuple[Tensor, Tensor]): y and x of the center points. """ h, w = featmap_size x_range = (torch.arange(w, dtype=dtype, device=device) + 0.5) * stride @@ -645,7 +730,7 @@ def get_single_level_center_point( if flatten: y = y.flatten() x = x.flatten() - return y, x + return x, y def get_grid_cells(self, featmap_size, scale, stride, dtype, device): """ @@ -681,20 +766,3 @@ def grid_cells_to_center(self, grid_cells): cells_cx = (grid_cells[:, 2] + grid_cells[:, 0]) / 2 cells_cy = (grid_cells[:, 3] + grid_cells[:, 1]) / 2 return torch.stack([cells_cx, cells_cy], dim=-1) - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x - for cls_conv in self.cls_convs: - cls_feat = cls_conv(cls_feat) - for reg_conv in self.reg_convs: - reg_feat = reg_conv(reg_feat) - cls_pred = self.gfl_cls(cls_feat) - reg_pred = scale(self.gfl_reg(reg_feat)) - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py index 01eac4146e..2e50867a21 100755 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py @@ -14,6 +14,8 @@ import torch import torch.nn as nn +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule, DepthwiseConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init @@ -135,15 +137,14 @@ def init_weights(self): normal_init(self.gfl_reg[i], std=0.01) print("Finish initialize NanoDet Head.") - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + @torch.jit.unused + def forward(self, feats: List[Tensor]): outputs = [] - for x, cls_convs, reg_convs, gfl_cls, gfl_reg in zip( - feats, self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg - ): - cls_feat = x - reg_feat = x + for idx, (cls_convs, reg_convs, gfl_cls, gfl_reg) in enumerate(zip( + self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg + )): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in reg_convs: @@ -155,31 +156,6 @@ def forward(self, feats): bbox_pred = gfl_reg(reg_feat) output = torch.cat([cls_score, bbox_pred], dim=1) outputs.append(output.flatten(start_dim=2)) + outputs = torch.cat(outputs, dim=2).permute(0, 2, 1) return outputs - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for x, cls_convs, reg_convs, gfl_cls, gfl_reg in zip( - feats, self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg - ): - cls_feat = x - reg_feat = x - for cls_conv in cls_convs: - cls_feat = cls_conv(cls_feat) - for reg_conv in reg_convs: - reg_feat = reg_conv(reg_feat) - if self.share_cls_reg: - output = gfl_cls(cls_feat) - cls_pred, reg_pred = output.split( - [self.num_classes, 4 * (self.reg_max + 1)], dim=1 - ) - else: - cls_pred = gfl_cls(cls_feat) - reg_pred = gfl_reg(reg_feat) - - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py index 5d853d5ecf..d2eb0a19f8 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py @@ -3,10 +3,13 @@ import numpy as np import torch import torch.nn as nn +from torch import Tensor +from typing import List, Tuple, Dict from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util\ import bbox2distance, distance2bbox, multi_apply -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp \ + import warp_boxes, scriptable_warp_boxes from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.gfocal_loss \ import DistributionFocalLoss, QualityFocalLoss from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.iou_loss import GIoULoss @@ -133,15 +136,10 @@ def init_weights(self): normal_init(self.gfl_cls[i], std=0.01, bias=bias_cls) print("Finish initialize NanoDet-Plus Head.") - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + def forward(self, feats: List[Tensor]): outputs = [] - for feat, cls_convs, gfl_cls in zip( - feats, - self.cls_convs, - self.gfl_cls, - ): + for idx, (cls_convs, gfl_cls) in enumerate(zip(self.cls_convs, self.gfl_cls)): + feat = feats[idx] for conv in cls_convs: feat = conv(feat) output = gfl_cls(feat) @@ -166,7 +164,7 @@ def loss(self, preds, gt_meta, aux_preds=None): batch_size = preds.shape[0] input_height, input_width = gt_meta["img"].shape[2:] featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (math.ceil(input_height / stride), math.ceil(input_width / stride)) for stride in self.strides ] # get grid cells of one image @@ -361,17 +359,51 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta): - """Prediction results post processing. Decode bboxes and rescale + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): + """Prediction results postprocessing. Decode bboxes and rescale to original image size. Args: preds (Tensor): Prediction output. meta (dict): Meta info. + mode (str): Determines if it uses batches and numpy or tensors for scripting. """ + if mode == "eval" and not torch.jit.is_scripting(): + # Inference do not use batches and tries to have + # tensors exclusively for better optimization during scripting. + return self._eval_post_process(preds, meta) + + cls_scores, bbox_preds = preds.split( + [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 + ) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"]) + (det_bboxes, det_labels) = results + + det_bboxes[:, :4] = scriptable_warp_boxes( + det_bboxes[:, :4], + torch.linalg.inv(meta["warp_matrix"]), meta["width"], meta["height"] + ) + + # constant output of model every time for tracing + det_result = torch.zeros((self.num_classes, 100, 5)) + for i in range(self.num_classes): + inds = det_labels == i + det = torch.cat(( + det_bboxes[inds, :4], + det_bboxes[inds, 4:5] + ), + dim=1 + ) + + pad = det.new_zeros((100 - det.size(0), 5)) + det = torch.cat([det, pad], dim=0) + det_result[i] = det + return det_result + + def _eval_post_process(self, preds, meta): cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - result_list = self.get_bboxes(cls_scores, bbox_preds, meta) + result_list = self.get_bboxes(cls_scores, bbox_preds, meta["img"], mode="eval") det_results = {} warp_matrixes = ( meta["warp_matrix"] @@ -395,7 +427,7 @@ def post_process(self, preds, meta): ) for result, img_width, img_height, img_id, warp_matrix in zip( - result_list, img_widths, img_heights, img_ids, warp_matrixes + result_list, img_widths, img_heights, img_ids, warp_matrixes ): det_result = {} det_bboxes, det_labels = result @@ -416,59 +448,71 @@ def post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, img_metas): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). - img_metas (dict): Dict of image info. - + input_img (Tensor): Input image to net. + mode (str): Determines if it uses batches and numpy or tensors for scripting. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ device = cls_preds.device b = cls_preds.shape[0] - input_height, input_width = img_metas["img"].shape[2:] + input_height, input_width = input_img.shape[2:] input_shape = (input_height, input_width) featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (int(math.ceil(input_height / stride)), int(math.ceil(input_width / stride))) for stride in self.strides ] # get grid cells of one image - mlvl_center_priors = [ - self.get_single_level_center_priors( - b, - featmap_sizes[i], - stride, - dtype=torch.float32, - device=device, + mlvl_center_priors = [] + for i, stride in enumerate(self.strides): + proiors = self.get_single_level_center_priors( + b, featmap_sizes[i], stride, torch.float32, device ) - for i, stride in enumerate(self.strides) - ] + mlvl_center_priors.append(proiors) + center_priors = torch.cat(mlvl_center_priors, dim=1) dis_preds = self.distribution_project(reg_preds) * center_priors[..., 2, None] bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=input_shape) - scores = cls_preds.sigmoid() + cls_preds = cls_preds.sigmoid() + # add a dummy background class at the end of all labels + if torch.jit.is_scripting() or mode == "infer": + # for faster inference and jit scripting in most common cases we do not try to go through for statement + score, bbox = cls_preds[0], bboxes[0] + padding = score.new_zeros(score.shape[0], 1) + score = torch.cat([score, padding], dim=1) + + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=100) + result_list = [] for i in range(b): # add a dummy background class at the end of all labels # same with mmdetection2.0 - score, bbox = scores[i], bboxes[i] + score, bbox = cls_preds[i], bboxes[i] padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) results = multiclass_nms( bbox, score, score_thr=0.05, - nms_cfg=dict(type="nms", iou_threshold=0.6), + nms_cfg=dict(iou_threshold=0.6), max_num=100, ) result_list.append(results) return result_list def get_single_level_center_priors( - self, batch_size, featmap_size, stride, dtype, device + self, + batch_size: int, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True ): """Generate centers of a single stage feature map. Args: @@ -477,6 +521,7 @@ def get_single_level_center_priors( stride (int): down sample stride of the feature map dtype (obj:`torch.dtype`): data type of the tensors device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors Return: priors (Tensor): center priors of a single level feature map. """ @@ -484,27 +529,9 @@ def get_single_level_center_priors( x_range = (torch.arange(w, dtype=dtype, device=device)) * stride y_range = (torch.arange(h, dtype=dtype, device=device)) * stride y, x = torch.meshgrid(y_range, x_range) - y = y.flatten() - x = x.flatten() + if flatten: + y = y.flatten() + x = x.flatten() strides = x.new_full((x.shape[0],), stride) proiors = torch.stack([x, y, strides, strides], dim=-1) return proiors.unsqueeze(0).repeat(batch_size, 1, 1) - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for feat, cls_convs, gfl_cls in zip( - feats, - self.cls_convs, - self.gfl_cls, - ): - for conv in cls_convs: - feat = conv(feat) - output = gfl_cls(feat) - cls_pred, reg_pred = output.split( - [self.num_classes, 4 * (self.reg_max + 1)], dim=1 - ) - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py index b3d4d95ff7..5a8e1a737a 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py @@ -1,6 +1,9 @@ import torch import torch.nn as nn +from torch import Tensor +from typing import List + from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.scale import Scale @@ -83,11 +86,12 @@ def init_weights(self): normal_init(self.gfl_cls, std=0.01, bias=bias_cls) normal_init(self.gfl_reg, std=0.01) - def forward(self, feats): + @torch.jit.unused + def forward(self, feats: List[Tensor]): outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x + for idx, scale in enumerate(self.scales): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py index af0b4251c2..b089a8d1f4 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py @@ -101,6 +101,7 @@ def __init__(self, use_sigmoid=True, beta=2.0, reduction="mean", loss_weight=1.0 self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, target, weight=None, avg_factor=None, reduction_override=None ): @@ -151,6 +152,7 @@ def __init__(self, reduction="mean", loss_weight=1.0): self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, target, weight=None, avg_factor=None, reduction_override=None ): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py index 7ee9d324a3..73809580da 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py @@ -449,6 +449,7 @@ def __init__(self, eps=1e-6, reduction="mean", loss_weight=1.0): self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py index 693e6fd0fe..0e55d157b6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py @@ -115,9 +115,10 @@ def __init__( # Use msra init by default self.init_weights() + @torch.jit.unused @property def norm(self): - if self.norm_name: + if self.norm_name is not None: return getattr(self, self.norm_name) else: return None @@ -131,13 +132,14 @@ def init_weights(self): if self.with_norm: constant_init(self.norm, 1, bias=0) - def forward(self, x, norm=True): + @torch.jit.unused + def forward(self, x, norm: bool = True): for layer in self.order: if layer == "conv": x = self.conv(x) - elif layer == "norm" and norm and self.with_norm: + elif layer == "norm" and (norm is not None) and (self.with_norm is not None) and (self.norm is not None): x = self.norm(x) - elif layer == "act" and self.activation: + elif layer == "act" and (self.activation is not None): x = self.act(x) return x @@ -211,7 +213,6 @@ def __init__( # norm layer is after conv layer _, self.dwnorm = build_norm_layer(norm_cfg, in_channels) _, self.pwnorm = build_norm_layer(norm_cfg, out_channels) - # build activation layer if self.activation: self.act = act_layers(self.activation) @@ -230,12 +231,17 @@ def init_weights(self): constant_init(self.dwnorm, 1, bias=0) constant_init(self.pwnorm, 1, bias=0) - def forward(self, x, norm=True): + def forward(self, x): for layer_name in self.order: - if layer_name != "act": - layer = self.__getattr__(layer_name) - x = layer(x) - elif layer_name == "act" and self.activation: + if layer_name == "depthwise": + x = self.depthwise(x) + elif layer_name == "pointwise": + x = self.pointwise(x) + elif layer_name == "dwnorm" and (self.dwnorm is not None): + x = self.dwnorm(x) + elif layer_name == "pwnorm" and (self.pwnorm is not None): + x = self.pwnorm(x) + elif layer_name == "act" and (self.activation is not None): x = self.act(x) return x diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py index e5fa3e216c..abb97a62ca 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py @@ -1,9 +1,15 @@ import torch from torchvision.ops import nms +from typing import Dict def multiclass_nms( - multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None + multi_bboxes, + multi_scores, + score_thr: float, + nms_cfg: Dict[str, float], + max_num: int = -1, + score_factors: torch.Tensor = torch.empty(0) ): """NMS for multi-class bboxes. @@ -13,7 +19,7 @@ def multiclass_nms( contains scores of the background class, but this will be ignored. score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. - nms_thr (float): NMS IoU threshold + nms_cfg (dictionary): dictionary of the type and threshold of IoU max_num (int): if there are more than max_num bboxes after NMS, only top max_num will be kept. score_factors (Tensor): The factors multiplied to scores before @@ -40,20 +46,19 @@ def multiclass_nms( bboxes = torch.masked_select( bboxes, torch.stack((valid_mask, valid_mask, valid_mask, valid_mask), -1) ).view(-1, 4) - if score_factors is not None: + if not (score_factors.numel() == 0): scores = scores * score_factors[:, None] scores = torch.masked_select(scores, valid_mask) - labels = valid_mask.nonzero(as_tuple=False)[:, 1] + + # for scripting + labels = torch.tensor(0).to(valid_mask.device).long() + torch.nonzero(valid_mask, out=labels) + # labels = valid_mask.nonzero(as_tuple=False)#[:, 1] + labels = labels[:, 1] if bboxes.numel() == 0: bboxes = multi_bboxes.new_zeros((0, 5)) labels = multi_bboxes.new_zeros((0,), dtype=torch.long) - - if torch.onnx.is_in_onnx_export(): - raise RuntimeError( - "[ONNX Error] Can not record NMS " - "as it has not been executed this time" - ) return bboxes, labels dets, keep = batched_nms(bboxes, scores, labels, nms_cfg) @@ -65,7 +70,7 @@ def multiclass_nms( return dets, labels[keep] -def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): +def batched_nms(boxes, scores, idxs, nms_cfg: Dict[str, float], class_agnostic: bool = False): """Performs non-maximum suppression in a batched fashion. Modified from https://github.com/pytorch/vision/blob /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. @@ -94,27 +99,32 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): tuple: kept dets and indice. """ nms_cfg_ = nms_cfg.copy() - class_agnostic = nms_cfg_.pop("class_agnostic", class_agnostic) if class_agnostic: boxes_for_nms = boxes else: max_coordinate = boxes.max() offsets = idxs.to(boxes) * (max_coordinate + 1) boxes_for_nms = boxes + offsets[:, None] - nms_cfg_.pop("type", "nms") - split_thr = nms_cfg_.pop("split_thr", 10000) - if len(boxes_for_nms) < split_thr: - keep = nms(boxes_for_nms, scores, **nms_cfg_) + split_thr = nms_cfg_.pop("split_thr", 10000.0) + if boxes_for_nms.shape[0] < split_thr: + keep = nms(boxes_for_nms, scores, nms_cfg_["iou_threshold"]) boxes = boxes[keep] scores = scores[keep] else: total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) for id in torch.unique(idxs): - mask = (idxs == id).nonzero(as_tuple=False).view(-1) - keep = nms(boxes_for_nms[mask], scores[mask], **nms_cfg_) + mask = (idxs == id) + mask_out = torch.tensor(0).to(mask.device).long() + torch.nonzero(mask, out=mask_out) + mask = mask_out.view(-1) + # mask = (idxs == id).nonzero(as_tuple=False).view(-1) + keep = nms(boxes_for_nms[mask], scores[mask], nms_cfg_["iou_threshold"]) total_mask[mask[keep]] = True - keep = total_mask.nonzero(as_tuple=False).view(-1) + keep_out = torch.tensor(0).to(total_mask.device).long() + torch.nonzero(total_mask, out=keep_out) + keep = keep_out.view(-1) + # keep = total_mask.nonzero(as_tuple=False).view(-1) keep = keep[scores[keep].argsort(descending=True)] boxes = boxes[keep] scores = scores[keep] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py index 24e2de458b..c44788d32e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import torch.jit import torch.nn as nn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.activation import act_layers @@ -128,6 +129,7 @@ def __init__( ] self.encoders = nn.Sequential(*encoders) + @torch.jit.unused def forward(self, x, pos_embed): b, _, h, w = x.shape x = self.conv(x) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py index d2939d22e1..46e638433e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py @@ -25,7 +25,7 @@ from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import stack_batch_img from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util\ - import convert_avg_params, gather_results, mkdir + import convert_avg_params, gather_results, mkdir, rank_filter from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.check_point import save_model_state from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.weight_averager import build_weight_averager @@ -69,12 +69,17 @@ def forward(self, x): def predict(self, batch, batch_idx=None, dataloader_idx=None): batch = self._preprocess_batch_input(batch) preds = self.forward(batch["img"]) - results = self.model.head.post_process(preds, batch) + results = self.model.head.post_process(preds, batch, "eval") return results + @rank_filter + def _save_current_model(self, path, logger): + save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, logger=logger) + def save_current_model(self, path, logger): save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, logger=logger) + @torch.jit.unused def training_step(self, batch, batch_idx): batch = self._preprocess_batch_input(batch) preds, loss, loss_states = self.model.forward_train(batch) @@ -109,7 +114,7 @@ def training_epoch_end(self, outputs: List[Any]) -> None: # save models in schedule epoches if self.current_epoch % self.cfg.schedule.val_intervals == 0: checkpoint_save_path = os.path.join(self.cfg.save_dir, "checkpoints") - mkdir(checkpoint_save_path) + mkdir(self.local_rank, checkpoint_save_path) print("===" * 10) print("checkpoint_save_path: {} \n epoch: {}".format(checkpoint_save_path, self.current_epoch)) print("===" * 10) @@ -142,7 +147,7 @@ def validation_step(self, batch, batch_idx): if self.logger: self.logger.info(log_msg) - dets = self.model.head.post_process(preds, batch) + dets = self.model.head.post_process(preds, batch, "eval") return dets def validation_epoch_end(self, validation_step_outputs): @@ -170,11 +175,12 @@ def validation_epoch_end(self, validation_step_outputs): if metric > self.save_flag: self.save_flag = metric best_save_path = os.path.join(self.cfg.save_dir, "model_best") - mkdir(best_save_path) + mkdir(self.local_rank, best_save_path) self.trainer.save_checkpoint( os.path.join(best_save_path, "model_best.ckpt") ) - self.save_current_model(os.path.join(best_save_path, "nanodet_model_best.pth"), logger=self.logger) + self._save_current_model(self.local_rank, os.path.join(best_save_path, "nanodet_model_state_best.pth"), + logger=self.logger) txt_path = os.path.join(best_save_path, "eval_results.txt") with open(txt_path, "a") as f: f.write("Epoch:{}\n".format(self.current_epoch + 1)) @@ -187,9 +193,8 @@ def validation_epoch_end(self, validation_step_outputs): if self.logger: self.logger.log_metrics(eval_results, self.current_epoch + 1) else: - # self.logger.info("Skip val on rank {}".format(self.local_rank)) if self.logger: - self.logger.info("Skip val ") + self.logger.info("Skip val on rank {}".format(self.local_rank)) def test_step(self, batch, batch_idx): dets = self.predict(batch, batch_idx) @@ -207,7 +212,8 @@ def test_epoch_end(self, test_step_outputs): if all_results: if self.cfg.test_mode == "val": eval_results = self.evaluator.evaluate( - all_results, self.cfg.save_dir) + all_results, self.cfg.save_dir, rank=self.local_rank + ) txt_path = os.path.join(self.cfg.save_dir, "eval_results.txt") with open(txt_path, "a") as f: for k, v in eval_results.items(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py index 4b82a8c19f..b954a165db 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py @@ -1,7 +1,8 @@ import torch +from typing import Tuple -def distance2bbox(points, distance, max_shape=None): +def distance2bbox(points, distance, max_shape: Tuple[int, int]=(0, 0)): """Decode distance prediction to bounding box. Args: @@ -17,7 +18,7 @@ def distance2bbox(points, distance, max_shape=None): y1 = points[..., 1] - distance[..., 1] x2 = points[..., 0] + distance[..., 2] y2 = points[..., 1] + distance[..., 3] - if max_shape is not None: + if max_shape != (0, 0): x1 = x1.clamp(min=0, max=max_shape[1]) y1 = y1.clamp(min=0, max=max_shape[0]) x2 = x2.clamp(min=0, max=max_shape[1]) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py index 2ac516167a..ea31d43d3b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py @@ -13,7 +13,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter from typing import Any, Dict import torch @@ -59,8 +59,7 @@ def load_model_weight(model, checkpoint, logger=None): return model -# @rank_zero_only -# @rank_filter +@rank_filter def save_model(model, path, epoch, iter, optimizer=None): model_state_dict = ( model.module.state_dict() if hasattr(model, "module") else model.state_dict() @@ -72,8 +71,6 @@ def save_model(model, path, epoch, iter, optimizer=None): torch.save(data, path) -# @rank_zero_only -# @rank_filter def save_model_state(path, model, weight_averager=None, logger=None): if logger: logger.info("Saving model to {}".format(path)) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py index b883d8f336..bbe5f59c47 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py @@ -22,15 +22,15 @@ from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.cloud_io import get_filesystem + from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.path import mkdir class Logger: def __init__(self, local_rank, save_dir="./", use_tensorboard=True): - # mkdir(local_rank, save_dir) - mkdir(save_dir) + mkdir(local_rank, save_dir) self.rank = local_rank - fmt = ("[%(name)s] [%(asctime)s] %(levelname)s: %(message)s") + fmt = "[%(name)s] [%(asctime)s] %(levelname)s: %(message)s" logging.basicConfig( level=logging.INFO, filename=os.path.join(save_dir, "logs.txt"), @@ -61,6 +61,10 @@ def log(self, string): if self.rank < 1: logging.info(string) + def info(self, string): + if self.rank < 1: + logging.info(string) + def scalar_summary(self, tag, phase, value, step): if self.rank < 1: self.writer.add_scalars(tag, {phase: value}, step) @@ -173,8 +177,7 @@ def _init_logger(self): ch = logging.StreamHandler() ch.setLevel(logging.INFO) # set console formatter - - c_fmt = ("[%(name)s] [%(asctime)s] %(levelname)s: %(message)s") + c_fmt = "[%(name)s] [%(asctime)s] %(levelname)s: %(message)s" console_formatter = logging.Formatter(c_fmt, datefmt="%m-%d %H:%M:%S") ch.setFormatter(console_formatter) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py index b0887d41a6..6f101ece69 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py @@ -14,10 +14,10 @@ import os -# from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter -# @rank_filter +@rank_filter def mkdir(path): if not os.path.exists(path): os.makedirs(path) diff --git a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini index a3105f4496..fa1c05ba26 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini +++ b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini @@ -1,21 +1,18 @@ [runtime] # 'python' key expects a value using the Python requirements file format # https://pip.pypa.io/en/stable/reference/pip_install/#requirements-file-format -python=torch>=1.7 +python=torch>=1.9.0 pytorch-lightning==1.2.3 omegaconf>=2.0.1 torchvision opencv-python pycocotools Cython - matplotlib numpy onnx - onnx-simplifier pyaml tabulate tensorboard torchmetrics - tqdm opendr=opendr-toolkit-engine diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index be505ee6e3..9588e63d47 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -44,6 +44,8 @@ from opendr.engine.learners import Learner from urllib.request import urlretrieve +import onnxruntime as ort + _MODEL_NAMES = {"EfficientNet_Lite0_320", "EfficientNet_Lite1_416", "EfficientNet_Lite2_512", "RepVGG_A0_416", "t", "g", "m", "m_416", "m_0.5x", "m_1.5x", "m_1.5x_416", "plus_m_320", "plus_m_1.5x_320", "plus_m_416", "plus_m_1.5x_416", "custom"} @@ -82,6 +84,12 @@ def __init__(self, model_to_use="plus_m_1.5x_416", iters=None, lr=None, batch_si checkpoint_load_iter=self.checkpoint_load_iter, temp_path=self.temp_path, device=self.device) + self.ort_session = None + self.jit_model = None + self.predictor = None + + self.pipeline = None + self.dummy_input = None self.model = build_model(self.cfg.model) self.logger = None self.task = None @@ -176,23 +184,32 @@ def save(self, path=None, verbose=True): :param verbose: whether to print a success message or not, defaults to False :type verbose: bool, optional """ + path = path if path is not None else self.cfg.save_dir model = self.cfg.check_point_name + if verbose and not self.logger: + self.logger = Logger(-1, path, False) + os.makedirs(path, exist_ok=True) + if self.ort_session: + self._save_onnx(path, verbose=verbose) + return True + if self.jit_model: + self._save_jit(path, verbose=verbose) + return True + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, "inference_params": {}, "optimized": False, "optimizer_info": {}, "classes": self.classes} - param_filepath = "nanodet_{}.pth".format(model) - metadata["model_paths"].append(param_filepath) + metadata["model_paths"].append("nanodet_{}.pth".format(model)) - logger = self.logger if verbose else None if self.task is None: print("You do not have call a task yet, only the state of the loaded or initialized model will be saved") - save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, logger) + save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, self.logger) else: - self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), logger) + self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), self.logger) with open(os.path.join(path, "nanodet_{}.json".format(model)), 'w', encoding='utf-8') as f: json.dump(metadata, f, ensure_ascii=False, indent=4) @@ -208,21 +225,33 @@ def load(self, path=None, verbose=True): :param verbose: whether to print a success message or not, defaults to False :type verbose: bool, optional """ + path = path if path is not None else self.cfg.save_dir + + if verbose and not self.logger: + self.logger = Logger(-1, path, False) + model = self.cfg.check_point_name if verbose: - print("Model name:", model, "-->", os.path.join(path, model + ".json")) + print("Model name:", model, "-->", os.path.join(path, "nanodet_" + model + ".json")) with open(os.path.join(path, "nanodet_{}.json".format(model))) as f: metadata = json.load(f) - logger = Logger(-1, path, False) if verbose else None - ckpt = torch.load(os.path.join(path, metadata["model_paths"][0]), map_location=torch.device(self.device)) - self.model = load_model_weight(self.model, ckpt, logger) + if metadata['optimized']: + if metadata['format'] == "onnx": + self._load_onnx(os.path.join(path, metadata["model_paths"][0]), verbose=verbose) + print("Loaded ONNX model.") + else: + self._load_jit(os.path.join(path, metadata["model_paths"][0]), verbose=verbose) + print("Loaded Jit model.") + else: + ckpt = torch.load(os.path.join(path, metadata["model_paths"][0]), map_location=torch.device(self.device)) + self.model = load_model_weight(self.model, ckpt, self.logger) if verbose: - logger.log("Loaded model weight from {}".format(path)) + self.logger.log("Loaded model weight from {}".format(path)) pass - def download(self, path=None, mode="pretrained", verbose=False, + def download(self, path=None, mode="pretrained", verbose=True, url=OPENDR_SERVER_URL + "/perception/object_detection_2d/nanodet/"): """ @@ -234,8 +263,6 @@ def download(self, path=None, mode="pretrained", verbose=False, network depending on the network choosed in config file, "images" downloads example inference data, and "test_data" downloads additional image,annotation file and pretrained network for training and testing :type mode: str, optional - :param model: the specific name of the model to download, all pre-configured configs files have their pretrained - model and can be selected, if None self.cfg.check_point_name will be used :param verbose: if True, additional information is printed on stdout :type verbose: bool, optional :param url: URL to file location on FTP server @@ -289,7 +316,7 @@ def download(self, path=None, mode="pretrained", verbose=False, except: print("Pretrain weights for this model are not provided!!! \n" - "Only the hole ckeckpoint will be download") + "Only the hole checkpoint will be download") if verbose: print("Making metadata...") @@ -333,11 +360,147 @@ def reset(self): """This method is not used in this implementation.""" return NotImplementedError - def optimize(self): - """This method is not used in this implementation.""" - return NotImplementedError + def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=True): + if self.jit_model: + print("Warning: A jit model has already initialized, inference will run in onnx mode by default!!!") + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device) + + os.makedirs(onnx_path, exist_ok=True) + export_path = os.path.join(onnx_path, "nanodet_{}.onnx".format(self.cfg.check_point_name)) + + if self.dummy_input is None: + assert img is not None,\ + "When use optimize or _save_jit is called for the first time, it must have and opendr Image input" + if not isinstance(img, Image): + img = Image(img) + img = img.opencv() + if not self.dummy_input: + self.dummy_input = self.predictor.preprocessing(img) + + torch.onnx.export( + self.predictor, + self.dummy_input, + export_path, + verbose=verbose, + keep_initializers_as_inputs=True, + do_constant_folding=do_constant_folding, + opset_version=11, + input_names=['data'], + output_names=['output'], + ) + + metadata = {"model_paths": ["nanodet_{}.onnx".format(self.cfg.check_point_name)], "framework": "pytorch", + "format": "onnx", "has_data": False, "inference_params": {}, "optimized": True, + "optimizer_info": {}, "classes": self.classes} + + with open(os.path.join(onnx_path, "nanodet_{}.json".format(self.cfg.check_point_name)), + 'w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + + if verbose: + print("finished exporting onxx") + self.logger.log("finished exporting onxx") + + try: + import onnxsim + import onnx + if verbose: + print("start simplifying onnx") + self.logger.log("start simplifying onnx") + input_data = {"data": self.dummy_input.detach().cpu().numpy()} + model_sim, flag = onnxsim.simplify(export_path, input_data=input_data) + if flag: + onnx.save(model_sim, export_path) + if verbose: + self.logger.log("simplify onnx successfully") + else: + if verbose: + self.logger.log("simplify onnx failed") + except: + print("For compression in optimized models, install the onnxsim dependencies and rerun optimize") - def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123): + def _load_onnx(self, onnx_path, verbose=True): + if verbose: + print("Loading ONNX runtime inference session from {}".format(onnx_path)) + self.logger.log("Loading ONNX runtime inference session from {}".format(onnx_path)) + + self.ort_session = ort.InferenceSession(onnx_path) + + def _save_jit(self, jit_path, img=None, verbose=True): + if self.ort_session: + print("Warning: An onnx model has already initialized, inference will run in onnx mode by default!!!") + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device) + + os.makedirs(jit_path, exist_ok=True) + + if not self.dummy_input: + assert img, \ + "When use optimize or _save_jit is called for the first time, it must have and opendr Image input" + if not isinstance(img, Image): + img = Image(img) + img = img.opencv() + if not self.dummy_input: + self.dummy_input = self.predictor.preprocessing(img) + + with torch.no_grad(): + export_path = os.path.join(jit_path, "nanodet_{}.pth".format(self.cfg.check_point_name)) + self.predictor.trace_model(self.dummy_input) + model_traced = torch.jit.script(self.predictor) + + metadata = {"model_paths": ["nanodet_{}.pth".format(self.cfg.check_point_name)], "framework": "pytorch", + "format": "pth", "has_data": False, "inference_params": {}, "optimized": True, + "optimizer_info": {}, "classes": self.classes} + model_traced.save(export_path) + + with open(os.path.join(jit_path, "nanodet_{}.json".format(self.cfg.check_point_name)), + 'w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + + if verbose: + print("Finished export to TorchScript") + self.logger.log("Finished export to TorchScript") + + def _load_jit(self, jit_path, verbose=True): + if verbose: + print("Loading Jit model from {}".format(jit_path)) + self.logger.log("Loading Jit model from {}".format(jit_path)) + + self.jit_model = torch.jit.load(jit_path, map_location=self.device) + + def optimize(self, export_path, initial_img=None, verbose=True, optimization="jit"): + """ + Method for optimizing the model with onnx or jit. + :param export_path: the path to the folder that the model is or will be after optimization + :type export_path: str + :param initial_img: if optimize is called for the first time is needed a dummy input of opendr Image + :type initial_img: Image + :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, + defaults to True + :type verbose: bool, optional + :param optimization: the kind of optimization you want to perform [jit, onnx] + :type optimization: str + """ + if verbose and not self.logger: + self.logger = Logger(-1, self.cfg.save_dir, False) + if not os.path.exists(export_path): + if optimization == "jit": + self._save_jit(export_path, initial_img, verbose=verbose) + elif optimization == "onnx": + self._save_onnx(export_path, initial_img, verbose=verbose) + else: + assert NotImplementedError + with open(os.path.join(export_path, "nanodet_{}.json".format(self.cfg.check_point_name))) as f: + metadata = json.load(f) + if optimization == "jit": + self._load_jit(os.path.join(export_path, metadata["model_paths"][0]), verbose) + elif optimization == "onnx": + self._load_onnx(os.path.join(export_path, metadata["model_paths"][0]), verbose) + else: + assert NotImplementedError + + def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123, local_rank=1): """ This method is used to train the detector on the COCO dataset. Validation is performed in a val_dataset if provided, else validation is performed in training dataset. @@ -354,9 +517,11 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 :type verbose: bool :param seed: seed for reproducibility :type seed: int + :param local_rank: for distribution learning + :type local_rank: int """ - mkdir(self.cfg.save_dir) + mkdir(local_rank, self.cfg.save_dir) if verbose: self.logger = NanoDetLightningLogger(self.temp_path + "/" + logging_path) @@ -405,10 +570,9 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 self.logger.info("Creating task...") self.task = TrainingTask(self.cfg, self.model, evaluator) - if self.device == "cpu": - gpu_ids = None - accelerator = None - elif self.device == "cuda": + gpu_ids = None + accelerator = None + if self.device == "cuda": gpu_ids = self.cfg.device.gpu_ids accelerator = None if len(gpu_ids) <= 1 else "ddp" @@ -421,7 +585,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 log_every_n_steps=self.cfg.log.interval, num_sanity_val_steps=0, resume_from_checkpoint=model_resume_path, - callbacks=[ProgressBar(refresh_rate=0)], # disable tqdm bar + callbacks=[ProgressBar(refresh_rate=0)], logger=self.logger, benchmark=True, gradient_clip_val=self.cfg.get("grad_clip", 0.0), @@ -429,7 +593,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 trainer.fit(self.task, train_dataloader, val_dataloader) - def eval(self, dataset, verbose=True): + def eval(self, dataset, verbose=True, local_rank=1): """ This method performs evaluation on a given dataset and returns a dictionary with the evaluation results. :param dataset: dataset object, to perform evaluation on @@ -437,18 +601,20 @@ def eval(self, dataset, verbose=True): :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, defaults to True :type verbose: bool + :param local_rank: for distribution learning + :type local_rank: int """ timestr = datetime.datetime.now().__format__("%Y_%m_%d_%H:%M:%S") save_dir = os.path.join(self.cfg.save_dir, timestr) - mkdir(save_dir) + mkdir(local_rank, save_dir) if verbose: self.logger = NanoDetLightningLogger(save_dir) self.cfg.update({"test_mode": "val"}) - if verbose: + if self.logger: self.logger.info("Setting up data...") val_dataset = build_dataset(self.cfg.data.val, dataset, self.cfg.class_names, "val") @@ -464,14 +630,13 @@ def eval(self, dataset, verbose=True): ) evaluator = build_evaluator(self.cfg.evaluator, val_dataset) - if verbose: + if self.logger: self.logger.info("Creating task...") self.task = TrainingTask(self.cfg, self.model, evaluator) - if self.device == "cpu": - gpu_ids = None - accelerator = None - elif self.device == "cuda": + gpu_ids = None + accelerator = None + if self.device == "cuda": gpu_ids = self.cfg.device.gpu_ids accelerator = None if len(gpu_ids) <= 1 else "ddp" @@ -483,7 +648,7 @@ def eval(self, dataset, verbose=True): num_sanity_val_steps=0, logger=self.logger, ) - if verbose: + if self.logger: self.logger.info("Starting testing...") return trainer.test(self.task, val_dataloader, verbose=verbose) @@ -491,7 +656,7 @@ def infer(self, input, threshold=0.35, verbose=True): """ Performs inference :param input: input can be an Image type image to perform inference - :type input: str, optional + :type input: Image :param threshold: confidence threshold :type threshold: float, optional :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, @@ -500,18 +665,27 @@ def infer(self, input, threshold=0.35, verbose=True): :return: list of bounding boxes of last image of input or last frame of the video :rtype: BoundingBoxList """ + if verbose and not self.logger: + self.logger = Logger(-1, "./last_infer", False) + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device) - if verbose: - self.logger = Logger(0, use_tensorboard=False) - predictor = Predictor(self.cfg, self.model, device=self.device) if not isinstance(input, Image): input = Image(input) _input = input.opencv() - meta, res = predictor.inference(_input, verbose) - bounding_boxes = BoundingBoxList([]) - for label in res[0]: - for box in res[0][label]: + (_input, _height, _width, _warp_matrix) = self.predictor.preprocessing(_input) + if self.ort_session: + res = self.ort_session.run(['output'], {'data': _input.cpu().detach().numpy()}) + res = self.predictor.postprocessing(torch.from_numpy(res[0]), _input, _height, _width, _warp_matrix) + elif self.jit_model: + res = self.jit_model(_input, _height, _width, _warp_matrix).cpu() + else: + res = self.predictor(_input, _height, _width, _warp_matrix) + + bounding_boxes = [] + for label in range(len(res)): + for box in res[label]: score = box[-1] if score > threshold: bbox = BoundingBox(left=box[0], top=box[1], @@ -519,7 +693,8 @@ def infer(self, input, threshold=0.35, verbose=True): height=box[3] - box[1], name=label, score=score) - bounding_boxes.data.append(bbox) + bounding_boxes.append(bbox) + bounding_boxes = BoundingBoxList(bounding_boxes) bounding_boxes.data.sort(key=lambda v: v.confidence) return bounding_boxes diff --git a/tests/Makefile b/tests/Makefile index b5e23d8a6c..9abc7c5671 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -46,6 +46,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/nanodet; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -57,6 +62,10 @@ $(BUILD_DIR)/test_face_recognition: @+echo "Building face recognition test..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/test_face_recognition sources/c_api/test_face_recognition.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) +$(BUILD_DIR)/test_nanodet: + @+echo "Building Nanodet object detection test..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/test_nanodet sources/c_api/test_nanodet.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + FMP_INC = -I$(OPENDR_HOME)/projects/python/perception/slam/full_map_posterior_gmapping/src/openslam_gmapping/include $(BUILD_DIR)/test_fmp_gmapping: @+echo "Building Full-Map-Posterior GMapping test..." @@ -66,6 +75,7 @@ tests: $(BUILD_DIR)/test_opendr_utils $(BUILD_DIR)/test_face_recognition $(BUILD runtests: download tests @+$(LD_RUN) $(BUILD_DIR)/test_opendr_utils 2>/dev/null + @+$(LD_RUN) $(BUILD_DIR)/test_nanodet 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_face_recognition 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_fmp_gmapping @@ -73,6 +83,7 @@ clean: @+echo "Cleaning C tests binaries and temporary files..." @+$(RM) $(BUILD_DIR)/test_opendr_utils @+$(RM) $(BUILD_DIR)/test_face_recognition + @+$(RM) $(BUILD_DIR)/test_nanodet @+$(RM) $(BUILD_DIR)/test_fmp_gmapping @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c new file mode 100644 index 0000000000..b9526a25eb --- /dev/null +++ b/tests/sources/c_api/test_nanodet.c @@ -0,0 +1,86 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "nanodet_c.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a nanodet libtorch model + nanodet_model_t model; + // Load a pretrained model + load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + + ck_assert_msg(model.net != 0, "net is NULL"); + + // Release the resources + free_nanodet_model(&model); + + // Check if memory steel exist + ck_assert_msg(model.net, "net is NULL"); +} +END_TEST + +START_TEST(inference_creation_test) { + // Create a nanodet model + nanodet_model_t model; + + // Load a pretrained model + load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + + // Load an image and performance inference + opendr_image_t image; + load_image("data/nanodet/database/000000000036.jpg", &image); + opendr_detection_target_list_t res = infer_nanodet(&image, &model); + free_image(&image); + + ck_assert(res.size != 0); + + // Free the model resources + free_detections(&res); + free_nanodet_model(&model); +} +END_TEST + +Suite *nanodet_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Nanodet"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, inference_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = nanodet_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py index 583404d933..48690ceda8 100644 --- a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py +++ b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py @@ -17,13 +17,15 @@ import gc import shutil import os +import warnings +from torch.jit import TracerWarning import numpy as np from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.datasets import ExternalDataset device = os.getenv('TEST_DEVICE') if os.getenv('TEST_DEVICE') else 'cpu' -_DEFAULT_MODEL = "plus_m_416" +_DEFAULT_MODEL = "m" def rmfile(path): @@ -47,8 +49,7 @@ def setUpClass(cls): print("\n\n**********************************\nTEST Nanodet Learner\n" "**********************************") - cls.temp_dir = os.path.join(".", "tests", "sources", "tools", "perception", "object_detection_2d", - "nanodet", "nanodet_temp") + cls.temp_dir = os.path.join(".", "nanodet_temp") cls.detector = NanodetLearner(model_to_use=_DEFAULT_MODEL, device=device, temp_path=cls.temp_dir, batch_size=1, iters=1, checkpoint_after_iter=2, lr=1e-4) # Download all required files for testing @@ -111,6 +112,8 @@ def test_infer(self): def test_save_load(self): print('Starting save/load test for Nanodet...') + self.detector.ort_session = None + self.detector.jit_model = None self.detector.save(path=os.path.join(self.temp_dir, "test_model"), verbose=False) starting_param_1 = list(self.detector._model.parameters())[0].detach().clone().to(device) self.detector.model = None @@ -120,12 +123,42 @@ def test_save_load(self): new_param = list(detector2._model.parameters())[0].detach().clone().to(device) self.assertTrue(starting_param_1.allclose(new_param)) + del starting_param_1, new_param # Cleanup rmfile(os.path.join(self.temp_dir, "test_model", "nanodet_{}.json".format(_DEFAULT_MODEL))) rmfile(os.path.join(self.temp_dir, "test_model", "nanodet_{}.pth".format(_DEFAULT_MODEL))) rmdir(os.path.join(self.temp_dir, "test_model")) print('Finished save/load test for Nanodet...') + def test_optimize(self): + # Tracing will issue TracerWarnings, but these can be ignored safely + # because we use this function to create tensors out of constant + # variables that are the same every time we call this function. + warnings.simplefilter("ignore", TracerWarning) + warnings.simplefilter("ignore", RuntimeWarning) + + self.detector.ort_session = None + self.detector.jit_model = None + + img = cv2.imread(os.path.join(self.temp_dir, "000000000036.jpg")) + + self.detector.optimize(os.path.join(self.temp_dir, "onnx"), initial_img=img, verbose=False, optimization="onnx") + self.assertIsNotNone(self.detector.ort_session) + + self.detector.optimize(os.path.join(self.temp_dir, "jit"), initial_img=img, verbose=False, optimization="jit") + self.assertIsNotNone(self.detector.jit_model) + + # Cleanup + rmfile(os.path.join(self.temp_dir, "onnx", "nanodet_{}.onnx".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "onnx", "nanodet_{}.json".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "jit", "nanodet_{}.pth".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "jit", "nanodet_{}.json".format(_DEFAULT_MODEL))) + rmdir(os.path.join(self.temp_dir, "onnx")) + rmdir(os.path.join(self.temp_dir, "jit")) + + warnings.simplefilter("default", TracerWarning) + warnings.simplefilter("default", RuntimeWarning) + if __name__ == "__main__": unittest.main() From a402954ee37d9db2ff0ba1e4b340b2b7fd19f18a Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 22 Nov 2022 18:47:14 +0200 Subject: [PATCH 02/87] Implementation of libtorch and torch vision installation as part of tool installation. --- Makefile | 1 + dependencies/install_torch_c_api.sh | 53 +++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) create mode 100755 dependencies/install_torch_c_api.sh diff --git a/Makefile b/Makefile index 7c4ca70dc7..f49924f862 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,7 @@ install_compilation_dependencies: @+echo "#"; echo "# * Install Compilation Dependencies *"; echo "#" @+cd dependencies; ./install.sh compilation @+cd dependencies; ./install_onnx.sh + @+cd dependencies; ./install_torch.sh @+make --silent -C src/opendr/control/mobile_manipulation $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; @+make --silent -C src/opendr/control/single_demo_grasp $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh new file mode 100755 index 0000000000..4db566432b --- /dev/null +++ b/dependencies/install_torch_c_api.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +OPENDR_DEVICE="gpu" + +if [ ! -f /usr/local/lib/libtorchvision.so ]; then + CUDA_VERSION="116" + TORCH_VERSION="1.13.0" + TORCH_DIRECTORY="/usr/local/libtorch" + + VISION_VERSION="0.14.0" + if [[ "$OPENDR_DEVICE" == "gpu" ]] + then + echo "Downloading and installing libtorch and torchvsion (gpu support) ..." + GPU="on" + DEVICE="cu"${CUDA_VERSION} + CUDA_COMPILER="/usr/local/cuda/bin/nvcc" + else + echo "Downloading and installing libtorch and torchvsion (cpu-only) ..." + GPU="off" + DEVICE="cpu" + fi + + # TORCH INSTALLATION + wget https://download.pytorch.org/libtorch/${DEVICE}/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2B${DEVICE}.zip --quiet + unzip -qq libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+${DEVICE}.zip + cd libtorch + + sudo mkdir -p ${TORCH_DIRECTORY} + sudo cp -r ./* ${TORCH_DIRECTORY} + cd .. + + # TORCH VISION INSTALLATION + wget https://github.com/pytorch/vision/archive/refs/tags/v${VISION_VERSION}.tar.gz --quiet + tar zxf v${VISION_VERSION}.tar.gz + cd vision-${VISION_VERSION} + sudo mkdir -p build + cd build + sudo cmake .. -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_PREFIX_PATH=${TORCH_DIRECTORY} -DWITH_CUDA=${GPU} + sudo make + sudo make install + cd ../.. + + # CLEAN + sudo rm -rf libtorch + sudo rm -rf libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+${DEVICE}.zip + + sudo rm -rf vision-${VISION_VERSION} + sudo rm -rf v${VISION_VERSION}.tar.gz + + sudo ldconfig + +fi + From cbf2a9ab5bf0145f5ac333a463ead8f3eca20b14 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 22 Nov 2022 22:39:52 +0200 Subject: [PATCH 03/87] add licence and file name corection --- Makefile | 2 +- .../c_api/samples/nanodet/nanodet_jit_demo.c | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index f49924f862..9559547f6b 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ install_compilation_dependencies: @+echo "#"; echo "# * Install Compilation Dependencies *"; echo "#" @+cd dependencies; ./install.sh compilation @+cd dependencies; ./install_onnx.sh - @+cd dependencies; ./install_torch.sh + @+cd dependencies; ./install_torch_c_api.sh @+make --silent -C src/opendr/control/mobile_manipulation $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; @+make --silent -C src/opendr/control/single_demo_grasp $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; diff --git a/projects/c_api/samples/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/nanodet/nanodet_jit_demo.c index 9b733bf4ff..d5bca5e198 100644 --- a/projects/c_api/samples/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/nanodet/nanodet_jit_demo.c @@ -1,3 +1,19 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + #include #include #include "nanodet_c.h" From cce417f1a104a1457d7260ae688967e27b4ecab2 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Wed, 23 Nov 2022 12:36:19 +0200 Subject: [PATCH 04/87] style, inconclusive cppcheck fixes. --- src/c_api/nanodet_libtorch.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp index 9f8cdee17e..17dd393025 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -239,7 +239,6 @@ opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_mode if (!opencv_image) { std::cerr << "Cannot load image for inference." << std::endl; - load_detections(&detections, dets.data(), (int)dets.size()); return detections; } From a1a4ac23f260c6653e251908d443c1c4f4fa0114 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 13:51:50 +0200 Subject: [PATCH 05/87] Fixes for C API initilization errors --- include/opendr_utils.h | 6 +++++ src/c_api/nanodet_libtorch.cpp | 45 ++++++++++++++++++++-------------- src/c_api/opendr_utils.cpp | 14 +++++++++++ 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 961b4a6cb4..f4da3f3f30 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -37,6 +37,12 @@ void load_image(const char *path, opendr_image_t *image); */ void free_image(opendr_image_t *image); +/** + * Initialize an empty detection list to be used in C API + * @param detections OpenDR detection_target_list structure to be initialized + */ +void initialize_detections(opendr_detection_target_list_t *detections); + /** * Loads an OpenDR detection target list to be used in C api * @param detections OpenDR detection_target_list structure to be loaded diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp index 17dd393025..ef7b57ddd2 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -239,6 +239,8 @@ opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_mode if (!opencv_image) { std::cerr << "Cannot load image for inference." << std::endl; + // Initialize an empty detection to return. + initialize_detections(&detections); return detections; } @@ -275,7 +277,11 @@ opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_mode } // Put vector detection as C pointer and size - load_detections(&detections, dets.data(), (int)dets.size()); + if ((int)dets.size() > 0) + load_detections(&detections, dets.data(), (int)dets.size()); + else + initialize_detections(&detections); + return detections; } @@ -306,27 +312,30 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det cv::Mat image = (*opencv_image).clone(); for (size_t i = 0; i < detections->size; i++) { - const opendr_detection_target bbox = (detections->starting_pointer)[i]; - cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); - cv::rectangle( - image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); - - char text[256]; float score = bbox.score > 1 ? 1 : bbox.score; - sprintf(text, "%s %.1f%%", (classNames)[bbox.name].c_str(), score * 100); + if (score > model->scoreThreshold) { + const opendr_detection_target bbox = (detections->starting_pointer)[i]; + cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); + cv::rectangle( + image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); - int baseLine = 0; - cv::Size labelSize = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + char text[256]; - int x = (int)bbox.left; - int y = (int)bbox.top; - if (y < 0) - y = 0; - if (x + labelSize.width > image.cols) - x = image.cols - labelSize.width; + sprintf(text, "%s %.1f%%", (classNames)[bbox.name].c_str(), score * 100); - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); - cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + int baseLine = 0; + cv::Size labelSize = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = (int)bbox.left; + int y = (int)bbox.top; + if (y < 0) + y = 0; + if (x + labelSize.width > image.cols) + x = image.cols - labelSize.width; + + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); + cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + } } cv::imshow("image", image); diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index a44ed1da39..35d5b68143 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -36,6 +36,20 @@ void free_image(opendr_image_t *image) { } } +void initialize_detections(opendr_detection_target_list_t *detections) { + std::vector dets; + opendr_detection_target_t det; + det.name = -1; + det.left = 0.0; + det.top = 0.0; + det.width = 0.0; + det.height = 0.0; + det.score = 0.0; + dets.push_back(det); + + load_detections(&detections, dets.data(), (int)dets.size()); +} + void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { detections->size = vectorSize; int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); From b15de60fa99be40b576e64a3b114e358fdcb6ed0 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 13:52:22 +0200 Subject: [PATCH 06/87] Deleting no used code --- .../object_detection_2d/nanodet/export_torchscript.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py index 1bfa2c1482..ce559cc748 100644 --- a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py +++ b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py @@ -25,7 +25,6 @@ args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.download("./predefined_examples", mode="pretrained") nanodet.load(f"./predefined_examples/nanodet_{args.model}", verbose=True) @@ -33,7 +32,7 @@ # First read an openDR image from your dataset and run the optimizer: img = Image.open("./predefined_examples/000000000036.jpg") nanodet.optimize(f"./jit/nanodet_{args.model}", img, optimization="jit") - img = Image.open("./predefined_examples/000000000036.jpg") + boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) From 42815d3c1cef4f63a72b96264ff140d90436a3bf Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 15:03:09 +0200 Subject: [PATCH 07/87] macos cppcheck -> C style fixes --- src/c_api/nanodet_libtorch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp index ef7b57ddd2..c71b16fc9c 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -227,7 +227,7 @@ void load_nanodet_model(char *model_path, char *device, int height, int width, f NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); - model->net = (void *)detector; + model->net = static_castdetector; } opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { From 08ed2c1db38f562da858b6f66e2f1cf36285259b Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 15:43:06 +0200 Subject: [PATCH 08/87] macos cppcheck -c style fixes --- src/c_api/nanodet_libtorch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp index c71b16fc9c..7fdf68ba58 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -227,7 +227,7 @@ void load_nanodet_model(char *model_path, char *device, int height, int width, f NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); - model->net = static_castdetector; + model->net = static_cast detector; } opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { From 75deceba10ae57192e58e2b6a860395d867a5919 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 16:06:32 +0200 Subject: [PATCH 09/87] c-style pointer delceration fixes --- src/c_api/opendr_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 35d5b68143..5bd736f765 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -53,7 +53,7 @@ void initialize_detections(opendr_detection_target_list_t *detections) { void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { detections->size = vectorSize; int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); - detections->starting_pointer = (opendr_detection_target_t *)malloc(sizeOfOutput); + detections->starting_pointer = static_castmalloc(sizeOfOutput); std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); } From 0d4b1be9f2d4d1740ce912ad5b28118a7782b6eb Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 16:37:05 +0200 Subject: [PATCH 10/87] c-style pointer delceration fixes --- src/c_api/opendr_utils.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 5bd736f765..6a88206a46 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -53,7 +53,7 @@ void initialize_detections(opendr_detection_target_list_t *detections) { void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { detections->size = vectorSize; int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); - detections->starting_pointer = static_castmalloc(sizeOfOutput); + detections->starting_pointer = static_cast malloc(sizeOfOutput); std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); } From 477f842e783cd24a58e1b100aa9308704f7e3e4a Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 17:32:20 +0200 Subject: [PATCH 11/87] cpp casting casting fixes --- src/c_api/nanodet_libtorch.cpp | 4 ++-- src/c_api/opendr_utils.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/nanodet_libtorch.cpp index 7fdf68ba58..51f2cbefad 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -227,7 +227,7 @@ void load_nanodet_model(char *model_path, char *device, int height, int width, f NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); - model->net = static_cast detector; + model->net = static_cast(detector); } opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { @@ -312,9 +312,9 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det cv::Mat image = (*opencv_image).clone(); for (size_t i = 0; i < detections->size; i++) { + const opendr_detection_target bbox = (detections->starting_pointer)[i]; float score = bbox.score > 1 ? 1 : bbox.score; if (score > model->scoreThreshold) { - const opendr_detection_target bbox = (detections->starting_pointer)[i]; cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); cv::rectangle( image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 6a88206a46..8eddbed500 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -47,13 +47,13 @@ void initialize_detections(opendr_detection_target_list_t *detections) { det.score = 0.0; dets.push_back(det); - load_detections(&detections, dets.data(), (int)dets.size()); + load_detections(detections, dets.data(), (int)dets.size()); } void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { detections->size = vectorSize; int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); - detections->starting_pointer = static_cast malloc(sizeOfOutput); + detections->starting_pointer = static_cast(malloc(sizeOfOutput)); std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); } From f1fed96e0e9d4545932e04cbce6140ccacb5ccae Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 24 Nov 2022 18:45:30 +0200 Subject: [PATCH 12/87] bug fixes --- .../object_detection_2d/nanodet/eval_demo.py | 10 +++++----- .../algorithm/nanodet/evaluator/coco_detection.py | 7 ++++++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py index ac5de4e8d5..dba2a9d844 100644 --- a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py @@ -25,11 +25,11 @@ parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) args = parser.parse_args() + data_root = "/home/manos/data/coco2017" - val_dataset = ExternalDataset(args.data_root, 'coco') + val_dataset = ExternalDataset(data_root, 'coco') nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) - nanodet.eval(val_dataset) - nanodet.save("./temp") + nanodet.download("./predefined_examples", mode="pretrained", verbose=False) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=False) + nanodet.eval(val_dataset, verbose=False) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py index 764da3fa01..797a5e7cbb 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py @@ -26,6 +26,7 @@ from pycocotools.cocoeval import COCOeval from tabulate import tabulate +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import mkdir logger = logging.getLogger("NanoDet") @@ -91,7 +92,11 @@ def evaluate(self, results, save_dir, rank=-1): json_path = os.path.join(save_dir, "results{}.json".format(rank)) else: json_path = os.path.join(save_dir, "results.json") - json.dump(results_json, open(json_path, "w")) + + mkdir(rank, save_dir) + with open(json_path, 'w') as f: + json.dump(results_json, f) + coco_dets = self.coco_api.loadRes(json_path) coco_eval = COCOeval( copy.deepcopy(self.coco_api), copy.deepcopy(coco_dets), "bbox" From e26e265c3db438ae305a7c05455db08f60c5df5d Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:32:46 +0200 Subject: [PATCH 13/87] Update projects/python/perception/object_detection_2d/nanodet/export_torchscript.py Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- .../object_detection_2d/nanodet/export_torchscript.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py index ce559cc748..27a2990187 100644 --- a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py +++ b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py @@ -13,9 +13,8 @@ # limitations under the License. import argparse -from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.data import Image -from opendr.perception.object_detection_2d import draw_bounding_boxes +from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes if __name__ == '__main__': From 8fbf4360da27ddfeacf062b8c9bfaedeb1fc66bc Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:34:08 +0200 Subject: [PATCH 14/87] Update install_torch_c_api.sh --- dependencies/install_torch_c_api.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index 4db566432b..4dddfdc061 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -1,7 +1,5 @@ #!/bin/bash -OPENDR_DEVICE="gpu" - if [ ! -f /usr/local/lib/libtorchvision.so ]; then CUDA_VERSION="116" TORCH_VERSION="1.13.0" From 882d0a1f4d2aac342d8a3d81be8630c251e55aa3 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:36:10 +0200 Subject: [PATCH 15/87] Update dependencies/install_torch_c_api.sh Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- dependencies/install_torch_c_api.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index 4dddfdc061..b861d0d6bc 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -8,7 +8,7 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then VISION_VERSION="0.14.0" if [[ "$OPENDR_DEVICE" == "gpu" ]] then - echo "Downloading and installing libtorch and torchvsion (gpu support) ..." + echo "Downloading and installing libtorch and torchvision (gpu support) ..." GPU="on" DEVICE="cu"${CUDA_VERSION} CUDA_COMPILER="/usr/local/cuda/bin/nvcc" From e3e1eea6c67d0a85b5cb3656f3a880b4ac9f6688 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:36:51 +0200 Subject: [PATCH 16/87] Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/nanodet_c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nanodet_c.h b/include/nanodet_c.h index 614112901f..71d56aba12 100644 --- a/include/nanodet_c.h +++ b/include/nanodet_c.h @@ -49,7 +49,7 @@ typedef struct nanodet_model nanodet_model_t; * @param height the height of model input * @param width the width of model input * @param scoreThreshold a threshold for score to be infered - * @param model the loaded model + * @param model the model to be loaded */ void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); From 83bdfbdd187061bc1661b55426771d2848ffdf7a Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:37:24 +0200 Subject: [PATCH 17/87] Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/nanodet_c.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/nanodet_c.h b/include/nanodet_c.h index 71d56aba12..b3d88e2848 100644 --- a/include/nanodet_c.h +++ b/include/nanodet_c.h @@ -54,10 +54,10 @@ typedef struct nanodet_model nanodet_model_t; void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); /** - * This function perform inference using a nanodet object detection model and an input image + * This function performs inference using a nanodet object detection model and an input image * @param model nanodet model to be used for inference * @param image OpenDR image - * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objectes + * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objects */ opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model); From c7376e2dad842b9663f4c183ae7b8904c2308342 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:37:42 +0200 Subject: [PATCH 18/87] Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/nanodet_c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nanodet_c.h b/include/nanodet_c.h index b3d88e2848..26f4402c90 100644 --- a/include/nanodet_c.h +++ b/include/nanodet_c.h @@ -68,7 +68,7 @@ opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_mode void free_nanodet_model(nanodet_model_t *model); /** - * draw the bounding boxes from detections in given image + * draw the bounding boxes from detections in the given image * @param opendr_image image that has been used for inference and wanted to be printed * @param model nanodet model that has been used for inference * @param detections output of the inference From fb4e6b686fffb07498193e7aac2a0a29c3dc16cf Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:38:09 +0200 Subject: [PATCH 19/87] Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/nanodet_c.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/nanodet_c.h b/include/nanodet_c.h index 26f4402c90..41ba71bedc 100644 --- a/include/nanodet_c.h +++ b/include/nanodet_c.h @@ -69,7 +69,7 @@ void free_nanodet_model(nanodet_model_t *model); /** * draw the bounding boxes from detections in the given image - * @param opendr_image image that has been used for inference and wanted to be printed + * @param opendr_image image that has been used for inference * @param model nanodet model that has been used for inference * @param detections output of the inference */ From 8f7fb24f6900caa3803c44f5990dd382f599febc Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:39:05 +0200 Subject: [PATCH 20/87] Update include/opendr_utils.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/opendr_utils.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/opendr_utils.h b/include/opendr_utils.h index f4da3f3f30..d15f13312b 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -44,7 +44,7 @@ void free_image(opendr_image_t *image); void initialize_detections(opendr_detection_target_list_t *detections); /** - * Loads an OpenDR detection target list to be used in C api + * Loads an OpenDR detection target list to be used in C API * @param detections OpenDR detection_target_list structure to be loaded * @param vectorDataPtr the pointer of the first OpenDR detection target in a vector * @param vectorSize the size of the vector From 61412b31a6cf06395300d0ebc55acb2de53339f1 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:40:38 +0200 Subject: [PATCH 21/87] Update include/target.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- include/target.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/target.h b/include/target.h index 7652cd8238..1baf6b55f5 100644 --- a/include/target.h +++ b/include/target.h @@ -43,7 +43,7 @@ struct opendr_detection_target { typedef struct opendr_detection_target opendr_detection_target_t; /*** - * OpenDR data type for representing a structure of detections targets + * OpenDR data type for representing a structure of detection targets */ struct opendr_detection_target_list { opendr_detection_target_t *starting_pointer; From 16e61c2e03de9e5bc95bcf3adc6277aac883cecf Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:41:04 +0200 Subject: [PATCH 22/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 36d1319bf7..3b010cd711 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -72,7 +72,7 @@ Parameters: - **seed** : *int, default=123*\ Seed for repeatability. - **local_rank** : *int, default=1*\ - Is needed if train to multiple machines is wanted. + Needed if training on multiple machines. #### `NanodetLearner.eval` ```python From f9a0bf46892aa8f87eadc2a7b21c507af68cd196 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:41:33 +0200 Subject: [PATCH 23/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 3b010cd711..3b03fdcf31 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -114,7 +114,7 @@ Parameters: NanodetLearner.optimize(self, export_path, initial_img=None, verbose=True, optimization="jit") ``` -This method is used to perform jir or onnx optimizations and save a trained model with its metadata. +This method is used to perform Jit or Onnx optimizations and save a trained model with its metadata. If a models is already saves in export_path, the model will be loaded instead. Provided with the "export_path", it creates the "export_path" directory, if it does already exist it try to load the optimized model in the path. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX From 2bca0530438fcaec99b0b47539ecb39be1c4e351 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:44:45 +0200 Subject: [PATCH 24/87] Update nanodet.md --- docs/reference/nanodet.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 3b03fdcf31..e0604fa9c2 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -76,7 +76,8 @@ Parameters: #### `NanodetLearner.eval` ```python -NanodetLearner.eval(self, dataset, verbose, local_rank) +NanodetLearner.eval(self, dataset, verbose, +) ``` This method is used to evaluate a trained model on an evaluation dataset. @@ -89,7 +90,7 @@ Parameters: - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. - **local_rank** : *int, default=1*\ - Is needed if evaluation to multiple machines is wanted. + Needed if evaluating on multiple machines. #### `NanodetLearner.infer` ```python @@ -359,4 +360,4 @@ Furthermore, demos on performing [training](../../projects/perception/object_det boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) - ``` \ No newline at end of file + ``` From 85496898e90780b9ea200d7d3fd71214ca0952b0 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:45:52 +0200 Subject: [PATCH 25/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index e0604fa9c2..923dbd4340 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -116,8 +116,8 @@ NanodetLearner.optimize(self, export_path, initial_img=None, verbose=True, optim ``` This method is used to perform Jit or Onnx optimizations and save a trained model with its metadata. -If a models is already saves in export_path, the model will be loaded instead. Provided with the "export_path", it creates -the "export_path" directory, if it does already exist it try to load the optimized model in the path. +If a model is not present in the location specified by "export_path", the optimizer will save it there. +If a model is already present, it will load it instead. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX and a metadata file *"nanodet_{model_name}.json"*. From 730d192e58e24ab6a1fc9b15e161ec72173e256b Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:46:42 +0200 Subject: [PATCH 26/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 923dbd4340..9b71d706d5 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -118,8 +118,7 @@ NanodetLearner.optimize(self, export_path, initial_img=None, verbose=True, optim This method is used to perform Jit or Onnx optimizations and save a trained model with its metadata. If a model is not present in the location specified by "export_path", the optimizer will save it there. If a model is already present, it will load it instead. -Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX -and a metadata file *"nanodet_{model_name}.json"*. +Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX and a metadata file *"nanodet_{model_name}.json"*. Note: Onnx optimization, optimize and saves only the actual model inference. This is important if the user wants to use the model for C API. It will be needed to make a preproccess and postproccess that will work exactly the same as our python From ef23d94e6c1677c906f8582bd71b2dfa5b73d429 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:47:34 +0200 Subject: [PATCH 27/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 9b71d706d5..7279740d1b 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -145,7 +145,7 @@ This method is used to save a trained model with its metadata. Provided with the path, it creates the "path" directory, if it does not already exist. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* and a metadata file *"nanodet_{model_name}.json"*. If the directory already exists, the *"nanodet_{model_name}.pth"* and *"nanodet_{model_name}.json"* files are overwritten. -If optimization is performed the optimized model is saved instead. +If optimization is performed, the optimized model is saved instead. Parameters: From c41d578050a4617d25d92a26489e5141fb73969d Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:47:46 +0200 Subject: [PATCH 28/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 7279740d1b..6f7615d2c6 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -161,7 +161,7 @@ NanodetLearner.load(self, path, verbose) This method is used to load a previously saved model from its saved folder. Loads the model from inside the directory of the path provided, using the metadata .json file included. -If optimization is performed the optimized model is loaded instead. +If optimization is performed, the optimized model is loaded instead. Parameters: From 96e4c0eadc1b6ad15704ba2617c485c4c623b7b9 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 10:48:32 +0200 Subject: [PATCH 29/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 6f7615d2c6..2a4941603d 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -280,12 +280,8 @@ Furthermore, demos on performing [training](../../projects/perception/object_det args = parser.parse_args() - if args.dataset == 'voc': - dataset = ExternalDataset(args.data_root, 'voc') - val_dataset = ExternalDataset(args.data_root, 'voc') - elif args.dataset == 'coco': - dataset = ExternalDataset(args.data_root, 'coco') - val_dataset = ExternalDataset(args.data_root, 'coco') + dataset = ExternalDataset(args.data_root, args.dataset) + val_dataset = ExternalDataset(args.data_root, args.dataset) nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, From bda772f2272651856e4b958e504c20957971c74f Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:01:05 +0200 Subject: [PATCH 30/87] Update nanodet.md --- docs/reference/nanodet.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 2a4941603d..395334dd59 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -298,7 +298,6 @@ Furthermore, demos on performing [training](../../projects/perception/object_det This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first is downloaded a pre-trained model as in training example and then an image to be inference. - With the *path* parameter you can choose an image file to be used as inference. ```python import argparse From 0feb2f27262fde248b57a709e029effe0ef1c411 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:03:06 +0200 Subject: [PATCH 31/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 395334dd59..c7b897eebc 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -323,11 +323,9 @@ Furthermore, demos on performing [training](../../projects/perception/object_det * **Optimization framework with Inference and result drawing example on a test image.** - This example shows how to perform optimization on a pretrained model and then inference and draw the resulting - bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first is loaded a - pretrained model and then an opendr Image is used to perform optimization, in this exampel we use onnx optimization but - with `--optimization` can be used one of `[jit, onnx]`. - With the *path* parameter you can choose an image file to be used as dummy input in optimization and after in inference. + This example shows how to perform optimization on a pretrained model, inference and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. + In this example first a pretrained model is loaded and then an image is used to perform the optimization, in this example we use onnx optimization but Jit can also be used by passing `--optimization=jit`. + With the *path* parameter you can define the image file to be used as dummy input for the optimization and inference. The optimized model will be saves in `./optimization_models` folder ```python import argparse From 96978ce185d715ef769db59fa96618f436a6b778 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:05:14 +0200 Subject: [PATCH 32/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index c7b897eebc..eb4c519213 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -326,7 +326,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det This example shows how to perform optimization on a pretrained model, inference and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first a pretrained model is loaded and then an image is used to perform the optimization, in this example we use onnx optimization but Jit can also be used by passing `--optimization=jit`. With the *path* parameter you can define the image file to be used as dummy input for the optimization and inference. - The optimized model will be saves in `./optimization_models` folder + The optimized model will be saved in the `./optimization_models` folder ```python import argparse from opendr.perception.object_detection_2d import NanodetLearner From e242b78e4e386c48f5d07bbb4f56ac7a22b058e5 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:06:40 +0200 Subject: [PATCH 33/87] Update Makefile --- projects/c_api/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index 9a6b35453c..e6510e8050 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -12,7 +12,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -OPENDR_HOME= /home/manos/develop/final/opendr SHELL := /bin/bash CPP = g++ From 51f4684e0a5bb364f1e07703ccbf0ef410ab660a Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:07:38 +0200 Subject: [PATCH 34/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index eb4c519213..ec7b03859c 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -329,9 +329,8 @@ Furthermore, demos on performing [training](../../projects/perception/object_det The optimized model will be saved in the `./optimization_models` folder ```python import argparse - from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.data import Image - from opendr.perception.object_detection_2d import draw_bounding_boxes + from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes if __name__ == '__main__': From a9537ecc3f827726853777fece898932fd9ae128 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:07:56 +0200 Subject: [PATCH 35/87] Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- docs/reference/nanodet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index ec7b03859c..5a10bd65f5 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -337,7 +337,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - parser.add_argument("--optimization", help="Optimization framework that will be used", type=str, default='onnx') + parser.add_argument("--optimization", help="Optimization framework to be used", type=str, default='onnx', choices=['jit', 'onnx']) parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str) args = parser.parse_args() From 55899664c1c7bd52a65ebfe027fede1db00f96b4 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:34:20 +0200 Subject: [PATCH 36/87] Update nanodet.md --- docs/reference/nanodet.md | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index 5a10bd65f5..fe8ad44545 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -298,7 +298,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first is downloaded a pre-trained model as in training example and then an image to be inference. - + With the *path* parameter you can define the image file to be used in inference. ```python import argparse from opendr.perception.object_detection_2d import NanodetLearner @@ -309,13 +309,15 @@ Furthermore, demos on performing [training](../../projects/perception/object_det parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + parser.add_argument("--path", help="Path to the image that will be used for inference", type=str, + default="./predefined_examples/000000000036.jpg") args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.download("./predefined_examples", mode="images") - img = Image.open("./predefined_examples/000000000036.jpg") + img = Image.open(args.path) boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) @@ -338,7 +340,8 @@ Furthermore, demos on performing [training](../../projects/perception/object_det parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') parser.add_argument("--optimization", help="Optimization framework to be used", type=str, default='onnx', choices=['jit', 'onnx']) - parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str) + parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str, + default="./predefined_examples/000000000036.jpg") args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) @@ -346,7 +349,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det # First read an openDR image from your dataset and run the optimizer: img = Image.open(args.path) - nanodet.optimize("./optimization_models", img, optimization=args.optimization) + nanodet.optimize("./optimization_models/{}/nanodet_{}/".format(args.optimization, args.model), img, optimization=args.optimization) boxes = nanodet.infer(input=img) From e3f66160fb8ecd0572197a519a2f10c8be04e107 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:35:42 +0200 Subject: [PATCH 37/87] Update projects/c_api/Makefile Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- projects/c_api/Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index e6510e8050..b8fb363fd3 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -34,7 +34,6 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos demos: $(BUILD_DIR)/face_recognition_demo $(BUILD_DIR)/nanodet_demo - face: $(BUILD_DIR)/face_recognition_demo nano: $(BUILD_DIR)/nanodet_demo From aca73a24acadbdbe996f009dcd3a0aa87555d941 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 11:40:26 +0200 Subject: [PATCH 38/87] Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- .../python/perception/object_detection_2d/nanodet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 721aceec80..6c8cea4915 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -20,7 +20,7 @@ provided by OpenDR. Specifically the following examples are provided: 4. export_onnx: Export the pretrained model into the onnx optimization format. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. Notes! Onnx model is only run the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. - It is recomended to install additional the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. + It is recommended to additionally install the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. 5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. Note! If you're planning on using c api, Jit optimization is preferred, so it can be used for the same postprocessing of the output From bf61b2ca700ec172be634ba246ace7057fe6625f Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 12:35:58 +0200 Subject: [PATCH 39/87] Update eval_demo.py --- .../perception/object_detection_2d/nanodet/eval_demo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py index dba2a9d844..c8975ccb02 100644 --- a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py @@ -20,14 +20,14 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument("--dataset", help="Dataset to evaluate on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) args = parser.parse_args() - data_root = "/home/manos/data/coco2017" - val_dataset = ExternalDataset(data_root, 'coco') + val_dataset = ExternalDataset(args.data_root, args.dataset) nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained", verbose=False) From 5f0c3640b4a7febfe111a0d72862dc70382b2bba Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 12:36:29 +0200 Subject: [PATCH 40/87] Update projects/python/perception/object_detection_2d/nanodet/export_onnx.py Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- .../perception/object_detection_2d/nanodet/export_onnx.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py index 8054ccce97..eca0a504ca 100644 --- a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py +++ b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py @@ -13,9 +13,8 @@ # limitations under the License. import argparse -from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.data import Image -from opendr.perception.object_detection_2d import draw_bounding_boxes +from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes if __name__ == '__main__': From 9128f7d577867f8051d8784b6f39bd9859052b8d Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 13:27:34 +0200 Subject: [PATCH 41/87] Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- .../python/perception/object_detection_2d/nanodet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 6c8cea4915..5a36ca4267 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -19,7 +19,7 @@ provided by OpenDR. Specifically the following examples are provided: 4. export_onnx: Export the pretrained model into the onnx optimization format. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. - Notes! Onnx model is only run the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. + Note: Onnx model only runs the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. It is recommended to additionally install the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. 5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. From 3631fc4c5299779c02ae12d763fa229d58616a1e Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 25 Nov 2022 13:28:16 +0200 Subject: [PATCH 42/87] Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> --- .../python/perception/object_detection_2d/nanodet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 5a36ca4267..859e343542 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -23,6 +23,6 @@ provided by OpenDR. Specifically the following examples are provided: It is recommended to additionally install the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. 5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. - Note! If you're planning on using c api, Jit optimization is preferred, so it can be used for the same postprocessing of the output + Note: if you're planning on using C api, Jit optimization is preferred, so it can be used for the same postprocessing of the output and have same exact detection as the python api. 6. inference_tutorial: A simple tutorial in jupyter for easier use of Nanodet in inference. \ No newline at end of file From 5c1e458ab12753ba9d8d8abd6ae4acb61f3dde69 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Mon, 28 Nov 2022 17:24:33 +0200 Subject: [PATCH 43/87] installing libtorch and vision with respect to CUDA of the user --- dependencies/download_torch.py | 126 ++++++++++++++++++++++++++++ dependencies/install_torch_c_api.sh | 45 ++++++---- 2 files changed, 155 insertions(+), 16 deletions(-) create mode 100644 dependencies/download_torch.py diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py new file mode 100644 index 0000000000..cc908f73ae --- /dev/null +++ b/dependencies/download_torch.py @@ -0,0 +1,126 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import argparse +import glob +from urllib.request import urlretrieve +import os +import warnings + + +def search_on_path(filenames): + for p in os.environ.get('PATH', '').split(os.pathsep): + for filename in filenames: + full = os.path.join(p, filename) + if os.path.exists(full): + return os.path.abspath(full) + return None + + +def get_cuda_path(): + nvcc_path = search_on_path(('nvcc', 'nvcc.exe')) + cuda_path_default = None + if nvcc_path is not None: + cuda_path_default = os.path.normpath(os.path.join(os.path.dirname(nvcc_path), '..', '..')) + if cuda_path_default is not None: + _cuda_path = cuda_path_default + elif os.path.exists('/usr/local/cuda'): + _cuda_path = '/usr/local/cuda' + else: + _cuda_path = None + + return _cuda_path + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--cuda_path", help="Path to installed cuda", type=str, default=None) + parser.add_argument("--opendr_device", help="OpenDR variable to install dependencies during installation", + type=str, default="gpu") + parser.add_argument("--torch_version", help="Version of Libtorch to be installed", type=str, default="1.9.0") + args = parser.parse_args() + + COMPATIBILITY_VERSIONS = { + "1.13.0": "0.14.0", + "1.12.0": "0.13.0", + "1.11.0": "0.12.0", + "1.10.2": "0.11.3", + "1.10.1": "0.11.2", + "1.10.0": "0.11.1", + "1.9.1": "0.10.1", + "1.9.0": "0.10.0", + } + + TORCH_VERSION = args.torch_version + VISION_VERSION = COMPATIBILITY_VERSIONS[TORCH_VERSION] + + CUDA_VERSION = None + DEVICE = None + # Find Device + if args.opendr_device == "gpu": + try: + if args.cuda_path is None: + CUDA_PATH = get_cuda_path() + else: + CUDA_PATH = args.cuda_path + version_file_type = glob.glob(f"{CUDA_PATH}/version*") + if version_file_type[0].endswith('.txt'): + version_file = open(f"{CUDA_PATH}/version.txt", mode='r') + version_line = version_file.readlines() + version_line = version_line[0].replace(".", "") + CUDA_VERSION = version_line[13:16] + elif version_file_type[0].endswith('.json'): + version_file = open(f"{CUDA_PATH}/version.json", mode='r') + version_dict = json.load(version_file) + CUDA_VERSION = version_dict["cuda"]["version"] + CUDA_VERSION = CUDA_VERSION.replace(".", "") + CUDA_VERSION = CUDA_VERSION[:3] + else: + warnings.warn("\033[93m Not cuda version file found. Please sent an Issue in our github") + DEVICE = f"cu{CUDA_VERSION}" + except: + warnings.warn("\033[93m No cuda found.\n" + "Please install cuda or specify cuda path with export CUDA_PATH=/path/to/your/cuda.") + else: + DEVICE = "cpu" + + # Download Libtorch + try: + file_url_libtorch = f"https://download.pytorch.org/libtorch/{DEVICE}/" \ + f"libtorch-cxx11-abi-shared-with-deps-{TORCH_VERSION}%2B{DEVICE}.zip" + + DOWNLOAD_DIRECTORY = "libtorch.zip" + + urlretrieve(file_url_libtorch, DOWNLOAD_DIRECTORY) + + except: + warnings.warn("\033[93m Not Libtorch found with your specific device and torch version.\n" + "Please choose another version of torch or install different CUDA.\n" + "Please reference https://download.pytorch.org/whl/torch_stable.html") + exit() + # Download Vision + try: + file_url_vision = f"https://github.com/pytorch/vision/archive/refs/tags/" \ + f"v{VISION_VERSION}.tar.gz" + DOWNLOAD_DIRECTORY = "vision.tar.gz" + urlretrieve(file_url_vision, DOWNLOAD_DIRECTORY) + except: + warnings.warn("\033[93m Not torchvision found with your specific torch version.\n" + "Please see the torchvision GitHub repository for more information.") + + # Send environment variables to be used with sudo privileges from bash script + os.environ["TORCH_VERSION"] = TORCH_VERSION + os.environ["VISION_VERSION"] = VISION_VERSION + os.environ["DEVICE"] = DEVICE diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index b861d0d6bc..ddfe8f220c 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -1,26 +1,41 @@ #!/bin/bash +if [[ -z "$TORCH_VERSION" ]]; +then + echo "Specific Torch Version is not defined. Torch version 1.9.0 will be installed" + echo "For specific Torch Version plz defined variable TORCH_VERSION with export TORCH_VERSION=x.x.x." + TORCH_VERSION="1.9.0" +fi + if [ ! -f /usr/local/lib/libtorchvision.so ]; then - CUDA_VERSION="116" - TORCH_VERSION="1.13.0" TORCH_DIRECTORY="/usr/local/libtorch" VISION_VERSION="0.14.0" if [[ "$OPENDR_DEVICE" == "gpu" ]] then - echo "Downloading and installing libtorch and torchvision (gpu support) ..." - GPU="on" - DEVICE="cu"${CUDA_VERSION} - CUDA_COMPILER="/usr/local/cuda/bin/nvcc" + echo "Downloading and installing libtorch and torchvision (gpu support) ..." + GPU="on" + DEVICE="cu"${CUDA_VERSION} + CUDA_COMPILER="/usr/local/cuda/bin/nvcc" else - echo "Downloading and installing libtorch and torchvsion (cpu-only) ..." - GPU="off" - DEVICE="cpu" + echo "Downloading and installing libtorch and torchvsion (cpu-only) ..." + GPU="off" + DEVICE="cpu" fi + # Find CUDA version and download torch and vision + echo "Downloading Libtorch and torchvision ..." + # Make sure that we can download files + if [[ -z "$CUDA_PATH" ]]; + then + python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" + else + python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" --cuda_path "$CUDA_PATH" + fi + echo "Downloading Libtorch and torchvision ... FINIS" + # TORCH INSTALLATION - wget https://download.pytorch.org/libtorch/${DEVICE}/libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}%2B${DEVICE}.zip --quiet - unzip -qq libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+${DEVICE}.zip + unzip -qq libtorch.zip cd libtorch sudo mkdir -p ${TORCH_DIRECTORY} @@ -28,8 +43,7 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then cd .. # TORCH VISION INSTALLATION - wget https://github.com/pytorch/vision/archive/refs/tags/v${VISION_VERSION}.tar.gz --quiet - tar zxf v${VISION_VERSION}.tar.gz + tar zxf vision.tar.gz cd vision-${VISION_VERSION} sudo mkdir -p build cd build @@ -40,12 +54,11 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then # CLEAN sudo rm -rf libtorch - sudo rm -rf libtorch-cxx11-abi-shared-with-deps-${TORCH_VERSION}+${DEVICE}.zip + sudo rm -rf libtorch.zip sudo rm -rf vision-${VISION_VERSION} - sudo rm -rf v${VISION_VERSION}.tar.gz + sudo rm -rf vision.tar.gz sudo ldconfig fi - From c547fc27a4ed2141fdcdbf1db25ea030ba029608 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Mon, 28 Nov 2022 17:26:06 +0200 Subject: [PATCH 44/87] better explanations of variables in optimization runtimes --- docs/reference/nanodet.md | 12 ++++++------ projects/c_api/samples/nanodet/README.md | 9 +++++---- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index fe8ad44545..dcc36c178e 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -90,7 +90,7 @@ Parameters: - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. - **local_rank** : *int, default=1*\ - Needed if evaluating on multiple machines. + Needed if evaluating on multiple machines. #### `NanodetLearner.infer` ```python @@ -120,10 +120,10 @@ If a model is not present in the location specified by "export_path", the optimi If a model is already present, it will load it instead. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX and a metadata file *"nanodet_{model_name}.json"*. -Note: Onnx optimization, optimize and saves only the actual model inference. This is important if the user wants to use -the model for C API. It will be needed to make a preproccess and postproccess that will work exactly the same as our python -implementation to have the exact same results. -For C API it is recomended the Jit optimization and the example that is provided in our [c_api](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) +Note: In Onnx optimization, the output model executes the original model's feed forward. +The user must create his or her own pre- and post-processes in order to use the Onnx model in the C API. +On the other side, in Jit optimization the output model does the feed forward and post-processing. +For C API it is recommended the Jit optimization and the example that is provided in our [c_api](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) Parameters: @@ -349,7 +349,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det # First read an openDR image from your dataset and run the optimizer: img = Image.open(args.path) - nanodet.optimize("./optimization_models/{}/nanodet_{}/".format(args.optimization, args.model), img, optimization=args.optimization) + nanodet.optimize("./{}/nanodet_{}/".format(args.optimization, args.model), img, optimization=args.optimization) boxes = nanodet.infer(input=img) diff --git a/projects/c_api/samples/nanodet/README.md b/projects/c_api/samples/nanodet/README.md index fb2329cb2c..8114150843 100644 --- a/projects/c_api/samples/nanodet/README.md +++ b/projects/c_api/samples/nanodet/README.md @@ -1,13 +1,14 @@ # OpenDR C API Nanodet Demo -C API implementation of nanodet models for inference. To use the models first must be exported with the optimization Jit from python. -After the installation can be run from projects/c_api directory with: +C API implementation of nanodet models for inference. +To use the demo, the downloaded model from installation can be used or it can be exported with the optimization Jit from our python implementation, see [Nanodet opimization](../../../../docs/reference/nanodet.md#nanodetlearneroptimize). + +After installation, the demo can be run from projects/c_api directory with: ```sh ./built/nanodet_libtorch_demo ./path/to/your/model.pth device_name{cpu, cuda} ./path/to/your/image.jpg height width ``` -After installation a temporal model and image are downloaded based on nanodet_m model from python. -You can run it as: +Or with the downloaded model and image with: ```sh ./built/nanodet_libtorch_demo ./data/nanodet/optimized_model/nanodet_m.pth cuda ./data/nanodet/database/000000000036.jpg 320 320 From 3d3a3a60d8c9767a2bdc580db979a9b63b12bd51 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Mon, 28 Nov 2022 17:28:44 +0200 Subject: [PATCH 45/87] small typing fixes --- .../c_api/samples/nanodet/nanodet_jit_demo.c | 70 +++++++++---------- .../nanodet/export_onnx.py | 2 +- .../nanodet/export_torchscript.py | 4 +- .../nanodet/inference_demo.py | 4 +- .../object_detection_2d/nanodet/train_demo.py | 8 +-- 5 files changed, 42 insertions(+), 46 deletions(-) diff --git a/projects/c_api/samples/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/nanodet/nanodet_jit_demo.c index d5bca5e198..aed0ac5d48 100644 --- a/projects/c_api/samples/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/nanodet/nanodet_jit_demo.c @@ -19,49 +19,47 @@ #include "nanodet_c.h" #include "opendr_utils.h" -int main(int argc, char** argv) -{ +int main(int argc, char **argv) { + if (argc != 6) { + fprintf(stderr, + "usage: %s [model_path] [device] [images_path] [input_sizes].\n" + "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", + argv[0]); + return -1; + } - if (argc != 6) - { - fprintf(stderr, "usage: %s [model_path] [device] [images_path] [input_sizes].\n" - "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" - "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", argv[0]); - return -1; - } + nanodet_model_t model; - nanodet_model_t model; + int height = atoi(argv[4]); + int width = atoi(argv[5]); + printf("start init model\n"); + load_nanodet_model(argv[1], argv[2], height, width, 0.35, &model); + printf("success\n"); - int height = atoi(argv[4]); - int width = atoi(argv[5]); - printf("start init model\n"); - load_nanodet_model(argv[1], argv[2], height, width, 0.35, &model); - printf("success\n"); + // Initialize opendr image + opendr_image_t image; - //Initialize opendr image - opendr_image_t image; + // Load opendr image + load_image(argv[3], &image); + if (!image.data) { + printf("Image not found!"); + return 1; + } - //Load opendr image - load_image(argv[3], &image); - if (!image.data) - { - printf("Image not found!"); - return 1; - } + // Initialize opendr detection target list; + opendr_detection_target_list_t results; - //Initialize opendr detection target list; - opendr_detection_target_list_t results; + // Infer nanodet model + results = infer_nanodet(&image, &model); - //Infer nanodet model - results = infer_nanodet(&image, &model); + // Draw the results + drawBboxes(&image, &model, &results); - //Draw the results - drawBboxes(&image, &model, &results); + // Free the memory + free_detections(&results); + free_image(&image); + free_nanodet_model(&model); - //Free the memory - free_detections(&results); - free_image(&image); - free_nanodet_model(&model); - - return 0; + return 0; } diff --git a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py index eca0a504ca..ac1a2239fd 100644 --- a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py +++ b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py @@ -30,7 +30,7 @@ nanodet.download("./predefined_examples", mode="images") # First read an openDR image from your dataset and run the optimizer: img = Image.open("./predefined_examples/000000000036.jpg") - nanodet.optimize("./onnx", img, optimization="onnx") + nanodet.optimize("./onnx/nanodet_{}".format(args.model), img, optimization="onnx") boxes = nanodet.infer(input=img) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py index 27a2990187..aac86c7a24 100644 --- a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py +++ b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py @@ -25,12 +25,12 @@ nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load(f"./predefined_examples/nanodet_{args.model}", verbose=True) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.download("./predefined_examples", mode="images") # First read an openDR image from your dataset and run the optimizer: img = Image.open("./predefined_examples/000000000036.jpg") - nanodet.optimize(f"./jit/nanodet_{args.model}", img, optimization="jit") + nanodet.optimize("./jit/nanodet_{}".format(args.model), img, optimization="jit") boxes = nanodet.infer(input=img) diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py index 71e95b15fb..048a5b8d0f 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py @@ -22,13 +22,15 @@ parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + parser.add_argument("--path", help="Path to the image that want to infer", type=str, + default="./predefined_examples/000000000036.jpg") args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.download("./predefined_examples", mode="images") - img = Image.open("./predefined_examples/000000000036.jpg") + img = Image.open(args.path) boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/train_demo.py b/projects/python/perception/object_detection_2d/nanodet/train_demo.py index 8fda02650c..bae2941259 100644 --- a/projects/python/perception/object_detection_2d/nanodet/train_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/train_demo.py @@ -34,12 +34,8 @@ args = parser.parse_args() - if args.dataset == 'voc': - dataset = ExternalDataset(args.data_root, 'voc') - val_dataset = ExternalDataset(args.data_root, 'voc') - elif args.dataset == 'coco': - dataset = ExternalDataset(args.data_root, 'coco') - val_dataset = ExternalDataset(args.data_root, 'coco') + dataset = ExternalDataset(args.data_root, args.dataset) + val_dataset = ExternalDataset(args.data_root, args.dataset) nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, From 704952f561553af8021a767f7e009f6f94534b76 Mon Sep 17 00:00:00 2001 From: ad-daniel Date: Tue, 6 Dec 2022 15:13:58 +0100 Subject: [PATCH 46/87] Fix dependency --- src/opendr/utils/hyperparameter_tuner/dependencies.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/opendr/utils/hyperparameter_tuner/dependencies.ini b/src/opendr/utils/hyperparameter_tuner/dependencies.ini index 90177a32c9..7bffaadd8c 100644 --- a/src/opendr/utils/hyperparameter_tuner/dependencies.ini +++ b/src/opendr/utils/hyperparameter_tuner/dependencies.ini @@ -5,7 +5,7 @@ python=optuna tabulate torch plotly - sklearn + scikit-learn wheel opendr=opendr-toolkit-engine From e649c60e1cffc9bc21b4e5bbbb4e111ab474d57f Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 9 Dec 2022 16:40:24 +0200 Subject: [PATCH 47/87] Update dependencies.ini deleting torchmetrics --- .../perception/object_detection_2d/nanodet/dependencies.ini | 1 - 1 file changed, 1 deletion(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini index fa1c05ba26..6bc6e8d861 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini +++ b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini @@ -13,6 +13,5 @@ python=torch>=1.9.0 pyaml tabulate tensorboard - torchmetrics opendr=opendr-toolkit-engine From 467c1f1e661462de46dd88a1762b23cbfaf7f4c8 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 00:57:51 +0200 Subject: [PATCH 48/87] update docs for new data structures and utilities of c api --- docs/reference/c-data-h.md | 101 +++++++++++++++++++++++++ docs/reference/c-face-recognition-h.md | 20 ++--- docs/reference/c-opendr-utils-h.md | 76 ++++++++++++++++++- docs/reference/c-target-h.md | 71 ++++++++++++++++- 4 files changed, 255 insertions(+), 13 deletions(-) diff --git a/docs/reference/c-data-h.md b/docs/reference/c-data-h.md index 20b5e27b8e..046010a769 100644 --- a/docs/reference/c-data-h.md +++ b/docs/reference/c-data-h.md @@ -24,3 +24,104 @@ A pointer where image data are stored. *opendr_image_t* is using internally OpenCV images (*cv::Mat*) for storing images. Therefore, only a pointer to the memory location of the corresponding *cv::Mat* is stored. Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV data type or using the corresponding functions provided in *opendr_utils.h*. + +### struct *opendr_tensor_t* +```C +struct opendr_tensor { + int batch_size; + int frames; + int channels; + int width; + int height; + + float *data; +}; +typedef struct opendr_tensor opendr_tensor_t; +``` + + +The *opendr_tensor_t* structure provides a data structure for storing OpenDR structures. +Every function in the C API receiving and return tensors is expected to use this structure. +Helper functions that directly maps data into this format are provided in *opendr_utils.h*. + +The *opendr_tensor_t* structure has the following field: + +#### `int batch_size` field + +An integer that represent the number of batch size in the tensor. + +#### `int frames` field + +An integer that represent the number of frames in the tensor. + +#### `int channels` field + +An integer that represent the number of channels in the tensor. + +#### `int width` field + +An integer that represent the width of the tensor. + +#### `int height` field + +An integer that represent the height of the tensor. + +#### `float *data` field + +A pointer where data are stored. +*opendr_tensor_t* is using internally a pointer and corresponding sizes to copy the data into the memory of float *data. +Therefore, only a pointer to the memory location of the corresponding data is stored. +Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV or other form of data type or using the corresponding functions provided in *opendr_utils.h*. + +### struct *opendr_tensor_vector_t* +```C +struct opendr_tensor_vector { + int n_tensors; + int *batch_sizes; + int *frames; + int *channels; + int *widths; + int *heights; + + float **memories; +}; +typedef struct opendr_tensor_vector opendr_tensor_vector_t; +``` + + +The *opendr_tensor_vector_t* structure provides a data structure for storing OpenDR tensor structures. +Every function in the C API receiving and returning multiple tensors is expected to use this structure. +Helper functions that directly maps data into this format are provided in *opendr_utils.h*. + +The *opendr_tensor_vector_t* structure has the following field: + +#### `int n_tensors` field + +An integer that represent the number of tensor in the tensor vector. + +#### `int *batch_sizes` field + +A pointer of integers that represent the number of batch size in each tensor. + +#### `int *frames` field + +A pointer of integers that represent the number of frames in each tensor. + +#### `int *channels` field + +A pointer of integers that represent the number of channels in each tensor. + +#### `int *widths` field + +A pointer of integers that represent the width of each tensor. + +#### `int *heights` field + +A pointer of integers that represent the height of each tensor. + +#### `float **memories` field + +A pointer where stores the data of each *opendr_tensor_t.data* stored in the vector. +*opendr_tensor_vector_t* is using internally pointers and corresponding sizes to copy the data into the memory of *memories* for each tensor that is provided. +Therefore, only a pointer to the memory location of the corresponding data is stored. +Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV or other form of data type or using the corresponding functions provided in *opendr_utils.h*. diff --git a/docs/reference/c-face-recognition-h.md b/docs/reference/c-face-recognition-h.md index 1ea2e5822d..8b635c27b0 100644 --- a/docs/reference/c-face-recognition-h.md +++ b/docs/reference/c-face-recognition-h.md @@ -11,14 +11,14 @@ struct face_recognition_model { }; typedef struct face_recognition_model face_recognition_model_t; ``` -The *face_recognition_model_t* structure keeps all the neccesary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). +The *face_recognition_model_t* structure keeps all the necessary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). ### Function *load_face_recognition_model()* ```C -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model); ``` - Loads a face recognition model saved in the local filesystem (*model path*) in OpenDR format. + Loads a face recognition model saved in the local filesystem (*modelPath*) in OpenDR format. This function also initializes a CPU-based ONNX session for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. @@ -40,25 +40,25 @@ The function returns an OpenDR category structure with the inference results. ### Function *decode_category_face_recognition()* ```C -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName); ``` Returns the name of a recognized person by decoding the category id into a string (this function uses the information from the built person database). ### Function *build_database_face_recognition()* ```C -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model); ``` Build a face recognition database (containing images for persons to be recognized). -This function expects the *database_folder* to have the same format as the main Python toolkit. -The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*output_path*). -A loaded face recongition model should be provided (*model*), since this model will be used for the feature extraction process. +This function expects the *databaseFolder* to have the same format as the main Python toolkit. +The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*outputPath*). +A loaded face recognition model should be provided (*model*), since this model will be used for the feature extraction process. ### Function *load_database_face_recognition()* ```C -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model); ``` -Loads an already built database (*database_path) into a face recognition model (*model*). +Loads an already built database (*databasePath*) into a face recognition model (*model*). After this step, the model can be used for performing inference. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 4e76a24258..dfe9a67013 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -11,8 +11,6 @@ The *load_image()* function allows for reading an images from the local file sys A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. - - ### Function *free_image()* ```C void free_image(opendr_image_t *image); @@ -20,3 +18,77 @@ void free_image(opendr_image_t *image); The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. +### Function *initialize_detections_vector()* +```C +void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector); +``` +The *initialize_detections_vector()* function initialize the data of an OpenDR detection vector structure (*detection_vector*) with zero values. +A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should be provided. + +### Function *load_detections_vector()* +```C +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size); +``` +The *load_detections_vector()* function allows for storing OpenDR detection target structures in to the memory allocated for multiple OpenDR detections structures (*detection*). +A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. + +### Function *free_detections_vector()* +```C +void free_detections_vector(opendr_detection_vector_target_t *detection_vector); +``` +The *free_detections_vector()* function releases the memory allocated for an OpenDR detection vector structure (*detection_vector*). +A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. + +### Function *initialize_tensor()* +```C +void initialize_tensor(opendr_tensor_t *opendr_tensor); +``` +The *initialize_tensor()* function initialize the data of an OpenDR tensor (*opendr_tensor*) with zero values. +A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* should be provided. + +### Function *load_tensor()* +```C +void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, + int height); +``` +The *load_tensor()* function allows for storing OpenDR tensor structures in to the memory allocated into a pointer into the OpenDR tensor structure (*opendr_tensor*). +A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* along with the pointer into the memory (*tensor_data*) and the (*batch_size*), (*frames*), (*channels*), (*width*) and (*height*) of tensor should be provided. +All integers must have a minimum value of *1*. + +### Function *free_tensor()* +```C +void free_tensor(opendr_tensor_t *opendr_tensor); +``` +The *free_tensor()* function releases the memory allocated for an OpenDR tensor structure (*opendr_tensor*). +A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* should be provided. + +### Function *initialize_tensor_vector()* +```C +void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector); +``` +The *initialize_tensor_vector()* function initialize the data of an OpenDR tensor vector (*tensor_vector*) with zero values. +A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* should be provided. + +### Function *load_tensor_vector()* +```C +void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors); +``` +The *load_tensor_vector()* function allows for storing multiple OpenDR tensor structures in to the memory allocated into pointers into the OpenDR tensor vector structure (*tensor_vector*). +A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* along with the pointer into the memory of a vector or array of OpenDR tensors structure (*tensor*) should be provided. +Moreover the number of tensors (*number_of_tensors*) should be included, and it must be better than *1*. + +### Function *free_tensor_vector()* +```C +void free_tensor_vector(opendr_tensor_vector_t *tensor_vector); +``` +The *free_tensor_vector()* function releases the memory allocated for an OpenDR tensor vector structure (*opendr_tensor_vector_t*). +A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* should be provided. + +### Function *iter_tensor_vector()* +```C +void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index); +``` +The *iter_tensor_vector()* function is used to help the user to iterate the OpenDR tensor vector. +A single OpenDR tensor (*output*) is loaded with the values of the indexed (*index*) tensor of the vector (*tensor_vector*). +A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* and an (*index*) along with a pointer (*output*) to an OpenDR *opendr_tensor_t* should be provided. diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 6f748759e8..7eae5bd704 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -13,7 +13,7 @@ typedef struct opendr_category_target opendr_category_target_t; ``` -The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. +The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. The *opendr_category_target_t* structure has the following field: @@ -25,3 +25,72 @@ A numerical id of the category to which the input objects belongs to. #### `float confidence` field The decision confidence (a value between 0 and 1). + + +### struct *opendr_detection_target_t* +```C +struct opendr_detection_target { + int name; + float left; + float top; + float width; + float height; + float score; +}; +typedef struct opendr_detection_target opendr_detection_target_t; +``` + + +The *opendr_detection_target_t* structure provides a data structure for storing inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. + +The *opendr_detection_target_t* structure has the following field: + +#### `int name` field + +A numerical id of the category to which the input objects belongs to. + +#### `float left` field + +A numerical value that corresponds to the X value of the top,left point of a detection. + +#### `float top` field + +A numerical value that corresponds to the Y value of the top,left point of a detection. + +#### `float width` field + +A numerical value that corresponds to the width of a detection. + +#### `float height` field + +A numerical value that corresponds to the height of a detection. + +#### `float score` field + +The decision score (a value between 0 and 1). + + + +### struct *opendr_detection_vector_target_t* +```C +struct opendr_detection_vector_target { + opendr_detection_target_t *starting_pointer; + int size; +}; +typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; +``` + + +The *opendr_detection_vector_target_t* structure provides a data structure for storing multiple inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this or a *detection_target_t* structure. + +The *opendr_detection_vector_target_t* structure has the following field: + +#### `opendr_detection_target_t starting_pointer` field + +A pointer to a memory of multiple OpenDR detection targets. + +#### `int size` field + +A numerical value that represents the number of OpenDR detection structures that are stored. From 617572d5d6d3e5d02004ece522f7d224d050bd61 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 01:04:04 +0200 Subject: [PATCH 49/87] new source, samples, utilities and test for onnx C API. The new scripts are implemente the feed forward of almost all the onnx models that python api is provided. --- include/activity_recognition_x3d.h | 80 ++++++ include/data.h | 29 ++ include/face_recognition.h | 18 +- include/lightweight_open_pose.h | 91 ++++++ include/object_detection_2d_detr.h | 81 ++++++ ..._c.h => object_detection_2d_nanodet_jit.h} | 22 +- include/object_tracking_2d_deep_sort.h | 79 +++++ include/object_tracking_2d_fair_mot.h | 72 +++++ include/opendr_utils.h | 72 ++++- .../skeleton_based_action_recognition_pst.h | 75 +++++ include/target.h | 4 +- projects/c_api/Makefile | 63 +++- projects/c_api/README.md | 9 +- .../activity_recognition/x3d/x3d_demo.c | 44 +++ .../lightweight_open_pose_demo.c | 46 +++ .../object_detection_2d/detr/detr_demo.c | 43 +++ .../nanodet/README.md | 2 +- .../nanodet/nanodet_jit_demo.c | 12 +- .../deep_sort/deep_sort_demo.c | 43 +++ .../progressive_spatio_temporal_gcn_demo.c | 44 +++ src/c_api/Makefile | 10 +- src/c_api/activity_recognition_x3d.cpp | 209 ++++++++++++++ src/c_api/face_recognition.cpp | 269 +++++++++--------- src/c_api/lightweight_open_pose.cpp | 264 +++++++++++++++++ src/c_api/object_detection_2d_detr.cpp | 269 ++++++++++++++++++ ...pp => object_detection_2d_nanodet_jit.cpp} | 245 ++++++++-------- src/c_api/object_tracking_2d_deep_sort.cpp | 203 +++++++++++++ src/c_api/opendr_utils.cpp | 152 ++++++++-- .../skeleton_based_action_recognition_pst.cpp | 167 +++++++++++ .../c_api/test_activity_recognition_x3d.c | 93 ++++++ tests/sources/c_api/test_deep_sort.c | 95 +++++++ tests/sources/c_api/test_detr.c | 104 +++++++ .../c_api/test_lightweight_open_pose.c | 92 ++++++ tests/sources/c_api/test_nanodet.c | 16 +- tests/sources/c_api/test_pst_gcn.c | 94 ++++++ 35 files changed, 2876 insertions(+), 335 deletions(-) create mode 100644 include/activity_recognition_x3d.h create mode 100644 include/lightweight_open_pose.h create mode 100644 include/object_detection_2d_detr.h rename include/{nanodet_c.h => object_detection_2d_nanodet_jit.h} (70%) create mode 100644 include/object_tracking_2d_deep_sort.h create mode 100644 include/object_tracking_2d_fair_mot.h create mode 100644 include/skeleton_based_action_recognition_pst.h create mode 100644 projects/c_api/samples/activity_recognition/x3d/x3d_demo.c create mode 100644 projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c create mode 100644 projects/c_api/samples/object_detection_2d/detr/detr_demo.c rename projects/c_api/samples/{ => object_detection_2d}/nanodet/README.md (77%) rename projects/c_api/samples/{ => object_detection_2d}/nanodet/nanodet_jit_demo.c (81%) create mode 100644 projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c create mode 100644 projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c create mode 100644 src/c_api/activity_recognition_x3d.cpp create mode 100644 src/c_api/lightweight_open_pose.cpp create mode 100644 src/c_api/object_detection_2d_detr.cpp rename src/c_api/{nanodet_libtorch.cpp => object_detection_2d_nanodet_jit.cpp} (53%) create mode 100644 src/c_api/object_tracking_2d_deep_sort.cpp create mode 100644 src/c_api/skeleton_based_action_recognition_pst.cpp create mode 100644 tests/sources/c_api/test_activity_recognition_x3d.c create mode 100644 tests/sources/c_api/test_deep_sort.c create mode 100644 tests/sources/c_api/test_detr.c create mode 100644 tests/sources/c_api/test_lightweight_open_pose.c create mode 100644 tests/sources/c_api/test_pst_gcn.c diff --git a/include/activity_recognition_x3d.h b/include/activity_recognition_x3d.h new file mode 100644 index 0000000000..8544adff1f --- /dev/null +++ b/include/activity_recognition_x3d.h @@ -0,0 +1,80 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_X3D_ACTIVITY_RECOGNITION_H +#define C_API_X3D_ACTIVITY_RECOGNITION_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct x3d_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for resizing an input image + int model_size; + int frames_per_clip; + int in_channels; + int batch_size; + + // Statistics for normalization + float mean_value; + float img_scale; + + // Feature dimension + int features; +}; +typedef struct x3d_model x3d_model_t; + +/** + * Loads a x3d activity recognition model saved in OpenDR format + * @param modelPath path to the OpenDR x3d model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_x3d_model(const char *modelPath, char *mode, x3d_model_t *model); + +/** + * This function perform feed forward of x3d activity recognition model + * @param model x3d object detection model to be used for inference + * @param inputTensorValues input of the model as OpenCV mat + * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); + +/** + * Releases the memory allocated for a x3d activity recognition model + * @param model model to be de-allocated + */ +void free_x3d_model(x3d_model_t *model); + +/** + * initialize a tensor with random values for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_random_opendr_tensor_x3d(opendr_tensor_t *inputTensorValues, x3d_model_t *model); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_X3D_ACTIVITY_RECOGNITION_H diff --git a/include/data.h b/include/data.h index 0b0018b941..9ec84913b3 100644 --- a/include/data.h +++ b/include/data.h @@ -29,6 +29,35 @@ struct opendr_image { }; typedef struct opendr_image opendr_image_t; +/*** + * Opendr data type for representing tensors + */ +struct opendr_tensor { + int batch_size; + int frames; + int channels; + int width; + int height; + + float *data; +}; +typedef struct opendr_tensor opendr_tensor_t; + +/*** + * Opendr data type for representing tensor vectors + */ +struct opendr_tensor_vector { + int n_tensors; + int *batch_sizes; + int *frames; + int *channels; + int *widths; + int *heights; + + float **memories; +}; +typedef struct opendr_tensor_vector opendr_tensor_vector_t; + #ifdef __cplusplus } #endif diff --git a/include/face_recognition.h b/include/face_recognition.h index ff2774aab2..551a7d6f19 100644 --- a/include/face_recognition.h +++ b/include/face_recognition.h @@ -58,10 +58,10 @@ typedef struct face_recognition_model face_recognition_model_t; /** * Loads a face recognition model saved in OpenDR format - * @param model_path path to the OpenDR face recongition model (as exported using OpenDR library) + * @param modelPath path to the OpenDR face recognition model (as exported using OpenDR library) * @param model the loaded model */ -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model); /** * This function perform inference using a face recognition model and an input image @@ -74,28 +74,28 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, /** * Builds a face recognition database (containing images for persons to be recognized). This function expects the * database_folder to have the same format as the main Python toolkit. - * @param database_folder folder containing the database - * @param output_path output path to store the binary database. This file should be loaded along with the face + * @param databaseFolder folder containing the database + * @param outputPath output path to store the binary database. This file should be loaded along with the face * recognition model before performing inference. * @param model the face recognition model to be used for extracting the database features */ -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model); /** * Loads an already built database into the face recognition model. After this step, the model can be used for * performing inference. - * @param database_path path to the database file + * @param databasePath path to the database file * @param model the face recognition model to be used for inference */ -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model); /** * Returns the name of a recognition person by decoding the category id into a string * @param model the face recognition model to be used for inference * @param category the predicted category - * @param person_name buffer to store the person name + * @param personName buffer to store the person name */ -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName); /** * Releases the memory allocated for a face recognition model diff --git a/include/lightweight_open_pose.h b/include/lightweight_open_pose.h new file mode 100644 index 0000000000..6888c4d6d6 --- /dev/null +++ b/include/lightweight_open_pose.h @@ -0,0 +1,91 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_LIGHTWEIGHT_OPEN_POSE_H +#define C_API_LIGHTWEIGHT_OPEN_POSE_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct open_pose_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for resizing an input image + int model_size; + + // Statistics for normalization + float mean_value; + float img_scale; + + // Model output parameters + int num_refinement_stages; + int output_size; + int stride; + + int even_channel_output; + int odd_channel_output; + int batch_size; + int width_output; + int height_output; +}; +typedef struct open_pose_model open_pose_model_t; + +/** + * Loads a lightweight open pose model saved in OpenDR format + * @param modelPath path to the OpenDR open_pose model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_open_pose_model(const char *modelPath, open_pose_model_t *model); + +/** + * This function perform feed forward of open pose model + * @param model open pose model to be used for inference + * @param inputTensorValues OpenDR tensor structure as input of the model + * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); + +/** + * Releases the memory allocated for a open pose model + * @param model model to be de-allocated + */ +void free_open_pose_model(open_pose_model_t *model); + +/** + * initialize a tensor with values from an opendr image for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_opendr_tensor_from_img_op(opendr_image_t *image, opendr_tensor_t *inputTensorValues, open_pose_model_t *model); + +/** + * initialize a tensor with random values for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_random_opendr_tensor_op(opendr_tensor_t *inputTensorValues, open_pose_model_t *model); +#ifdef __cplusplus +} +#endif + +#endif // C_API_LIGHTWEIGHT_OPEN_POSE_H diff --git a/include/object_detection_2d_detr.h b/include/object_detection_2d_detr.h new file mode 100644 index 0000000000..8424fd1188 --- /dev/null +++ b/include/object_detection_2d_detr.h @@ -0,0 +1,81 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_OBJECT_DETECTION_2D_DETR_H +#define C_API_OBJECT_DETECTION_2D_DETR_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct detr_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for resizing an input image + int model_size; + + // Statistics for normalization + float mean_value[3]; + float std_value[3]; + + // Recognition threshold + float threshold; + + // Feature dimension + int features; + int output_sizes[2]; +}; +typedef struct detr_model detr_model_t; + +/** + * Loads a detr object detection model saved in OpenDR format + * @param modelPath path to the OpenDR detr model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_detr_model(const char *modelPath, detr_model_t *model); + +/** + * This function perform feed forward of detr object detection model + * @param model detr object detection model to be used for inference + * @param inputTensorValues input of the model as OpenCV mat + * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); + +/** + * Releases the memory allocated for a detr object detection model + * @param model model to be de-allocated + */ +void free_detr_model(detr_model_t *model); + +/** + * initialize a tensor with random values for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_random_opendr_tensor_detr(opendr_tensor_t *inputTensorValues, detr_model_t *model); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_OBJECT_DETECTION_2D_DETR_H diff --git a/include/nanodet_c.h b/include/object_detection_2d_nanodet_jit.h similarity index 70% rename from include/nanodet_c.h rename to include/object_detection_2d_nanodet_jit.h index 41ba71bedc..e0e59954ce 100644 --- a/include/nanodet_c.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -26,16 +26,16 @@ extern "C" { struct nanodet_model { // Jit cpp class holder - void *net; + void *network; // Device to be used char *device; // Recognition threshold - float scoreThreshold; + float score_threshold; // Model input size - int inputSize[2]; + int input_size[2]; // Keep ratio flag int keep_ratio; @@ -44,22 +44,22 @@ typedef struct nanodet_model nanodet_model_t; /** * Loads a nanodet object detection model saved in libtorch format - * @param model_path path to the libtorch nanodet model (as exported using OpenDR library) + * @param modelPath path to the libtorch nanodet model (as exported using OpenDR library) * @param device the device that will be used for the inference * @param height the height of model input * @param width the width of model input - * @param scoreThreshold a threshold for score to be infered + * @param scoreThreshold a threshold for score to be inferred * @param model the model to be loaded */ -void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); /** * This function performs inference using a nanodet object detection model and an input image * @param model nanodet model to be used for inference * @param image OpenDR image - * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objects + * @return OpenDR detection vector target containing the detections of the recognized objects */ -opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model); +opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); /** * Releases the memory allocated for a nanodet object detection model @@ -69,11 +69,11 @@ void free_nanodet_model(nanodet_model_t *model); /** * draw the bounding boxes from detections in the given image - * @param opendr_image image that has been used for inference + * @param image image that has been used for inference * @param model nanodet model that has been used for inference - * @param detections output of the inference + * @param detectionsVector output of the inference */ -void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections); +void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); #ifdef __cplusplus } diff --git a/include/object_tracking_2d_deep_sort.h b/include/object_tracking_2d_deep_sort.h new file mode 100644 index 0000000000..92783daad4 --- /dev/null +++ b/include/object_tracking_2d_deep_sort.h @@ -0,0 +1,79 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_DEEP_SORT_OBJECT_TRACKING_2D_H +#define C_API_DEEP_SORT_OBJECT_TRACKING_2D_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct deep_sort_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for resizing an input image + int model_size[2]; + int in_channels; + int batch_size; + + // Statistics for normalization + float mean_value[3]; + float std_value[3]; + + // Feature dimension + int features; +}; +typedef struct deep_sort_model deep_sort_model_t; + +/** + * Loads a deep_sort object tracking 2d model saved in OpenDR format + * @param modelPath path to the OpenDR deep_sort model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_deep_sort_model(const char *modelPath, deep_sort_model_t *model); + +/** + * This function perform feed forward of deep_sort object tracking 2d model + * @param model deep_sort object detection model to be used for inference + * @param inputTensorValues input of the model as OpenCV mat + * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); + +/** + * Releases the memory allocated for a deep_sort object tracking 2d model + * @param model model to be de-allocated + */ +void free_deep_sort_model(deep_sort_model_t *model); + +/** + * initialize a tensor with random values for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_random_opendr_tensor_ds(opendr_tensor_t *inputTensorValues, deep_sort_model_t *model); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_DEEP_SORT_OBJECT_TRACKING_2D_H diff --git a/include/object_tracking_2d_fair_mot.h b/include/object_tracking_2d_fair_mot.h new file mode 100644 index 0000000000..f3beb26efa --- /dev/null +++ b/include/object_tracking_2d_fair_mot.h @@ -0,0 +1,72 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_FAIR_MOT_OBJECT_TRACKING_2D_H +#define C_API_FAIR_MOT_OBJECT_TRACKING_2D_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct fair_mot_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for resizing an input image + int model_size[2]; + int in_channels; + int batch_size; + + // Statistics for normalization + float mean_value[3]; + float std_value[3]; + + // Feature dimension + int features; +}; +typedef struct fair_mot_model fair_mot_model_t; + +/** + * Loads a fair_mot object tracking 2d model saved in OpenDR format + * @param model_path path to the OpenDR fair_mot model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_fair_mot_model(const char *model_path, fair_mot_model_t *model); + +/** + * This function perform feed forward of fair_mot object tracking 2d model + * @param model fair_mot object detection model to be used for inference + * @param input_tensor_values input of the model as OpenCV mat + * @param tensor_vector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_fair_mot(fair_mot_model_t *model, opendr_tensor_t *input_tensor_values, opendr_tensor_vector_t *tensor_vector); + +/** + * Releases the memory allocated for a fair_mot object tracking 2d model + * @param model model to be de-allocated + */ +void free_fair_mot_model(fair_mot_model_t *model); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_FAIR_MOT_OBJECT_TRACKING_2D_H diff --git a/include/opendr_utils.h b/include/opendr_utils.h index d15f13312b..3706283c6c 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -38,24 +38,78 @@ void load_image(const char *path, opendr_image_t *image); void free_image(opendr_image_t *image); /** - * Initialize an empty detection list to be used in C API - * @param detections OpenDR detection_target_list structure to be initialized + * Initialize an empty Opendr detection vector target to be used in C API + * @param detection_vector OpenDR detection_target_list structure to be initialized */ -void initialize_detections(opendr_detection_target_list_t *detections); +void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector); /** * Loads an OpenDR detection target list to be used in C API - * @param detections OpenDR detection_target_list structure to be loaded - * @param vectorDataPtr the pointer of the first OpenDR detection target in a vector - * @param vectorSize the size of the vector + * @param detection_vector OpenDR detection_target_list structure to be loaded + * @param detection the pointer of the first OpenDR detection target in a vector + * @param vector_size the size of the vector */ -void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize); +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size); /** * Releases the memory allocated for a detection list structure - * @param detections OpenDR detection_target_list structure to release + * @param detection_vector OpenDR detection vector target structure to release */ -void free_detections(opendr_detection_target_list_t *detections); +void free_detections_vector(opendr_detection_vector_target_t *detection_vector); + +/** + * Initialize an empty OpenDR tensor to be used in C API + * @param tensor OpenDR tensor to initialize + */ +void initialize_tensor(opendr_tensor_t *opendr_tensor); + +/** + * Load a tensor values to be used in C. + * @param tensor Opendr tensor structure to be loaded + * @param tensor_data vector of cv Mat that holds tensors data to be used + * @param batch_size batch size for each OpenDR mat in an array of integers + * @param frames frames size for each OpenDR mat in an array of integers + * @param channels channels size for each OpenDR mat in an array of integers + * @param width width size for each OpenDR mat in an array of integers + * @param height height size for each OpenDR mat in an array of integers + */ +void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, + int height); + +/** + * Releases the memory allocated for an OpenDR tensor structure + * @param opendr_tensor OpenDR tensor structure to release + */ +void free_tensor(opendr_tensor_t *opendr_tensor); + +/** + * Initialize an empty OpenDR tensor vector to be used in C API + * @param tensor_vector + */ +void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector); + +/** + * Load a vector of tensors values to be used in C. + * @param tensor_vector OpenDR vector of tensors structure to be loaded + * @param tensor data in vector of OpenDR tensors structure + * @param number_of_tensors the number of tensors that we want to load into the structure + */ +void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors); + +/** + * Releases the memory allocated for an OpenDR tensor vector structure + * @param tensor_vector OpenDR tensor vector structure to release + */ +void free_tensor_vector(opendr_tensor_vector_t *tensor_vector); + +/** + * Helper function to store a tensor from OpenDR tensor vector structure into an OpenCV Mat. + * @param tensor OpenDR tensor to store the tensor + * @param tensor_vector OpenDR tensor vector from which tensor is wanted + * @param index the tensor is wanted from Opendr tensor vector + */ +void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index); #ifdef __cplusplus } diff --git a/include/skeleton_based_action_recognition_pst.h b/include/skeleton_based_action_recognition_pst.h new file mode 100644 index 0000000000..0d8257c3e5 --- /dev/null +++ b/include/skeleton_based_action_recognition_pst.h @@ -0,0 +1,75 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H +#define C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct pst_model { + // ONNX session objects + void *onnx_session; + void *env; + void *session_options; + + // Sizes for input and output tensor + int batch_size; + int in_channels; + int features; + int num_point; + int num_person; + int num_classes; +}; +typedef struct pst_model pst_model_t; + +/** + * Loads a pst skeleton based action recognition model saved in OpenDR format + * @param modelPath path to the OpenDR pst model (as exported using OpenDR library) + * @param model the loaded model + */ +void load_pst_model(const char *modelPath, pst_model_t *model); + +/** + * This function perform feed forward of pst skeleton based action recognition model + * @param model pst object detection model to be used for inference + * @param inputTensorValues input of the model as OpenCV mat + * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward + */ +void forward_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); + +/** + * Releases the memory allocated for a pst skeleton based action recognition model + * @param model model to be de-allocated + */ +void free_pst_model(pst_model_t *model); + +/** + * initialize a tensor with random values for testing the forward pass of the model + * @param inputTensorValues opendr tensor to be loaded with random values + * @param model model to be used for hyper parameters + */ +void init_random_opendr_tensor_pst(opendr_tensor_t *inputTensorValues, pst_model_t *model); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H diff --git a/include/target.h b/include/target.h index 1baf6b55f5..894bc03d2f 100644 --- a/include/target.h +++ b/include/target.h @@ -45,11 +45,11 @@ typedef struct opendr_detection_target opendr_detection_target_t; /*** * OpenDR data type for representing a structure of detection targets */ -struct opendr_detection_target_list { +struct opendr_detection_vector_target { opendr_detection_target_t *starting_pointer; int size; }; -typedef struct opendr_detection_target_list opendr_detection_target_list_t; +typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; #ifdef __cplusplus } diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index b8fb363fd3..34231684bd 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -33,9 +33,15 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos -demos: $(BUILD_DIR)/face_recognition_demo $(BUILD_DIR)/nanodet_demo -face: $(BUILD_DIR)/face_recognition_demo -nano: $(BUILD_DIR)/nanodet_demo +demos: x3d face_recognition open_pose detr nanodet deep_sort skeleton_based_action_recognition_pst + +x3d: $(BUILD_DIR)/activity_recognition/x3d_demo +face_recognition: $(BUILD_DIR)/face_recognition_demo +open_pose: $(BUILD_DIR)/lightweight_open_pose_demo +detr: $(BUILD_DIR)/object_detection_2d/detr_demo +nanodet: $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo +deep_sort: $(BUILD_DIR)/object_tracking_2d/deep_sort_demo +skeleton_based_action_recognition_pst: $(BUILD_DIR)/skeleton_based_action_recognition/pst_demo download: @+if [ -a $(DATA_DIR) ] ; \ @@ -44,32 +50,71 @@ download: else \ echo Downloading resources for C API...; \ $(RM) -r opendrdata.csd.auth.gr; \ + $(MKDIR_P) $(DATA_DIR)/activity_recognition/x3d; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/activity_recognition/weights/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/activity_recognition/weights/optimized_model $(DATA_DIR)/activity_recognition/x3d/optimized_model; \ $(MKDIR_P) $(DATA_DIR); \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/test_data/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/nanodet; \ + $(MKDIR_P) $(DATA_DIR)/lightweight_open_pose; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/pose_estimation/lightweight_open_pose/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/pose_estimation/lightweight_open_pose/optimized_model $(DATA_DIR)/lightweight_open_pose/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/detr; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/detr/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/detr/optimized_model $(DATA_DIR)/object_detection_2d/detr/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/object_tracking_2d/deep_sort; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_tracking_2d/deep_sort/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_tracking_2d/deep_sort/optimized_model $(DATA_DIR)/object_tracking_2d/deep_sort/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/skeleton_based_action_recognition/progressive_spatiotemporal_gcn; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/skeleton_based_action_recognition/pretrained_models/optimized_data/*; \ + $(MV) opendrdata.csd.auth.gr/perception/skeleton_based_action_recognition/pretrained_models/optimized_data $(DATA_DIR)/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; +$(BUILD_DIR)/activity_recognition/x3d_demo: + $(MKDIR_P) $(BUILD_DIR)/activity_recognition + @+echo "Building activity recognition x3d demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/activity_recognition/x3d_demo samples/activity_recognition/x3d/x3d_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + $(BUILD_DIR)/face_recognition_demo: $(MKDIR_P) $(BUILD_DIR) @+echo "Building face recognition demo..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/face_recognition_demo samples/face_recognition/face_recognition_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) -$(BUILD_DIR)/nanodet_demo: +$(BUILD_DIR)/lightweight_open_pose_demo: $(MKDIR_P) $(BUILD_DIR) + @+echo "Building lightweight open pose demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/lightweigth_open_pose_demo samples/lightweight_open_pose/lightweight_open_pose_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + +$(BUILD_DIR)/object_detection_2d/detr_demo: + $(MKDIR_P) $(BUILD_DIR)/object_detection_2d + @+echo "Building detr object detection demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/detr_demo samples/object_detection_2d/detr/detr_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + +$(BUILD_DIR)/object_detection_2d/nanodet_jit_demo: + $(MKDIR_P) $(BUILD_DIR)/object_detection_2d @+echo "Building nanodet object detection demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/nanodet_libtorch_demo samples/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo samples/object_detection_2d/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + +$(BUILD_DIR)/object_tracking_2d/deep_sort_demo: + $(MKDIR_P) $(BUILD_DIR)/object_tracking_2d + @+echo "Building object tracking 2d deep sort demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_tracking_2d/deep_sort_demo samples/object_tracking_2d/deep_sort/deep_sort_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) +$(BUILD_DIR)/skeleton_based_action_recognition/pst_demo: + $(MKDIR_P) $(BUILD_DIR)/skeleton_based_action_recognition + @+echo "Building skeleton based action recognition progressive spatiotemporal demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/skeleton_based_action_recognition/pst_demo samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) clean: @+echo "Cleaning C API demo binaries and temporary files..." - @+$(RM) $(BUILD_DIR)/* + @+$(RM) -rf $(BUILD_DIR)/* @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/projects/c_api/README.md b/projects/c_api/README.md index ebaccc45ea..6031f157ce 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -9,6 +9,11 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: -1. Face recognition -2. Nanodet Jit module +1. Activity recognition x3d model +2. Face recognition +3. Lightweight open pose +4. Object detection 2d Detr +5. Object detection 2d Nanodet Jit module +6. Object tracking 2d Deep sort +7. Skeleton based action recognition with Progressive spatiotemporal gcn diff --git a/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c b/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c new file mode 100644 index 0000000000..f38fb837ff --- /dev/null +++ b/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c @@ -0,0 +1,44 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "activity_recognition_x3d.h" +#include "opendr_utils.h" + +int main(int argc, char **argv) { + x3d_model_t model; + char *mode = "l"; + + printf("start init model\n"); + load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", mode, &model); + printf("success\n"); + + // Initialize opendr tensor for input + opendr_tensor_t input_tensor; + init_random_opendr_tensor_x3d(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_x3d(&model, &input_tensor, &output_tensor_vector); + + // Free the memory + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + free_x3d_model(&model); + + return 0; +} diff --git a/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c b/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c new file mode 100644 index 0000000000..a684309c17 --- /dev/null +++ b/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c @@ -0,0 +1,46 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "lightweight_open_pose.h" +#include "opendr_utils.h" + +int main(int argc, char **argv) { + open_pose_model_t model; + + printf("start init model\n"); + load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); + printf("success\n"); + + // Initialize opendr tensor for input + opendr_tensor_t input_tensor; + init_random_opendr_tensor_op(&input_tensor, &model); + + /* If user wants to import an Opendr Image, first must load the image and then use this function */ + // init_opendr_tensor_from_img_op(&image, &input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_open_pose(&model, &input_tensor, &output_tensor_vector); + + // Free the memory + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + free_open_pose_model(&model); + + return 0; +} diff --git a/projects/c_api/samples/object_detection_2d/detr/detr_demo.c b/projects/c_api/samples/object_detection_2d/detr/detr_demo.c new file mode 100644 index 0000000000..a1478093d8 --- /dev/null +++ b/projects/c_api/samples/object_detection_2d/detr/detr_demo.c @@ -0,0 +1,43 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "object_detection_2d_detr.h" +#include "opendr_utils.h" + +int main(int argc, char **argv) { + detr_model_t model; + + printf("start init model\n"); + load_detr_model("data/object_detection_2d/detr/optimized_model", &model); + printf("success\n"); + + // Initialize opendr tensor for input + opendr_tensor_t input_tensor; + init_random_opendr_tensor_detr(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_detr(&model, &input_tensor, &output_tensor_vector); + + // Free the memory + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + free_detr_model(&model); + + return 0; +} diff --git a/projects/c_api/samples/nanodet/README.md b/projects/c_api/samples/object_detection_2d/nanodet/README.md similarity index 77% rename from projects/c_api/samples/nanodet/README.md rename to projects/c_api/samples/object_detection_2d/nanodet/README.md index 8114150843..31d3e90496 100644 --- a/projects/c_api/samples/nanodet/README.md +++ b/projects/c_api/samples/object_detection_2d/nanodet/README.md @@ -11,5 +11,5 @@ After installation, the demo can be run from projects/c_api directory with: Or with the downloaded model and image with: ```sh -./built/nanodet_libtorch_demo ./data/nanodet/optimized_model/nanodet_m.pth cuda ./data/nanodet/database/000000000036.jpg 320 320 +./built/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 ``` diff --git a/projects/c_api/samples/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c similarity index 81% rename from projects/c_api/samples/nanodet/nanodet_jit_demo.c rename to projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index aed0ac5d48..4d6b4ea717 100644 --- a/projects/c_api/samples/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -16,15 +16,15 @@ #include #include -#include "nanodet_c.h" +#include "object_detection_2d_nanodet_jit.h" #include "opendr_utils.h" int main(int argc, char **argv) { if (argc != 6) { fprintf(stderr, "usage: %s [model_path] [device] [images_path] [input_sizes].\n" - "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" - "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", + "model_path = path/to/your/libtorch/model.pth \ndevice = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \ninput_size = width height.\n", argv[0]); return -1; } @@ -48,16 +48,16 @@ int main(int argc, char **argv) { } // Initialize opendr detection target list; - opendr_detection_target_list_t results; + opendr_detection_vector_target_t results; // Infer nanodet model - results = infer_nanodet(&image, &model); + results = infer_nanodet(&model, &image); // Draw the results drawBboxes(&image, &model, &results); // Free the memory - free_detections(&results); + free_detections_vector(&results); free_image(&image); free_nanodet_model(&model); diff --git a/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c b/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c new file mode 100644 index 0000000000..55441a934e --- /dev/null +++ b/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c @@ -0,0 +1,43 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "object_tracking_2d_deep_sort.h" +#include "opendr_utils.h" + +int main(int argc, char **argv) { + deep_sort_model_t model; + + printf("start init model\n"); + load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); + printf("success\n"); + + // Initialize opendr tensor for input + opendr_tensor_t input_tensor; + init_random_opendr_tensor_ds(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_deep_sort(&model, &input_tensor, &output_tensor_vector); + + // Free the memory + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + free_deep_sort_model(&model); + + return 0; +} diff --git a/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c b/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c new file mode 100644 index 0000000000..bc82a5a2ed --- /dev/null +++ b/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c @@ -0,0 +1,44 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "opendr_utils.h" +#include "skeleton_based_action_recognition_pst.h" + +int main(int argc, char **argv) { + pst_model_t model; + + printf("start init model\n"); + load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", + &model); + printf("success\n"); + + // Initialize opendr tensor for input + opendr_tensor_t input_tensor; + init_random_opendr_tensor_pst(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_pst(&model, &input_tensor, &output_tensor_vector); + + // Free the memory + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + free_pst_model(&model); + + return 0; +} diff --git a/src/c_api/Makefile b/src/c_api/Makefile index ed9bbeb80c..755f61a7e4 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -42,10 +42,16 @@ $(OPENDR_HOME)/lib/libopendr.so: @+echo "Building C API..." @+$(MKDIR_P) $(BUILD_DIR) $(CPP) $(CFLAGS) -c opendr_utils.cpp -o $(BUILD_DIR)/opendr_utils.o $(INC) $(OPENDR_INC) + $(CPP) $(CFLAGS) -c activity_recognition_x3d.cpp -o $(BUILD_DIR)/opendr_x3d.o $(INC) $(OPENDR_INC) $(CPP) $(CFLAGS) -c face_recognition.cpp -o $(BUILD_DIR)/opendr_face_recognition.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c nanodet_libtorch.cpp -o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) + $(CPP) $(CFLAGS) -c lightweight_open_pose.cpp -o $(BUILD_DIR)/opendr_open_pose.o $(INC) $(OPENDR_INC) + $(CPP) $(CFLAGS) -c object_detection_2d_detr.cpp -o $(BUILD_DIR)/opendr_detr.o $(INC) $(OPENDR_INC) + $(CPP) $(CFLAGS) -c object_detection_2d_nanodet_jit.cpp -o $(BUILD_DIR)/opendr_nanodet_jit.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) + $(CPP) $(CFLAGS) -c object_tracking_2d_deep_sort.cpp -o $(BUILD_DIR)/opendr_deep_sort.o $(INC) $(OPENDR_INC) + $(CPP) $(CFLAGS) -c skeleton_based_action_recognition_pst.cpp -o $(BUILD_DIR)/opendr_pst.o $(INC) $(OPENDR_INC) + @$(MKDIR_P) $(LIB_DIR) - $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(LD) $(TORCHSCRIPT_LD) --shared + $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_detr.o $(BUILD_DIR)/opendr_open_pose.o $(BUILD_DIR)/opendr_x3d.o $(BUILD_DIR)/opendr_pst.o $(BUILD_DIR)/opendr_deep_sort.o $(BUILD_DIR)/opendr_nanodet_jit.o $(LD) $(TORCHSCRIPT_LD) --shared clean: @+echo "Cleaning C API binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/src/c_api/activity_recognition_x3d.cpp b/src/c_api/activity_recognition_x3d.cpp new file mode 100644 index 0000000000..fee11831cb --- /dev/null +++ b/src/c_api/activity_recognition_x3d.cpp @@ -0,0 +1,209 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "activity_recognition_x3d.h" +#include "target.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "opencv2/core/core_c.h" + +/** + * Helper function for preprocessing images before feeding them into the lightweight open pose estimator model. + * This function follows the OpenDR's lightweight open pose pre-processing pipeline, which includes the following: + * a) resizing the image into modelInputSize x modelInputSize pixels relative to the original ratio, + * b) normalizing the resulting values using meanValue and c) padding image into a standard size. + * @param image image to be preprocesses + * @param normalizedImage pre-processed data in a matrix + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue value used for centering the input image + * @param imageScale value used for scaling the input image + */ +void preprocess_x3d(cv::Mat *image, cv::Mat *normalizedImage, int modelInputSize, float meanValue, float imageScale) { + // Convert to RGB + cv::Mat imageRgb; + cv::cvtColor(*image, imageRgb, cv::COLOR_BGR2RGB); + + // Resize with ratio + double scale = (static_cast(modelInputSize) / static_cast(imageRgb.rows)); + cv::resize(imageRgb, imageRgb, cv::Size(), scale, scale); + + // Convert to 32f and normalize + imageRgb.convertTo(*normalizedImage, CV_32FC3, imageScale, meanValue); +} + +void load_x3d_model(const char *modelPath, char *mode, x3d_model_t *model) { + // Initialize model + model->onnx_session = model->env = model->session_options = NULL; + + Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); + model->env = env; + model->onnx_session = session; + model->session_options = sessionOptions; + + // Should we pass these parameters through the model json file? + model->mean_value = -128.0f / 255.0f; + model->img_scale = (1.0f / 255.0f); + + // std::string model_name = "l"; + std::string modeName = mode; + if (modeName == "l") { + model->model_size = 312; + model->frames_per_clip = 16; + } else if (modeName == "m") { + model->model_size = 224; + model->frames_per_clip = 16; + } else if (modeName == "s") { + model->model_size = 160; + model->frames_per_clip = 13; + } else { + model->model_size = 160; + model->frames_per_clip = 4; + } + + model->batch_size = 1; + model->in_channels = 3; + + model->features = 400; +} + +void free_x3d_model(x3d_model_t *model) { + if (model->onnx_session) { + Ort::Session *session = static_cast(model->onnx_session); + delete session; + } + + if (model->session_options) { + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; + } + + if (model->env) { + Ort::Env *env = static_cast(model->env); + delete env; + } +} + +void ff_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { + Ort::Session *session = static_cast(model->onnx_session); + + if (!session) { + std::cerr << "ONNX session not initialized." << std::endl; + return; + } + + // Prepare the input dimensions + // Dims of input data + size_t inputTensorSize = + model->batch_size * model->in_channels * model->frames_per_clip * model->model_size * model->model_size; + + // Dims of input of model + std::vector inputNodeDims = {model->batch_size, model->in_channels, model->frames_per_clip, model->model_size, + model->model_size}; + + // Setup input/output names + Ort::AllocatorWithDefaultOptions allocator; + std::vector inputNodeNames = {"video"}; + std::vector outputNodeNames = {"classes"}; + + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 5); + assert(inputTensor.IsTensor()); + + // Feed-forward the model + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1); + + // Get the results back + float *tensorData = outputTensors.front().GetTensorMutableData(); + + int tensorSizes[5] = {1, 1, 1, model->batch_size, model->features}; + + cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); + outputTensorValues->push_back(outputMat); +} + +void init_random_opendr_tensor_x3d(opendr_tensor_t *inputTensorValues, x3d_model_t *model) { + // Prepare the input dimensions + // Dims of input data + int inputTensorSize = model->batch_size * model->frames_per_clip * model->in_channels * model->model_size * model->model_size; + + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + // change data structure so channels are the last iterable dimension + for (unsigned int j = 0; j < inputTensorSize; ++j) { + data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; + } + + // Dims of input of model + load_tensor(inputTensorValues, static_cast(data), model->batch_size, model->frames_per_clip, model->in_channels, + model->model_size, model->model_size); + free(data); +} + +void forward_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { + // Get the feature vector for the current image + std::vector outputTensorValues; + ff_x3d(model, inputTensorValues, &outputTensorValues); + + int nTensors = static_cast(outputTensorValues.size()); + if (nTensors > 0) { + int batchSizes[nTensors]; + int frames[nTensors]; + int channels[nTensors]; + int widths[nTensors]; + int heights[nTensors]; + + std::vector tempTensorsVector; + opendr_tensor_t tempTensors[nTensors]; + + for (int i = 0; i < nTensors; i++) { + batchSizes[i] = 1; + frames[i] = 1; + channels[i] = 1; + widths[i] = model->batch_size; + heights[i] = model->features; + + load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], + heights[i]); + tempTensorsVector.push_back(tempTensors[i]); + } + load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); + for (int i = 0; i < nTensors; i++) { + free_tensor(&(tempTensors[i])); + } + + } else { + initialize_tensor_vector(tensorVector); + } +} diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 9ed33b2ba1..9226e8e394 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -36,40 +36,40 @@ /** * Helper function for preprocessing images before feeding them into the face recognition model. * This function follows the OpenDR's face recognition pre-processing pipeline, which includes the following: - * a) resizing the image into resize_target x resize_target pixels and then taking a center crop of size model_input_size, - * and b) normalizing the resulting values using mean_value and std_value + * a) resizing the image into resizeTarget x resizeTarget pixels and then taking a center crop of size modelInputSize, + * and b) normalizing the resulting values using meanValue and stdValue * @param image image to be preprocesses * @param data pre-processed data in a flattened vector - * @param resize_target target size for resizing - * @param model_input_size size of the center crop (equals the size that the DL model expects) - * @param mean_value value used for centering the input image - * @param std_value value used for scaling the input image + * @param resizeTarget target size for resizing + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue value used for centering the input image + * @param stdValue value used for scaling the input image */ -void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resize_target = 128, int model_input_size = 112, - float mean_value = 0.5, float std_value = 0.5) { +void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resizeTarget = 128, int modelInputSize = 112, + float meanValue = 0.5, float stdValue = 0.5) { // Convert to RGB - cv::Mat img; - cv::cvtColor(*image, img, cv::COLOR_BGR2RGB); + cv::Mat normalizedImage; + cv::cvtColor(*image, normalizedImage, cv::COLOR_BGR2RGB); // Resize and then get a center crop - cv::resize(img, img, cv::Size(resize_target, resize_target)); - int stride = (resize_target - model_input_size) / 2; - cv::Rect myROI(stride, stride, resize_target - stride, resize_target - stride); - img = img(myROI); + cv::resize(normalizedImage, normalizedImage, cv::Size(resizeTarget, resizeTarget)); + int stride = (resizeTarget - modelInputSize) / 2; + cv::Rect myRoi(stride, stride, resizeTarget - stride, resizeTarget - stride); + normalizedImage = normalizedImage(myRoi); // Scale to 0...1 - cv::Mat out_img; - img.convertTo(out_img, CV_32FC3, 1 / 255.0); + cv::Mat outputImage; + normalizedImage.convertTo(outputImage, CV_32FC3, 1 / 255.0); // Unfold the image into the appropriate format // This is certainly not the most efficient way to do this... // ... and is probably constantly leading to cache misses // ... but it works for now. - for (unsigned int j = 0; j < model_input_size; ++j) { - for (unsigned int k = 0; k < model_input_size; ++k) { - cv::Vec3f cur_pixel = out_img.at(j, k); - data[0 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[0] - mean_value) / std_value; - data[1 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[1] - mean_value) / std_value; - data[2 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[2] - mean_value) / std_value; + for (unsigned int j = 0; j < modelInputSize; ++j) { + for (unsigned int k = 0; k < modelInputSize; ++k) { + cv::Vec3f currentPixel = outputImage.at(j, k); + data[0 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[0] - meanValue) / stdValue; + data[1 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[1] - meanValue) / stdValue; + data[2 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[2] - meanValue) / stdValue; } } } @@ -79,16 +79,16 @@ void preprocess_face_recognition(cv::Mat *image, std::vector &data, int r * In the future this can be done at library level using a JSON-parser */ std::string json_get_key_string(std::string json, const std::string &key) { - std::size_t start_idx = json.find(key); - std::string value = json.substr(start_idx); + std::size_t startIdx = json.find(key); + std::string value = json.substr(startIdx); value = value.substr(value.find(":") + 1); - value.resize(value.find(",")); + value = value.substr(0, value.find(",")); value = value.substr(value.find("\"") + 1); - value.resize(value.find("\"")); + value = value.substr(0, value.find("\"")); return value; } -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model) { +void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model) { // Initialize model model->onnx_session = model->env = model->session_options = NULL; model->database = model->database_ids = NULL; @@ -96,55 +96,55 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ model->threshold = 1; // Parse the model JSON file - std::string model_json_path(model_path); - std::size_t split_pos = model_json_path.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - model_json_path = model_json_path + "/" + model_json_path.substr(split_pos) + ".json"; + std::string modelJsonPath(modelPath); + std::size_t splitPosition = modelJsonPath.find_last_of("/"); + splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; + modelJsonPath = modelJsonPath + "/" + modelJsonPath.substr(splitPosition) + ".json"; - std::ifstream in_stream(model_json_path); - if (!in_stream.is_open()) { + std::ifstream inStream(modelJsonPath); + if (!inStream.is_open()) { std::cerr << "Cannot open JSON model file" << std::endl; return; } std::string str; - in_stream.seekg(0, std::ios::end); - str.reserve(in_stream.tellg()); - in_stream.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); + inStream.seekg(0, std::ios::end); + str.reserve(inStream.tellg()); + inStream.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); - std::string basepath = model_json_path.substr(0, split_pos); - split_pos = basepath.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - basepath.resize(split_pos); + std::string basePath = modelJsonPath.substr(0, splitPosition); + splitPosition = basePath.find_last_of("/"); + splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; + basePath = basePath.substr(0, splitPosition); // Parse JSON - std::string onnx_model_path = basepath + json_get_key_string(str, "model_paths"); - std::string model_format = json_get_key_string(str, "format"); + std::string onnxModelPath = basePath + json_get_key_string(str, "model_paths"); + std::string modelFormat = json_get_key_string(str, "format"); // Parse inference params std::string threshold = json_get_key_string(str, "threshold"); - ; + if (!threshold.empty()) { model->threshold = std::stof(threshold); } // Proceed only if the model is in onnx format - if (model_format != "onnx") { + if (modelFormat != "onnx") { std::cerr << "Model not in ONNX format." << std::endl; return; } Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *session_options = new Ort::SessionOptions; - session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, onnx_model_path.c_str(), *session_options); + Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); model->env = env; model->onnx_session = session; - model->session_options = session_options; + model->session_options = sessionOptions; // Should we pass these parameters through the model json file? model->model_size = 112; @@ -161,8 +161,8 @@ void free_face_recognition_model(face_recognition_model_t *model) { } if (model->session_options) { - Ort::SessionOptions *session_options = static_cast(model->session_options); - delete session_options; + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; } if (model->env) { @@ -194,164 +194,165 @@ void ff_face_recognition(face_recognition_model_t *model, opendr_image_t *image, } // Prepare the input dimensions - std::vector input_node_dims = {1, 3, model->model_size, model->model_size}; - size_t input_tensor_size = model->model_size * model->model_size * 3; + std::vector inputNodeDims = {1, 3, model->model_size, model->model_size}; + size_t inputTensorSize = model->model_size * model->model_size * 3; // Get the input image and pre-process it - std::vector input_tensor_values(input_tensor_size); - cv::Mat *opencv_image = static_cast(image->data); - if (!opencv_image) { + std::vector inputTensorValues(inputTensorSize); + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; return; } - preprocess_face_recognition(opencv_image, input_tensor_values, model->resize_size, model->model_size, model->mean_value, + preprocess_face_recognition(opencvImage, inputTensorValues, model->resize_size, model->model_size, model->mean_value, model->std_value); // Setup input/output names Ort::AllocatorWithDefaultOptions allocator; - std::vector input_node_names = {"data"}; - std::vector output_node_names = {"features"}; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"features"}; - // Setup the input tensor - auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value input_tensor = - Ort::Value::CreateTensor(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4); - assert(input_tensor.IsTensor()); + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); // Feed-forward the model - auto output_tensors = - session->Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); - assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1 && outputTensors.front().IsTensor()); // Get the results back - float *floatarr = output_tensors.front().GetTensorMutableData(); - cv::Mat cur_features(cv::Size(model->output_size, 1), CV_32F, floatarr); + float *tensorData = outputTensors.front().GetTensorMutableData(); + cv::Mat currentFeatures(cv::Size(model->output_size, 1), CV_32F, tensorData); // Perform l2 normalizaton - cv::Mat features_square = cur_features.mul(cur_features); - float norm = sqrt(cv::sum(features_square)[0]); - cur_features = cur_features / norm; - memcpy(features->data, cur_features.data, sizeof(float) * model->output_size); + cv::Mat featuresSquare = currentFeatures.mul(currentFeatures); + float normalizationValue = sqrt(cv::sum(featuresSquare)[0]); + currentFeatures = currentFeatures / normalizationValue; + memcpy(features->data, currentFeatures.data, sizeof(float) * model->output_size); } -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model) { +void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model) { using namespace boost::filesystem; - std::vector person_names; - std::vector database_ids; + std::vector personNames; + std::vector databaseIds; cv::Mat database(cv::Size(model->output_size, 0), CV_32F); - path root_path(database_folder); - if (!exists(root_path)) { + path rootPath(databaseFolder); + if (!exists(rootPath)) { std::cerr << "Database path does not exist." << std::endl; return; } - int current_id = 0; - for (auto person_path = directory_iterator(root_path); person_path != directory_iterator(); person_path++) { + int currentId = 0; + for (auto personPath = directory_iterator(rootPath); personPath != directory_iterator(); personPath++) { // For each person in the database - if (is_directory(person_path->path())) { - path cur_person_path(person_path->path()); - person_names.push_back(person_path->path().filename().string()); + if (is_directory(personPath->path())) { + path currentPersonPath(personPath->path()); + personNames.push_back(personPath->path().filename().string()); - for (auto cur_img_path = directory_iterator(cur_person_path); cur_img_path != directory_iterator(); cur_img_path++) { + for (auto currentImagePath = directory_iterator(currentPersonPath); currentImagePath != directory_iterator(); + currentImagePath++) { opendr_image_t image; - load_image(cur_img_path->path().string().c_str(), &image); + load_image(currentImagePath->path().string().c_str(), &image); cv::Mat features(cv::Size(model->output_size, 1), CV_32F); ff_face_recognition(model, &image, &features); free_image(&image); database.push_back(features.clone()); - database_ids.push_back(current_id); + databaseIds.push_back(currentId); } - current_id++; + currentId++; } else { continue; } } - if (current_id == 0) { + if (currentId == 0) { std::cerr << "Cannot open database files." << std::endl; return; } // Make the array continuous - cv::Mat database_out = database.clone(); + cv::Mat databaseOutput = database.clone(); - std::ofstream fout(output_path, std::ios::out | std::ios::binary); + std::ofstream fout(outputPath, std::ios::out | std::ios::binary); if (!fout.is_open()) { std::cerr << "Cannot open database file for writting." << std::endl; return; } // Write number of persons - int n = person_names.size(); + int n = personNames.size(); fout.write(reinterpret_cast(&n), sizeof(int)); for (int i = 0; i < n; i++) { // Write the name of the person (along with its size) - int name_length = person_names[i].size() + 1; - fout.write(reinterpret_cast(&name_length), sizeof(int)); - fout.write(person_names[i].c_str(), name_length); + int nameLength = personNames[i].size() + 1; + fout.write(reinterpret_cast(&nameLength), sizeof(int)); + fout.write(personNames[i].c_str(), nameLength); } - cv::Size s = database_out.size(); + cv::Size s = databaseOutput.size(); fout.write(reinterpret_cast(&s.height), sizeof(int)); fout.write(reinterpret_cast(&s.width), sizeof(int)); - fout.write(reinterpret_cast(database_out.data), sizeof(float) * s.height * s.width); - fout.write(reinterpret_cast(&database_ids[0]), sizeof(int) * s.height); + fout.write(reinterpret_cast(databaseOutput.data), sizeof(float) * s.height * s.width); + fout.write(reinterpret_cast(&databaseIds[0]), sizeof(int) * s.height); fout.flush(); fout.close(); } -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model) { +void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model) { model->database = NULL; model->database_ids = NULL; - std::ifstream fin(database_path, std::ios::out | std::ios::binary); + std::ifstream fin(databasePath, std::ios::out | std::ios::binary); if (!fin.is_open()) { std::cerr << "Cannot load database file (check that file exists and you have created the database)." << std::endl; return; } - int n; - fin.read(reinterpret_cast(&n), sizeof(int)); - char **person_names = new char *[n]; + int nPerson; + fin.read(reinterpret_cast(&nPerson), sizeof(int)); + char **personNames = new char *[nPerson]; - for (int i = 0; i < n; i++) { - person_names[i] = new char[512]; + for (int i = 0; i < nPerson; i++) { + personNames[i] = new char[512]; // Read person name - int name_length; - fin.read(reinterpret_cast(&name_length), sizeof(int)); - if (name_length > 512) { + int nameLength; + fin.read(reinterpret_cast(&nameLength), sizeof(int)); + if (nameLength > 512) { std::cerr << "Person name exceeds max number of characters (512)" << std::endl; return; } - fin.read(person_names[i], name_length); + fin.read(personNames[i], nameLength); } int height, width; fin.read(reinterpret_cast(&height), sizeof(int)); fin.read(reinterpret_cast(&width), sizeof(int)); - float *database_buff = new float[height * width]; - int *features_ids = new int[height]; - fin.read(reinterpret_cast(database_buff), sizeof(float) * height * width); - fin.read(reinterpret_cast(features_ids), sizeof(int) * height); + float *databaseBuff = new float[height * width]; + int *featuresIds = new int[height]; + fin.read(reinterpret_cast(databaseBuff), sizeof(float) * height * width); + fin.read(reinterpret_cast(featuresIds), sizeof(int) * height); fin.close(); cv::Mat *database = new cv::Mat(cv::Size(width, height), CV_32F); - memcpy(database->data, database_buff, sizeof(float) * width * height); - delete[] database_buff; + memcpy(database->data, databaseBuff, sizeof(float) * width * height); + delete[] databaseBuff; model->database = database; - model->database_ids = features_ids; - model->person_names = person_names; - model->n_persons = n; + model->database_ids = featuresIds; + model->person_names = personNames; + model->n_persons = nPerson; model->n_features = height; } @@ -370,27 +371,27 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, } cv::Mat *database = static_cast(model->database); // Calculate the distance between the extracted feature vector and database features - cv::Mat features_repeated; - cv::repeat(features, model->n_features, 1, features_repeated); - cv::Mat diff = features_repeated - *database; - diff = diff.mul(diff); - cv::Mat sq_dists; - cv::reduce(diff, sq_dists, 1, CV_REDUCE_SUM, CV_32F); - cv::Mat dists; - cv::sqrt(sq_dists, dists); - - double min_dist, max_dist; - cv::Point min_loc, max_loc; - cv::minMaxLoc(dists, &min_dist, &max_dist, &min_loc, &max_loc); - - target.data = model->database_ids[min_loc.y]; - target.confidence = 1 - (min_dist / model->threshold); + cv::Mat featuresRepeated; + cv::repeat(features, model->n_features, 1, featuresRepeated); + cv::Mat differences = featuresRepeated - *database; + differences = differences.mul(differences); + cv::Mat squareRootDistances; + cv::reduce(differences, squareRootDistances, 1, CV_REDUCE_SUM, CV_32F); + cv::Mat distances; + cv::sqrt(squareRootDistances, distances); + + double minDistance, maxDistance; + cv::Point minLoc, maxLoc; + cv::minMaxLoc(distances, &minDistance, &maxDistance, &minLoc, &maxLoc); + + target.data = model->database_ids[minLoc.y]; + target.confidence = 1 - (minDistance / model->threshold); return target; } -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name) { +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName) { if (category.data >= model->n_persons) return; - strcpy(person_name, model->person_names[category.data]); + strcpy(personName, model->person_names[category.data]); } diff --git a/src/c_api/lightweight_open_pose.cpp b/src/c_api/lightweight_open_pose.cpp new file mode 100644 index 0000000000..343efa1f10 --- /dev/null +++ b/src/c_api/lightweight_open_pose.cpp @@ -0,0 +1,264 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lightweight_open_pose.h" +#include "target.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "opencv2/core/core_c.h" + +/** + * Helper function for preprocessing images before feeding them into the lightweight open pose estimator model. + * This function follows the OpenDR's lightweight open pose pre-processing pipeline, which includes the following: + * a) resizing the image into modelInputSize x modelInputSize pixels relative to the original ratio, + * b) normalizing the resulting values using meanValue and c) padding image into a standard size. + * @param image image to be preprocesses + * @param preprocessedImage opencv Mat that pre-processed data will be saved + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue value used for centering the input image + * @param imgScale value used for scaling the input image + */ +void preprocess_open_pose(cv::Mat *image, cv::Mat *preprocessedImage, int modelInputSize, float meanValue, float imgScale) { + // Convert to RGB + cv::Mat resizedImage; + cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); + cv::cvtColor(resizedImage, resizedImage, cv::COLOR_RGB2BGR); + + // Resize and then get a center crop + double scale = (static_cast(modelInputSize) / static_cast(resizedImage.rows)); + cv::resize(resizedImage, resizedImage, cv::Size(), scale, scale); + + // Convert to float32 and normalize + cv::Mat normalizedImage; + resizedImage.convertTo(normalizedImage, CV_32FC3, imgScale, meanValue); + + // Padding + int h = normalizedImage.rows; + int w = normalizedImage.cols; + + const float stride = 8.0f; + int maxWidth = std::max(modelInputSize, w); + cv::Size minDims = cv::Size(maxWidth, modelInputSize); + + h = std::min(h, minDims.height); + minDims.height = ceil((minDims.height / stride)) * stride; + + minDims.width = std::max(minDims.width, w); + minDims.width = ceil((minDims.width / stride)) * stride; + + int pad[4]; + pad[0] = static_cast((minDims.height - h) / 2); + pad[1] = static_cast((minDims.width - w) / 2); + pad[2] = minDims.height - h - pad[0]; + pad[3] = minDims.width - w - pad[1]; + + cv::Scalar padValue(0, 0, 0); + cv::copyMakeBorder(normalizedImage, *preprocessedImage, pad[0], pad[2], pad[1], pad[3], cv::BORDER_CONSTANT, padValue); +} + +void load_open_pose_model(const char *modelPath, open_pose_model_t *model) { + // Initialize model + model->onnx_session = model->env = model->session_options = NULL; + + Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); + model->env = env; + model->onnx_session = session; + model->session_options = sessionOptions; + + // Should we pass these parameters through the model json file? + model->mean_value = -128.0f / 256.0f; + model->img_scale = (1.0f / 256.0f); + model->model_size = 256; + + model->num_refinement_stages = 2; + model->output_size = (model->num_refinement_stages + 1) * 2; + + model->even_channel_output = 38; + model->odd_channel_output = 19; + model->stride = 0; + model->batch_size = 1; + if (model->stride == 0) { + model->width_output = 32; + model->height_output = 49; + } else { + model->width_output = 16; + model->height_output = 35; + } +} + +void free_open_pose_model(open_pose_model_t *model) { + if (model->onnx_session) { + Ort::Session *session = static_cast(model->onnx_session); + delete session; + } + + if (model->session_options) { + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; + } + + if (model->env) { + Ort::Env *env = static_cast(model->env); + delete env; + } +} + +void ff_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { + Ort::Session *session = static_cast(model->onnx_session); + + if (!session) { + std::cerr << "ONNX session not initialized." << std::endl; + return; + } + + // Prepare the input dimensions + // Dims of input data + size_t inputTensorSize = model->model_size * model->model_size * 3; + + // Dims of input of model + std::vector inputNodeDims = {inputTensorValues->batch_size, inputTensorValues->channels, inputTensorValues->width, + inputTensorValues->height}; + + // Setup input/output names + Ort::AllocatorWithDefaultOptions allocator; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"stage_0_output_1_heatmaps", "stage_0_output_0_pafs"}; + if (model->num_refinement_stages == 2) { + outputNodeNames.push_back("stage_1_output_1_heatmaps"); + outputNodeNames.push_back("stage_1_output_0_pafs"); + outputNodeNames.push_back("stage_2_output_1_heatmaps"); + outputNodeNames.push_back("stage_2_output_0_pafs"); + } + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); + + // Feed-forward the model + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), model->output_size); + assert(outputTensors.size() == model->output_size); + + // Get the results back + for (int i = 0; i < outputTensors.size(); i++) { + float *tensor_data = outputTensors[i].GetTensorMutableData(); + + int channelDim; + if ((i % 2) == 0) { + channelDim = model->even_channel_output; + } else { + channelDim = model->odd_channel_output; + } + + int tensorSizes[5] = {1, model->batch_size, channelDim, model->width_output, model->height_output}; + + cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensor_data)); + outputTensorValues->push_back(outputMat); + } +} + +void init_random_opendr_tensor_op(opendr_tensor_t *inputTensorValues, open_pose_model_t *model) { + int inputTensorSize = model->model_size * model->model_size * 3; + + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + + for (unsigned int j = 0; j < inputTensorSize; ++j) { + data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; + } + + load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); + free(data); +} + +void init_opendr_tensor_from_img_op(opendr_image_t *image, opendr_tensor_t *inputTensorValues, open_pose_model_t *model) { + int inputTensorSize = model->model_size * model->model_size * 3; + + cv::Mat *opencvImage = (static_cast(image->data)); + cv::Mat normImage; + preprocess_open_pose(opencvImage, &normImage, model->model_size, model->mean_value, model->img_scale); + + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + for (unsigned int j = 0; j < model->model_size; ++j) { + for (unsigned int k = 0; k < model->model_size; ++k) { + cv::Vec3f currentPixel = normImage.at(j, k); + data[0 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[0]; + data[1 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[1]; + data[2 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[2]; + } + } + + load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); + free(data); +} + +void forward_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { + // Get the feature vector for the current image + std::vector outputTensorValues; + ff_open_pose(model, inputTensorValues, &outputTensorValues); + + int nTensors = static_cast(outputTensorValues.size()); + if (nTensors > 0) { + int batchSizes[nTensors]; + int frames[nTensors]; + int channels[nTensors]; + int widths[nTensors]; + int heights[nTensors]; + + std::vector tempTensorsVector; + opendr_tensor_t tempTensors[nTensors]; + + for (int i = 0; i < nTensors; i++) { + batchSizes[i] = 1; + frames[i] = 1; + if ((i % 2) == 0) { + channels[i] = model->even_channel_output; + } else { + channels[i] = model->odd_channel_output; + } + widths[i] = model->width_output; + heights[i] = model->height_output; + + load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], + heights[i]); + tempTensorsVector.push_back(tempTensors[i]); + } + load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); + for (int i = 0; i < nTensors; i++) { + free_tensor(&(tempTensors[i])); + } + } else { + initialize_tensor_vector(tensorVector); + } +} diff --git a/src/c_api/object_detection_2d_detr.cpp b/src/c_api/object_detection_2d_detr.cpp new file mode 100644 index 0000000000..8652fa47d9 --- /dev/null +++ b/src/c_api/object_detection_2d_detr.cpp @@ -0,0 +1,269 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "object_detection_2d_detr.h" +#include "target.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "opencv2/core/core_c.h" + +/** + * Helper function for preprocessing images before feeding them into the detr object detection model. + * This function follows the OpenDR's object detection detr pre-processing pipeline, which includes the following: + * a) resizing the image into modelInputSize x modelInputSize pixels and b) normalizing the resulting values using + * meanValue and stdValue + * @param image image to be preprocesses + * @param data pre-processed data in a flattened vector + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue values used for centering the input image + * @param stdValues values used for scaling the input image + */ +void preprocess_detr(cv::Mat *image, cv::Mat *normalizedImage, int modelInputSize, float meanValues[3], float stdValues[3]) { + // Convert to RGB + cv::Mat resizedImage; + cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); + + // Resize and then get a center crop + cv::resize(resizedImage, resizedImage, cv::Size(modelInputSize, modelInputSize)); + + // Scale to 0...1 + resizedImage.convertTo(*normalizedImage, CV_32FC3, (1 / 255.0)); + + cv::Scalar meanValue(meanValues[0], meanValues[1], meanValues[2]); + cv::Scalar stdValue(stdValues[0], stdValues[1], stdValues[2]); + + cv::add(*normalizedImage, meanValue, *normalizedImage); + cv::multiply(*normalizedImage, stdValue, *normalizedImage); +} + +/** + * Very simple helper function to parse OpenDR model files for object detection detr + * In the future this can be done at library level using a JSON-parser + */ +std::string json_get_key_string_detr(std::string json, const std::string &key) { + std::size_t startIdx = json.find(key); + std::string value = json.substr(startIdx); + value = value.substr(value.find(":") + 1); + value = value.substr(0, value.find(",")); + value = value.substr(value.find("\"") + 1); + value = value.substr(0, value.find("\"")); + return value; +} + +/** + * Very simple helper function to parse dictionaries OpenDR model files for object detection detr + * In the future this can be done at library level using a JSON-parser + */ +std::string json_get_key_string_detr_in_dict(std::string json, const std::string &key) { + std::size_t startIdx = json.find(key); + std::string value = json.substr(startIdx); + value = value.substr(value.find(":") + 1); + value = value.substr(0, value.find(",")); + value = value.substr(value.find("\"") + 1); + value = value.substr(0, value.find("}")); + return value; +} + +void load_detr_model(const char *modelPath, detr_model_t *model) { + // Initialize model + model->onnx_session = model->env = model->session_options = NULL; + model->threshold = 0; + + // Parse the model JSON file + std::string basePath(modelPath); + std::size_t splitPosition = basePath.find_last_of("/"); + splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; + std::string modelJsonPath = basePath + "/" + basePath.substr(splitPosition) + ".json"; + std::ifstream inStream(modelJsonPath); + if (!inStream.is_open()) { + std::cerr << "Cannot open JSON model file" << std::endl; + return; + } + + std::string str; + inStream.seekg(0, std::ios::end); + str.reserve(inStream.tellg()); + inStream.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); + + // Parse JSON + std::string onnxModelPath = basePath + "/" + json_get_key_string_detr(str, "model_paths"); + + std::string modelFormat = json_get_key_string_detr(str, "format"); + + // Parse inference params + std::string threshold = json_get_key_string_detr_in_dict(str, "threshold"); + + if (!threshold.empty()) { + model->threshold = std::stof(threshold); + } + + // Proceed only if the model is in onnx format + if (modelFormat != "onnx") { + std::cerr << "Model not in ONNX format." << std::endl; + return; + } + + Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); + model->env = env; + model->onnx_session = session; + model->session_options = sessionOptions; + + // Should we pass these parameters through the model json file? + model->mean_value[0] = -0.485f; + model->mean_value[1] = -0.456f; + model->mean_value[2] = -0.406f; + + model->std_value[0] = 0.229f; + model->std_value[1] = 0.224f; + model->std_value[2] = 0.225f; + + model->model_size = 800; + + model->features = 100; + model->output_sizes[0] = 92; + model->output_sizes[1] = 4; +} + +void free_detr_model(detr_model_t *model) { + if (model->onnx_session) { + Ort::Session *session = static_cast(model->onnx_session); + delete session; + } + + if (model->session_options) { + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; + } + + if (model->env) { + Ort::Env *env = static_cast(model->env); + delete env; + } +} + +void ff_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { + Ort::Session *session = static_cast(model->onnx_session); + + if (!session) { + std::cerr << "ONNX session not initialized." << std::endl; + return; + } + + // Prepare the input dimensions + // Dims of input data for preprocessing + size_t inputTensorSize = model->model_size * model->model_size * 3; + + // Dims of input of model + std::vector inputNodeDims = {inputTensorValues->batch_size, inputTensorValues->channels, inputTensorValues->width, + inputTensorValues->height}; + + // Setup input/output names + Ort::AllocatorWithDefaultOptions allocator; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"pred_logits", "pred_boxes"}; + + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); + + // Feed-forward the model + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 2); + assert(outputTensors.size() == 2); + + // Get the results back + for (int i = 0; i < outputTensors.size(); i++) { + float *tensorData = outputTensors[i].GetTensorMutableData(); + + int tensorSizes[5] = {1, 1, 1, model->features, model->output_sizes[i]}; + + cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); + outputTensorValues->push_back(outputMat); + } +} + +void init_random_opendr_tensor_detr(opendr_tensor_t *inputTensorValues, detr_model_t *model) { + // Prepare the input data with random values + int inputTensorSize = model->model_size * model->model_size * 3; + + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + // change data structure so channels are the last iterable dimension + for (unsigned int j = 0; j < inputTensorSize; ++j) { + data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; + } + + load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); + free(data); +} + +void forward_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { + // Get the feature vector for the current image + std::vector outputTensorValues; + ff_detr(model, inputTensorValues, &outputTensorValues); + + int nTensors = static_cast(outputTensorValues.size()); + if (nTensors > 0) { + int batchSizes[nTensors]; + int frames[nTensors]; + int channels[nTensors]; + int widths[nTensors]; + int heights[nTensors]; + + std::vector tempTensorsVector; + opendr_tensor_t tempTensors[nTensors]; + + for (int i = 0; i < nTensors; i++) { + batchSizes[i] = 1; + frames[i] = 1; + channels[i] = 1; + widths[i] = 1; + if (i == 0) { + heights[i] = model->output_sizes[0]; + } else { + heights[i] = model->output_sizes[1]; + } + load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], + heights[i]); + tempTensorsVector.push_back(tempTensors[i]); + } + load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); + for (int i = 0; i < nTensors; i++) { + free_tensor(&(tempTensors[i])); + } + + } else { + initialize_tensor_vector(tensorVector); + } +} diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp similarity index 53% rename from src/c_api/nanodet_libtorch.cpp rename to src/c_api/object_detection_2d_nanodet_jit.cpp index 51f2cbefad..cfaf6293ca 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -12,44 +12,45 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "object_detection_2d_nanodet_jit.h" + #include #include #include #include #include -#include "nanodet_c.h" /** * Helper class holder of c++ values and jit model. */ class NanoDet { private: - torch::DeviceType device; - torch::jit::script::Module network; - torch::Tensor meanTensor; - torch::Tensor stdTensor; - std::vector labels; + torch::DeviceType mDevice; + torch::jit::script::Module mNetwork; + torch::Tensor mMeanTensor; + torch::Tensor mStdTensor; + std::vector mLabels; public: - NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, std::vector labels); ~NanoDet(); - torch::Tensor mPreProcess(cv::Mat *image); - torch::jit::script::Module net() const; - torch::Tensor meanValues() const; - torch::Tensor stdValues() const; - std::vector classes() const; + torch::Tensor preProcess(cv::Mat *image); + torch::jit::script::Module network() const; + torch::Tensor meanTensor() const; + torch::Tensor stdTensor() const; + std::vector labels() const; std::vector outputs; }; -NanoDet::NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, - const std::vector labels) { - this->device = device; - this->network = net; - this->meanTensor = meanValues.clone().to(device); - this->stdTensor = stdValues.clone().to(device); - this->labels = labels; +NanoDet::NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, + torch::DeviceType device, const std::vector labels) { + this->mDevice = device; + this->mNetwork = network; + this->mMeanTensor = meanValues.clone().to(device); + this->mStdTensor = stdValues.clone().to(device); + this->mLabels = labels; } NanoDet::~NanoDet() { @@ -61,13 +62,13 @@ NanoDet::~NanoDet() { * Mean and Standard deviation are already part of NanoDet class when is initialized. * @param image, image to be preprocesses */ -torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { +torch::Tensor NanoDet::preProcess(cv::Mat *image) { torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); tensorImage = tensorImage.toType(torch::kFloat); - tensorImage = tensorImage.to(this->device); + tensorImage = tensorImage.to(this->mDevice); tensorImage = tensorImage.permute({2, 0, 1}); - tensorImage = tensorImage.add(this->meanTensor); - tensorImage = tensorImage.mul(this->stdTensor); + tensorImage = tensorImage.add(this->mMeanTensor); + tensorImage = tensorImage.mul(this->mStdTensor); return tensorImage; } @@ -75,39 +76,39 @@ torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { /** * Getter for jit model */ -torch::jit::script::Module NanoDet::net() const { - return this->network; +torch::jit::script::Module NanoDet::network() const { + return this->mNetwork; } /** * Getter for tensor with the mean values */ -torch::Tensor NanoDet::meanValues() const { - return this->meanTensor; +torch::Tensor NanoDet::meanTensor() const { + return this->mMeanTensor; } /** * Getter for tensor with the standard deviation values */ -torch::Tensor NanoDet::stdValues() const { - return this->stdTensor; +torch::Tensor NanoDet::stdTensor() const { + return this->mStdTensor; } /** - * Getter of labels for printing + * Getter of labels */ -std::vector NanoDet::classes() const { - return labels; +std::vector NanoDet::labels() const { + return this->mLabels; } /** * Helper function to calculate the final shape of the model input relative to size ratio of input image. */ -void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { +void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { float ratio; - float src_ratio = ((float)srcSize->width / (float)srcSize->height); - float dst_ratio = ((float)dstSize->width / (float)dstSize->height); - if (src_ratio < dst_ratio) + float srcRatio = ((float)srcSize->width / (float)srcSize->height); + float dstRatio = ((float)dstSize->width / (float)dstSize->height); + if (srcRatio < dstRatio) ratio = ((float)dstSize->height / (float)srcSize->height); else ratio = ((float)dstSize->width / (float)srcSize->width); @@ -124,32 +125,32 @@ void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible /** * Helper function to calculate the warp matrix for resizing. */ -void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, int keep_ratio) { - if (keep_ratio == 1) { +void get_resize_matrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { + if (keepRatio == 1) { float ratio; cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); - C.at(0, 2) = -src_shape->width / 2.0; - C.at(1, 2) = -src_shape->height / 2.0; - float src_ratio = ((float)src_shape->width / (float)src_shape->height); - float dst_ratio = ((float)dst_shape->width / (float)dst_shape->height); - if (src_ratio < dst_ratio) { - ratio = ((float)dst_shape->height / (float)src_shape->height); + C.at(0, 2) = -srcShape->width / 2.0; + C.at(1, 2) = -srcShape->height / 2.0; + float srcRatio = ((float)srcShape->width / (float)srcShape->height); + float dstRatio = ((float)dstShape->width / (float)dstShape->height); + if (srcRatio < dstRatio) { + ratio = ((float)dstShape->height / (float)srcShape->height); } else { - ratio = ((float)dst_shape->width / (float)src_shape->width); + ratio = ((float)dstShape->width / (float)srcShape->width); } Rs->at(0, 0) *= ratio; Rs->at(1, 1) *= ratio; cv::Mat T = cv::Mat::eye(3, 3, CV_32FC1); - T.at(0, 2) = 0.5 * dst_shape->width; - T.at(1, 2) = 0.5 * dst_shape->height; + T.at(0, 2) = 0.5 * dstShape->width; + T.at(1, 2) = 0.5 * dstShape->height; *Rs = T * (*Rs) * C; } else { - Rs->at(0, 0) *= (float)dst_shape->width / (float)src_shape->width; - Rs->at(1, 1) *= (float)dst_shape->height / (float)src_shape->height; + Rs->at(0, 0) *= (float)dstShape->width / (float)srcShape->width; + Rs->at(1, 1) *= (float)dstShape->height / (float)srcShape->height; } } @@ -161,28 +162,28 @@ void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, in * @param src, image to be preprocesses * @param dst, output image to be used as model input * @param dstSize, final size of the dst - * @param Rs, matrix to be used for warp perspective - * @param keep_ratio, flag for targeting the resized image size relative to input image ratio + * @param warpMatrix, matrix to be used for warp perspective + * @param keepRatio, flag for targeting the resized image size relative to input image ratio */ -void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warp_matrix, int keep_ratio) { +void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatrix, int keepRatio) { cv::Size srcSize = cv::Size(src->cols, src->rows); const float divisible = 0.0; // Get new destination size if keep ratio is wanted - if (keep_ratio == 1) { - get_minimum_dst_shape(&srcSize, dstSize, divisible); + if (keepRatio == 1) { + get_minimum_dstShape(&srcSize, dstSize, divisible); } - get_resize_matrix(&srcSize, dstSize, warp_matrix, keep_ratio); - cv::warpPerspective(*src, *dst, *warp_matrix, *dstSize); + get_resize_matrix(&srcSize, dstSize, warpMatrix, keepRatio); + cv::warpPerspective(*src, *dst, *warpMatrix, *dstSize); } /** * Helper function to determine the device of jit model and tensors. */ -torch::DeviceType torchDevice(char *device_name, int verbose = 0) { +torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { torch::DeviceType device; - if (std::string(device_name) == "cuda") { + if (std::string(deviceName) == "cuda") { if (verbose == 1) printf("to cuda\n"); device = torch::kCUDA; @@ -194,12 +195,12 @@ torch::DeviceType torchDevice(char *device_name, int verbose = 0) { return device; } -void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { +void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { // Initialize model - model->inputSize[0] = width; - model->inputSize[1] = height; + model->input_size[0] = width; + model->input_size[1] = height; - model->scoreThreshold = scoreThreshold; + model->score_threshold = scoreThreshold; model->keep_ratio = 1; const std::vector labels{ @@ -221,71 +222,78 @@ void load_nanodet_model(char *model_path, char *device, int height, int width, f torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); // initialization of jit model and class as holder of c++ values. - torch::DeviceType torch_device = torchDevice(device, 1); - torch::jit::script::Module net = torch::jit::load(model_path, torch_device); - net.eval(); + torch::DeviceType initDevice = torchDevice(device, 1); + torch::jit::script::Module network = torch::jit::load(modelPath, initDevice); + network.eval(); + + NanoDet *detector = new NanoDet(network, meanTensor, stdValues, initDevice, labels); + + model->network = static_cast(detector); +} - NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); +void ff_nanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, + torch::Tensor *outputs) { + // Make all the inputs as tensors to use in jit model + torch::Tensor srcHeight = torch::tensor(originalSize->width); + torch::Tensor srcWidth = torch::tensor(originalSize->height); + torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3}); - model->net = static_cast(detector); + // Model inference + *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensor(); + *outputs = outputs->to(torch::Device(torch::kCPU, 0)); } -opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { - NanoDet *networkPTR = static_cast(model->net); - opendr_detection_target_list_t detections; +opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image) { + NanoDet *networkPTR = static_cast(model->network); + opendr_detection_vector_target_t detectionsVector; - std::vector dets; - cv::Mat *opencv_image = static_cast(image->data); - if (!opencv_image) { + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; // Initialize an empty detection to return. - initialize_detections(&detections); - return detections; + initialize_detections_vector(&detectionsVector); + return detectionsVector; } // Preprocess image and keep values as input in jit model cv::Mat resizedImg; - cv::Size dstSize = cv::Size(model->inputSize[0], model->inputSize[1]); - cv::Mat warp_matrix = cv::Mat::eye(3, 3, CV_32FC1); - preprocess(opencv_image, &resizedImg, &dstSize, &warp_matrix, model->keep_ratio); - torch::Tensor input = networkPTR->mPreProcess(&resizedImg); + cv::Size dstSize = cv::Size(model->input_size[0], model->input_size[1]); + cv::Mat warpMatrix = cv::Mat::eye(3, 3, CV_32FC1); + preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keep_ratio); + torch::Tensor input = networkPTR->preProcess(&resizedImg); + cv::Size originalSize(opencvImage->cols, opencvImage->rows); - // Make all the inputs as tensors to use in jit model - torch::Tensor srcHeight = torch::tensor(opencv_image->rows); - torch::Tensor srcWidth = torch::tensor(opencv_image->cols); - torch::Tensor warpMatrix = torch::from_blob(warp_matrix.data, {3, 3}); - - // Model inference - torch::Tensor outputs = (networkPTR->net()).forward({input, srcHeight, srcWidth, warpMatrix}).toTensor(); - outputs = outputs.to(torch::Device(torch::kCPU, 0)); + torch::Tensor outputs; + ff_nanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); + std::vector detections; // Postprocessing, find which outputs have better score than threshold and keep them. for (int label = 0; label < outputs.size(0); label++) { for (int box = 0; box < outputs.size(1); box++) { - if (outputs[label][box][4].item() > model->scoreThreshold) { - opendr_detection_target_t det; - det.name = label; - det.left = outputs[label][box][0].item(); - det.top = outputs[label][box][1].item(); - det.width = outputs[label][box][2].item() - outputs[label][box][0].item(); - det.height = outputs[label][box][3].item() - outputs[label][box][1].item(); - det.score = outputs[label][box][4].item(); - dets.push_back(det); + if (outputs[label][box][4].item() > model->score_threshold) { + opendr_detection_target_t detection; + detection.name = label; + detection.left = outputs[label][box][0].item(); + detection.top = outputs[label][box][1].item(); + detection.width = outputs[label][box][2].item() - outputs[label][box][0].item(); + detection.height = outputs[label][box][3].item() - outputs[label][box][1].item(); + detection.score = outputs[label][box][4].item(); + detections.push_back(detection); } } } // Put vector detection as C pointer and size - if ((int)dets.size() > 0) - load_detections(&detections, dets.data(), (int)dets.size()); + if (static_cast(detections.size()) > 0) + load_detections_vector(&detectionsVector, detections.data(), static_cast(detections.size())); else - initialize_detections(&detections); + initialize_detections_vector(&detectionsVector); - return detections; + return detectionsVector; } -void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections) { +void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { const int colorList[80][3] = { //{255 ,255 ,255}, //bg {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, @@ -302,22 +310,23 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, }; - std::vector classNames = (static_cast(model->net))->classes(); + std::vector classNames = (static_cast(model->network))->labels(); - cv::Mat *opencv_image = static_cast(opendr_image->data); - if (!opencv_image) { + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; return; } - cv::Mat image = (*opencv_image).clone(); - for (size_t i = 0; i < detections->size; i++) { - const opendr_detection_target bbox = (detections->starting_pointer)[i]; + cv::Mat imageWithDetections = (*opencvImage).clone(); + for (size_t i = 0; i < detectionsVector->size; i++) { + const opendr_detection_target bbox = (detectionsVector->starting_pointer)[i]; float score = bbox.score > 1 ? 1 : bbox.score; - if (score > model->scoreThreshold) { + if (score > model->score_threshold) { cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); - cv::rectangle( - image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); + cv::rectangle(imageWithDetections, + cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), + color); char text[256]; @@ -330,21 +339,23 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det int y = (int)bbox.top; if (y < 0) y = 0; - if (x + labelSize.width > image.cols) - x = image.cols - labelSize.width; + if (x + labelSize.width > imageWithDetections.cols) + x = imageWithDetections.cols - labelSize.width; - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); - cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + cv::rectangle(imageWithDetections, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), + color, -1); + cv::putText(imageWithDetections, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, + cv::Scalar(255, 255, 255)); } } - cv::imshow("image", image); + cv::imshow("image", imageWithDetections); cv::waitKey(0); } void free_nanodet_model(nanodet_model_t *model) { - if (model->net) { - NanoDet *networkPTR = static_cast(model->net); + if (model->network) { + NanoDet *networkPTR = static_cast(model->network); delete networkPTR; } } diff --git a/src/c_api/object_tracking_2d_deep_sort.cpp b/src/c_api/object_tracking_2d_deep_sort.cpp new file mode 100644 index 0000000000..2783a47c01 --- /dev/null +++ b/src/c_api/object_tracking_2d_deep_sort.cpp @@ -0,0 +1,203 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "object_tracking_2d_deep_sort.h" +#include "target.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "opencv2/core/core_c.h" + +/** + * Helper function for preprocessing images before feeding them into the deep sort object tracking 2d model. + * This function follows the OpenDR's object tracking 2d deep sort pre-processing pipeline, which includes the following: + * a) resizing the image into modelInputSizes[1] x modelInputSizes[0] pixels and b) normalizing the resulting values using + * meanValues and stdValues + * @param image image to be preprocesses + * @param normalizedImg pre-processed data in a flattened vector + * @param modelInputSizes size of the center crop (equals the size that the DL model expects) + * @param meanValues value used for centering the input image + * @param stdValues value used for scaling the input image + */ +void preprocess_deep_sort(cv::Mat *image, cv::Mat *normalizedImg, int modelInputSizes[2], float meanValues[3], + float stdValues[3]) { + // Convert to RGB + cv::Mat resizedImage; + cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); + + // Resize + cv::resize(resizedImage, resizedImage, cv::Size(modelInputSizes[1], modelInputSizes[0])); + + // Unfold the image into the appropriate format + // Scale to 0...1 + resizedImage.convertTo(*normalizedImg, CV_32FC3, (1 / 255.0)); + + // Normalize + cv::Scalar meanValue(meanValues[0], meanValues[1], meanValues[2]); + cv::Scalar stdValue(stdValues[0], stdValues[1], stdValues[2]); + + cv::add(*normalizedImg, meanValue, *normalizedImg); + cv::multiply(*normalizedImg, stdValue, *normalizedImg); +} + +void load_deep_sort_model(const char *modelPath, deep_sort_model_t *model) { + // Initialize model + model->onnx_session = model->env = model->session_options = NULL; + + Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); + model->env = env; + model->onnx_session = session; + model->session_options = sessionOptions; + + // Should we pass these parameters through the model json file? + model->mean_value[0] = -0.485f; + model->mean_value[1] = -0.456f; + model->mean_value[2] = -0.406f; + + model->std_value[0] = (1.0f / 0.229f); + model->std_value[1] = (1.0f / 0.224f); + model->std_value[2] = (1.0f / 0.225f); + + model->model_size[0] = 64; + model->model_size[1] = 128; + + model->batch_size = 1; + model->in_channels = 3; + + model->features = 512; +} + +void free_deep_sort_model(deep_sort_model_t *model) { + if (model->onnx_session) { + Ort::Session *session = static_cast(model->onnx_session); + delete session; + } + + if (model->session_options) { + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; + } + + if (model->env) { + Ort::Env *env = static_cast(model->env); + delete env; + } +} + +void ff_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { + Ort::Session *session = static_cast(model->onnx_session); + + if (!session) { + std::cerr << "ONNX session not initialized." << std::endl; + return; + } + + // Prepare the input dimensions + // Dims of input data + size_t inputTensorSize = model->batch_size * model->in_channels * model->model_size[1] * model->model_size[0]; + + // Dims of input of model + std::vector inputNodeDims = {model->batch_size, model->in_channels, model->model_size[1], model->model_size[0]}; + + // Setup input/output names + Ort::AllocatorWithDefaultOptions allocator; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"output"}; + + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); + + // Feed-forward the model + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1); + + // Get the results back + for (int i = 0; i < outputTensors.size(); i++) { + float *tensorData = outputTensors[i].GetTensorMutableData(); + + int tensorSizes[5] = {1, 1, 1, model->batch_size, model->features}; + cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); + outputTensorValues->push_back(outputMat); + } +} + +void init_random_opendr_tensor_ds(opendr_tensor_t *inputTensorValues, deep_sort_model_t *model) { + int inputTensorSize = 1 * model->batch_size * model->in_channels * model->model_size[1] * model->model_size[0]; + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + for (unsigned int j = 0; j < inputTensorSize; ++j) { + data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; + } + + // Dims of input of model + load_tensor(inputTensorValues, static_cast(data), 1, model->batch_size, model->in_channels, model->model_size[1], + model->model_size[0]); + free(data); +} + +void forward_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { + // Get the feature vector for the current image + std::vector outputTensorValues; + ff_deep_sort(model, inputTensorValues, &outputTensorValues); + + int nTensors = static_cast(outputTensorValues.size()); + if (nTensors > 0) { + int batch_sizes[nTensors]; + int frames[nTensors]; + int channels[nTensors]; + int widths[nTensors]; + int heights[nTensors]; + + std::vector temp_tensors; + opendr_tensor_t temp_tensor[nTensors]; + + for (int i = 0; i < nTensors; i++) { + batch_sizes[i] = 1; + frames[i] = 1; + channels[i] = 1; + widths[i] = model->batch_size; + heights[i] = model->features; + + load_tensor(&(temp_tensor[i]), outputTensorValues[i].ptr(0), batch_sizes[i], frames[i], channels[i], widths[i], + heights[i]); + temp_tensors.push_back(temp_tensor[i]); + } + load_tensor_vector(tensorVector, temp_tensors.data(), nTensors); + for (int i = 0; i < nTensors; i++) { + free_tensor(&(temp_tensor[i])); + } + } else { + initialize_tensor_vector(tensorVector); + } +} diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 8eddbed500..fde313aeb9 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -36,28 +36,130 @@ void free_image(opendr_image_t *image) { } } -void initialize_detections(opendr_detection_target_list_t *detections) { - std::vector dets; - opendr_detection_target_t det; - det.name = -1; - det.left = 0.0; - det.top = 0.0; - det.width = 0.0; - det.height = 0.0; - det.score = 0.0; - dets.push_back(det); - - load_detections(detections, dets.data(), (int)dets.size()); -} - -void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { - detections->size = vectorSize; - int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); - detections->starting_pointer = static_cast(malloc(sizeOfOutput)); - std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); -} - -void free_detections(opendr_detection_target_list_t *detections) { - if (detections->starting_pointer != NULL) - free(detections->starting_pointer); -} \ No newline at end of file +void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector) { + std::vector detections; + + opendr_detection_target_t detection; + detection.name = -1; + detection.left = 0.0; + detection.top = 0.0; + detection.width = 0.0; + detection.height = 0.0; + detection.score = 0.0; + + detections.push_back(detection); + + load_detections_vector(detection_vector, detections.data(), static_cast(detections.size())); +} + +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size) { + detection_vector->size = vector_size; + int size_of_output = (vector_size) * sizeof(opendr_detection_target_t); + detection_vector->starting_pointer = static_cast(malloc(size_of_output)); + std::memcpy(detection_vector->starting_pointer, detection, size_of_output); +} + +void free_detections_vector(opendr_detection_vector_target_t *detection_vector) { + if (detection_vector->starting_pointer != NULL) + delete detection_vector->starting_pointer; +} + +void initialize_tensor(opendr_tensor_t *opendr_tensor) { + opendr_tensor->batch_size = 0; + opendr_tensor->frames = 0; + opendr_tensor->channels = 0; + opendr_tensor->width = 0; + opendr_tensor->height = 0; + opendr_tensor->data = NULL; +} + +void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, + int height) { + opendr_tensor->batch_size = batch_size; + opendr_tensor->frames = frames; + opendr_tensor->channels = channels; + opendr_tensor->width = width; + opendr_tensor->height = height; + + int size_of_data = (batch_size * frames * channels * width * height) * sizeof(float); + opendr_tensor->data = static_cast(malloc(size_of_data)); + std::memcpy(opendr_tensor->data, tensor_data, size_of_data); +} + +void free_tensor(opendr_tensor_t *opendr_tensor) { + if (opendr_tensor->data != NULL) + delete opendr_tensor->data; +} + +void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector) { + tensor_vector->n_tensors = 0; + tensor_vector->batch_sizes = NULL; + tensor_vector->frames = NULL; + tensor_vector->channels = NULL; + tensor_vector->widths = NULL; + tensor_vector->heights = NULL; + tensor_vector->memories = 0; +} + +void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors) { + tensor_vector->n_tensors = number_of_tensors; + int size_of_shape_data = number_of_tensors * sizeof(int); + /* initialize arrays to hold size values for each tensor */ + tensor_vector->batch_sizes = static_cast(malloc(size_of_shape_data)); + tensor_vector->frames = static_cast(malloc(size_of_shape_data)); + tensor_vector->channels = static_cast(malloc(size_of_shape_data)); + tensor_vector->widths = static_cast(malloc(size_of_shape_data)); + tensor_vector->heights = static_cast(malloc(size_of_shape_data)); + + /* initialize array to hold data values for all tensors */ + tensor_vector->memories = static_cast(malloc(number_of_tensors * sizeof(float *))); + + /* copy size values */ + for (int i = 0; i < number_of_tensors; i++) { + (tensor_vector->batch_sizes)[i] = tensor[i].batch_size; + (tensor_vector->frames)[i] = tensor[i].frames; + (tensor_vector->channels)[i] = tensor[i].channels; + (tensor_vector->widths)[i] = tensor[i].width; + (tensor_vector->heights)[i] = tensor[i].height; + + /* copy data values by, + * initialize a data pointer into a tensor, + * copy the values, + * set tensor data pointer to watch the memory pointer*/ + int size_of_data = ((tensor[i].batch_size) * (tensor[i].frames) * (tensor[i].channels) * (tensor[i].width) * + (tensor[i].height) * sizeof(float)); + float *memory_of_data_tensor = static_cast(malloc(size_of_data)); + std::memcpy(memory_of_data_tensor, tensor[i].data, size_of_data); + (tensor_vector->memories)[i] = memory_of_data_tensor; + } +} + +void free_tensor_vector(opendr_tensor_vector_t *tensor_vector) { + // free vector pointers + if (tensor_vector->batch_sizes != NULL) + delete tensor_vector->batch_sizes; + if (tensor_vector->frames != NULL) + delete tensor_vector->frames; + if (tensor_vector->channels != NULL) + delete tensor_vector->channels; + if (tensor_vector->widths != NULL) + delete tensor_vector->widths; + if (tensor_vector->heights != NULL) + delete tensor_vector->heights; + + // free tensors data and vector memory + if (tensor_vector->memories != NULL) { + for (int i = 0; i < (tensor_vector->n_tensors); i++) { + if ((tensor_vector->memories)[i] != NULL) + delete (tensor_vector->memories)[i]; + } + delete tensor_vector->memories; + } +} + +void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index) { + load_tensor(output, static_cast((tensor_vector->memories)[index]), (tensor_vector->batch_sizes)[index], + (tensor_vector->frames)[index], (tensor_vector->channels)[index], (tensor_vector->widths)[index], + (tensor_vector->heights)[index]); +} diff --git a/src/c_api/skeleton_based_action_recognition_pst.cpp b/src/c_api/skeleton_based_action_recognition_pst.cpp new file mode 100644 index 0000000000..b8bf90815a --- /dev/null +++ b/src/c_api/skeleton_based_action_recognition_pst.cpp @@ -0,0 +1,167 @@ +// Copyright 2020-2022 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "skeleton_based_action_recognition_pst.h" +#include "target.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include "opencv2/core/core_c.h" + +void load_pst_model(const char *modelPath, pst_model_t *model) { + // Initialize model + model->onnx_session = model->env = model->session_options = NULL; + + Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); + model->env = env; + model->onnx_session = session; + model->session_options = sessionOptions; + + // Should we pass these parameters through the model json file? + model->batch_size = 128; + model->in_channels = 2; + model->features = 300; + model->num_point = 18; // same as the output of openpose + model->num_person = 2; + + model->num_classes = 60; +} + +void free_pst_model(pst_model_t *model) { + if (model->onnx_session) { + Ort::Session *session = static_cast(model->onnx_session); + delete session; + } + + if (model->session_options) { + Ort::SessionOptions *sessionOptions = static_cast(model->session_options); + delete sessionOptions; + } + + if (model->env) { + Ort::Env *env = static_cast(model->env); + delete env; + } +} + +void ff_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { + Ort::Session *session = static_cast(model->onnx_session); + + if (!session) { + std::cerr << "ONNX session not initialized." << std::endl; + return; + } + + // Prepare the input dimensions + // Dims of input data + size_t inputTensorSize = model->batch_size * model->in_channels * model->features * model->num_point * model->num_person; + + // Dims of input of model + std::vector inputNodeDims = {model->batch_size, model->in_channels, model->features, model->num_point, + model->num_person}; + + // Setup input/output names + Ort::AllocatorWithDefaultOptions allocator; + std::vector inputNodeNames = {"onnx_input"}; + std::vector outputNodeNames = {"onnx_output"}; + + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 5); + assert(inputTensor.IsTensor()); + + // Feed-forward the model + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1); + + // Get the results back + for (int i = 0; i < outputTensors.size(); i++) { + float *tensorData = outputTensors[i].GetTensorMutableData(); + + int tensorSizes[5] = {1, 1, 1, model->batch_size, model->num_classes}; + + cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); + outputTensorValues->push_back(outputMat); + } +} + +void init_random_opendr_tensor_pst(opendr_tensor_t *inputTensorValues, pst_model_t *model) { + int inputTensorSize = model->batch_size * model->in_channels * model->features * model->num_point * model->num_person; + + float *data = static_cast(malloc(inputTensorSize * sizeof(float))); + for (unsigned int j = 0; j < inputTensorSize; ++j) { + data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; + } + + load_tensor(inputTensorValues, static_cast(data), model->batch_size, model->in_channels, model->features, + model->num_point, model->num_person); + free(data); +} + +void forward_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { + // Get the feature vector for the current image + std::vector outputTensorValues; + ff_pst(model, inputTensorValues, &outputTensorValues); + + int nTensors = static_cast(outputTensorValues.size()); + if (nTensors > 0) { + int batch_sizes[nTensors]; + int frames[nTensors]; + int channels[nTensors]; + int widths[nTensors]; + int heights[nTensors]; + + std::vector tempTensorsVector; + opendr_tensor_t tempTensors[nTensors]; + + for (int i = 0; i < nTensors; i++) { + batch_sizes[i] = 1; + frames[i] = 1; + channels[i] = 1; + widths[i] = model->batch_size; + heights[i] = model->num_classes; + + load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batch_sizes[i], frames[i], channels[i], widths[i], + heights[i]); + tempTensorsVector.push_back(tempTensors[i]); + } + load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); + for (int i = 0; i < nTensors; i++) { + free_tensor(&(tempTensors[i])); + } + } else { + initialize_tensor_vector(tensorVector); + } +} diff --git a/tests/sources/c_api/test_activity_recognition_x3d.c b/tests/sources/c_api/test_activity_recognition_x3d.c new file mode 100644 index 0000000000..81fbc325f1 --- /dev/null +++ b/tests/sources/c_api/test_activity_recognition_x3d.c @@ -0,0 +1,93 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "activity_recognition_x3d.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a face recognition model + x3d_model_t model; + + // Load a pretrained model + load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", "l", &model); + ck_assert(model.onnx_session); + ck_assert(model.env); + ck_assert(model.session_options); + // Release the resources + free_x3d_model(&model); +} +END_TEST + +START_TEST(forward_pass_creation_test) { + // Create a x3d model + x3d_model_t model; + // Load a pretrained model (see instructions for downloading the data) + load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", "l", &model); + + // Load a random tensor and perform forward pass + opendr_tensor_t input_tensor; + init_random_opendr_tensor_x3d(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_x3d(&model, &input_tensor, &output_tensor_vector); + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Load another tensor + init_random_opendr_tensor_x3d(&input_tensor, &model); + forward_x3d(&model, &input_tensor, &output_tensor_vector); + + ck_assert(output_tensor_vector.n_tensors == 1); + + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Free the model resources + free_x3d_model(&model); +} +END_TEST + +Suite *x3d_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("X3d"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, forward_pass_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = x3d_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/c_api/test_deep_sort.c b/tests/sources/c_api/test_deep_sort.c new file mode 100644 index 0000000000..8f29690436 --- /dev/null +++ b/tests/sources/c_api/test_deep_sort.c @@ -0,0 +1,95 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "object_tracking_2d_deep_sort.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a face recognition model + deep_sort_model_t model; + + // Load a pretrained model + load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); + + ck_assert(model.onnx_session); + ck_assert(model.env); + ck_assert(model.session_options); + + // Release the resources + free_deep_sort_model(&model); +} +END_TEST + +START_TEST(forward_pass_creation_test) { + // Create a face recognition model + deep_sort_model_t model; + // Load a pretrained model (see instructions for downloading the data) + load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); + + // Load a random tensor and perform forward pass + opendr_tensor_t input_tensor; + init_random_opendr_tensor_ds(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_deep_sort(&model, &input_tensor, &output_tensor_vector); + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Load another tensor + init_random_opendr_tensor_ds(&input_tensor, &model); + forward_deep_sort(&model, &input_tensor, &output_tensor_vector); + + ck_assert(output_tensor_vector.n_tensors == 1); + + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Free the model resources + free_deep_sort_model(&model); +} +END_TEST + +Suite *deep_sort_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Deep Sort"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, forward_pass_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = deep_sort_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/c_api/test_detr.c b/tests/sources/c_api/test_detr.c new file mode 100644 index 0000000000..049632c373 --- /dev/null +++ b/tests/sources/c_api/test_detr.c @@ -0,0 +1,104 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "object_detection_2d_detr.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a detr model + detr_model_t model; + + // Load a pretrained model + load_detr_model("data/object_detection_2d/detr/optimized_model", &model); + + ck_assert(model.onnx_session); + ck_assert(model.env); + ck_assert(model.session_options); + + // Release the resources + free_detr_model(&model); + + // Load a model that does not exist + load_detr_model("data/optimized_model_not_existant", &model); + ck_assert(!model.onnx_session); + ck_assert(!model.env); + ck_assert(!model.session_options); + + // Release the resources + free_detr_model(&model); +} +END_TEST + +START_TEST(forward_pass_creation_test) { + // Create a detr model + detr_model_t model; + // Load a pretrained model (see instructions for downloading the data) + load_detr_model("data/object_detection_2d/detr/optimized_model", &model); + + // Load a random tensor and perform forward pass + opendr_tensor_t input_tensor; + init_random_opendr_tensor_detr(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_detr(&model, &input_tensor, &output_tensor_vector); + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Load another tensor + init_random_opendr_tensor_detr(&input_tensor, &model); + forward_detr(&model, &input_tensor, &output_tensor_vector); + + ck_assert(output_tensor_vector.n_tensors == 2); + + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Free the model resources + free_detr_model(&model); +} +END_TEST + +Suite *detr_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Detr"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, forward_pass_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = detr_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/c_api/test_lightweight_open_pose.c b/tests/sources/c_api/test_lightweight_open_pose.c new file mode 100644 index 0000000000..4f8fa27b90 --- /dev/null +++ b/tests/sources/c_api/test_lightweight_open_pose.c @@ -0,0 +1,92 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "lightweight_open_pose.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a face recognition model + open_pose_model_t model; + + // Load a pretrained model + load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); + + ck_assert(model.onnx_session); + ck_assert(model.env); + ck_assert(model.session_options); +} +END_TEST + +START_TEST(forward_pass_creation_test) { + // Create a x3d model + open_pose_model_t model; + // Load a pretrained model (see instructions for downloading the data) + load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); + + // Load a random tensor and perform forward pass + opendr_tensor_t input_tensor; + init_random_opendr_tensor_op(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_open_pose(&model, &input_tensor, &output_tensor_vector); + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Load another tensor + init_random_opendr_tensor_op(&input_tensor, &model); + forward_open_pose(&model, &input_tensor, &output_tensor_vector); + + ck_assert(output_tensor_vector.n_tensors == model.output_size); + + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Free the model resources + free_open_pose_model(&model); +} +END_TEST + +Suite *open_pose_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Open Pose"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, forward_pass_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = open_pose_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c index b9526a25eb..66ec6a1856 100644 --- a/tests/sources/c_api/test_nanodet.c +++ b/tests/sources/c_api/test_nanodet.c @@ -17,22 +17,22 @@ #include #include #include -#include "nanodet_c.h" +#include "object_detection_2d_nanodet_jit.h" #include "opendr_utils.h" START_TEST(model_creation_test) { // Create a nanodet libtorch model nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); - ck_assert_msg(model.net != 0, "net is NULL"); + ck_assert_msg(model.network != 0, "net is NULL"); // Release the resources free_nanodet_model(&model); // Check if memory steel exist - ck_assert_msg(model.net, "net is NULL"); + ck_assert_msg(model.network, "net is NULL"); } END_TEST @@ -41,18 +41,18 @@ START_TEST(inference_creation_test) { nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); // Load an image and performance inference opendr_image_t image; - load_image("data/nanodet/database/000000000036.jpg", &image); - opendr_detection_target_list_t res = infer_nanodet(&image, &model); + load_image("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); + opendr_detection_vector_target_t res = infer_nanodet(&model, &image); free_image(&image); ck_assert(res.size != 0); // Free the model resources - free_detections(&res); + free_detections_vector(&res); free_nanodet_model(&model); } END_TEST diff --git a/tests/sources/c_api/test_pst_gcn.c b/tests/sources/c_api/test_pst_gcn.c new file mode 100644 index 0000000000..cec6c24841 --- /dev/null +++ b/tests/sources/c_api/test_pst_gcn.c @@ -0,0 +1,94 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "opendr_utils.h" +#include "skeleton_based_action_recognition_pst.h" + +START_TEST(model_creation_test) { + // Create a skeleton based action recognition pst model + pst_model_t model; + + // Load a pretrained model + load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", + &model); + + ck_assert(model.onnx_session); + ck_assert(model.env); + ck_assert(model.session_options); +} +END_TEST + +START_TEST(forward_pass_creation_test) { + // Create a skeleton based action recognition pst model + pst_model_t model; + // Load a pretrained model (see instructions for downloading the data) + load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", + &model); + + // Load a random tensor and perform forward pass + opendr_tensor_t input_tensor; + init_random_opendr_tensor_pst(&input_tensor, &model); + + // Initialize opendr tensor vector for output + opendr_tensor_vector_t output_tensor_vector; + forward_pst(&model, &input_tensor, &output_tensor_vector); + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Load another tensor + init_random_opendr_tensor_pst(&input_tensor, &model); + forward_pst(&model, &input_tensor, &output_tensor_vector); + + ck_assert(output_tensor_vector.n_tensors == 1); + + free_tensor(&input_tensor); + free_tensor_vector(&output_tensor_vector); + + // Free the model resources + free_pst_model(&model); +} +END_TEST + +Suite *pst_gcn_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Pst Gcn"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, forward_pass_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = pst_gcn_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} From fcd4f9c3a09d70deb51eec83ad1d469c7290ae27 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 01:05:17 +0200 Subject: [PATCH 50/87] bug fixes in onnx exporting. --- .../fair_mot/object_tracking_2d_fair_mot_learner.py | 4 ++-- .../lightweight_open_pose/lightweight_open_pose_learner.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py index 02cdcdcbaf..7de05811d3 100644 --- a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py +++ b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py @@ -463,12 +463,12 @@ def optimize(self, do_constant_folding=False, img_size=(1088, 608), optimizable_ except FileNotFoundError: # Create temp directory os.makedirs(self.temp_path, exist_ok=True) - self.__convert_rpn_to_onnx( + self.__convert_to_onnx( input_shape, os.path.join(self.temp_path, "onnx_model_temp.onnx"), do_constant_folding ) - self.__load_rpn_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) + self.__load_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) @staticmethod def download(model_name, path, server_url=None): diff --git a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py index 2305154c4b..b9f6ae8656 100644 --- a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py +++ b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py @@ -869,7 +869,8 @@ def __convert_to_onnx(self, output_name, do_constant_folding=False, verbose=Fals input_names = ['data'] if self.num_refinement_stages == 2: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', - 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] + 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs', + 'stage_2_output_1_heatmaps', 'stage_2_output_0_pafs'] else: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs'] From eaa9a0d6c1e4005bdba3002b5c7c0081f2b0817a Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 16:31:05 +0200 Subject: [PATCH 51/87] Revert "update docs for new data structures and utilities of c api" This reverts commit 467c1f1e661462de46dd88a1762b23cbfaf7f4c8. --- docs/reference/c-data-h.md | 101 ------------------------- docs/reference/c-face-recognition-h.md | 20 ++--- docs/reference/c-opendr-utils-h.md | 76 +------------------ docs/reference/c-target-h.md | 71 +---------------- 4 files changed, 13 insertions(+), 255 deletions(-) diff --git a/docs/reference/c-data-h.md b/docs/reference/c-data-h.md index 046010a769..20b5e27b8e 100644 --- a/docs/reference/c-data-h.md +++ b/docs/reference/c-data-h.md @@ -24,104 +24,3 @@ A pointer where image data are stored. *opendr_image_t* is using internally OpenCV images (*cv::Mat*) for storing images. Therefore, only a pointer to the memory location of the corresponding *cv::Mat* is stored. Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV data type or using the corresponding functions provided in *opendr_utils.h*. - -### struct *opendr_tensor_t* -```C -struct opendr_tensor { - int batch_size; - int frames; - int channels; - int width; - int height; - - float *data; -}; -typedef struct opendr_tensor opendr_tensor_t; -``` - - -The *opendr_tensor_t* structure provides a data structure for storing OpenDR structures. -Every function in the C API receiving and return tensors is expected to use this structure. -Helper functions that directly maps data into this format are provided in *opendr_utils.h*. - -The *opendr_tensor_t* structure has the following field: - -#### `int batch_size` field - -An integer that represent the number of batch size in the tensor. - -#### `int frames` field - -An integer that represent the number of frames in the tensor. - -#### `int channels` field - -An integer that represent the number of channels in the tensor. - -#### `int width` field - -An integer that represent the width of the tensor. - -#### `int height` field - -An integer that represent the height of the tensor. - -#### `float *data` field - -A pointer where data are stored. -*opendr_tensor_t* is using internally a pointer and corresponding sizes to copy the data into the memory of float *data. -Therefore, only a pointer to the memory location of the corresponding data is stored. -Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV or other form of data type or using the corresponding functions provided in *opendr_utils.h*. - -### struct *opendr_tensor_vector_t* -```C -struct opendr_tensor_vector { - int n_tensors; - int *batch_sizes; - int *frames; - int *channels; - int *widths; - int *heights; - - float **memories; -}; -typedef struct opendr_tensor_vector opendr_tensor_vector_t; -``` - - -The *opendr_tensor_vector_t* structure provides a data structure for storing OpenDR tensor structures. -Every function in the C API receiving and returning multiple tensors is expected to use this structure. -Helper functions that directly maps data into this format are provided in *opendr_utils.h*. - -The *opendr_tensor_vector_t* structure has the following field: - -#### `int n_tensors` field - -An integer that represent the number of tensor in the tensor vector. - -#### `int *batch_sizes` field - -A pointer of integers that represent the number of batch size in each tensor. - -#### `int *frames` field - -A pointer of integers that represent the number of frames in each tensor. - -#### `int *channels` field - -A pointer of integers that represent the number of channels in each tensor. - -#### `int *widths` field - -A pointer of integers that represent the width of each tensor. - -#### `int *heights` field - -A pointer of integers that represent the height of each tensor. - -#### `float **memories` field - -A pointer where stores the data of each *opendr_tensor_t.data* stored in the vector. -*opendr_tensor_vector_t* is using internally pointers and corresponding sizes to copy the data into the memory of *memories* for each tensor that is provided. -Therefore, only a pointer to the memory location of the corresponding data is stored. -Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV or other form of data type or using the corresponding functions provided in *opendr_utils.h*. diff --git a/docs/reference/c-face-recognition-h.md b/docs/reference/c-face-recognition-h.md index 8b635c27b0..1ea2e5822d 100644 --- a/docs/reference/c-face-recognition-h.md +++ b/docs/reference/c-face-recognition-h.md @@ -11,14 +11,14 @@ struct face_recognition_model { }; typedef struct face_recognition_model face_recognition_model_t; ``` -The *face_recognition_model_t* structure keeps all the necessary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). +The *face_recognition_model_t* structure keeps all the neccesary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). ### Function *load_face_recognition_model()* ```C -void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model); +void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); ``` - Loads a face recognition model saved in the local filesystem (*modelPath*) in OpenDR format. + Loads a face recognition model saved in the local filesystem (*model path*) in OpenDR format. This function also initializes a CPU-based ONNX session for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. @@ -40,25 +40,25 @@ The function returns an OpenDR category structure with the inference results. ### Function *decode_category_face_recognition()* ```C -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName); +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); ``` Returns the name of a recognized person by decoding the category id into a string (this function uses the information from the built person database). ### Function *build_database_face_recognition()* ```C -void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model); +void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); ``` Build a face recognition database (containing images for persons to be recognized). -This function expects the *databaseFolder* to have the same format as the main Python toolkit. -The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*outputPath*). -A loaded face recognition model should be provided (*model*), since this model will be used for the feature extraction process. +This function expects the *database_folder* to have the same format as the main Python toolkit. +The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*output_path*). +A loaded face recongition model should be provided (*model*), since this model will be used for the feature extraction process. ### Function *load_database_face_recognition()* ```C -void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model); +void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); ``` -Loads an already built database (*databasePath*) into a face recognition model (*model*). +Loads an already built database (*database_path) into a face recognition model (*model*). After this step, the model can be used for performing inference. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index dfe9a67013..4e76a24258 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -11,6 +11,8 @@ The *load_image()* function allows for reading an images from the local file sys A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. + + ### Function *free_image()* ```C void free_image(opendr_image_t *image); @@ -18,77 +20,3 @@ void free_image(opendr_image_t *image); The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. -### Function *initialize_detections_vector()* -```C -void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector); -``` -The *initialize_detections_vector()* function initialize the data of an OpenDR detection vector structure (*detection_vector*) with zero values. -A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should be provided. - -### Function *load_detections_vector()* -```C -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size); -``` -The *load_detections_vector()* function allows for storing OpenDR detection target structures in to the memory allocated for multiple OpenDR detections structures (*detection*). -A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. - -### Function *free_detections_vector()* -```C -void free_detections_vector(opendr_detection_vector_target_t *detection_vector); -``` -The *free_detections_vector()* function releases the memory allocated for an OpenDR detection vector structure (*detection_vector*). -A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. - -### Function *initialize_tensor()* -```C -void initialize_tensor(opendr_tensor_t *opendr_tensor); -``` -The *initialize_tensor()* function initialize the data of an OpenDR tensor (*opendr_tensor*) with zero values. -A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* should be provided. - -### Function *load_tensor()* -```C -void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, - int height); -``` -The *load_tensor()* function allows for storing OpenDR tensor structures in to the memory allocated into a pointer into the OpenDR tensor structure (*opendr_tensor*). -A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* along with the pointer into the memory (*tensor_data*) and the (*batch_size*), (*frames*), (*channels*), (*width*) and (*height*) of tensor should be provided. -All integers must have a minimum value of *1*. - -### Function *free_tensor()* -```C -void free_tensor(opendr_tensor_t *opendr_tensor); -``` -The *free_tensor()* function releases the memory allocated for an OpenDR tensor structure (*opendr_tensor*). -A pointer (*opendr_tensor*) to an OpenDR *opendr_tensor_t* should be provided. - -### Function *initialize_tensor_vector()* -```C -void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector); -``` -The *initialize_tensor_vector()* function initialize the data of an OpenDR tensor vector (*tensor_vector*) with zero values. -A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* should be provided. - -### Function *load_tensor_vector()* -```C -void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors); -``` -The *load_tensor_vector()* function allows for storing multiple OpenDR tensor structures in to the memory allocated into pointers into the OpenDR tensor vector structure (*tensor_vector*). -A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* along with the pointer into the memory of a vector or array of OpenDR tensors structure (*tensor*) should be provided. -Moreover the number of tensors (*number_of_tensors*) should be included, and it must be better than *1*. - -### Function *free_tensor_vector()* -```C -void free_tensor_vector(opendr_tensor_vector_t *tensor_vector); -``` -The *free_tensor_vector()* function releases the memory allocated for an OpenDR tensor vector structure (*opendr_tensor_vector_t*). -A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* should be provided. - -### Function *iter_tensor_vector()* -```C -void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index); -``` -The *iter_tensor_vector()* function is used to help the user to iterate the OpenDR tensor vector. -A single OpenDR tensor (*output*) is loaded with the values of the indexed (*index*) tensor of the vector (*tensor_vector*). -A pointer (*tensor_vector*) to an OpenDR *opendr_tensor_vector_t* and an (*index*) along with a pointer (*output*) to an OpenDR *opendr_tensor_t* should be provided. diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 7eae5bd704..6f748759e8 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -13,7 +13,7 @@ typedef struct opendr_category_target opendr_category_target_t; ``` -The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. +The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. The *opendr_category_target_t* structure has the following field: @@ -25,72 +25,3 @@ A numerical id of the category to which the input objects belongs to. #### `float confidence` field The decision confidence (a value between 0 and 1). - - -### struct *opendr_detection_target_t* -```C -struct opendr_detection_target { - int name; - float left; - float top; - float width; - float height; - float score; -}; -typedef struct opendr_detection_target opendr_detection_target_t; -``` - - -The *opendr_detection_target_t* structure provides a data structure for storing inference outputs of detection models. -Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. - -The *opendr_detection_target_t* structure has the following field: - -#### `int name` field - -A numerical id of the category to which the input objects belongs to. - -#### `float left` field - -A numerical value that corresponds to the X value of the top,left point of a detection. - -#### `float top` field - -A numerical value that corresponds to the Y value of the top,left point of a detection. - -#### `float width` field - -A numerical value that corresponds to the width of a detection. - -#### `float height` field - -A numerical value that corresponds to the height of a detection. - -#### `float score` field - -The decision score (a value between 0 and 1). - - - -### struct *opendr_detection_vector_target_t* -```C -struct opendr_detection_vector_target { - opendr_detection_target_t *starting_pointer; - int size; -}; -typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; -``` - - -The *opendr_detection_vector_target_t* structure provides a data structure for storing multiple inference outputs of detection models. -Every function in the C API that outputs a detection decision is expected to use this or a *detection_target_t* structure. - -The *opendr_detection_vector_target_t* structure has the following field: - -#### `opendr_detection_target_t starting_pointer` field - -A pointer to a memory of multiple OpenDR detection targets. - -#### `int size` field - -A numerical value that represents the number of OpenDR detection structures that are stored. From 953a193aa9a99522ac6069814a4b50f6837c0fac Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 16:34:02 +0200 Subject: [PATCH 52/87] Revert "bug fixes in onnx exporting." This reverts commit fcd4f9c3a09d70deb51eec83ad1d469c7290ae27. --- .../fair_mot/object_tracking_2d_fair_mot_learner.py | 4 ++-- .../lightweight_open_pose/lightweight_open_pose_learner.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py index 7de05811d3..02cdcdcbaf 100644 --- a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py +++ b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py @@ -463,12 +463,12 @@ def optimize(self, do_constant_folding=False, img_size=(1088, 608), optimizable_ except FileNotFoundError: # Create temp directory os.makedirs(self.temp_path, exist_ok=True) - self.__convert_to_onnx( + self.__convert_rpn_to_onnx( input_shape, os.path.join(self.temp_path, "onnx_model_temp.onnx"), do_constant_folding ) - self.__load_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) + self.__load_rpn_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) @staticmethod def download(model_name, path, server_url=None): diff --git a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py index b9f6ae8656..2305154c4b 100644 --- a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py +++ b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py @@ -869,8 +869,7 @@ def __convert_to_onnx(self, output_name, do_constant_folding=False, verbose=Fals input_names = ['data'] if self.num_refinement_stages == 2: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', - 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs', - 'stage_2_output_1_heatmaps', 'stage_2_output_0_pafs'] + 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] else: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs'] From bd6fb6d3015c620b362cc1a7a236ea441dbe74de Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 16:35:05 +0200 Subject: [PATCH 53/87] Revert "Revert "bug fixes in onnx exporting."" This reverts commit 953a193aa9a99522ac6069814a4b50f6837c0fac. --- .../fair_mot/object_tracking_2d_fair_mot_learner.py | 4 ++-- .../lightweight_open_pose/lightweight_open_pose_learner.py | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py index 02cdcdcbaf..7de05811d3 100644 --- a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py +++ b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py @@ -463,12 +463,12 @@ def optimize(self, do_constant_folding=False, img_size=(1088, 608), optimizable_ except FileNotFoundError: # Create temp directory os.makedirs(self.temp_path, exist_ok=True) - self.__convert_rpn_to_onnx( + self.__convert_to_onnx( input_shape, os.path.join(self.temp_path, "onnx_model_temp.onnx"), do_constant_folding ) - self.__load_rpn_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) + self.__load_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) @staticmethod def download(model_name, path, server_url=None): diff --git a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py index 2305154c4b..b9f6ae8656 100644 --- a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py +++ b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py @@ -869,7 +869,8 @@ def __convert_to_onnx(self, output_name, do_constant_folding=False, verbose=Fals input_names = ['data'] if self.num_refinement_stages == 2: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', - 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] + 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs', + 'stage_2_output_1_heatmaps', 'stage_2_output_0_pafs'] else: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs'] From 83df9199f733c6fbf5b6525642f52dcb9e905957 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 16:35:52 +0200 Subject: [PATCH 54/87] Revert "new source, samples, utilities and test for onnx C API. The new scripts are implemente the feed forward of almost all the onnx models that python api is provided." This reverts commit 617572d5d6d3e5d02004ece522f7d224d050bd61. --- include/activity_recognition_x3d.h | 80 ------ include/data.h | 29 -- include/face_recognition.h | 18 +- include/lightweight_open_pose.h | 91 ------ ...detection_2d_nanodet_jit.h => nanodet_c.h} | 22 +- include/object_detection_2d_detr.h | 81 ------ include/object_tracking_2d_deep_sort.h | 79 ----- include/object_tracking_2d_fair_mot.h | 72 ----- include/opendr_utils.h | 72 +---- .../skeleton_based_action_recognition_pst.h | 75 ----- include/target.h | 4 +- projects/c_api/Makefile | 63 +--- projects/c_api/README.md | 9 +- .../activity_recognition/x3d/x3d_demo.c | 44 --- .../lightweight_open_pose_demo.c | 46 --- .../nanodet/README.md | 2 +- .../nanodet/nanodet_jit_demo.c | 12 +- .../object_detection_2d/detr/detr_demo.c | 43 --- .../deep_sort/deep_sort_demo.c | 43 --- .../progressive_spatio_temporal_gcn_demo.c | 44 --- src/c_api/Makefile | 10 +- src/c_api/activity_recognition_x3d.cpp | 209 -------------- src/c_api/face_recognition.cpp | 269 +++++++++--------- src/c_api/lightweight_open_pose.cpp | 264 ----------------- ...d_nanodet_jit.cpp => nanodet_libtorch.cpp} | 245 ++++++++-------- src/c_api/object_detection_2d_detr.cpp | 269 ------------------ src/c_api/object_tracking_2d_deep_sort.cpp | 203 ------------- src/c_api/opendr_utils.cpp | 152 ++-------- .../skeleton_based_action_recognition_pst.cpp | 167 ----------- .../c_api/test_activity_recognition_x3d.c | 93 ------ tests/sources/c_api/test_deep_sort.c | 95 ------- tests/sources/c_api/test_detr.c | 104 ------- .../c_api/test_lightweight_open_pose.c | 92 ------ tests/sources/c_api/test_nanodet.c | 16 +- tests/sources/c_api/test_pst_gcn.c | 94 ------ 35 files changed, 335 insertions(+), 2876 deletions(-) delete mode 100644 include/activity_recognition_x3d.h delete mode 100644 include/lightweight_open_pose.h rename include/{object_detection_2d_nanodet_jit.h => nanodet_c.h} (70%) delete mode 100644 include/object_detection_2d_detr.h delete mode 100644 include/object_tracking_2d_deep_sort.h delete mode 100644 include/object_tracking_2d_fair_mot.h delete mode 100644 include/skeleton_based_action_recognition_pst.h delete mode 100644 projects/c_api/samples/activity_recognition/x3d/x3d_demo.c delete mode 100644 projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c rename projects/c_api/samples/{object_detection_2d => }/nanodet/README.md (77%) rename projects/c_api/samples/{object_detection_2d => }/nanodet/nanodet_jit_demo.c (81%) delete mode 100644 projects/c_api/samples/object_detection_2d/detr/detr_demo.c delete mode 100644 projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c delete mode 100644 projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c delete mode 100644 src/c_api/activity_recognition_x3d.cpp delete mode 100644 src/c_api/lightweight_open_pose.cpp rename src/c_api/{object_detection_2d_nanodet_jit.cpp => nanodet_libtorch.cpp} (53%) delete mode 100644 src/c_api/object_detection_2d_detr.cpp delete mode 100644 src/c_api/object_tracking_2d_deep_sort.cpp delete mode 100644 src/c_api/skeleton_based_action_recognition_pst.cpp delete mode 100644 tests/sources/c_api/test_activity_recognition_x3d.c delete mode 100644 tests/sources/c_api/test_deep_sort.c delete mode 100644 tests/sources/c_api/test_detr.c delete mode 100644 tests/sources/c_api/test_lightweight_open_pose.c delete mode 100644 tests/sources/c_api/test_pst_gcn.c diff --git a/include/activity_recognition_x3d.h b/include/activity_recognition_x3d.h deleted file mode 100644 index 8544adff1f..0000000000 --- a/include/activity_recognition_x3d.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_X3D_ACTIVITY_RECOGNITION_H -#define C_API_X3D_ACTIVITY_RECOGNITION_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct x3d_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for resizing an input image - int model_size; - int frames_per_clip; - int in_channels; - int batch_size; - - // Statistics for normalization - float mean_value; - float img_scale; - - // Feature dimension - int features; -}; -typedef struct x3d_model x3d_model_t; - -/** - * Loads a x3d activity recognition model saved in OpenDR format - * @param modelPath path to the OpenDR x3d model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_x3d_model(const char *modelPath, char *mode, x3d_model_t *model); - -/** - * This function perform feed forward of x3d activity recognition model - * @param model x3d object detection model to be used for inference - * @param inputTensorValues input of the model as OpenCV mat - * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); - -/** - * Releases the memory allocated for a x3d activity recognition model - * @param model model to be de-allocated - */ -void free_x3d_model(x3d_model_t *model); - -/** - * initialize a tensor with random values for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_random_opendr_tensor_x3d(opendr_tensor_t *inputTensorValues, x3d_model_t *model); - -#ifdef __cplusplus -} -#endif - -#endif // C_API_X3D_ACTIVITY_RECOGNITION_H diff --git a/include/data.h b/include/data.h index 9ec84913b3..0b0018b941 100644 --- a/include/data.h +++ b/include/data.h @@ -29,35 +29,6 @@ struct opendr_image { }; typedef struct opendr_image opendr_image_t; -/*** - * Opendr data type for representing tensors - */ -struct opendr_tensor { - int batch_size; - int frames; - int channels; - int width; - int height; - - float *data; -}; -typedef struct opendr_tensor opendr_tensor_t; - -/*** - * Opendr data type for representing tensor vectors - */ -struct opendr_tensor_vector { - int n_tensors; - int *batch_sizes; - int *frames; - int *channels; - int *widths; - int *heights; - - float **memories; -}; -typedef struct opendr_tensor_vector opendr_tensor_vector_t; - #ifdef __cplusplus } #endif diff --git a/include/face_recognition.h b/include/face_recognition.h index 551a7d6f19..ff2774aab2 100644 --- a/include/face_recognition.h +++ b/include/face_recognition.h @@ -58,10 +58,10 @@ typedef struct face_recognition_model face_recognition_model_t; /** * Loads a face recognition model saved in OpenDR format - * @param modelPath path to the OpenDR face recognition model (as exported using OpenDR library) + * @param model_path path to the OpenDR face recongition model (as exported using OpenDR library) * @param model the loaded model */ -void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model); +void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); /** * This function perform inference using a face recognition model and an input image @@ -74,28 +74,28 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, /** * Builds a face recognition database (containing images for persons to be recognized). This function expects the * database_folder to have the same format as the main Python toolkit. - * @param databaseFolder folder containing the database - * @param outputPath output path to store the binary database. This file should be loaded along with the face + * @param database_folder folder containing the database + * @param output_path output path to store the binary database. This file should be loaded along with the face * recognition model before performing inference. * @param model the face recognition model to be used for extracting the database features */ -void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model); +void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); /** * Loads an already built database into the face recognition model. After this step, the model can be used for * performing inference. - * @param databasePath path to the database file + * @param database_path path to the database file * @param model the face recognition model to be used for inference */ -void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model); +void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); /** * Returns the name of a recognition person by decoding the category id into a string * @param model the face recognition model to be used for inference * @param category the predicted category - * @param personName buffer to store the person name + * @param person_name buffer to store the person name */ -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName); +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); /** * Releases the memory allocated for a face recognition model diff --git a/include/lightweight_open_pose.h b/include/lightweight_open_pose.h deleted file mode 100644 index 6888c4d6d6..0000000000 --- a/include/lightweight_open_pose.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_LIGHTWEIGHT_OPEN_POSE_H -#define C_API_LIGHTWEIGHT_OPEN_POSE_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct open_pose_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for resizing an input image - int model_size; - - // Statistics for normalization - float mean_value; - float img_scale; - - // Model output parameters - int num_refinement_stages; - int output_size; - int stride; - - int even_channel_output; - int odd_channel_output; - int batch_size; - int width_output; - int height_output; -}; -typedef struct open_pose_model open_pose_model_t; - -/** - * Loads a lightweight open pose model saved in OpenDR format - * @param modelPath path to the OpenDR open_pose model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_open_pose_model(const char *modelPath, open_pose_model_t *model); - -/** - * This function perform feed forward of open pose model - * @param model open pose model to be used for inference - * @param inputTensorValues OpenDR tensor structure as input of the model - * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); - -/** - * Releases the memory allocated for a open pose model - * @param model model to be de-allocated - */ -void free_open_pose_model(open_pose_model_t *model); - -/** - * initialize a tensor with values from an opendr image for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_opendr_tensor_from_img_op(opendr_image_t *image, opendr_tensor_t *inputTensorValues, open_pose_model_t *model); - -/** - * initialize a tensor with random values for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_random_opendr_tensor_op(opendr_tensor_t *inputTensorValues, open_pose_model_t *model); -#ifdef __cplusplus -} -#endif - -#endif // C_API_LIGHTWEIGHT_OPEN_POSE_H diff --git a/include/object_detection_2d_nanodet_jit.h b/include/nanodet_c.h similarity index 70% rename from include/object_detection_2d_nanodet_jit.h rename to include/nanodet_c.h index e0e59954ce..41ba71bedc 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/nanodet_c.h @@ -26,16 +26,16 @@ extern "C" { struct nanodet_model { // Jit cpp class holder - void *network; + void *net; // Device to be used char *device; // Recognition threshold - float score_threshold; + float scoreThreshold; // Model input size - int input_size[2]; + int inputSize[2]; // Keep ratio flag int keep_ratio; @@ -44,22 +44,22 @@ typedef struct nanodet_model nanodet_model_t; /** * Loads a nanodet object detection model saved in libtorch format - * @param modelPath path to the libtorch nanodet model (as exported using OpenDR library) + * @param model_path path to the libtorch nanodet model (as exported using OpenDR library) * @param device the device that will be used for the inference * @param height the height of model input * @param width the width of model input - * @param scoreThreshold a threshold for score to be inferred + * @param scoreThreshold a threshold for score to be infered * @param model the model to be loaded */ -void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); /** * This function performs inference using a nanodet object detection model and an input image * @param model nanodet model to be used for inference * @param image OpenDR image - * @return OpenDR detection vector target containing the detections of the recognized objects + * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objects */ -opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); +opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model); /** * Releases the memory allocated for a nanodet object detection model @@ -69,11 +69,11 @@ void free_nanodet_model(nanodet_model_t *model); /** * draw the bounding boxes from detections in the given image - * @param image image that has been used for inference + * @param opendr_image image that has been used for inference * @param model nanodet model that has been used for inference - * @param detectionsVector output of the inference + * @param detections output of the inference */ -void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); +void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections); #ifdef __cplusplus } diff --git a/include/object_detection_2d_detr.h b/include/object_detection_2d_detr.h deleted file mode 100644 index 8424fd1188..0000000000 --- a/include/object_detection_2d_detr.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_OBJECT_DETECTION_2D_DETR_H -#define C_API_OBJECT_DETECTION_2D_DETR_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct detr_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for resizing an input image - int model_size; - - // Statistics for normalization - float mean_value[3]; - float std_value[3]; - - // Recognition threshold - float threshold; - - // Feature dimension - int features; - int output_sizes[2]; -}; -typedef struct detr_model detr_model_t; - -/** - * Loads a detr object detection model saved in OpenDR format - * @param modelPath path to the OpenDR detr model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_detr_model(const char *modelPath, detr_model_t *model); - -/** - * This function perform feed forward of detr object detection model - * @param model detr object detection model to be used for inference - * @param inputTensorValues input of the model as OpenCV mat - * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); - -/** - * Releases the memory allocated for a detr object detection model - * @param model model to be de-allocated - */ -void free_detr_model(detr_model_t *model); - -/** - * initialize a tensor with random values for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_random_opendr_tensor_detr(opendr_tensor_t *inputTensorValues, detr_model_t *model); - -#ifdef __cplusplus -} -#endif - -#endif // C_API_OBJECT_DETECTION_2D_DETR_H diff --git a/include/object_tracking_2d_deep_sort.h b/include/object_tracking_2d_deep_sort.h deleted file mode 100644 index 92783daad4..0000000000 --- a/include/object_tracking_2d_deep_sort.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_DEEP_SORT_OBJECT_TRACKING_2D_H -#define C_API_DEEP_SORT_OBJECT_TRACKING_2D_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct deep_sort_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for resizing an input image - int model_size[2]; - int in_channels; - int batch_size; - - // Statistics for normalization - float mean_value[3]; - float std_value[3]; - - // Feature dimension - int features; -}; -typedef struct deep_sort_model deep_sort_model_t; - -/** - * Loads a deep_sort object tracking 2d model saved in OpenDR format - * @param modelPath path to the OpenDR deep_sort model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_deep_sort_model(const char *modelPath, deep_sort_model_t *model); - -/** - * This function perform feed forward of deep_sort object tracking 2d model - * @param model deep_sort object detection model to be used for inference - * @param inputTensorValues input of the model as OpenCV mat - * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); - -/** - * Releases the memory allocated for a deep_sort object tracking 2d model - * @param model model to be de-allocated - */ -void free_deep_sort_model(deep_sort_model_t *model); - -/** - * initialize a tensor with random values for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_random_opendr_tensor_ds(opendr_tensor_t *inputTensorValues, deep_sort_model_t *model); - -#ifdef __cplusplus -} -#endif - -#endif // C_API_DEEP_SORT_OBJECT_TRACKING_2D_H diff --git a/include/object_tracking_2d_fair_mot.h b/include/object_tracking_2d_fair_mot.h deleted file mode 100644 index f3beb26efa..0000000000 --- a/include/object_tracking_2d_fair_mot.h +++ /dev/null @@ -1,72 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_FAIR_MOT_OBJECT_TRACKING_2D_H -#define C_API_FAIR_MOT_OBJECT_TRACKING_2D_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct fair_mot_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for resizing an input image - int model_size[2]; - int in_channels; - int batch_size; - - // Statistics for normalization - float mean_value[3]; - float std_value[3]; - - // Feature dimension - int features; -}; -typedef struct fair_mot_model fair_mot_model_t; - -/** - * Loads a fair_mot object tracking 2d model saved in OpenDR format - * @param model_path path to the OpenDR fair_mot model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_fair_mot_model(const char *model_path, fair_mot_model_t *model); - -/** - * This function perform feed forward of fair_mot object tracking 2d model - * @param model fair_mot object detection model to be used for inference - * @param input_tensor_values input of the model as OpenCV mat - * @param tensor_vector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_fair_mot(fair_mot_model_t *model, opendr_tensor_t *input_tensor_values, opendr_tensor_vector_t *tensor_vector); - -/** - * Releases the memory allocated for a fair_mot object tracking 2d model - * @param model model to be de-allocated - */ -void free_fair_mot_model(fair_mot_model_t *model); - -#ifdef __cplusplus -} -#endif - -#endif // C_API_FAIR_MOT_OBJECT_TRACKING_2D_H diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 3706283c6c..d15f13312b 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -38,78 +38,24 @@ void load_image(const char *path, opendr_image_t *image); void free_image(opendr_image_t *image); /** - * Initialize an empty Opendr detection vector target to be used in C API - * @param detection_vector OpenDR detection_target_list structure to be initialized + * Initialize an empty detection list to be used in C API + * @param detections OpenDR detection_target_list structure to be initialized */ -void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector); +void initialize_detections(opendr_detection_target_list_t *detections); /** * Loads an OpenDR detection target list to be used in C API - * @param detection_vector OpenDR detection_target_list structure to be loaded - * @param detection the pointer of the first OpenDR detection target in a vector - * @param vector_size the size of the vector + * @param detections OpenDR detection_target_list structure to be loaded + * @param vectorDataPtr the pointer of the first OpenDR detection target in a vector + * @param vectorSize the size of the vector */ -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size); +void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize); /** * Releases the memory allocated for a detection list structure - * @param detection_vector OpenDR detection vector target structure to release + * @param detections OpenDR detection_target_list structure to release */ -void free_detections_vector(opendr_detection_vector_target_t *detection_vector); - -/** - * Initialize an empty OpenDR tensor to be used in C API - * @param tensor OpenDR tensor to initialize - */ -void initialize_tensor(opendr_tensor_t *opendr_tensor); - -/** - * Load a tensor values to be used in C. - * @param tensor Opendr tensor structure to be loaded - * @param tensor_data vector of cv Mat that holds tensors data to be used - * @param batch_size batch size for each OpenDR mat in an array of integers - * @param frames frames size for each OpenDR mat in an array of integers - * @param channels channels size for each OpenDR mat in an array of integers - * @param width width size for each OpenDR mat in an array of integers - * @param height height size for each OpenDR mat in an array of integers - */ -void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, - int height); - -/** - * Releases the memory allocated for an OpenDR tensor structure - * @param opendr_tensor OpenDR tensor structure to release - */ -void free_tensor(opendr_tensor_t *opendr_tensor); - -/** - * Initialize an empty OpenDR tensor vector to be used in C API - * @param tensor_vector - */ -void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector); - -/** - * Load a vector of tensors values to be used in C. - * @param tensor_vector OpenDR vector of tensors structure to be loaded - * @param tensor data in vector of OpenDR tensors structure - * @param number_of_tensors the number of tensors that we want to load into the structure - */ -void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors); - -/** - * Releases the memory allocated for an OpenDR tensor vector structure - * @param tensor_vector OpenDR tensor vector structure to release - */ -void free_tensor_vector(opendr_tensor_vector_t *tensor_vector); - -/** - * Helper function to store a tensor from OpenDR tensor vector structure into an OpenCV Mat. - * @param tensor OpenDR tensor to store the tensor - * @param tensor_vector OpenDR tensor vector from which tensor is wanted - * @param index the tensor is wanted from Opendr tensor vector - */ -void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index); +void free_detections(opendr_detection_target_list_t *detections); #ifdef __cplusplus } diff --git a/include/skeleton_based_action_recognition_pst.h b/include/skeleton_based_action_recognition_pst.h deleted file mode 100644 index 0d8257c3e5..0000000000 --- a/include/skeleton_based_action_recognition_pst.h +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H -#define C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H - -#include "opendr_utils.h" -#include "target.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct pst_model { - // ONNX session objects - void *onnx_session; - void *env; - void *session_options; - - // Sizes for input and output tensor - int batch_size; - int in_channels; - int features; - int num_point; - int num_person; - int num_classes; -}; -typedef struct pst_model pst_model_t; - -/** - * Loads a pst skeleton based action recognition model saved in OpenDR format - * @param modelPath path to the OpenDR pst model (as exported using OpenDR library) - * @param model the loaded model - */ -void load_pst_model(const char *modelPath, pst_model_t *model); - -/** - * This function perform feed forward of pst skeleton based action recognition model - * @param model pst object detection model to be used for inference - * @param inputTensorValues input of the model as OpenCV mat - * @param tensorVector OpenDR tensor vector structure to save the output of the feed forward - */ -void forward_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector); - -/** - * Releases the memory allocated for a pst skeleton based action recognition model - * @param model model to be de-allocated - */ -void free_pst_model(pst_model_t *model); - -/** - * initialize a tensor with random values for testing the forward pass of the model - * @param inputTensorValues opendr tensor to be loaded with random values - * @param model model to be used for hyper parameters - */ -void init_random_opendr_tensor_pst(opendr_tensor_t *inputTensorValues, pst_model_t *model); - -#ifdef __cplusplus -} -#endif - -#endif // C_API_PST_SKELETON_BASED_ACTION_RECOGNITION_H diff --git a/include/target.h b/include/target.h index 894bc03d2f..1baf6b55f5 100644 --- a/include/target.h +++ b/include/target.h @@ -45,11 +45,11 @@ typedef struct opendr_detection_target opendr_detection_target_t; /*** * OpenDR data type for representing a structure of detection targets */ -struct opendr_detection_vector_target { +struct opendr_detection_target_list { opendr_detection_target_t *starting_pointer; int size; }; -typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; +typedef struct opendr_detection_target_list opendr_detection_target_list_t; #ifdef __cplusplus } diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index 34231684bd..b8fb363fd3 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -33,15 +33,9 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos -demos: x3d face_recognition open_pose detr nanodet deep_sort skeleton_based_action_recognition_pst - -x3d: $(BUILD_DIR)/activity_recognition/x3d_demo -face_recognition: $(BUILD_DIR)/face_recognition_demo -open_pose: $(BUILD_DIR)/lightweight_open_pose_demo -detr: $(BUILD_DIR)/object_detection_2d/detr_demo -nanodet: $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo -deep_sort: $(BUILD_DIR)/object_tracking_2d/deep_sort_demo -skeleton_based_action_recognition_pst: $(BUILD_DIR)/skeleton_based_action_recognition/pst_demo +demos: $(BUILD_DIR)/face_recognition_demo $(BUILD_DIR)/nanodet_demo +face: $(BUILD_DIR)/face_recognition_demo +nano: $(BUILD_DIR)/nanodet_demo download: @+if [ -a $(DATA_DIR) ] ; \ @@ -50,71 +44,32 @@ download: else \ echo Downloading resources for C API...; \ $(RM) -r opendrdata.csd.auth.gr; \ - $(MKDIR_P) $(DATA_DIR)/activity_recognition/x3d; \ - $(WGET) ftp://opendrdata.csd.auth.gr/perception/activity_recognition/weights/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/activity_recognition/weights/optimized_model $(DATA_DIR)/activity_recognition/x3d/optimized_model; \ $(MKDIR_P) $(DATA_DIR); \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/test_data/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/lightweight_open_pose; \ - $(WGET) ftp://opendrdata.csd.auth.gr/perception/pose_estimation/lightweight_open_pose/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/pose_estimation/lightweight_open_pose/optimized_model $(DATA_DIR)/lightweight_open_pose/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/object_detection_2d/detr; \ - $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/detr/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/detr/optimized_model $(DATA_DIR)/object_detection_2d/detr/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ + $(MKDIR_P) $(DATA_DIR)/nanodet; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/object_tracking_2d/deep_sort; \ - $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_tracking_2d/deep_sort/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_tracking_2d/deep_sort/optimized_model $(DATA_DIR)/object_tracking_2d/deep_sort/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/skeleton_based_action_recognition/progressive_spatiotemporal_gcn; \ - $(WGET) ftp://opendrdata.csd.auth.gr/perception/skeleton_based_action_recognition/pretrained_models/optimized_data/*; \ - $(MV) opendrdata.csd.auth.gr/perception/skeleton_based_action_recognition/pretrained_models/optimized_data $(DATA_DIR)/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; -$(BUILD_DIR)/activity_recognition/x3d_demo: - $(MKDIR_P) $(BUILD_DIR)/activity_recognition - @+echo "Building activity recognition x3d demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/activity_recognition/x3d_demo samples/activity_recognition/x3d/x3d_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) - $(BUILD_DIR)/face_recognition_demo: $(MKDIR_P) $(BUILD_DIR) @+echo "Building face recognition demo..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/face_recognition_demo samples/face_recognition/face_recognition_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) -$(BUILD_DIR)/lightweight_open_pose_demo: +$(BUILD_DIR)/nanodet_demo: $(MKDIR_P) $(BUILD_DIR) - @+echo "Building lightweight open pose demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/lightweigth_open_pose_demo samples/lightweight_open_pose/lightweight_open_pose_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) - -$(BUILD_DIR)/object_detection_2d/detr_demo: - $(MKDIR_P) $(BUILD_DIR)/object_detection_2d - @+echo "Building detr object detection demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/detr_demo samples/object_detection_2d/detr/detr_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) - -$(BUILD_DIR)/object_detection_2d/nanodet_jit_demo: - $(MKDIR_P) $(BUILD_DIR)/object_detection_2d @+echo "Building nanodet object detection demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo samples/object_detection_2d/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) - -$(BUILD_DIR)/object_tracking_2d/deep_sort_demo: - $(MKDIR_P) $(BUILD_DIR)/object_tracking_2d - @+echo "Building object tracking 2d deep sort demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_tracking_2d/deep_sort_demo samples/object_tracking_2d/deep_sort/deep_sort_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + $(CC) $(CFLAGS) -o $(BUILD_DIR)/nanodet_libtorch_demo samples/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) -$(BUILD_DIR)/skeleton_based_action_recognition/pst_demo: - $(MKDIR_P) $(BUILD_DIR)/skeleton_based_action_recognition - @+echo "Building skeleton based action recognition progressive spatiotemporal demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/skeleton_based_action_recognition/pst_demo samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) clean: @+echo "Cleaning C API demo binaries and temporary files..." - @+$(RM) -rf $(BUILD_DIR)/* + @+$(RM) $(BUILD_DIR)/* @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/projects/c_api/README.md b/projects/c_api/README.md index 6031f157ce..ebaccc45ea 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -9,11 +9,6 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: -1. Activity recognition x3d model -2. Face recognition -3. Lightweight open pose -4. Object detection 2d Detr -5. Object detection 2d Nanodet Jit module -6. Object tracking 2d Deep sort -7. Skeleton based action recognition with Progressive spatiotemporal gcn +1. Face recognition +2. Nanodet Jit module diff --git a/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c b/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c deleted file mode 100644 index f38fb837ff..0000000000 --- a/projects/c_api/samples/activity_recognition/x3d/x3d_demo.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "activity_recognition_x3d.h" -#include "opendr_utils.h" - -int main(int argc, char **argv) { - x3d_model_t model; - char *mode = "l"; - - printf("start init model\n"); - load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", mode, &model); - printf("success\n"); - - // Initialize opendr tensor for input - opendr_tensor_t input_tensor; - init_random_opendr_tensor_x3d(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_x3d(&model, &input_tensor, &output_tensor_vector); - - // Free the memory - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - free_x3d_model(&model); - - return 0; -} diff --git a/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c b/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c deleted file mode 100644 index a684309c17..0000000000 --- a/projects/c_api/samples/lightweight_open_pose/lightweight_open_pose_demo.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "lightweight_open_pose.h" -#include "opendr_utils.h" - -int main(int argc, char **argv) { - open_pose_model_t model; - - printf("start init model\n"); - load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); - printf("success\n"); - - // Initialize opendr tensor for input - opendr_tensor_t input_tensor; - init_random_opendr_tensor_op(&input_tensor, &model); - - /* If user wants to import an Opendr Image, first must load the image and then use this function */ - // init_opendr_tensor_from_img_op(&image, &input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_open_pose(&model, &input_tensor, &output_tensor_vector); - - // Free the memory - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - free_open_pose_model(&model); - - return 0; -} diff --git a/projects/c_api/samples/object_detection_2d/nanodet/README.md b/projects/c_api/samples/nanodet/README.md similarity index 77% rename from projects/c_api/samples/object_detection_2d/nanodet/README.md rename to projects/c_api/samples/nanodet/README.md index 31d3e90496..8114150843 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/README.md +++ b/projects/c_api/samples/nanodet/README.md @@ -11,5 +11,5 @@ After installation, the demo can be run from projects/c_api directory with: Or with the downloaded model and image with: ```sh -./built/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 +./built/nanodet_libtorch_demo ./data/nanodet/optimized_model/nanodet_m.pth cuda ./data/nanodet/database/000000000036.jpg 320 320 ``` diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/nanodet/nanodet_jit_demo.c similarity index 81% rename from projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c rename to projects/c_api/samples/nanodet/nanodet_jit_demo.c index 4d6b4ea717..aed0ac5d48 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/nanodet/nanodet_jit_demo.c @@ -16,15 +16,15 @@ #include #include -#include "object_detection_2d_nanodet_jit.h" +#include "nanodet_c.h" #include "opendr_utils.h" int main(int argc, char **argv) { if (argc != 6) { fprintf(stderr, "usage: %s [model_path] [device] [images_path] [input_sizes].\n" - "model_path = path/to/your/libtorch/model.pth \ndevice = cuda or cpu \n" - "images_path = \"xxx/xxx/*.jpg\" \ninput_size = width height.\n", + "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", argv[0]); return -1; } @@ -48,16 +48,16 @@ int main(int argc, char **argv) { } // Initialize opendr detection target list; - opendr_detection_vector_target_t results; + opendr_detection_target_list_t results; // Infer nanodet model - results = infer_nanodet(&model, &image); + results = infer_nanodet(&image, &model); // Draw the results drawBboxes(&image, &model, &results); // Free the memory - free_detections_vector(&results); + free_detections(&results); free_image(&image); free_nanodet_model(&model); diff --git a/projects/c_api/samples/object_detection_2d/detr/detr_demo.c b/projects/c_api/samples/object_detection_2d/detr/detr_demo.c deleted file mode 100644 index a1478093d8..0000000000 --- a/projects/c_api/samples/object_detection_2d/detr/detr_demo.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "object_detection_2d_detr.h" -#include "opendr_utils.h" - -int main(int argc, char **argv) { - detr_model_t model; - - printf("start init model\n"); - load_detr_model("data/object_detection_2d/detr/optimized_model", &model); - printf("success\n"); - - // Initialize opendr tensor for input - opendr_tensor_t input_tensor; - init_random_opendr_tensor_detr(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_detr(&model, &input_tensor, &output_tensor_vector); - - // Free the memory - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - free_detr_model(&model); - - return 0; -} diff --git a/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c b/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c deleted file mode 100644 index 55441a934e..0000000000 --- a/projects/c_api/samples/object_tracking_2d/deep_sort/deep_sort_demo.c +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "object_tracking_2d_deep_sort.h" -#include "opendr_utils.h" - -int main(int argc, char **argv) { - deep_sort_model_t model; - - printf("start init model\n"); - load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); - printf("success\n"); - - // Initialize opendr tensor for input - opendr_tensor_t input_tensor; - init_random_opendr_tensor_ds(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_deep_sort(&model, &input_tensor, &output_tensor_vector); - - // Free the memory - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - free_deep_sort_model(&model); - - return 0; -} diff --git a/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c b/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c deleted file mode 100644 index bc82a5a2ed..0000000000 --- a/projects/c_api/samples/skeleton_based_action_recognition/progressive_spatio_temporal_gcn_demo.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include "opendr_utils.h" -#include "skeleton_based_action_recognition_pst.h" - -int main(int argc, char **argv) { - pst_model_t model; - - printf("start init model\n"); - load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", - &model); - printf("success\n"); - - // Initialize opendr tensor for input - opendr_tensor_t input_tensor; - init_random_opendr_tensor_pst(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_pst(&model, &input_tensor, &output_tensor_vector); - - // Free the memory - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - free_pst_model(&model); - - return 0; -} diff --git a/src/c_api/Makefile b/src/c_api/Makefile index 755f61a7e4..ed9bbeb80c 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -42,16 +42,10 @@ $(OPENDR_HOME)/lib/libopendr.so: @+echo "Building C API..." @+$(MKDIR_P) $(BUILD_DIR) $(CPP) $(CFLAGS) -c opendr_utils.cpp -o $(BUILD_DIR)/opendr_utils.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c activity_recognition_x3d.cpp -o $(BUILD_DIR)/opendr_x3d.o $(INC) $(OPENDR_INC) $(CPP) $(CFLAGS) -c face_recognition.cpp -o $(BUILD_DIR)/opendr_face_recognition.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c lightweight_open_pose.cpp -o $(BUILD_DIR)/opendr_open_pose.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c object_detection_2d_detr.cpp -o $(BUILD_DIR)/opendr_detr.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c object_detection_2d_nanodet_jit.cpp -o $(BUILD_DIR)/opendr_nanodet_jit.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) - $(CPP) $(CFLAGS) -c object_tracking_2d_deep_sort.cpp -o $(BUILD_DIR)/opendr_deep_sort.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c skeleton_based_action_recognition_pst.cpp -o $(BUILD_DIR)/opendr_pst.o $(INC) $(OPENDR_INC) - + $(CPP) $(CFLAGS) -c nanodet_libtorch.cpp -o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) @$(MKDIR_P) $(LIB_DIR) - $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_detr.o $(BUILD_DIR)/opendr_open_pose.o $(BUILD_DIR)/opendr_x3d.o $(BUILD_DIR)/opendr_pst.o $(BUILD_DIR)/opendr_deep_sort.o $(BUILD_DIR)/opendr_nanodet_jit.o $(LD) $(TORCHSCRIPT_LD) --shared + $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(LD) $(TORCHSCRIPT_LD) --shared clean: @+echo "Cleaning C API binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/src/c_api/activity_recognition_x3d.cpp b/src/c_api/activity_recognition_x3d.cpp deleted file mode 100644 index fee11831cb..0000000000 --- a/src/c_api/activity_recognition_x3d.cpp +++ /dev/null @@ -1,209 +0,0 @@ -// Copyright 2020-2022 OpenDR European Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "activity_recognition_x3d.h" -#include "target.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "opencv2/core/core_c.h" - -/** - * Helper function for preprocessing images before feeding them into the lightweight open pose estimator model. - * This function follows the OpenDR's lightweight open pose pre-processing pipeline, which includes the following: - * a) resizing the image into modelInputSize x modelInputSize pixels relative to the original ratio, - * b) normalizing the resulting values using meanValue and c) padding image into a standard size. - * @param image image to be preprocesses - * @param normalizedImage pre-processed data in a matrix - * @param modelInputSize size of the center crop (equals the size that the DL model expects) - * @param meanValue value used for centering the input image - * @param imageScale value used for scaling the input image - */ -void preprocess_x3d(cv::Mat *image, cv::Mat *normalizedImage, int modelInputSize, float meanValue, float imageScale) { - // Convert to RGB - cv::Mat imageRgb; - cv::cvtColor(*image, imageRgb, cv::COLOR_BGR2RGB); - - // Resize with ratio - double scale = (static_cast(modelInputSize) / static_cast(imageRgb.rows)); - cv::resize(imageRgb, imageRgb, cv::Size(), scale, scale); - - // Convert to 32f and normalize - imageRgb.convertTo(*normalizedImage, CV_32FC3, imageScale, meanValue); -} - -void load_x3d_model(const char *modelPath, char *mode, x3d_model_t *model) { - // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - - Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); - model->env = env; - model->onnx_session = session; - model->session_options = sessionOptions; - - // Should we pass these parameters through the model json file? - model->mean_value = -128.0f / 255.0f; - model->img_scale = (1.0f / 255.0f); - - // std::string model_name = "l"; - std::string modeName = mode; - if (modeName == "l") { - model->model_size = 312; - model->frames_per_clip = 16; - } else if (modeName == "m") { - model->model_size = 224; - model->frames_per_clip = 16; - } else if (modeName == "s") { - model->model_size = 160; - model->frames_per_clip = 13; - } else { - model->model_size = 160; - model->frames_per_clip = 4; - } - - model->batch_size = 1; - model->in_channels = 3; - - model->features = 400; -} - -void free_x3d_model(x3d_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); - delete session; - } - - if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; - } - - if (model->env) { - Ort::Env *env = static_cast(model->env); - delete env; - } -} - -void ff_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { - Ort::Session *session = static_cast(model->onnx_session); - - if (!session) { - std::cerr << "ONNX session not initialized." << std::endl; - return; - } - - // Prepare the input dimensions - // Dims of input data - size_t inputTensorSize = - model->batch_size * model->in_channels * model->frames_per_clip * model->model_size * model->model_size; - - // Dims of input of model - std::vector inputNodeDims = {model->batch_size, model->in_channels, model->frames_per_clip, model->model_size, - model->model_size}; - - // Setup input/output names - Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"video"}; - std::vector outputNodeNames = {"classes"}; - - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 5); - assert(inputTensor.IsTensor()); - - // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); - assert(outputTensors.size() == 1); - - // Get the results back - float *tensorData = outputTensors.front().GetTensorMutableData(); - - int tensorSizes[5] = {1, 1, 1, model->batch_size, model->features}; - - cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); - outputTensorValues->push_back(outputMat); -} - -void init_random_opendr_tensor_x3d(opendr_tensor_t *inputTensorValues, x3d_model_t *model) { - // Prepare the input dimensions - // Dims of input data - int inputTensorSize = model->batch_size * model->frames_per_clip * model->in_channels * model->model_size * model->model_size; - - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - // change data structure so channels are the last iterable dimension - for (unsigned int j = 0; j < inputTensorSize; ++j) { - data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; - } - - // Dims of input of model - load_tensor(inputTensorValues, static_cast(data), model->batch_size, model->frames_per_clip, model->in_channels, - model->model_size, model->model_size); - free(data); -} - -void forward_x3d(x3d_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { - // Get the feature vector for the current image - std::vector outputTensorValues; - ff_x3d(model, inputTensorValues, &outputTensorValues); - - int nTensors = static_cast(outputTensorValues.size()); - if (nTensors > 0) { - int batchSizes[nTensors]; - int frames[nTensors]; - int channels[nTensors]; - int widths[nTensors]; - int heights[nTensors]; - - std::vector tempTensorsVector; - opendr_tensor_t tempTensors[nTensors]; - - for (int i = 0; i < nTensors; i++) { - batchSizes[i] = 1; - frames[i] = 1; - channels[i] = 1; - widths[i] = model->batch_size; - heights[i] = model->features; - - load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], - heights[i]); - tempTensorsVector.push_back(tempTensors[i]); - } - load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); - for (int i = 0; i < nTensors; i++) { - free_tensor(&(tempTensors[i])); - } - - } else { - initialize_tensor_vector(tensorVector); - } -} diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 9226e8e394..9ed33b2ba1 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -36,40 +36,40 @@ /** * Helper function for preprocessing images before feeding them into the face recognition model. * This function follows the OpenDR's face recognition pre-processing pipeline, which includes the following: - * a) resizing the image into resizeTarget x resizeTarget pixels and then taking a center crop of size modelInputSize, - * and b) normalizing the resulting values using meanValue and stdValue + * a) resizing the image into resize_target x resize_target pixels and then taking a center crop of size model_input_size, + * and b) normalizing the resulting values using mean_value and std_value * @param image image to be preprocesses * @param data pre-processed data in a flattened vector - * @param resizeTarget target size for resizing - * @param modelInputSize size of the center crop (equals the size that the DL model expects) - * @param meanValue value used for centering the input image - * @param stdValue value used for scaling the input image + * @param resize_target target size for resizing + * @param model_input_size size of the center crop (equals the size that the DL model expects) + * @param mean_value value used for centering the input image + * @param std_value value used for scaling the input image */ -void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resizeTarget = 128, int modelInputSize = 112, - float meanValue = 0.5, float stdValue = 0.5) { +void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resize_target = 128, int model_input_size = 112, + float mean_value = 0.5, float std_value = 0.5) { // Convert to RGB - cv::Mat normalizedImage; - cv::cvtColor(*image, normalizedImage, cv::COLOR_BGR2RGB); + cv::Mat img; + cv::cvtColor(*image, img, cv::COLOR_BGR2RGB); // Resize and then get a center crop - cv::resize(normalizedImage, normalizedImage, cv::Size(resizeTarget, resizeTarget)); - int stride = (resizeTarget - modelInputSize) / 2; - cv::Rect myRoi(stride, stride, resizeTarget - stride, resizeTarget - stride); - normalizedImage = normalizedImage(myRoi); + cv::resize(img, img, cv::Size(resize_target, resize_target)); + int stride = (resize_target - model_input_size) / 2; + cv::Rect myROI(stride, stride, resize_target - stride, resize_target - stride); + img = img(myROI); // Scale to 0...1 - cv::Mat outputImage; - normalizedImage.convertTo(outputImage, CV_32FC3, 1 / 255.0); + cv::Mat out_img; + img.convertTo(out_img, CV_32FC3, 1 / 255.0); // Unfold the image into the appropriate format // This is certainly not the most efficient way to do this... // ... and is probably constantly leading to cache misses // ... but it works for now. - for (unsigned int j = 0; j < modelInputSize; ++j) { - for (unsigned int k = 0; k < modelInputSize; ++k) { - cv::Vec3f currentPixel = outputImage.at(j, k); - data[0 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[0] - meanValue) / stdValue; - data[1 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[1] - meanValue) / stdValue; - data[2 * modelInputSize * modelInputSize + j * modelInputSize + k] = (currentPixel[2] - meanValue) / stdValue; + for (unsigned int j = 0; j < model_input_size; ++j) { + for (unsigned int k = 0; k < model_input_size; ++k) { + cv::Vec3f cur_pixel = out_img.at(j, k); + data[0 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[0] - mean_value) / std_value; + data[1 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[1] - mean_value) / std_value; + data[2 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[2] - mean_value) / std_value; } } } @@ -79,16 +79,16 @@ void preprocess_face_recognition(cv::Mat *image, std::vector &data, int r * In the future this can be done at library level using a JSON-parser */ std::string json_get_key_string(std::string json, const std::string &key) { - std::size_t startIdx = json.find(key); - std::string value = json.substr(startIdx); + std::size_t start_idx = json.find(key); + std::string value = json.substr(start_idx); value = value.substr(value.find(":") + 1); - value = value.substr(0, value.find(",")); + value.resize(value.find(",")); value = value.substr(value.find("\"") + 1); - value = value.substr(0, value.find("\"")); + value.resize(value.find("\"")); return value; } -void load_face_recognition_model(const char *modelPath, face_recognition_model_t *model) { +void load_face_recognition_model(const char *model_path, face_recognition_model_t *model) { // Initialize model model->onnx_session = model->env = model->session_options = NULL; model->database = model->database_ids = NULL; @@ -96,55 +96,55 @@ void load_face_recognition_model(const char *modelPath, face_recognition_model_t model->threshold = 1; // Parse the model JSON file - std::string modelJsonPath(modelPath); - std::size_t splitPosition = modelJsonPath.find_last_of("/"); - splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; - modelJsonPath = modelJsonPath + "/" + modelJsonPath.substr(splitPosition) + ".json"; + std::string model_json_path(model_path); + std::size_t split_pos = model_json_path.find_last_of("/"); + split_pos = split_pos > 0 ? split_pos + 1 : 0; + model_json_path = model_json_path + "/" + model_json_path.substr(split_pos) + ".json"; - std::ifstream inStream(modelJsonPath); - if (!inStream.is_open()) { + std::ifstream in_stream(model_json_path); + if (!in_stream.is_open()) { std::cerr << "Cannot open JSON model file" << std::endl; return; } std::string str; - inStream.seekg(0, std::ios::end); - str.reserve(inStream.tellg()); - inStream.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); + in_stream.seekg(0, std::ios::end); + str.reserve(in_stream.tellg()); + in_stream.seekg(0, std::ios::beg); + str.assign((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); - std::string basePath = modelJsonPath.substr(0, splitPosition); - splitPosition = basePath.find_last_of("/"); - splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; - basePath = basePath.substr(0, splitPosition); + std::string basepath = model_json_path.substr(0, split_pos); + split_pos = basepath.find_last_of("/"); + split_pos = split_pos > 0 ? split_pos + 1 : 0; + basepath.resize(split_pos); // Parse JSON - std::string onnxModelPath = basePath + json_get_key_string(str, "model_paths"); - std::string modelFormat = json_get_key_string(str, "format"); + std::string onnx_model_path = basepath + json_get_key_string(str, "model_paths"); + std::string model_format = json_get_key_string(str, "format"); // Parse inference params std::string threshold = json_get_key_string(str, "threshold"); - + ; if (!threshold.empty()) { model->threshold = std::stof(threshold); } // Proceed only if the model is in onnx format - if (modelFormat != "onnx") { + if (model_format != "onnx") { std::cerr << "Model not in ONNX format." << std::endl; return; } Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::SessionOptions *session_options = new Ort::SessionOptions; + session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); + Ort::Session *session = new Ort::Session(*env, onnx_model_path.c_str(), *session_options); model->env = env; model->onnx_session = session; - model->session_options = sessionOptions; + model->session_options = session_options; // Should we pass these parameters through the model json file? model->model_size = 112; @@ -161,8 +161,8 @@ void free_face_recognition_model(face_recognition_model_t *model) { } if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; + Ort::SessionOptions *session_options = static_cast(model->session_options); + delete session_options; } if (model->env) { @@ -194,165 +194,164 @@ void ff_face_recognition(face_recognition_model_t *model, opendr_image_t *image, } // Prepare the input dimensions - std::vector inputNodeDims = {1, 3, model->model_size, model->model_size}; - size_t inputTensorSize = model->model_size * model->model_size * 3; + std::vector input_node_dims = {1, 3, model->model_size, model->model_size}; + size_t input_tensor_size = model->model_size * model->model_size * 3; // Get the input image and pre-process it - std::vector inputTensorValues(inputTensorSize); - cv::Mat *opencvImage = static_cast(image->data); - if (!opencvImage) { + std::vector input_tensor_values(input_tensor_size); + cv::Mat *opencv_image = static_cast(image->data); + if (!opencv_image) { std::cerr << "Cannot load image for inference." << std::endl; return; } - preprocess_face_recognition(opencvImage, inputTensorValues, model->resize_size, model->model_size, model->mean_value, + preprocess_face_recognition(opencv_image, input_tensor_values, model->resize_size, model->model_size, model->mean_value, model->std_value); // Setup input/output names Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"data"}; - std::vector outputNodeNames = {"features"}; + std::vector input_node_names = {"data"}; + std::vector output_node_names = {"features"}; - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), inputTensorSize, inputNodeDims.data(), 4); - assert(inputTensor.IsTensor()); + // Setup the input tensor + auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value input_tensor = + Ort::Value::CreateTensor(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4); + assert(input_tensor.IsTensor()); // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); - assert(outputTensors.size() == 1 && outputTensors.front().IsTensor()); + auto output_tensors = + session->Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); + assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); // Get the results back - float *tensorData = outputTensors.front().GetTensorMutableData(); - cv::Mat currentFeatures(cv::Size(model->output_size, 1), CV_32F, tensorData); + float *floatarr = output_tensors.front().GetTensorMutableData(); + cv::Mat cur_features(cv::Size(model->output_size, 1), CV_32F, floatarr); // Perform l2 normalizaton - cv::Mat featuresSquare = currentFeatures.mul(currentFeatures); - float normalizationValue = sqrt(cv::sum(featuresSquare)[0]); - currentFeatures = currentFeatures / normalizationValue; - memcpy(features->data, currentFeatures.data, sizeof(float) * model->output_size); + cv::Mat features_square = cur_features.mul(cur_features); + float norm = sqrt(cv::sum(features_square)[0]); + cur_features = cur_features / norm; + memcpy(features->data, cur_features.data, sizeof(float) * model->output_size); } -void build_database_face_recognition(const char *databaseFolder, const char *outputPath, face_recognition_model_t *model) { +void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model) { using namespace boost::filesystem; - std::vector personNames; - std::vector databaseIds; + std::vector person_names; + std::vector database_ids; cv::Mat database(cv::Size(model->output_size, 0), CV_32F); - path rootPath(databaseFolder); - if (!exists(rootPath)) { + path root_path(database_folder); + if (!exists(root_path)) { std::cerr << "Database path does not exist." << std::endl; return; } - int currentId = 0; - for (auto personPath = directory_iterator(rootPath); personPath != directory_iterator(); personPath++) { + int current_id = 0; + for (auto person_path = directory_iterator(root_path); person_path != directory_iterator(); person_path++) { // For each person in the database - if (is_directory(personPath->path())) { - path currentPersonPath(personPath->path()); - personNames.push_back(personPath->path().filename().string()); + if (is_directory(person_path->path())) { + path cur_person_path(person_path->path()); + person_names.push_back(person_path->path().filename().string()); - for (auto currentImagePath = directory_iterator(currentPersonPath); currentImagePath != directory_iterator(); - currentImagePath++) { + for (auto cur_img_path = directory_iterator(cur_person_path); cur_img_path != directory_iterator(); cur_img_path++) { opendr_image_t image; - load_image(currentImagePath->path().string().c_str(), &image); + load_image(cur_img_path->path().string().c_str(), &image); cv::Mat features(cv::Size(model->output_size, 1), CV_32F); ff_face_recognition(model, &image, &features); free_image(&image); database.push_back(features.clone()); - databaseIds.push_back(currentId); + database_ids.push_back(current_id); } - currentId++; + current_id++; } else { continue; } } - if (currentId == 0) { + if (current_id == 0) { std::cerr << "Cannot open database files." << std::endl; return; } // Make the array continuous - cv::Mat databaseOutput = database.clone(); + cv::Mat database_out = database.clone(); - std::ofstream fout(outputPath, std::ios::out | std::ios::binary); + std::ofstream fout(output_path, std::ios::out | std::ios::binary); if (!fout.is_open()) { std::cerr << "Cannot open database file for writting." << std::endl; return; } // Write number of persons - int n = personNames.size(); + int n = person_names.size(); fout.write(reinterpret_cast(&n), sizeof(int)); for (int i = 0; i < n; i++) { // Write the name of the person (along with its size) - int nameLength = personNames[i].size() + 1; - fout.write(reinterpret_cast(&nameLength), sizeof(int)); - fout.write(personNames[i].c_str(), nameLength); + int name_length = person_names[i].size() + 1; + fout.write(reinterpret_cast(&name_length), sizeof(int)); + fout.write(person_names[i].c_str(), name_length); } - cv::Size s = databaseOutput.size(); + cv::Size s = database_out.size(); fout.write(reinterpret_cast(&s.height), sizeof(int)); fout.write(reinterpret_cast(&s.width), sizeof(int)); - fout.write(reinterpret_cast(databaseOutput.data), sizeof(float) * s.height * s.width); - fout.write(reinterpret_cast(&databaseIds[0]), sizeof(int) * s.height); + fout.write(reinterpret_cast(database_out.data), sizeof(float) * s.height * s.width); + fout.write(reinterpret_cast(&database_ids[0]), sizeof(int) * s.height); fout.flush(); fout.close(); } -void load_database_face_recognition(const char *databasePath, face_recognition_model_t *model) { +void load_database_face_recognition(const char *database_path, face_recognition_model_t *model) { model->database = NULL; model->database_ids = NULL; - std::ifstream fin(databasePath, std::ios::out | std::ios::binary); + std::ifstream fin(database_path, std::ios::out | std::ios::binary); if (!fin.is_open()) { std::cerr << "Cannot load database file (check that file exists and you have created the database)." << std::endl; return; } - int nPerson; - fin.read(reinterpret_cast(&nPerson), sizeof(int)); - char **personNames = new char *[nPerson]; + int n; + fin.read(reinterpret_cast(&n), sizeof(int)); + char **person_names = new char *[n]; - for (int i = 0; i < nPerson; i++) { - personNames[i] = new char[512]; + for (int i = 0; i < n; i++) { + person_names[i] = new char[512]; // Read person name - int nameLength; - fin.read(reinterpret_cast(&nameLength), sizeof(int)); - if (nameLength > 512) { + int name_length; + fin.read(reinterpret_cast(&name_length), sizeof(int)); + if (name_length > 512) { std::cerr << "Person name exceeds max number of characters (512)" << std::endl; return; } - fin.read(personNames[i], nameLength); + fin.read(person_names[i], name_length); } int height, width; fin.read(reinterpret_cast(&height), sizeof(int)); fin.read(reinterpret_cast(&width), sizeof(int)); - float *databaseBuff = new float[height * width]; - int *featuresIds = new int[height]; - fin.read(reinterpret_cast(databaseBuff), sizeof(float) * height * width); - fin.read(reinterpret_cast(featuresIds), sizeof(int) * height); + float *database_buff = new float[height * width]; + int *features_ids = new int[height]; + fin.read(reinterpret_cast(database_buff), sizeof(float) * height * width); + fin.read(reinterpret_cast(features_ids), sizeof(int) * height); fin.close(); cv::Mat *database = new cv::Mat(cv::Size(width, height), CV_32F); - memcpy(database->data, databaseBuff, sizeof(float) * width * height); - delete[] databaseBuff; + memcpy(database->data, database_buff, sizeof(float) * width * height); + delete[] database_buff; model->database = database; - model->database_ids = featuresIds; - model->person_names = personNames; - model->n_persons = nPerson; + model->database_ids = features_ids; + model->person_names = person_names; + model->n_persons = n; model->n_features = height; } @@ -371,27 +370,27 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, } cv::Mat *database = static_cast(model->database); // Calculate the distance between the extracted feature vector and database features - cv::Mat featuresRepeated; - cv::repeat(features, model->n_features, 1, featuresRepeated); - cv::Mat differences = featuresRepeated - *database; - differences = differences.mul(differences); - cv::Mat squareRootDistances; - cv::reduce(differences, squareRootDistances, 1, CV_REDUCE_SUM, CV_32F); - cv::Mat distances; - cv::sqrt(squareRootDistances, distances); - - double minDistance, maxDistance; - cv::Point minLoc, maxLoc; - cv::minMaxLoc(distances, &minDistance, &maxDistance, &minLoc, &maxLoc); - - target.data = model->database_ids[minLoc.y]; - target.confidence = 1 - (minDistance / model->threshold); + cv::Mat features_repeated; + cv::repeat(features, model->n_features, 1, features_repeated); + cv::Mat diff = features_repeated - *database; + diff = diff.mul(diff); + cv::Mat sq_dists; + cv::reduce(diff, sq_dists, 1, CV_REDUCE_SUM, CV_32F); + cv::Mat dists; + cv::sqrt(sq_dists, dists); + + double min_dist, max_dist; + cv::Point min_loc, max_loc; + cv::minMaxLoc(dists, &min_dist, &max_dist, &min_loc, &max_loc); + + target.data = model->database_ids[min_loc.y]; + target.confidence = 1 - (min_dist / model->threshold); return target; } -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *personName) { +void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name) { if (category.data >= model->n_persons) return; - strcpy(personName, model->person_names[category.data]); + strcpy(person_name, model->person_names[category.data]); } diff --git a/src/c_api/lightweight_open_pose.cpp b/src/c_api/lightweight_open_pose.cpp deleted file mode 100644 index 343efa1f10..0000000000 --- a/src/c_api/lightweight_open_pose.cpp +++ /dev/null @@ -1,264 +0,0 @@ -// Copyright 2020-2022 OpenDR European Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lightweight_open_pose.h" -#include "target.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "opencv2/core/core_c.h" - -/** - * Helper function for preprocessing images before feeding them into the lightweight open pose estimator model. - * This function follows the OpenDR's lightweight open pose pre-processing pipeline, which includes the following: - * a) resizing the image into modelInputSize x modelInputSize pixels relative to the original ratio, - * b) normalizing the resulting values using meanValue and c) padding image into a standard size. - * @param image image to be preprocesses - * @param preprocessedImage opencv Mat that pre-processed data will be saved - * @param modelInputSize size of the center crop (equals the size that the DL model expects) - * @param meanValue value used for centering the input image - * @param imgScale value used for scaling the input image - */ -void preprocess_open_pose(cv::Mat *image, cv::Mat *preprocessedImage, int modelInputSize, float meanValue, float imgScale) { - // Convert to RGB - cv::Mat resizedImage; - cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); - cv::cvtColor(resizedImage, resizedImage, cv::COLOR_RGB2BGR); - - // Resize and then get a center crop - double scale = (static_cast(modelInputSize) / static_cast(resizedImage.rows)); - cv::resize(resizedImage, resizedImage, cv::Size(), scale, scale); - - // Convert to float32 and normalize - cv::Mat normalizedImage; - resizedImage.convertTo(normalizedImage, CV_32FC3, imgScale, meanValue); - - // Padding - int h = normalizedImage.rows; - int w = normalizedImage.cols; - - const float stride = 8.0f; - int maxWidth = std::max(modelInputSize, w); - cv::Size minDims = cv::Size(maxWidth, modelInputSize); - - h = std::min(h, minDims.height); - minDims.height = ceil((minDims.height / stride)) * stride; - - minDims.width = std::max(minDims.width, w); - minDims.width = ceil((minDims.width / stride)) * stride; - - int pad[4]; - pad[0] = static_cast((minDims.height - h) / 2); - pad[1] = static_cast((minDims.width - w) / 2); - pad[2] = minDims.height - h - pad[0]; - pad[3] = minDims.width - w - pad[1]; - - cv::Scalar padValue(0, 0, 0); - cv::copyMakeBorder(normalizedImage, *preprocessedImage, pad[0], pad[2], pad[1], pad[3], cv::BORDER_CONSTANT, padValue); -} - -void load_open_pose_model(const char *modelPath, open_pose_model_t *model) { - // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - - Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); - model->env = env; - model->onnx_session = session; - model->session_options = sessionOptions; - - // Should we pass these parameters through the model json file? - model->mean_value = -128.0f / 256.0f; - model->img_scale = (1.0f / 256.0f); - model->model_size = 256; - - model->num_refinement_stages = 2; - model->output_size = (model->num_refinement_stages + 1) * 2; - - model->even_channel_output = 38; - model->odd_channel_output = 19; - model->stride = 0; - model->batch_size = 1; - if (model->stride == 0) { - model->width_output = 32; - model->height_output = 49; - } else { - model->width_output = 16; - model->height_output = 35; - } -} - -void free_open_pose_model(open_pose_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); - delete session; - } - - if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; - } - - if (model->env) { - Ort::Env *env = static_cast(model->env); - delete env; - } -} - -void ff_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { - Ort::Session *session = static_cast(model->onnx_session); - - if (!session) { - std::cerr << "ONNX session not initialized." << std::endl; - return; - } - - // Prepare the input dimensions - // Dims of input data - size_t inputTensorSize = model->model_size * model->model_size * 3; - - // Dims of input of model - std::vector inputNodeDims = {inputTensorValues->batch_size, inputTensorValues->channels, inputTensorValues->width, - inputTensorValues->height}; - - // Setup input/output names - Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"data"}; - std::vector outputNodeNames = {"stage_0_output_1_heatmaps", "stage_0_output_0_pafs"}; - if (model->num_refinement_stages == 2) { - outputNodeNames.push_back("stage_1_output_1_heatmaps"); - outputNodeNames.push_back("stage_1_output_0_pafs"); - outputNodeNames.push_back("stage_2_output_1_heatmaps"); - outputNodeNames.push_back("stage_2_output_0_pafs"); - } - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); - assert(inputTensor.IsTensor()); - - // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), model->output_size); - assert(outputTensors.size() == model->output_size); - - // Get the results back - for (int i = 0; i < outputTensors.size(); i++) { - float *tensor_data = outputTensors[i].GetTensorMutableData(); - - int channelDim; - if ((i % 2) == 0) { - channelDim = model->even_channel_output; - } else { - channelDim = model->odd_channel_output; - } - - int tensorSizes[5] = {1, model->batch_size, channelDim, model->width_output, model->height_output}; - - cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensor_data)); - outputTensorValues->push_back(outputMat); - } -} - -void init_random_opendr_tensor_op(opendr_tensor_t *inputTensorValues, open_pose_model_t *model) { - int inputTensorSize = model->model_size * model->model_size * 3; - - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - - for (unsigned int j = 0; j < inputTensorSize; ++j) { - data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; - } - - load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); - free(data); -} - -void init_opendr_tensor_from_img_op(opendr_image_t *image, opendr_tensor_t *inputTensorValues, open_pose_model_t *model) { - int inputTensorSize = model->model_size * model->model_size * 3; - - cv::Mat *opencvImage = (static_cast(image->data)); - cv::Mat normImage; - preprocess_open_pose(opencvImage, &normImage, model->model_size, model->mean_value, model->img_scale); - - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - for (unsigned int j = 0; j < model->model_size; ++j) { - for (unsigned int k = 0; k < model->model_size; ++k) { - cv::Vec3f currentPixel = normImage.at(j, k); - data[0 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[0]; - data[1 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[1]; - data[2 * model->model_size * model->model_size + j * model->model_size + k] = currentPixel[2]; - } - } - - load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); - free(data); -} - -void forward_open_pose(open_pose_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { - // Get the feature vector for the current image - std::vector outputTensorValues; - ff_open_pose(model, inputTensorValues, &outputTensorValues); - - int nTensors = static_cast(outputTensorValues.size()); - if (nTensors > 0) { - int batchSizes[nTensors]; - int frames[nTensors]; - int channels[nTensors]; - int widths[nTensors]; - int heights[nTensors]; - - std::vector tempTensorsVector; - opendr_tensor_t tempTensors[nTensors]; - - for (int i = 0; i < nTensors; i++) { - batchSizes[i] = 1; - frames[i] = 1; - if ((i % 2) == 0) { - channels[i] = model->even_channel_output; - } else { - channels[i] = model->odd_channel_output; - } - widths[i] = model->width_output; - heights[i] = model->height_output; - - load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], - heights[i]); - tempTensorsVector.push_back(tempTensors[i]); - } - load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); - for (int i = 0; i < nTensors; i++) { - free_tensor(&(tempTensors[i])); - } - } else { - initialize_tensor_vector(tensorVector); - } -} diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/nanodet_libtorch.cpp similarity index 53% rename from src/c_api/object_detection_2d_nanodet_jit.cpp rename to src/c_api/nanodet_libtorch.cpp index cfaf6293ca..51f2cbefad 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/nanodet_libtorch.cpp @@ -12,45 +12,44 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "object_detection_2d_nanodet_jit.h" - #include #include #include #include #include +#include "nanodet_c.h" /** * Helper class holder of c++ values and jit model. */ class NanoDet { private: - torch::DeviceType mDevice; - torch::jit::script::Module mNetwork; - torch::Tensor mMeanTensor; - torch::Tensor mStdTensor; - std::vector mLabels; + torch::DeviceType device; + torch::jit::script::Module network; + torch::Tensor meanTensor; + torch::Tensor stdTensor; + std::vector labels; public: - NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, std::vector labels); ~NanoDet(); - torch::Tensor preProcess(cv::Mat *image); - torch::jit::script::Module network() const; - torch::Tensor meanTensor() const; - torch::Tensor stdTensor() const; - std::vector labels() const; + torch::Tensor mPreProcess(cv::Mat *image); + torch::jit::script::Module net() const; + torch::Tensor meanValues() const; + torch::Tensor stdValues() const; + std::vector classes() const; std::vector outputs; }; -NanoDet::NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, - torch::DeviceType device, const std::vector labels) { - this->mDevice = device; - this->mNetwork = network; - this->mMeanTensor = meanValues.clone().to(device); - this->mStdTensor = stdValues.clone().to(device); - this->mLabels = labels; +NanoDet::NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + const std::vector labels) { + this->device = device; + this->network = net; + this->meanTensor = meanValues.clone().to(device); + this->stdTensor = stdValues.clone().to(device); + this->labels = labels; } NanoDet::~NanoDet() { @@ -62,13 +61,13 @@ NanoDet::~NanoDet() { * Mean and Standard deviation are already part of NanoDet class when is initialized. * @param image, image to be preprocesses */ -torch::Tensor NanoDet::preProcess(cv::Mat *image) { +torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); tensorImage = tensorImage.toType(torch::kFloat); - tensorImage = tensorImage.to(this->mDevice); + tensorImage = tensorImage.to(this->device); tensorImage = tensorImage.permute({2, 0, 1}); - tensorImage = tensorImage.add(this->mMeanTensor); - tensorImage = tensorImage.mul(this->mStdTensor); + tensorImage = tensorImage.add(this->meanTensor); + tensorImage = tensorImage.mul(this->stdTensor); return tensorImage; } @@ -76,39 +75,39 @@ torch::Tensor NanoDet::preProcess(cv::Mat *image) { /** * Getter for jit model */ -torch::jit::script::Module NanoDet::network() const { - return this->mNetwork; +torch::jit::script::Module NanoDet::net() const { + return this->network; } /** * Getter for tensor with the mean values */ -torch::Tensor NanoDet::meanTensor() const { - return this->mMeanTensor; +torch::Tensor NanoDet::meanValues() const { + return this->meanTensor; } /** * Getter for tensor with the standard deviation values */ -torch::Tensor NanoDet::stdTensor() const { - return this->mStdTensor; +torch::Tensor NanoDet::stdValues() const { + return this->stdTensor; } /** - * Getter of labels + * Getter of labels for printing */ -std::vector NanoDet::labels() const { - return this->mLabels; +std::vector NanoDet::classes() const { + return labels; } /** * Helper function to calculate the final shape of the model input relative to size ratio of input image. */ -void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { +void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { float ratio; - float srcRatio = ((float)srcSize->width / (float)srcSize->height); - float dstRatio = ((float)dstSize->width / (float)dstSize->height); - if (srcRatio < dstRatio) + float src_ratio = ((float)srcSize->width / (float)srcSize->height); + float dst_ratio = ((float)dstSize->width / (float)dstSize->height); + if (src_ratio < dst_ratio) ratio = ((float)dstSize->height / (float)srcSize->height); else ratio = ((float)dstSize->width / (float)srcSize->width); @@ -125,32 +124,32 @@ void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) /** * Helper function to calculate the warp matrix for resizing. */ -void get_resize_matrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { - if (keepRatio == 1) { +void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, int keep_ratio) { + if (keep_ratio == 1) { float ratio; cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); - C.at(0, 2) = -srcShape->width / 2.0; - C.at(1, 2) = -srcShape->height / 2.0; - float srcRatio = ((float)srcShape->width / (float)srcShape->height); - float dstRatio = ((float)dstShape->width / (float)dstShape->height); - if (srcRatio < dstRatio) { - ratio = ((float)dstShape->height / (float)srcShape->height); + C.at(0, 2) = -src_shape->width / 2.0; + C.at(1, 2) = -src_shape->height / 2.0; + float src_ratio = ((float)src_shape->width / (float)src_shape->height); + float dst_ratio = ((float)dst_shape->width / (float)dst_shape->height); + if (src_ratio < dst_ratio) { + ratio = ((float)dst_shape->height / (float)src_shape->height); } else { - ratio = ((float)dstShape->width / (float)srcShape->width); + ratio = ((float)dst_shape->width / (float)src_shape->width); } Rs->at(0, 0) *= ratio; Rs->at(1, 1) *= ratio; cv::Mat T = cv::Mat::eye(3, 3, CV_32FC1); - T.at(0, 2) = 0.5 * dstShape->width; - T.at(1, 2) = 0.5 * dstShape->height; + T.at(0, 2) = 0.5 * dst_shape->width; + T.at(1, 2) = 0.5 * dst_shape->height; *Rs = T * (*Rs) * C; } else { - Rs->at(0, 0) *= (float)dstShape->width / (float)srcShape->width; - Rs->at(1, 1) *= (float)dstShape->height / (float)srcShape->height; + Rs->at(0, 0) *= (float)dst_shape->width / (float)src_shape->width; + Rs->at(1, 1) *= (float)dst_shape->height / (float)src_shape->height; } } @@ -162,28 +161,28 @@ void get_resize_matrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int * @param src, image to be preprocesses * @param dst, output image to be used as model input * @param dstSize, final size of the dst - * @param warpMatrix, matrix to be used for warp perspective - * @param keepRatio, flag for targeting the resized image size relative to input image ratio + * @param Rs, matrix to be used for warp perspective + * @param keep_ratio, flag for targeting the resized image size relative to input image ratio */ -void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatrix, int keepRatio) { +void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warp_matrix, int keep_ratio) { cv::Size srcSize = cv::Size(src->cols, src->rows); const float divisible = 0.0; // Get new destination size if keep ratio is wanted - if (keepRatio == 1) { - get_minimum_dstShape(&srcSize, dstSize, divisible); + if (keep_ratio == 1) { + get_minimum_dst_shape(&srcSize, dstSize, divisible); } - get_resize_matrix(&srcSize, dstSize, warpMatrix, keepRatio); - cv::warpPerspective(*src, *dst, *warpMatrix, *dstSize); + get_resize_matrix(&srcSize, dstSize, warp_matrix, keep_ratio); + cv::warpPerspective(*src, *dst, *warp_matrix, *dstSize); } /** * Helper function to determine the device of jit model and tensors. */ -torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { +torch::DeviceType torchDevice(char *device_name, int verbose = 0) { torch::DeviceType device; - if (std::string(deviceName) == "cuda") { + if (std::string(device_name) == "cuda") { if (verbose == 1) printf("to cuda\n"); device = torch::kCUDA; @@ -195,12 +194,12 @@ torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { return device; } -void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { +void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { // Initialize model - model->input_size[0] = width; - model->input_size[1] = height; + model->inputSize[0] = width; + model->inputSize[1] = height; - model->score_threshold = scoreThreshold; + model->scoreThreshold = scoreThreshold; model->keep_ratio = 1; const std::vector labels{ @@ -222,78 +221,71 @@ void load_nanodet_model(char *modelPath, char *device, int height, int width, fl torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); // initialization of jit model and class as holder of c++ values. - torch::DeviceType initDevice = torchDevice(device, 1); - torch::jit::script::Module network = torch::jit::load(modelPath, initDevice); - network.eval(); - - NanoDet *detector = new NanoDet(network, meanTensor, stdValues, initDevice, labels); - - model->network = static_cast(detector); -} + torch::DeviceType torch_device = torchDevice(device, 1); + torch::jit::script::Module net = torch::jit::load(model_path, torch_device); + net.eval(); -void ff_nanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, - torch::Tensor *outputs) { - // Make all the inputs as tensors to use in jit model - torch::Tensor srcHeight = torch::tensor(originalSize->width); - torch::Tensor srcWidth = torch::tensor(originalSize->height); - torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3}); + NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); - // Model inference - *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensor(); - *outputs = outputs->to(torch::Device(torch::kCPU, 0)); + model->net = static_cast(detector); } -opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image) { - NanoDet *networkPTR = static_cast(model->network); - opendr_detection_vector_target_t detectionsVector; +opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { + NanoDet *networkPTR = static_cast(model->net); + opendr_detection_target_list_t detections; - cv::Mat *opencvImage = static_cast(image->data); - if (!opencvImage) { + std::vector dets; + cv::Mat *opencv_image = static_cast(image->data); + if (!opencv_image) { std::cerr << "Cannot load image for inference." << std::endl; // Initialize an empty detection to return. - initialize_detections_vector(&detectionsVector); - return detectionsVector; + initialize_detections(&detections); + return detections; } // Preprocess image and keep values as input in jit model cv::Mat resizedImg; - cv::Size dstSize = cv::Size(model->input_size[0], model->input_size[1]); - cv::Mat warpMatrix = cv::Mat::eye(3, 3, CV_32FC1); - preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keep_ratio); - torch::Tensor input = networkPTR->preProcess(&resizedImg); - cv::Size originalSize(opencvImage->cols, opencvImage->rows); + cv::Size dstSize = cv::Size(model->inputSize[0], model->inputSize[1]); + cv::Mat warp_matrix = cv::Mat::eye(3, 3, CV_32FC1); + preprocess(opencv_image, &resizedImg, &dstSize, &warp_matrix, model->keep_ratio); + torch::Tensor input = networkPTR->mPreProcess(&resizedImg); - torch::Tensor outputs; - ff_nanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); + // Make all the inputs as tensors to use in jit model + torch::Tensor srcHeight = torch::tensor(opencv_image->rows); + torch::Tensor srcWidth = torch::tensor(opencv_image->cols); + torch::Tensor warpMatrix = torch::from_blob(warp_matrix.data, {3, 3}); + + // Model inference + torch::Tensor outputs = (networkPTR->net()).forward({input, srcHeight, srcWidth, warpMatrix}).toTensor(); + outputs = outputs.to(torch::Device(torch::kCPU, 0)); - std::vector detections; // Postprocessing, find which outputs have better score than threshold and keep them. for (int label = 0; label < outputs.size(0); label++) { for (int box = 0; box < outputs.size(1); box++) { - if (outputs[label][box][4].item() > model->score_threshold) { - opendr_detection_target_t detection; - detection.name = label; - detection.left = outputs[label][box][0].item(); - detection.top = outputs[label][box][1].item(); - detection.width = outputs[label][box][2].item() - outputs[label][box][0].item(); - detection.height = outputs[label][box][3].item() - outputs[label][box][1].item(); - detection.score = outputs[label][box][4].item(); - detections.push_back(detection); + if (outputs[label][box][4].item() > model->scoreThreshold) { + opendr_detection_target_t det; + det.name = label; + det.left = outputs[label][box][0].item(); + det.top = outputs[label][box][1].item(); + det.width = outputs[label][box][2].item() - outputs[label][box][0].item(); + det.height = outputs[label][box][3].item() - outputs[label][box][1].item(); + det.score = outputs[label][box][4].item(); + dets.push_back(det); } } } // Put vector detection as C pointer and size - if (static_cast(detections.size()) > 0) - load_detections_vector(&detectionsVector, detections.data(), static_cast(detections.size())); + if ((int)dets.size() > 0) + load_detections(&detections, dets.data(), (int)dets.size()); else - initialize_detections_vector(&detectionsVector); + initialize_detections(&detections); - return detectionsVector; + return detections; } -void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { +void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections) { const int colorList[80][3] = { //{255 ,255 ,255}, //bg {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, @@ -310,23 +302,22 @@ void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_ {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, }; - std::vector classNames = (static_cast(model->network))->labels(); + std::vector classNames = (static_cast(model->net))->classes(); - cv::Mat *opencvImage = static_cast(image->data); - if (!opencvImage) { + cv::Mat *opencv_image = static_cast(opendr_image->data); + if (!opencv_image) { std::cerr << "Cannot load image for inference." << std::endl; return; } - cv::Mat imageWithDetections = (*opencvImage).clone(); - for (size_t i = 0; i < detectionsVector->size; i++) { - const opendr_detection_target bbox = (detectionsVector->starting_pointer)[i]; + cv::Mat image = (*opencv_image).clone(); + for (size_t i = 0; i < detections->size; i++) { + const opendr_detection_target bbox = (detections->starting_pointer)[i]; float score = bbox.score > 1 ? 1 : bbox.score; - if (score > model->score_threshold) { + if (score > model->scoreThreshold) { cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); - cv::rectangle(imageWithDetections, - cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), - color); + cv::rectangle( + image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); char text[256]; @@ -339,23 +330,21 @@ void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_ int y = (int)bbox.top; if (y < 0) y = 0; - if (x + labelSize.width > imageWithDetections.cols) - x = imageWithDetections.cols - labelSize.width; + if (x + labelSize.width > image.cols) + x = image.cols - labelSize.width; - cv::rectangle(imageWithDetections, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), - color, -1); - cv::putText(imageWithDetections, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, - cv::Scalar(255, 255, 255)); + cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); + cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); } } - cv::imshow("image", imageWithDetections); + cv::imshow("image", image); cv::waitKey(0); } void free_nanodet_model(nanodet_model_t *model) { - if (model->network) { - NanoDet *networkPTR = static_cast(model->network); + if (model->net) { + NanoDet *networkPTR = static_cast(model->net); delete networkPTR; } } diff --git a/src/c_api/object_detection_2d_detr.cpp b/src/c_api/object_detection_2d_detr.cpp deleted file mode 100644 index 8652fa47d9..0000000000 --- a/src/c_api/object_detection_2d_detr.cpp +++ /dev/null @@ -1,269 +0,0 @@ -// Copyright 2020-2022 OpenDR European Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "object_detection_2d_detr.h" -#include "target.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "opencv2/core/core_c.h" - -/** - * Helper function for preprocessing images before feeding them into the detr object detection model. - * This function follows the OpenDR's object detection detr pre-processing pipeline, which includes the following: - * a) resizing the image into modelInputSize x modelInputSize pixels and b) normalizing the resulting values using - * meanValue and stdValue - * @param image image to be preprocesses - * @param data pre-processed data in a flattened vector - * @param modelInputSize size of the center crop (equals the size that the DL model expects) - * @param meanValue values used for centering the input image - * @param stdValues values used for scaling the input image - */ -void preprocess_detr(cv::Mat *image, cv::Mat *normalizedImage, int modelInputSize, float meanValues[3], float stdValues[3]) { - // Convert to RGB - cv::Mat resizedImage; - cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); - - // Resize and then get a center crop - cv::resize(resizedImage, resizedImage, cv::Size(modelInputSize, modelInputSize)); - - // Scale to 0...1 - resizedImage.convertTo(*normalizedImage, CV_32FC3, (1 / 255.0)); - - cv::Scalar meanValue(meanValues[0], meanValues[1], meanValues[2]); - cv::Scalar stdValue(stdValues[0], stdValues[1], stdValues[2]); - - cv::add(*normalizedImage, meanValue, *normalizedImage); - cv::multiply(*normalizedImage, stdValue, *normalizedImage); -} - -/** - * Very simple helper function to parse OpenDR model files for object detection detr - * In the future this can be done at library level using a JSON-parser - */ -std::string json_get_key_string_detr(std::string json, const std::string &key) { - std::size_t startIdx = json.find(key); - std::string value = json.substr(startIdx); - value = value.substr(value.find(":") + 1); - value = value.substr(0, value.find(",")); - value = value.substr(value.find("\"") + 1); - value = value.substr(0, value.find("\"")); - return value; -} - -/** - * Very simple helper function to parse dictionaries OpenDR model files for object detection detr - * In the future this can be done at library level using a JSON-parser - */ -std::string json_get_key_string_detr_in_dict(std::string json, const std::string &key) { - std::size_t startIdx = json.find(key); - std::string value = json.substr(startIdx); - value = value.substr(value.find(":") + 1); - value = value.substr(0, value.find(",")); - value = value.substr(value.find("\"") + 1); - value = value.substr(0, value.find("}")); - return value; -} - -void load_detr_model(const char *modelPath, detr_model_t *model) { - // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - model->threshold = 0; - - // Parse the model JSON file - std::string basePath(modelPath); - std::size_t splitPosition = basePath.find_last_of("/"); - splitPosition = splitPosition > 0 ? splitPosition + 1 : 0; - std::string modelJsonPath = basePath + "/" + basePath.substr(splitPosition) + ".json"; - std::ifstream inStream(modelJsonPath); - if (!inStream.is_open()) { - std::cerr << "Cannot open JSON model file" << std::endl; - return; - } - - std::string str; - inStream.seekg(0, std::ios::end); - str.reserve(inStream.tellg()); - inStream.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); - - // Parse JSON - std::string onnxModelPath = basePath + "/" + json_get_key_string_detr(str, "model_paths"); - - std::string modelFormat = json_get_key_string_detr(str, "format"); - - // Parse inference params - std::string threshold = json_get_key_string_detr_in_dict(str, "threshold"); - - if (!threshold.empty()) { - model->threshold = std::stof(threshold); - } - - // Proceed only if the model is in onnx format - if (modelFormat != "onnx") { - std::cerr << "Model not in ONNX format." << std::endl; - return; - } - - Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); - model->env = env; - model->onnx_session = session; - model->session_options = sessionOptions; - - // Should we pass these parameters through the model json file? - model->mean_value[0] = -0.485f; - model->mean_value[1] = -0.456f; - model->mean_value[2] = -0.406f; - - model->std_value[0] = 0.229f; - model->std_value[1] = 0.224f; - model->std_value[2] = 0.225f; - - model->model_size = 800; - - model->features = 100; - model->output_sizes[0] = 92; - model->output_sizes[1] = 4; -} - -void free_detr_model(detr_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); - delete session; - } - - if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; - } - - if (model->env) { - Ort::Env *env = static_cast(model->env); - delete env; - } -} - -void ff_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { - Ort::Session *session = static_cast(model->onnx_session); - - if (!session) { - std::cerr << "ONNX session not initialized." << std::endl; - return; - } - - // Prepare the input dimensions - // Dims of input data for preprocessing - size_t inputTensorSize = model->model_size * model->model_size * 3; - - // Dims of input of model - std::vector inputNodeDims = {inputTensorValues->batch_size, inputTensorValues->channels, inputTensorValues->width, - inputTensorValues->height}; - - // Setup input/output names - Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"data"}; - std::vector outputNodeNames = {"pred_logits", "pred_boxes"}; - - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); - assert(inputTensor.IsTensor()); - - // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 2); - assert(outputTensors.size() == 2); - - // Get the results back - for (int i = 0; i < outputTensors.size(); i++) { - float *tensorData = outputTensors[i].GetTensorMutableData(); - - int tensorSizes[5] = {1, 1, 1, model->features, model->output_sizes[i]}; - - cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); - outputTensorValues->push_back(outputMat); - } -} - -void init_random_opendr_tensor_detr(opendr_tensor_t *inputTensorValues, detr_model_t *model) { - // Prepare the input data with random values - int inputTensorSize = model->model_size * model->model_size * 3; - - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - // change data structure so channels are the last iterable dimension - for (unsigned int j = 0; j < inputTensorSize; ++j) { - data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; - } - - load_tensor(inputTensorValues, static_cast(data), 1, 1, 3, model->model_size, model->model_size); - free(data); -} - -void forward_detr(detr_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { - // Get the feature vector for the current image - std::vector outputTensorValues; - ff_detr(model, inputTensorValues, &outputTensorValues); - - int nTensors = static_cast(outputTensorValues.size()); - if (nTensors > 0) { - int batchSizes[nTensors]; - int frames[nTensors]; - int channels[nTensors]; - int widths[nTensors]; - int heights[nTensors]; - - std::vector tempTensorsVector; - opendr_tensor_t tempTensors[nTensors]; - - for (int i = 0; i < nTensors; i++) { - batchSizes[i] = 1; - frames[i] = 1; - channels[i] = 1; - widths[i] = 1; - if (i == 0) { - heights[i] = model->output_sizes[0]; - } else { - heights[i] = model->output_sizes[1]; - } - load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batchSizes[i], frames[i], channels[i], widths[i], - heights[i]); - tempTensorsVector.push_back(tempTensors[i]); - } - load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); - for (int i = 0; i < nTensors; i++) { - free_tensor(&(tempTensors[i])); - } - - } else { - initialize_tensor_vector(tensorVector); - } -} diff --git a/src/c_api/object_tracking_2d_deep_sort.cpp b/src/c_api/object_tracking_2d_deep_sort.cpp deleted file mode 100644 index 2783a47c01..0000000000 --- a/src/c_api/object_tracking_2d_deep_sort.cpp +++ /dev/null @@ -1,203 +0,0 @@ -// Copyright 2020-2022 OpenDR European Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "object_tracking_2d_deep_sort.h" -#include "target.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "opencv2/core/core_c.h" - -/** - * Helper function for preprocessing images before feeding them into the deep sort object tracking 2d model. - * This function follows the OpenDR's object tracking 2d deep sort pre-processing pipeline, which includes the following: - * a) resizing the image into modelInputSizes[1] x modelInputSizes[0] pixels and b) normalizing the resulting values using - * meanValues and stdValues - * @param image image to be preprocesses - * @param normalizedImg pre-processed data in a flattened vector - * @param modelInputSizes size of the center crop (equals the size that the DL model expects) - * @param meanValues value used for centering the input image - * @param stdValues value used for scaling the input image - */ -void preprocess_deep_sort(cv::Mat *image, cv::Mat *normalizedImg, int modelInputSizes[2], float meanValues[3], - float stdValues[3]) { - // Convert to RGB - cv::Mat resizedImage; - cv::cvtColor(*image, resizedImage, cv::COLOR_BGR2RGB); - - // Resize - cv::resize(resizedImage, resizedImage, cv::Size(modelInputSizes[1], modelInputSizes[0])); - - // Unfold the image into the appropriate format - // Scale to 0...1 - resizedImage.convertTo(*normalizedImg, CV_32FC3, (1 / 255.0)); - - // Normalize - cv::Scalar meanValue(meanValues[0], meanValues[1], meanValues[2]); - cv::Scalar stdValue(stdValues[0], stdValues[1], stdValues[2]); - - cv::add(*normalizedImg, meanValue, *normalizedImg); - cv::multiply(*normalizedImg, stdValue, *normalizedImg); -} - -void load_deep_sort_model(const char *modelPath, deep_sort_model_t *model) { - // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - - Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); - model->env = env; - model->onnx_session = session; - model->session_options = sessionOptions; - - // Should we pass these parameters through the model json file? - model->mean_value[0] = -0.485f; - model->mean_value[1] = -0.456f; - model->mean_value[2] = -0.406f; - - model->std_value[0] = (1.0f / 0.229f); - model->std_value[1] = (1.0f / 0.224f); - model->std_value[2] = (1.0f / 0.225f); - - model->model_size[0] = 64; - model->model_size[1] = 128; - - model->batch_size = 1; - model->in_channels = 3; - - model->features = 512; -} - -void free_deep_sort_model(deep_sort_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); - delete session; - } - - if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; - } - - if (model->env) { - Ort::Env *env = static_cast(model->env); - delete env; - } -} - -void ff_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { - Ort::Session *session = static_cast(model->onnx_session); - - if (!session) { - std::cerr << "ONNX session not initialized." << std::endl; - return; - } - - // Prepare the input dimensions - // Dims of input data - size_t inputTensorSize = model->batch_size * model->in_channels * model->model_size[1] * model->model_size[0]; - - // Dims of input of model - std::vector inputNodeDims = {model->batch_size, model->in_channels, model->model_size[1], model->model_size[0]}; - - // Setup input/output names - Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"data"}; - std::vector outputNodeNames = {"output"}; - - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 4); - assert(inputTensor.IsTensor()); - - // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); - assert(outputTensors.size() == 1); - - // Get the results back - for (int i = 0; i < outputTensors.size(); i++) { - float *tensorData = outputTensors[i].GetTensorMutableData(); - - int tensorSizes[5] = {1, 1, 1, model->batch_size, model->features}; - cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); - outputTensorValues->push_back(outputMat); - } -} - -void init_random_opendr_tensor_ds(opendr_tensor_t *inputTensorValues, deep_sort_model_t *model) { - int inputTensorSize = 1 * model->batch_size * model->in_channels * model->model_size[1] * model->model_size[0]; - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - for (unsigned int j = 0; j < inputTensorSize; ++j) { - data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; - } - - // Dims of input of model - load_tensor(inputTensorValues, static_cast(data), 1, model->batch_size, model->in_channels, model->model_size[1], - model->model_size[0]); - free(data); -} - -void forward_deep_sort(deep_sort_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { - // Get the feature vector for the current image - std::vector outputTensorValues; - ff_deep_sort(model, inputTensorValues, &outputTensorValues); - - int nTensors = static_cast(outputTensorValues.size()); - if (nTensors > 0) { - int batch_sizes[nTensors]; - int frames[nTensors]; - int channels[nTensors]; - int widths[nTensors]; - int heights[nTensors]; - - std::vector temp_tensors; - opendr_tensor_t temp_tensor[nTensors]; - - for (int i = 0; i < nTensors; i++) { - batch_sizes[i] = 1; - frames[i] = 1; - channels[i] = 1; - widths[i] = model->batch_size; - heights[i] = model->features; - - load_tensor(&(temp_tensor[i]), outputTensorValues[i].ptr(0), batch_sizes[i], frames[i], channels[i], widths[i], - heights[i]); - temp_tensors.push_back(temp_tensor[i]); - } - load_tensor_vector(tensorVector, temp_tensors.data(), nTensors); - for (int i = 0; i < nTensors; i++) { - free_tensor(&(temp_tensor[i])); - } - } else { - initialize_tensor_vector(tensorVector); - } -} diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index fde313aeb9..8eddbed500 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -36,130 +36,28 @@ void free_image(opendr_image_t *image) { } } -void initialize_detections_vector(opendr_detection_vector_target_t *detection_vector) { - std::vector detections; - - opendr_detection_target_t detection; - detection.name = -1; - detection.left = 0.0; - detection.top = 0.0; - detection.width = 0.0; - detection.height = 0.0; - detection.score = 0.0; - - detections.push_back(detection); - - load_detections_vector(detection_vector, detections.data(), static_cast(detections.size())); -} - -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size) { - detection_vector->size = vector_size; - int size_of_output = (vector_size) * sizeof(opendr_detection_target_t); - detection_vector->starting_pointer = static_cast(malloc(size_of_output)); - std::memcpy(detection_vector->starting_pointer, detection, size_of_output); -} - -void free_detections_vector(opendr_detection_vector_target_t *detection_vector) { - if (detection_vector->starting_pointer != NULL) - delete detection_vector->starting_pointer; -} - -void initialize_tensor(opendr_tensor_t *opendr_tensor) { - opendr_tensor->batch_size = 0; - opendr_tensor->frames = 0; - opendr_tensor->channels = 0; - opendr_tensor->width = 0; - opendr_tensor->height = 0; - opendr_tensor->data = NULL; -} - -void load_tensor(opendr_tensor_t *opendr_tensor, void *tensor_data, int batch_size, int frames, int channels, int width, - int height) { - opendr_tensor->batch_size = batch_size; - opendr_tensor->frames = frames; - opendr_tensor->channels = channels; - opendr_tensor->width = width; - opendr_tensor->height = height; - - int size_of_data = (batch_size * frames * channels * width * height) * sizeof(float); - opendr_tensor->data = static_cast(malloc(size_of_data)); - std::memcpy(opendr_tensor->data, tensor_data, size_of_data); -} - -void free_tensor(opendr_tensor_t *opendr_tensor) { - if (opendr_tensor->data != NULL) - delete opendr_tensor->data; -} - -void initialize_tensor_vector(opendr_tensor_vector_t *tensor_vector) { - tensor_vector->n_tensors = 0; - tensor_vector->batch_sizes = NULL; - tensor_vector->frames = NULL; - tensor_vector->channels = NULL; - tensor_vector->widths = NULL; - tensor_vector->heights = NULL; - tensor_vector->memories = 0; -} - -void load_tensor_vector(opendr_tensor_vector_t *tensor_vector, opendr_tensor_t *tensor, int number_of_tensors) { - tensor_vector->n_tensors = number_of_tensors; - int size_of_shape_data = number_of_tensors * sizeof(int); - /* initialize arrays to hold size values for each tensor */ - tensor_vector->batch_sizes = static_cast(malloc(size_of_shape_data)); - tensor_vector->frames = static_cast(malloc(size_of_shape_data)); - tensor_vector->channels = static_cast(malloc(size_of_shape_data)); - tensor_vector->widths = static_cast(malloc(size_of_shape_data)); - tensor_vector->heights = static_cast(malloc(size_of_shape_data)); - - /* initialize array to hold data values for all tensors */ - tensor_vector->memories = static_cast(malloc(number_of_tensors * sizeof(float *))); - - /* copy size values */ - for (int i = 0; i < number_of_tensors; i++) { - (tensor_vector->batch_sizes)[i] = tensor[i].batch_size; - (tensor_vector->frames)[i] = tensor[i].frames; - (tensor_vector->channels)[i] = tensor[i].channels; - (tensor_vector->widths)[i] = tensor[i].width; - (tensor_vector->heights)[i] = tensor[i].height; - - /* copy data values by, - * initialize a data pointer into a tensor, - * copy the values, - * set tensor data pointer to watch the memory pointer*/ - int size_of_data = ((tensor[i].batch_size) * (tensor[i].frames) * (tensor[i].channels) * (tensor[i].width) * - (tensor[i].height) * sizeof(float)); - float *memory_of_data_tensor = static_cast(malloc(size_of_data)); - std::memcpy(memory_of_data_tensor, tensor[i].data, size_of_data); - (tensor_vector->memories)[i] = memory_of_data_tensor; - } -} - -void free_tensor_vector(opendr_tensor_vector_t *tensor_vector) { - // free vector pointers - if (tensor_vector->batch_sizes != NULL) - delete tensor_vector->batch_sizes; - if (tensor_vector->frames != NULL) - delete tensor_vector->frames; - if (tensor_vector->channels != NULL) - delete tensor_vector->channels; - if (tensor_vector->widths != NULL) - delete tensor_vector->widths; - if (tensor_vector->heights != NULL) - delete tensor_vector->heights; - - // free tensors data and vector memory - if (tensor_vector->memories != NULL) { - for (int i = 0; i < (tensor_vector->n_tensors); i++) { - if ((tensor_vector->memories)[i] != NULL) - delete (tensor_vector->memories)[i]; - } - delete tensor_vector->memories; - } -} - -void iter_tensor_vector(opendr_tensor_t *output, opendr_tensor_vector_t *tensor_vector, int index) { - load_tensor(output, static_cast((tensor_vector->memories)[index]), (tensor_vector->batch_sizes)[index], - (tensor_vector->frames)[index], (tensor_vector->channels)[index], (tensor_vector->widths)[index], - (tensor_vector->heights)[index]); -} +void initialize_detections(opendr_detection_target_list_t *detections) { + std::vector dets; + opendr_detection_target_t det; + det.name = -1; + det.left = 0.0; + det.top = 0.0; + det.width = 0.0; + det.height = 0.0; + det.score = 0.0; + dets.push_back(det); + + load_detections(detections, dets.data(), (int)dets.size()); +} + +void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { + detections->size = vectorSize; + int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); + detections->starting_pointer = static_cast(malloc(sizeOfOutput)); + std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); +} + +void free_detections(opendr_detection_target_list_t *detections) { + if (detections->starting_pointer != NULL) + free(detections->starting_pointer); +} \ No newline at end of file diff --git a/src/c_api/skeleton_based_action_recognition_pst.cpp b/src/c_api/skeleton_based_action_recognition_pst.cpp deleted file mode 100644 index b8bf90815a..0000000000 --- a/src/c_api/skeleton_based_action_recognition_pst.cpp +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright 2020-2022 OpenDR European Project -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "skeleton_based_action_recognition_pst.h" -#include "target.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include "opencv2/core/core_c.h" - -void load_pst_model(const char *modelPath, pst_model_t *model) { - // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - - Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; - sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, modelPath, *sessionOptions); - model->env = env; - model->onnx_session = session; - model->session_options = sessionOptions; - - // Should we pass these parameters through the model json file? - model->batch_size = 128; - model->in_channels = 2; - model->features = 300; - model->num_point = 18; // same as the output of openpose - model->num_person = 2; - - model->num_classes = 60; -} - -void free_pst_model(pst_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); - delete session; - } - - if (model->session_options) { - Ort::SessionOptions *sessionOptions = static_cast(model->session_options); - delete sessionOptions; - } - - if (model->env) { - Ort::Env *env = static_cast(model->env); - delete env; - } -} - -void ff_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, std::vector *outputTensorValues) { - Ort::Session *session = static_cast(model->onnx_session); - - if (!session) { - std::cerr << "ONNX session not initialized." << std::endl; - return; - } - - // Prepare the input dimensions - // Dims of input data - size_t inputTensorSize = model->batch_size * model->in_channels * model->features * model->num_point * model->num_person; - - // Dims of input of model - std::vector inputNodeDims = {model->batch_size, model->in_channels, model->features, model->num_point, - model->num_person}; - - // Setup input/output names - Ort::AllocatorWithDefaultOptions allocator; - std::vector inputNodeNames = {"onnx_input"}; - std::vector outputNodeNames = {"onnx_output"}; - - // Set up the input tensor - auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value inputTensor = - Ort::Value::CreateTensor(memoryInfo, inputTensorValues->data, inputTensorSize, inputNodeDims.data(), 5); - assert(inputTensor.IsTensor()); - - // Feed-forward the model - auto outputTensors = - session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); - assert(outputTensors.size() == 1); - - // Get the results back - for (int i = 0; i < outputTensors.size(); i++) { - float *tensorData = outputTensors[i].GetTensorMutableData(); - - int tensorSizes[5] = {1, 1, 1, model->batch_size, model->num_classes}; - - cv::Mat outputMat(5, tensorSizes, CV_32F, static_cast(tensorData)); - outputTensorValues->push_back(outputMat); - } -} - -void init_random_opendr_tensor_pst(opendr_tensor_t *inputTensorValues, pst_model_t *model) { - int inputTensorSize = model->batch_size * model->in_channels * model->features * model->num_point * model->num_person; - - float *data = static_cast(malloc(inputTensorSize * sizeof(float))); - for (unsigned int j = 0; j < inputTensorSize; ++j) { - data[j] = (((float)rand() / (RAND_MAX)) * 2) - 1; - } - - load_tensor(inputTensorValues, static_cast(data), model->batch_size, model->in_channels, model->features, - model->num_point, model->num_person); - free(data); -} - -void forward_pst(pst_model_t *model, opendr_tensor_t *inputTensorValues, opendr_tensor_vector_t *tensorVector) { - // Get the feature vector for the current image - std::vector outputTensorValues; - ff_pst(model, inputTensorValues, &outputTensorValues); - - int nTensors = static_cast(outputTensorValues.size()); - if (nTensors > 0) { - int batch_sizes[nTensors]; - int frames[nTensors]; - int channels[nTensors]; - int widths[nTensors]; - int heights[nTensors]; - - std::vector tempTensorsVector; - opendr_tensor_t tempTensors[nTensors]; - - for (int i = 0; i < nTensors; i++) { - batch_sizes[i] = 1; - frames[i] = 1; - channels[i] = 1; - widths[i] = model->batch_size; - heights[i] = model->num_classes; - - load_tensor(&(tempTensors[i]), outputTensorValues[i].ptr(0), batch_sizes[i], frames[i], channels[i], widths[i], - heights[i]); - tempTensorsVector.push_back(tempTensors[i]); - } - load_tensor_vector(tensorVector, tempTensorsVector.data(), nTensors); - for (int i = 0; i < nTensors; i++) { - free_tensor(&(tempTensors[i])); - } - } else { - initialize_tensor_vector(tensorVector); - } -} diff --git a/tests/sources/c_api/test_activity_recognition_x3d.c b/tests/sources/c_api/test_activity_recognition_x3d.c deleted file mode 100644 index 81fbc325f1..0000000000 --- a/tests/sources/c_api/test_activity_recognition_x3d.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "activity_recognition_x3d.h" -#include "opendr_utils.h" - -START_TEST(model_creation_test) { - // Create a face recognition model - x3d_model_t model; - - // Load a pretrained model - load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", "l", &model); - ck_assert(model.onnx_session); - ck_assert(model.env); - ck_assert(model.session_options); - // Release the resources - free_x3d_model(&model); -} -END_TEST - -START_TEST(forward_pass_creation_test) { - // Create a x3d model - x3d_model_t model; - // Load a pretrained model (see instructions for downloading the data) - load_x3d_model("data/activity_recognition/x3d/optimized_model/x3d_l.onnx", "l", &model); - - // Load a random tensor and perform forward pass - opendr_tensor_t input_tensor; - init_random_opendr_tensor_x3d(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_x3d(&model, &input_tensor, &output_tensor_vector); - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Load another tensor - init_random_opendr_tensor_x3d(&input_tensor, &model); - forward_x3d(&model, &input_tensor, &output_tensor_vector); - - ck_assert(output_tensor_vector.n_tensors == 1); - - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Free the model resources - free_x3d_model(&model); -} -END_TEST - -Suite *x3d_suite(void) { - Suite *s; - TCase *tc_core; - - s = suite_create("X3d"); - tc_core = tcase_create("Core"); - - tcase_add_test(tc_core, model_creation_test); - tcase_add_test(tc_core, forward_pass_creation_test); - suite_add_tcase(s, tc_core); - - return s; -} - -int main() { - int no_failed = 0; - Suite *s; - SRunner *runner; - - s = x3d_suite(); - runner = srunner_create(s); - - srunner_run_all(runner, CK_NORMAL); - no_failed = srunner_ntests_failed(runner); - srunner_free(runner); - return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/sources/c_api/test_deep_sort.c b/tests/sources/c_api/test_deep_sort.c deleted file mode 100644 index 8f29690436..0000000000 --- a/tests/sources/c_api/test_deep_sort.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "object_tracking_2d_deep_sort.h" -#include "opendr_utils.h" - -START_TEST(model_creation_test) { - // Create a face recognition model - deep_sort_model_t model; - - // Load a pretrained model - load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); - - ck_assert(model.onnx_session); - ck_assert(model.env); - ck_assert(model.session_options); - - // Release the resources - free_deep_sort_model(&model); -} -END_TEST - -START_TEST(forward_pass_creation_test) { - // Create a face recognition model - deep_sort_model_t model; - // Load a pretrained model (see instructions for downloading the data) - load_deep_sort_model("data/object_tracking_2d/deep_sort/optimized_model/onnx_model.onnx", &model); - - // Load a random tensor and perform forward pass - opendr_tensor_t input_tensor; - init_random_opendr_tensor_ds(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_deep_sort(&model, &input_tensor, &output_tensor_vector); - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Load another tensor - init_random_opendr_tensor_ds(&input_tensor, &model); - forward_deep_sort(&model, &input_tensor, &output_tensor_vector); - - ck_assert(output_tensor_vector.n_tensors == 1); - - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Free the model resources - free_deep_sort_model(&model); -} -END_TEST - -Suite *deep_sort_suite(void) { - Suite *s; - TCase *tc_core; - - s = suite_create("Deep Sort"); - tc_core = tcase_create("Core"); - - tcase_add_test(tc_core, model_creation_test); - tcase_add_test(tc_core, forward_pass_creation_test); - suite_add_tcase(s, tc_core); - - return s; -} - -int main() { - int no_failed = 0; - Suite *s; - SRunner *runner; - - s = deep_sort_suite(); - runner = srunner_create(s); - - srunner_run_all(runner, CK_NORMAL); - no_failed = srunner_ntests_failed(runner); - srunner_free(runner); - return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/sources/c_api/test_detr.c b/tests/sources/c_api/test_detr.c deleted file mode 100644 index 049632c373..0000000000 --- a/tests/sources/c_api/test_detr.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "object_detection_2d_detr.h" -#include "opendr_utils.h" - -START_TEST(model_creation_test) { - // Create a detr model - detr_model_t model; - - // Load a pretrained model - load_detr_model("data/object_detection_2d/detr/optimized_model", &model); - - ck_assert(model.onnx_session); - ck_assert(model.env); - ck_assert(model.session_options); - - // Release the resources - free_detr_model(&model); - - // Load a model that does not exist - load_detr_model("data/optimized_model_not_existant", &model); - ck_assert(!model.onnx_session); - ck_assert(!model.env); - ck_assert(!model.session_options); - - // Release the resources - free_detr_model(&model); -} -END_TEST - -START_TEST(forward_pass_creation_test) { - // Create a detr model - detr_model_t model; - // Load a pretrained model (see instructions for downloading the data) - load_detr_model("data/object_detection_2d/detr/optimized_model", &model); - - // Load a random tensor and perform forward pass - opendr_tensor_t input_tensor; - init_random_opendr_tensor_detr(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_detr(&model, &input_tensor, &output_tensor_vector); - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Load another tensor - init_random_opendr_tensor_detr(&input_tensor, &model); - forward_detr(&model, &input_tensor, &output_tensor_vector); - - ck_assert(output_tensor_vector.n_tensors == 2); - - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Free the model resources - free_detr_model(&model); -} -END_TEST - -Suite *detr_suite(void) { - Suite *s; - TCase *tc_core; - - s = suite_create("Detr"); - tc_core = tcase_create("Core"); - - tcase_add_test(tc_core, model_creation_test); - tcase_add_test(tc_core, forward_pass_creation_test); - suite_add_tcase(s, tc_core); - - return s; -} - -int main() { - int no_failed = 0; - Suite *s; - SRunner *runner; - - s = detr_suite(); - runner = srunner_create(s); - - srunner_run_all(runner, CK_NORMAL); - no_failed = srunner_ntests_failed(runner); - srunner_free(runner); - return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/sources/c_api/test_lightweight_open_pose.c b/tests/sources/c_api/test_lightweight_open_pose.c deleted file mode 100644 index 4f8fa27b90..0000000000 --- a/tests/sources/c_api/test_lightweight_open_pose.c +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "lightweight_open_pose.h" -#include "opendr_utils.h" - -START_TEST(model_creation_test) { - // Create a face recognition model - open_pose_model_t model; - - // Load a pretrained model - load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); - - ck_assert(model.onnx_session); - ck_assert(model.env); - ck_assert(model.session_options); -} -END_TEST - -START_TEST(forward_pass_creation_test) { - // Create a x3d model - open_pose_model_t model; - // Load a pretrained model (see instructions for downloading the data) - load_open_pose_model("data/lightweight_open_pose/optimized_model/onnx_model.onnx", &model); - - // Load a random tensor and perform forward pass - opendr_tensor_t input_tensor; - init_random_opendr_tensor_op(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_open_pose(&model, &input_tensor, &output_tensor_vector); - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Load another tensor - init_random_opendr_tensor_op(&input_tensor, &model); - forward_open_pose(&model, &input_tensor, &output_tensor_vector); - - ck_assert(output_tensor_vector.n_tensors == model.output_size); - - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Free the model resources - free_open_pose_model(&model); -} -END_TEST - -Suite *open_pose_suite(void) { - Suite *s; - TCase *tc_core; - - s = suite_create("Open Pose"); - tc_core = tcase_create("Core"); - - tcase_add_test(tc_core, model_creation_test); - tcase_add_test(tc_core, forward_pass_creation_test); - suite_add_tcase(s, tc_core); - - return s; -} - -int main() { - int no_failed = 0; - Suite *s; - SRunner *runner; - - s = open_pose_suite(); - runner = srunner_create(s); - - srunner_run_all(runner, CK_NORMAL); - no_failed = srunner_ntests_failed(runner); - srunner_free(runner); - return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c index 66ec6a1856..b9526a25eb 100644 --- a/tests/sources/c_api/test_nanodet.c +++ b/tests/sources/c_api/test_nanodet.c @@ -17,22 +17,22 @@ #include #include #include -#include "object_detection_2d_nanodet_jit.h" +#include "nanodet_c.h" #include "opendr_utils.h" START_TEST(model_creation_test) { // Create a nanodet libtorch model nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); - ck_assert_msg(model.network != 0, "net is NULL"); + ck_assert_msg(model.net != 0, "net is NULL"); // Release the resources free_nanodet_model(&model); // Check if memory steel exist - ck_assert_msg(model.network, "net is NULL"); + ck_assert_msg(model.net, "net is NULL"); } END_TEST @@ -41,18 +41,18 @@ START_TEST(inference_creation_test) { nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); // Load an image and performance inference opendr_image_t image; - load_image("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); - opendr_detection_vector_target_t res = infer_nanodet(&model, &image); + load_image("data/nanodet/database/000000000036.jpg", &image); + opendr_detection_target_list_t res = infer_nanodet(&image, &model); free_image(&image); ck_assert(res.size != 0); // Free the model resources - free_detections_vector(&res); + free_detections(&res); free_nanodet_model(&model); } END_TEST diff --git a/tests/sources/c_api/test_pst_gcn.c b/tests/sources/c_api/test_pst_gcn.c deleted file mode 100644 index cec6c24841..0000000000 --- a/tests/sources/c_api/test_pst_gcn.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright 2020-2022 OpenDR European Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include -#include -#include -#include -#include "opendr_utils.h" -#include "skeleton_based_action_recognition_pst.h" - -START_TEST(model_creation_test) { - // Create a skeleton based action recognition pst model - pst_model_t model; - - // Load a pretrained model - load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", - &model); - - ck_assert(model.onnx_session); - ck_assert(model.env); - ck_assert(model.session_options); -} -END_TEST - -START_TEST(forward_pass_creation_test) { - // Create a skeleton based action recognition pst model - pst_model_t model; - // Load a pretrained model (see instructions for downloading the data) - load_pst_model("data/skeleton_based_action_recognition/progressive_spatiotemporal_gcn/optimized_model/onnx_model.onnx", - &model); - - // Load a random tensor and perform forward pass - opendr_tensor_t input_tensor; - init_random_opendr_tensor_pst(&input_tensor, &model); - - // Initialize opendr tensor vector for output - opendr_tensor_vector_t output_tensor_vector; - forward_pst(&model, &input_tensor, &output_tensor_vector); - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Load another tensor - init_random_opendr_tensor_pst(&input_tensor, &model); - forward_pst(&model, &input_tensor, &output_tensor_vector); - - ck_assert(output_tensor_vector.n_tensors == 1); - - free_tensor(&input_tensor); - free_tensor_vector(&output_tensor_vector); - - // Free the model resources - free_pst_model(&model); -} -END_TEST - -Suite *pst_gcn_suite(void) { - Suite *s; - TCase *tc_core; - - s = suite_create("Pst Gcn"); - tc_core = tcase_create("Core"); - - tcase_add_test(tc_core, model_creation_test); - tcase_add_test(tc_core, forward_pass_creation_test); - suite_add_tcase(s, tc_core); - - return s; -} - -int main() { - int no_failed = 0; - Suite *s; - SRunner *runner; - - s = pst_gcn_suite(); - runner = srunner_create(s); - - srunner_run_all(runner, CK_NORMAL); - no_failed = srunner_ntests_failed(runner); - srunner_free(runner); - return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; -} From 70d75431f0c6894c7d53ff7a7ecab40b955dd084 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 16:36:20 +0200 Subject: [PATCH 55/87] Revert "bug fixes in onnx exporting." This reverts commit fcd4f9c3a09d70deb51eec83ad1d469c7290ae27. --- .../fair_mot/object_tracking_2d_fair_mot_learner.py | 4 ++-- .../lightweight_open_pose/lightweight_open_pose_learner.py | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py index 7de05811d3..02cdcdcbaf 100644 --- a/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py +++ b/src/opendr/perception/object_tracking_2d/fair_mot/object_tracking_2d_fair_mot_learner.py @@ -463,12 +463,12 @@ def optimize(self, do_constant_folding=False, img_size=(1088, 608), optimizable_ except FileNotFoundError: # Create temp directory os.makedirs(self.temp_path, exist_ok=True) - self.__convert_to_onnx( + self.__convert_rpn_to_onnx( input_shape, os.path.join(self.temp_path, "onnx_model_temp.onnx"), do_constant_folding ) - self.__load_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) + self.__load_rpn_from_onnx(os.path.join(self.temp_path, "onnx_model_rpn_temp.onnx")) @staticmethod def download(model_name, path, server_url=None): diff --git a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py index b9f6ae8656..2305154c4b 100644 --- a/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py +++ b/src/opendr/perception/pose_estimation/lightweight_open_pose/lightweight_open_pose_learner.py @@ -869,8 +869,7 @@ def __convert_to_onnx(self, output_name, do_constant_folding=False, verbose=Fals input_names = ['data'] if self.num_refinement_stages == 2: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs', - 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs', - 'stage_2_output_1_heatmaps', 'stage_2_output_0_pafs'] + 'stage_1_output_1_heatmaps', 'stage_1_output_0_pafs'] else: output_names = ['stage_0_output_1_heatmaps', 'stage_0_output_0_pafs'] From 67c78dfca1c8b9e791b4c371fa54578a652b054f Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Tue, 13 Dec 2022 17:27:06 +0200 Subject: [PATCH 56/87] revert last commits, change files for cpp styles as wiki says and better directory construction for easier navigation and new tools implementation. --- docs/reference/c-opendr-utils-h.md | 23 +- docs/reference/c-target-h.md | 71 ++++- ..._c.h => object_detection_2d_nanodet_jit.h} | 22 +- include/opendr_utils.h | 17 +- include/target.h | 4 +- projects/c_api/Makefile | 22 +- .../nanodet/README.md | 2 +- .../nanodet/nanodet_jit_demo.c | 13 +- src/c_api/Makefile | 5 +- src/c_api/face_recognition.cpp | 12 +- ...pp => object_detection_2d_nanodet_jit.cpp} | 247 +++++++++--------- src/c_api/opendr_utils.cpp | 49 ++-- tests/Makefile | 16 +- tests/sources/c_api/test_nanodet.c | 18 +- 14 files changed, 316 insertions(+), 205 deletions(-) rename include/{nanodet_c.h => object_detection_2d_nanodet_jit.h} (70%) rename projects/c_api/samples/{ => object_detection_2d}/nanodet/README.md (77%) rename projects/c_api/samples/{ => object_detection_2d}/nanodet/nanodet_jit_demo.c (80%) rename src/c_api/{nanodet_libtorch.cpp => object_detection_2d_nanodet_jit.cpp} (54%) diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 4e76a24258..2efdb04634 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -11,8 +11,6 @@ The *load_image()* function allows for reading an images from the local file sys A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. - - ### Function *free_image()* ```C void free_image(opendr_image_t *image); @@ -20,3 +18,24 @@ void free_image(opendr_image_t *image); The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. +### Function *init_detections_vector()* +```C +void init_detections_vector(opendr_detection_vector_target_t *detection_vector); +``` +The *init_detections_vector()* function initialize the data of an OpenDR detection vector structure (*detection_vector*) with zero values. +A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should be provided. + +### Function *load_detections_vector()* +```C +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size); +``` +The *load_detections_vector()* function allows for storing OpenDR detection target structures in to the memory allocated for multiple OpenDR detections structures (*detection*). +A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. + +### Function *free_detections_vector()* +```C +void free_detections_vector(opendr_detection_vector_target_t *detection_vector); +``` +The *free_detections_vector()* function releases the memory allocated for an OpenDR detection vector structure (*detection_vector*). +A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 6f748759e8..7eae5bd704 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -13,7 +13,7 @@ typedef struct opendr_category_target opendr_category_target_t; ``` -The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. +The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. The *opendr_category_target_t* structure has the following field: @@ -25,3 +25,72 @@ A numerical id of the category to which the input objects belongs to. #### `float confidence` field The decision confidence (a value between 0 and 1). + + +### struct *opendr_detection_target_t* +```C +struct opendr_detection_target { + int name; + float left; + float top; + float width; + float height; + float score; +}; +typedef struct opendr_detection_target opendr_detection_target_t; +``` + + +The *opendr_detection_target_t* structure provides a data structure for storing inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. + +The *opendr_detection_target_t* structure has the following field: + +#### `int name` field + +A numerical id of the category to which the input objects belongs to. + +#### `float left` field + +A numerical value that corresponds to the X value of the top,left point of a detection. + +#### `float top` field + +A numerical value that corresponds to the Y value of the top,left point of a detection. + +#### `float width` field + +A numerical value that corresponds to the width of a detection. + +#### `float height` field + +A numerical value that corresponds to the height of a detection. + +#### `float score` field + +The decision score (a value between 0 and 1). + + + +### struct *opendr_detection_vector_target_t* +```C +struct opendr_detection_vector_target { + opendr_detection_target_t *starting_pointer; + int size; +}; +typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; +``` + + +The *opendr_detection_vector_target_t* structure provides a data structure for storing multiple inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this or a *detection_target_t* structure. + +The *opendr_detection_vector_target_t* structure has the following field: + +#### `opendr_detection_target_t starting_pointer` field + +A pointer to a memory of multiple OpenDR detection targets. + +#### `int size` field + +A numerical value that represents the number of OpenDR detection structures that are stored. diff --git a/include/nanodet_c.h b/include/object_detection_2d_nanodet_jit.h similarity index 70% rename from include/nanodet_c.h rename to include/object_detection_2d_nanodet_jit.h index 41ba71bedc..e0e59954ce 100644 --- a/include/nanodet_c.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -26,16 +26,16 @@ extern "C" { struct nanodet_model { // Jit cpp class holder - void *net; + void *network; // Device to be used char *device; // Recognition threshold - float scoreThreshold; + float score_threshold; // Model input size - int inputSize[2]; + int input_size[2]; // Keep ratio flag int keep_ratio; @@ -44,22 +44,22 @@ typedef struct nanodet_model nanodet_model_t; /** * Loads a nanodet object detection model saved in libtorch format - * @param model_path path to the libtorch nanodet model (as exported using OpenDR library) + * @param modelPath path to the libtorch nanodet model (as exported using OpenDR library) * @param device the device that will be used for the inference * @param height the height of model input * @param width the width of model input - * @param scoreThreshold a threshold for score to be infered + * @param scoreThreshold a threshold for score to be inferred * @param model the model to be loaded */ -void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); /** * This function performs inference using a nanodet object detection model and an input image * @param model nanodet model to be used for inference * @param image OpenDR image - * @return vecter of OpenDR bounding box target containing the bounding boxes of the detected objects + * @return OpenDR detection vector target containing the detections of the recognized objects */ -opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model); +opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); /** * Releases the memory allocated for a nanodet object detection model @@ -69,11 +69,11 @@ void free_nanodet_model(nanodet_model_t *model); /** * draw the bounding boxes from detections in the given image - * @param opendr_image image that has been used for inference + * @param image image that has been used for inference * @param model nanodet model that has been used for inference - * @param detections output of the inference + * @param detectionsVector output of the inference */ -void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections); +void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); #ifdef __cplusplus } diff --git a/include/opendr_utils.h b/include/opendr_utils.h index d15f13312b..3226fcaa4e 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -39,23 +39,24 @@ void free_image(opendr_image_t *image); /** * Initialize an empty detection list to be used in C API - * @param detections OpenDR detection_target_list structure to be initialized + * @param detection_vector OpenDR detection_target_list structure to be initialized */ -void initialize_detections(opendr_detection_target_list_t *detections); +void init_detections_vector(opendr_detection_vector_target_t *detection_vector); /** * Loads an OpenDR detection target list to be used in C API - * @param detections OpenDR detection_target_list structure to be loaded - * @param vectorDataPtr the pointer of the first OpenDR detection target in a vector - * @param vectorSize the size of the vector + * @param detection_vector OpenDR detection_target_list structure to be loaded + * @param detection the pointer of the first OpenDR detection target in a vector + * @param vector_size the size of the vector */ -void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize); +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size); /** * Releases the memory allocated for a detection list structure - * @param detections OpenDR detection_target_list structure to release + * @param detection_vector OpenDR detection vector target structure to release */ -void free_detections(opendr_detection_target_list_t *detections); +void free_detections_vector(opendr_detection_vector_target_t *detection_vector); #ifdef __cplusplus } diff --git a/include/target.h b/include/target.h index 1baf6b55f5..894bc03d2f 100644 --- a/include/target.h +++ b/include/target.h @@ -45,11 +45,11 @@ typedef struct opendr_detection_target opendr_detection_target_t; /*** * OpenDR data type for representing a structure of detection targets */ -struct opendr_detection_target_list { +struct opendr_detection_vector_target { opendr_detection_target_t *starting_pointer; int size; }; -typedef struct opendr_detection_target_list opendr_detection_target_list_t; +typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; #ifdef __cplusplus } diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index b8fb363fd3..9392d28de6 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -33,9 +33,10 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos -demos: $(BUILD_DIR)/face_recognition_demo $(BUILD_DIR)/nanodet_demo -face: $(BUILD_DIR)/face_recognition_demo -nano: $(BUILD_DIR)/nanodet_demo +demos: face_recognition nanodet + +face_recognition: $(BUILD_DIR)/face_recognition_demo +nanodet: $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo download: @+if [ -a $(DATA_DIR) ] ; \ @@ -49,11 +50,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/nanodet; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -62,14 +63,13 @@ $(BUILD_DIR)/face_recognition_demo: @+echo "Building face recognition demo..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/face_recognition_demo samples/face_recognition/face_recognition_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) -$(BUILD_DIR)/nanodet_demo: - $(MKDIR_P) $(BUILD_DIR) +$(BUILD_DIR)/object_detection_2d/nanodet_jit_demo: + $(MKDIR_P) $(BUILD_DIR)/object_detection_2d @+echo "Building nanodet object detection demo..." - $(CC) $(CFLAGS) -o $(BUILD_DIR)/nanodet_libtorch_demo samples/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) - + $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo samples/object_detection_2d/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) clean: @+echo "Cleaning C API demo binaries and temporary files..." - @+$(RM) $(BUILD_DIR)/* + @+$(RM) -rf $(BUILD_DIR)/* @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/projects/c_api/samples/nanodet/README.md b/projects/c_api/samples/object_detection_2d/nanodet/README.md similarity index 77% rename from projects/c_api/samples/nanodet/README.md rename to projects/c_api/samples/object_detection_2d/nanodet/README.md index 8114150843..31d3e90496 100644 --- a/projects/c_api/samples/nanodet/README.md +++ b/projects/c_api/samples/object_detection_2d/nanodet/README.md @@ -11,5 +11,5 @@ After installation, the demo can be run from projects/c_api directory with: Or with the downloaded model and image with: ```sh -./built/nanodet_libtorch_demo ./data/nanodet/optimized_model/nanodet_m.pth cuda ./data/nanodet/database/000000000036.jpg 320 320 +./built/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 ``` diff --git a/projects/c_api/samples/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c similarity index 80% rename from projects/c_api/samples/nanodet/nanodet_jit_demo.c rename to projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index aed0ac5d48..4327696e8c 100644 --- a/projects/c_api/samples/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -16,15 +16,15 @@ #include #include -#include "nanodet_c.h" +#include "object_detection_2d_nanodet_jit.h" #include "opendr_utils.h" int main(int argc, char **argv) { if (argc != 6) { fprintf(stderr, "usage: %s [model_path] [device] [images_path] [input_sizes].\n" - "model_path = path/to/your/libtorch/model.pth \n device = cuda or cpu \n" - "images_path = \"xxx/xxx/*.jpg\" \n input_size = width height.\n", + "model_path = path/to/your/libtorch/model.pth \ndevice = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \ninput_size = width height.\n", argv[0]); return -1; } @@ -48,16 +48,17 @@ int main(int argc, char **argv) { } // Initialize opendr detection target list; - opendr_detection_target_list_t results; + opendr_detection_vector_target_t results; + init_detections_vector(&results); // Infer nanodet model - results = infer_nanodet(&image, &model); + results = infer_nanodet(&model, &image); // Draw the results drawBboxes(&image, &model, &results); // Free the memory - free_detections(&results); + free_detections_vector(&results); free_image(&image); free_nanodet_model(&model); diff --git a/src/c_api/Makefile b/src/c_api/Makefile index ed9bbeb80c..eff518d1ae 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -43,9 +43,10 @@ $(OPENDR_HOME)/lib/libopendr.so: @+$(MKDIR_P) $(BUILD_DIR) $(CPP) $(CFLAGS) -c opendr_utils.cpp -o $(BUILD_DIR)/opendr_utils.o $(INC) $(OPENDR_INC) $(CPP) $(CFLAGS) -c face_recognition.cpp -o $(BUILD_DIR)/opendr_face_recognition.o $(INC) $(OPENDR_INC) - $(CPP) $(CFLAGS) -c nanodet_libtorch.cpp -o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) + $(CPP) $(CFLAGS) -c object_detection_2d_nanodet_jit.cpp -o $(BUILD_DIR)/opendr_nanodet_jit.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) + @$(MKDIR_P) $(LIB_DIR) - $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_libtorch.o $(LD) $(TORCHSCRIPT_LD) --shared + $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_jit.o $(LD) $(TORCHSCRIPT_LD) --shared clean: @+echo "Cleaning C API binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 9ed33b2ba1..b746c68f6b 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -79,12 +79,12 @@ void preprocess_face_recognition(cv::Mat *image, std::vector &data, int r * In the future this can be done at library level using a JSON-parser */ std::string json_get_key_string(std::string json, const std::string &key) { - std::size_t start_idx = json.find(key); - std::string value = json.substr(start_idx); + std::size_t startIdx = json.find(key); + std::string value = json.substr(startIdx); value = value.substr(value.find(":") + 1); - value.resize(value.find(",")); + value = value.substr(0, value.find(",")); value = value.substr(value.find("\"") + 1); - value.resize(value.find("\"")); + value = value.substr(0, value.find("\"")); return value; } @@ -124,7 +124,7 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ // Parse inference params std::string threshold = json_get_key_string(str, "threshold"); - ; + if (!threshold.empty()) { model->threshold = std::stof(threshold); } @@ -213,7 +213,7 @@ void ff_face_recognition(face_recognition_model_t *model, opendr_image_t *image, std::vector input_node_names = {"data"}; std::vector output_node_names = {"features"}; - // Setup the input tensor + // Set up the input tensor auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); Ort::Value input_tensor = Ort::Value::CreateTensor(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4); diff --git a/src/c_api/nanodet_libtorch.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp similarity index 54% rename from src/c_api/nanodet_libtorch.cpp rename to src/c_api/object_detection_2d_nanodet_jit.cpp index 51f2cbefad..04ac572958 100644 --- a/src/c_api/nanodet_libtorch.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -12,44 +12,45 @@ // See the License for the specific language governing permissions and // limitations under the License. +#include "object_detection_2d_nanodet_jit.h" + #include #include #include #include #include -#include "nanodet_c.h" /** * Helper class holder of c++ values and jit model. */ class NanoDet { private: - torch::DeviceType device; - torch::jit::script::Module network; - torch::Tensor meanTensor; - torch::Tensor stdTensor; - std::vector labels; + torch::DeviceType mDevice; + torch::jit::script::Module mNetwork; + torch::Tensor mMeanTensor; + torch::Tensor mStdTensor; + std::vector mLabels; public: - NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, std::vector labels); ~NanoDet(); - torch::Tensor mPreProcess(cv::Mat *image); - torch::jit::script::Module net() const; - torch::Tensor meanValues() const; - torch::Tensor stdValues() const; - std::vector classes() const; + torch::Tensor preProcess(cv::Mat *image); + torch::jit::script::Module network() const; + torch::Tensor meanTensor() const; + torch::Tensor stdTensor() const; + std::vector labels() const; std::vector outputs; }; -NanoDet::NanoDet(torch::jit::script::Module net, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, - const std::vector labels) { - this->device = device; - this->network = net; - this->meanTensor = meanValues.clone().to(device); - this->stdTensor = stdValues.clone().to(device); - this->labels = labels; +NanoDet::NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, + torch::DeviceType device, const std::vector labels) { + this->mDevice = device; + this->mNetwork = network; + this->mMeanTensor = meanValues.clone().to(device); + this->mStdTensor = stdValues.clone().to(device); + this->mLabels = labels; } NanoDet::~NanoDet() { @@ -61,13 +62,13 @@ NanoDet::~NanoDet() { * Mean and Standard deviation are already part of NanoDet class when is initialized. * @param image, image to be preprocesses */ -torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { +torch::Tensor NanoDet::preProcess(cv::Mat *image) { torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); tensorImage = tensorImage.toType(torch::kFloat); - tensorImage = tensorImage.to(this->device); + tensorImage = tensorImage.to(this->mDevice); tensorImage = tensorImage.permute({2, 0, 1}); - tensorImage = tensorImage.add(this->meanTensor); - tensorImage = tensorImage.mul(this->stdTensor); + tensorImage = tensorImage.add(this->mMeanTensor); + tensorImage = tensorImage.mul(this->mStdTensor); return tensorImage; } @@ -75,39 +76,39 @@ torch::Tensor NanoDet::mPreProcess(cv::Mat *image) { /** * Getter for jit model */ -torch::jit::script::Module NanoDet::net() const { - return this->network; +torch::jit::script::Module NanoDet::network() const { + return this->mNetwork; } /** * Getter for tensor with the mean values */ -torch::Tensor NanoDet::meanValues() const { - return this->meanTensor; +torch::Tensor NanoDet::meanTensor() const { + return this->mMeanTensor; } /** * Getter for tensor with the standard deviation values */ -torch::Tensor NanoDet::stdValues() const { - return this->stdTensor; +torch::Tensor NanoDet::stdTensor() const { + return this->mStdTensor; } /** - * Getter of labels for printing + * Getter of labels */ -std::vector NanoDet::classes() const { - return labels; +std::vector NanoDet::labels() const { + return this->mLabels; } /** * Helper function to calculate the final shape of the model input relative to size ratio of input image. */ -void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { +void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { float ratio; - float src_ratio = ((float)srcSize->width / (float)srcSize->height); - float dst_ratio = ((float)dstSize->width / (float)dstSize->height); - if (src_ratio < dst_ratio) + float srcRatio = ((float)srcSize->width / (float)srcSize->height); + float dstRatio = ((float)dstSize->width / (float)dstSize->height); + if (srcRatio < dstRatio) ratio = ((float)dstSize->height / (float)srcSize->height); else ratio = ((float)dstSize->width / (float)srcSize->width); @@ -124,32 +125,32 @@ void get_minimum_dst_shape(cv::Size *srcSize, cv::Size *dstSize, float divisible /** * Helper function to calculate the warp matrix for resizing. */ -void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, int keep_ratio) { - if (keep_ratio == 1) { +void get_resize_matrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { + if (keepRatio == 1) { float ratio; cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); - C.at(0, 2) = -src_shape->width / 2.0; - C.at(1, 2) = -src_shape->height / 2.0; - float src_ratio = ((float)src_shape->width / (float)src_shape->height); - float dst_ratio = ((float)dst_shape->width / (float)dst_shape->height); - if (src_ratio < dst_ratio) { - ratio = ((float)dst_shape->height / (float)src_shape->height); + C.at(0, 2) = -srcShape->width / 2.0; + C.at(1, 2) = -srcShape->height / 2.0; + float srcRatio = ((float)srcShape->width / (float)srcShape->height); + float dstRatio = ((float)dstShape->width / (float)dstShape->height); + if (srcRatio < dstRatio) { + ratio = ((float)dstShape->height / (float)srcShape->height); } else { - ratio = ((float)dst_shape->width / (float)src_shape->width); + ratio = ((float)dstShape->width / (float)srcShape->width); } Rs->at(0, 0) *= ratio; Rs->at(1, 1) *= ratio; cv::Mat T = cv::Mat::eye(3, 3, CV_32FC1); - T.at(0, 2) = 0.5 * dst_shape->width; - T.at(1, 2) = 0.5 * dst_shape->height; + T.at(0, 2) = 0.5 * dstShape->width; + T.at(1, 2) = 0.5 * dstShape->height; *Rs = T * (*Rs) * C; } else { - Rs->at(0, 0) *= (float)dst_shape->width / (float)src_shape->width; - Rs->at(1, 1) *= (float)dst_shape->height / (float)src_shape->height; + Rs->at(0, 0) *= (float)dstShape->width / (float)srcShape->width; + Rs->at(1, 1) *= (float)dstShape->height / (float)srcShape->height; } } @@ -161,28 +162,28 @@ void get_resize_matrix(cv::Size *src_shape, cv::Size *dst_shape, cv::Mat *Rs, in * @param src, image to be preprocesses * @param dst, output image to be used as model input * @param dstSize, final size of the dst - * @param Rs, matrix to be used for warp perspective - * @param keep_ratio, flag for targeting the resized image size relative to input image ratio + * @param warpMatrix, matrix to be used for warp perspective + * @param keepRatio, flag for targeting the resized image size relative to input image ratio */ -void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warp_matrix, int keep_ratio) { +void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatrix, int keepRatio) { cv::Size srcSize = cv::Size(src->cols, src->rows); const float divisible = 0.0; // Get new destination size if keep ratio is wanted - if (keep_ratio == 1) { - get_minimum_dst_shape(&srcSize, dstSize, divisible); + if (keepRatio == 1) { + get_minimum_dstShape(&srcSize, dstSize, divisible); } - get_resize_matrix(&srcSize, dstSize, warp_matrix, keep_ratio); - cv::warpPerspective(*src, *dst, *warp_matrix, *dstSize); + get_resize_matrix(&srcSize, dstSize, warpMatrix, keepRatio); + cv::warpPerspective(*src, *dst, *warpMatrix, *dstSize); } /** * Helper function to determine the device of jit model and tensors. */ -torch::DeviceType torchDevice(char *device_name, int verbose = 0) { +torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { torch::DeviceType device; - if (std::string(device_name) == "cuda") { + if (std::string(deviceName) == "cuda") { if (verbose == 1) printf("to cuda\n"); device = torch::kCUDA; @@ -194,12 +195,12 @@ torch::DeviceType torchDevice(char *device_name, int verbose = 0) { return device; } -void load_nanodet_model(char *model_path, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { +void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { // Initialize model - model->inputSize[0] = width; - model->inputSize[1] = height; + model->input_size[0] = width; + model->input_size[1] = height; - model->scoreThreshold = scoreThreshold; + model->score_threshold = scoreThreshold; model->keep_ratio = 1; const std::vector labels{ @@ -221,71 +222,74 @@ void load_nanodet_model(char *model_path, char *device, int height, int width, f torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); // initialization of jit model and class as holder of c++ values. - torch::DeviceType torch_device = torchDevice(device, 1); - torch::jit::script::Module net = torch::jit::load(model_path, torch_device); - net.eval(); + torch::DeviceType initDevice = torchDevice(device, 1); + torch::jit::script::Module network = torch::jit::load(modelPath, initDevice); + network.eval(); - NanoDet *detector = new NanoDet(net, meanTensor, stdValues, torch_device, labels); + NanoDet *detector = new NanoDet(network, meanTensor, stdValues, initDevice, labels); - model->net = static_cast(detector); + model->network = static_cast(detector); } -opendr_detection_target_list_t infer_nanodet(opendr_image_t *image, nanodet_model_t *model) { - NanoDet *networkPTR = static_cast(model->net); - opendr_detection_target_list_t detections; +void ff_nanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, + torch::Tensor *outputs) { + // Make all the inputs as tensors to use in jit model + torch::Tensor srcHeight = torch::tensor(originalSize->width); + torch::Tensor srcWidth = torch::tensor(originalSize->height); + torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3}); - std::vector dets; - cv::Mat *opencv_image = static_cast(image->data); - if (!opencv_image) { - std::cerr << "Cannot load image for inference." << std::endl; + // Model inference + *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensor(); + *outputs = outputs->to(torch::Device(torch::kCPU, 0)); +} - // Initialize an empty detection to return. - initialize_detections(&detections); - return detections; +opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image) { + NanoDet *networkPTR = static_cast(model->network); + opendr_detection_vector_target_t detectionsVector; + init_detections_vector(&detectionsVector); + + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { + std::cerr << "Cannot load image for inference." << std::endl; + return detectionsVector; } // Preprocess image and keep values as input in jit model cv::Mat resizedImg; - cv::Size dstSize = cv::Size(model->inputSize[0], model->inputSize[1]); - cv::Mat warp_matrix = cv::Mat::eye(3, 3, CV_32FC1); - preprocess(opencv_image, &resizedImg, &dstSize, &warp_matrix, model->keep_ratio); - torch::Tensor input = networkPTR->mPreProcess(&resizedImg); - - // Make all the inputs as tensors to use in jit model - torch::Tensor srcHeight = torch::tensor(opencv_image->rows); - torch::Tensor srcWidth = torch::tensor(opencv_image->cols); - torch::Tensor warpMatrix = torch::from_blob(warp_matrix.data, {3, 3}); + cv::Size dstSize = cv::Size(model->input_size[0], model->input_size[1]); + cv::Mat warpMatrix = cv::Mat::eye(3, 3, CV_32FC1); + preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keep_ratio); + torch::Tensor input = networkPTR->preProcess(&resizedImg); + cv::Size originalSize(opencvImage->cols, opencvImage->rows); - // Model inference - torch::Tensor outputs = (networkPTR->net()).forward({input, srcHeight, srcWidth, warpMatrix}).toTensor(); - outputs = outputs.to(torch::Device(torch::kCPU, 0)); + torch::Tensor outputs; + ff_nanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); + std::vector detections; // Postprocessing, find which outputs have better score than threshold and keep them. for (int label = 0; label < outputs.size(0); label++) { for (int box = 0; box < outputs.size(1); box++) { - if (outputs[label][box][4].item() > model->scoreThreshold) { - opendr_detection_target_t det; - det.name = label; - det.left = outputs[label][box][0].item(); - det.top = outputs[label][box][1].item(); - det.width = outputs[label][box][2].item() - outputs[label][box][0].item(); - det.height = outputs[label][box][3].item() - outputs[label][box][1].item(); - det.score = outputs[label][box][4].item(); - dets.push_back(det); + if (outputs[label][box][4].item() > model->score_threshold) { + opendr_detection_target_t detection; + detection.name = label; + detection.left = outputs[label][box][0].item(); + detection.top = outputs[label][box][1].item(); + detection.width = outputs[label][box][2].item() - outputs[label][box][0].item(); + detection.height = outputs[label][box][3].item() - outputs[label][box][1].item(); + detection.score = outputs[label][box][4].item(); + detections.push_back(detection); } } } // Put vector detection as C pointer and size - if ((int)dets.size() > 0) - load_detections(&detections, dets.data(), (int)dets.size()); - else - initialize_detections(&detections); + if (static_cast(detections.size()) > 0) + load_detections_vector(&detectionsVector, detections.data(), static_cast(detections.size())); - return detections; + return detectionsVector; } -void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_detection_target_list_t *detections) { +void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { const int colorList[80][3] = { //{255 ,255 ,255}, //bg {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, @@ -302,22 +306,23 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, }; - std::vector classNames = (static_cast(model->net))->classes(); + std::vector classNames = (static_cast(model->network))->labels(); - cv::Mat *opencv_image = static_cast(opendr_image->data); - if (!opencv_image) { + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; return; } - cv::Mat image = (*opencv_image).clone(); - for (size_t i = 0; i < detections->size; i++) { - const opendr_detection_target bbox = (detections->starting_pointer)[i]; + cv::Mat imageWithDetections = (*opencvImage).clone(); + for (size_t i = 0; i < detectionsVector->size; i++) { + const opendr_detection_target bbox = (detectionsVector->starting_pointer)[i]; float score = bbox.score > 1 ? 1 : bbox.score; - if (score > model->scoreThreshold) { + if (score > model->score_threshold) { cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); - cv::rectangle( - image, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), color); + cv::rectangle(imageWithDetections, + cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), + color); char text[256]; @@ -330,21 +335,23 @@ void drawBboxes(opendr_image_t *opendr_image, nanodet_model_t *model, opendr_det int y = (int)bbox.top; if (y < 0) y = 0; - if (x + labelSize.width > image.cols) - x = image.cols - labelSize.width; + if (x + labelSize.width > imageWithDetections.cols) + x = imageWithDetections.cols - labelSize.width; - cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), color, -1); - cv::putText(image, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, cv::Scalar(255, 255, 255)); + cv::rectangle(imageWithDetections, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), + color, -1); + cv::putText(imageWithDetections, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, + cv::Scalar(255, 255, 255)); } } - cv::imshow("image", image); + cv::imshow("image", imageWithDetections); cv::waitKey(0); } void free_nanodet_model(nanodet_model_t *model) { - if (model->net) { - NanoDet *networkPTR = static_cast(model->net); + if (model->network) { + NanoDet *networkPTR = static_cast(model->network); delete networkPTR; } } diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 8eddbed500..88c4214789 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -36,28 +36,35 @@ void free_image(opendr_image_t *image) { } } -void initialize_detections(opendr_detection_target_list_t *detections) { - std::vector dets; - opendr_detection_target_t det; - det.name = -1; - det.left = 0.0; - det.top = 0.0; - det.width = 0.0; - det.height = 0.0; - det.score = 0.0; - dets.push_back(det); - - load_detections(detections, dets.data(), (int)dets.size()); +void init_detections_vector(opendr_detection_vector_target_t *detection_vector) { + detection_vector->starting_pointer = NULL; + + std::vector detections; + opendr_detection_target_t detection; + + detection.name = -1; + detection.left = 0.0; + detection.top = 0.0; + detection.width = 0.0; + detection.height = 0.0; + detection.score = 0.0; + + detections.push_back(detection); + + load_detections_vector(detection_vector, detections.data(), static_cast(detections.size())); } -void load_detections(opendr_detection_target_list_t *detections, opendr_detection_target_t *vectorDataPtr, int vectorSize) { - detections->size = vectorSize; - int sizeOfOutput = (vectorSize) * sizeof(opendr_detection_target_t); - detections->starting_pointer = static_cast(malloc(sizeOfOutput)); - std::memcpy(detections->starting_pointer, vectorDataPtr, sizeOfOutput); +void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, + int vector_size) { + free_detections_vector(detection_vector); + + detection_vector->size = vector_size; + int size_of_output = (vector_size) * sizeof(opendr_detection_target_t); + detection_vector->starting_pointer = static_cast(malloc(size_of_output)); + std::memcpy(detection_vector->starting_pointer, detection, size_of_output); } -void free_detections(opendr_detection_target_list_t *detections) { - if (detections->starting_pointer != NULL) - free(detections->starting_pointer); -} \ No newline at end of file +void free_detections_vector(opendr_detection_vector_target_t *detection_vector) { + if (detection_vector->starting_pointer != NULL) + free(detection_vector->starting_pointer); +} diff --git a/tests/Makefile b/tests/Makefile index 9abc7c5671..0fcb5ae69b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -46,11 +46,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ - $(MKDIR_P) $(DATA_DIR)/nanodet; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/nanodet/database; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ - $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/nanodet/optimized_model; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -71,12 +71,18 @@ $(BUILD_DIR)/test_fmp_gmapping: @+echo "Building Full-Map-Posterior GMapping test..." $(CPP) $(CFLAGS) -o $(BUILD_DIR)/test_fmp_gmapping sources/c_api/test_fmp_gmapping.cpp -lboost_unit_test_framework $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) $(FMP_INC) -tests: $(BUILD_DIR)/test_opendr_utils $(BUILD_DIR)/test_face_recognition $(BUILD_DIR)/test_fmp_gmapping + +tests: utils face_recognition nanodet fmp_gmapping + +utils: $(BUILD_DIR)/test_opendr_utils +face_recognition: $(BUILD_DIR)/test_face_recognition +nanodet: $(BUILD_DIR)/test_nanodet +fmp_gmapping: $(BUILD_DIR)/test_fmp_gmapping runtests: download tests @+$(LD_RUN) $(BUILD_DIR)/test_opendr_utils 2>/dev/null - @+$(LD_RUN) $(BUILD_DIR)/test_nanodet 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_face_recognition 2>/dev/null + @+$(LD_RUN) $(BUILD_DIR)/test_nanodet 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_fmp_gmapping clean: diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c index b9526a25eb..a15850e39b 100644 --- a/tests/sources/c_api/test_nanodet.c +++ b/tests/sources/c_api/test_nanodet.c @@ -17,22 +17,21 @@ #include #include #include -#include "nanodet_c.h" +#include "object_detection_2d_nanodet_jit.h" #include "opendr_utils.h" START_TEST(model_creation_test) { // Create a nanodet libtorch model nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); - - ck_assert_msg(model.net != 0, "net is NULL"); + load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + ck_assert_msg(model.network != 0, "net is NULL"); // Release the resources free_nanodet_model(&model); // Check if memory steel exist - ck_assert_msg(model.net, "net is NULL"); + ck_assert_msg(model.network, "net is NULL"); } END_TEST @@ -41,18 +40,19 @@ START_TEST(inference_creation_test) { nanodet_model_t model; // Load a pretrained model - load_nanodet_model("data/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + printf("6\n"); + load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); // Load an image and performance inference opendr_image_t image; - load_image("data/nanodet/database/000000000036.jpg", &image); - opendr_detection_target_list_t res = infer_nanodet(&image, &model); + load_image("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); + opendr_detection_vector_target_t res = infer_nanodet(&model, &image); free_image(&image); ck_assert(res.size != 0); // Free the model resources - free_detections(&res); + free_detections_vector(&res); free_nanodet_model(&model); } END_TEST From 9a6baf078a2b83d504999c8f8ab037e66e06e41e Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Wed, 14 Dec 2022 12:57:29 +0200 Subject: [PATCH 57/87] change function name to be uniform with the others --- include/object_detection_2d_nanodet_jit.h | 2 +- .../samples/object_detection_2d/nanodet/nanodet_jit_demo.c | 2 +- src/c_api/object_detection_2d_nanodet_jit.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h index e0e59954ce..993f54a85f 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -73,7 +73,7 @@ void free_nanodet_model(nanodet_model_t *model); * @param model nanodet model that has been used for inference * @param detectionsVector output of the inference */ -void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); +void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); #ifdef __cplusplus } diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index 4327696e8c..789f56767a 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -55,7 +55,7 @@ int main(int argc, char **argv) { results = infer_nanodet(&model, &image); // Draw the results - drawBboxes(&image, &model, &results); + draw_bboxes(&image, &model, &results); // Free the memory free_detections_vector(&results); diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index 04ac572958..4908996e37 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -289,7 +289,7 @@ opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_im return detectionsVector; } -void drawBboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { +void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { const int colorList[80][3] = { //{255 ,255 ,255}, //bg {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, From 13d23f92c73c9fa68497bec5d1267fb05dd697be Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Wed, 14 Dec 2022 12:57:48 +0200 Subject: [PATCH 58/87] doc for c nanodet --- .../c-object-detection-2d-nanodet-jit-h.md | 47 +++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 docs/reference/c-object-detection-2d-nanodet-jit-h.md diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md new file mode 100644 index 0000000000..e5ad2f967c --- /dev/null +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -0,0 +1,47 @@ +## C_API: object_detection_2d_nanodet_jit.h + + +The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2d nanodet tool. + +### Struct *nanodet_model_t* +```C + +struct nanodet_model { + ... +}; +typedef struct nanodet_model nanodet_model_t; +``` +The *nanodet_model_t* structure keeps all the necessary information that are required by the OpenDR object detection 2d nanodet tool (e.g., model weights, normalization information, etc.). + + +### Function *load_nanodet_model()* +```C +void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +``` + Loads a nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. + This function also initializes a (*device*) Jit network for performing inference using this model. + The pre-trained models should follow the OpenDR conventions. + The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. + +### Function *free_nanodet_model()* +```C +void free_nanodet_model(nanodet_model_t *model); +``` +Releases the memory allocated for an object detection 2d nanodet model (*model*). + + +### Function *infer_nanodet()* +```C +opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); +``` +This function perform inference using an objecte detection 2d nanodet model (*model*) and an input image (*image*). +The function returns an OpenDR detection vector structure with the inference results. + + +### Function *draw_bboxes()* +```C +void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); +``` +This function draws the given detections (*detectionsVector*) into the input image (*image*) and then show it in screen. +The (*model*) keeps all the necessary information. + From a0dbac962cb73fc2ecf13af9495be5ecc33cb9c8 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 15 Dec 2022 16:06:27 +0200 Subject: [PATCH 59/87] Added small Json parser and installation script for easier navigation and universal use for future c api --- Makefile | 1 + dependencies/install_rapidjson.sh | 17 +++++++++++++++++ docs/reference/c-opendr-utils-h.md | 9 +++++++++ include/opendr_utils.h | 7 +++++++ src/c_api/Makefile | 2 +- src/c_api/face_recognition.cpp | 28 +++++----------------------- src/c_api/opendr_utils.cpp | 17 +++++++++++++++++ 7 files changed, 57 insertions(+), 24 deletions(-) create mode 100755 dependencies/install_rapidjson.sh diff --git a/Makefile b/Makefile index 9559547f6b..dffd1227b5 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,7 @@ install_compilation_dependencies: @+echo "#"; echo "# * Install Compilation Dependencies *"; echo "#" @+cd dependencies; ./install.sh compilation @+cd dependencies; ./install_onnx.sh + @+cd dependencies; ./install_rapidjson.sh @+cd dependencies; ./install_torch_c_api.sh @+make --silent -C src/opendr/control/mobile_manipulation $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; @+make --silent -C src/opendr/control/single_demo_grasp $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; diff --git a/dependencies/install_rapidjson.sh b/dependencies/install_rapidjson.sh new file mode 100755 index 0000000000..a60d44132b --- /dev/null +++ b/dependencies/install_rapidjson.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ ! -d /usr/local/include/rapidjson ]; then + + VERSION="1.1.0" + + wget https://github.com/Tencent/rapidjson/archive/refs/tags/v${VERSION}.tar.gz --quiet + tar zxf v${VERSION}.tar.gz + cd rapidjson-${VERSION} + sudo mkdir -p /usr/local/include/rapidjson + sudo mv include/rapidjson/* /usr/local/include/rapidjson + cd .. + rm -rf rapidjson-${VERSION} + rm -rf v${VERSION}.tar.gz + + +fi diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 2efdb04634..1040e1d847 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -3,6 +3,15 @@ The *opendr_utils.h* header provides function definitions of OpenDR helpers (e.g., for creating OpenDR images). +### Function *json_get_key_string()* +```C +const char* json_get_key_string(const char *json, const char *key); +``` +The *json_get_key_string()* function allows for reading a json file and return the value of a key. +A pointer (*json*) that have the json string and a pointer (*key*) with the wanted value is needed. + +## + ### Function *load_image()* ```C void load_image(const char *path, opendr_image_t *image); diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 3226fcaa4e..806fdc0d6d 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -24,6 +24,13 @@ extern "C" { #endif +/** + * Json parser for OpenDR model files. + * @param json a string of json file. + * @param key the value to extract from json file. + */ +const char *json_get_key_string(const char *json, const char *key); + /** * Reads an image from path and saves it into OpenDR an image structure * @param path path from which the image will be read diff --git a/src/c_api/Makefile b/src/c_api/Makefile index eff518d1ae..17f4aa5273 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -25,7 +25,7 @@ BUILD_DIR = $(OPENDR_HOME)/build LIB_DIR = $(OPENDR_HOME)/lib CFLAGS = -fPIC -INC = -I/usr/local/include/onnxruntime/ `pkg-config --cflags opencv4` +INC = -I/usr/local/include/onnxruntime/ -I/usr/local/include/rapidjson/ `pkg-config --cflags opencv4` LD = -L/usr/lib/ -L/usr/local/lib/ -lstdc++ -lm -lonnxruntime `pkg-config --libs opencv4` -lboost_filesystem OPENDR_INC = -I$(OPENDR_HOME)/include OPENDR_LD = -L$(OPENDR_HOME)//lib -lopendr diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index b746c68f6b..a0c8905bf6 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -74,20 +74,6 @@ void preprocess_face_recognition(cv::Mat *image, std::vector &data, int r } } -/** - * Very simple helper function to parse OpenDR model files for face recognition - * In the future this can be done at library level using a JSON-parser - */ -std::string json_get_key_string(std::string json, const std::string &key) { - std::size_t startIdx = json.find(key); - std::string value = json.substr(startIdx); - value = value.substr(value.find(":") + 1); - value = value.substr(0, value.find(",")); - value = value.substr(value.find("\"") + 1); - value = value.substr(0, value.find("\"")); - return value; -} - void load_face_recognition_model(const char *model_path, face_recognition_model_t *model) { // Initialize model model->onnx_session = model->env = model->session_options = NULL; @@ -106,12 +92,8 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ std::cerr << "Cannot open JSON model file" << std::endl; return; } - - std::string str; - in_stream.seekg(0, std::ios::end); - str.reserve(in_stream.tellg()); - in_stream.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); + std::string str((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); + const char *json = str.c_str(); std::string basepath = model_json_path.substr(0, split_pos); split_pos = basepath.find_last_of("/"); @@ -119,11 +101,11 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ basepath.resize(split_pos); // Parse JSON - std::string onnx_model_path = basepath + json_get_key_string(str, "model_paths"); - std::string model_format = json_get_key_string(str, "format"); + std::string onnx_model_path = basepath + json_get_key_string(json, "model_paths"); + std::string model_format = json_get_key_string(json, "format"); // Parse inference params - std::string threshold = json_get_key_string(str, "threshold"); + std::string threshold = json_get_key_string(json, "threshold"); if (!threshold.empty()) { model->threshold = std::stof(threshold); diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 88c4214789..fc88f152be 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -20,6 +20,23 @@ #include #include +#include +#include +#include + +const char *json_get_key_string(const char *json, const char *key) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember(key))) { + return ""; + } + const rapidjson::Value &value = doc[key]; + if (!value.IsString()) { + return ""; + } + return value.GetString(); +} + void load_image(const char *path, opendr_image_t *image) { cv::Mat opencv_image = cv::imread(path, cv::IMREAD_COLOR); if (opencv_image.empty()) { From 62c02660d87bfd401f8e44a0e162520dfc72a988 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:19:20 +0200 Subject: [PATCH 60/87] Update projects/c_api/README.md Co-authored-by: Nikolaos Passalis --- projects/c_api/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/c_api/README.md b/projects/c_api/README.md index ebaccc45ea..bd47689ec5 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -10,5 +10,5 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: 1. Face recognition -2. Nanodet Jit module +2. Object detection (Nanodet) From 8f010932c3509d4ddb4abd4f67eac41b75f94b82 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:19:40 +0200 Subject: [PATCH 61/87] Update dependencies/install_torch_c_api.sh Co-authored-by: Nikolaos Passalis --- dependencies/install_torch_c_api.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index ddfe8f220c..8ac4922731 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -44,7 +44,8 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then # TORCH VISION INSTALLATION tar zxf vision.tar.gz - cd vision-${VISION_VERSION} + mv vision-* vision + cd vision sudo mkdir -p build cd build sudo cmake .. -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_PREFIX_PATH=${TORCH_DIRECTORY} -DWITH_CUDA=${GPU} From d7e6c867c0f88d173b8c12e8afc5109324d543ae Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:19:49 +0200 Subject: [PATCH 62/87] Update dependencies/install_torch_c_api.sh Co-authored-by: Nikolaos Passalis --- dependencies/install_torch_c_api.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index 8ac4922731..3ccb836990 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -57,7 +57,7 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then sudo rm -rf libtorch sudo rm -rf libtorch.zip - sudo rm -rf vision-${VISION_VERSION} + sudo rm -rf vision sudo rm -rf vision.tar.gz sudo ldconfig From 9ff5c63b333301c5e4b39585a646108b05135c34 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:23:57 +0200 Subject: [PATCH 63/87] Update install_torch_c_api.sh not need for specified vision version in sh --- dependencies/install_torch_c_api.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index 3ccb836990..d6c4c48ff7 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -10,7 +10,6 @@ fi if [ ! -f /usr/local/lib/libtorchvision.so ]; then TORCH_DIRECTORY="/usr/local/libtorch" - VISION_VERSION="0.14.0" if [[ "$OPENDR_DEVICE" == "gpu" ]] then echo "Downloading and installing libtorch and torchvision (gpu support) ..." From c398d753463d10f2209642d4e0d119dc38c04542 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Fri, 16 Dec 2022 16:25:04 +0200 Subject: [PATCH 64/87] Update download_torch.py not need for environment variables to be passed --- dependencies/download_torch.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index cc908f73ae..aa75d17d46 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -119,8 +119,4 @@ def get_cuda_path(): except: warnings.warn("\033[93m Not torchvision found with your specific torch version.\n" "Please see the torchvision GitHub repository for more information.") - - # Send environment variables to be used with sudo privileges from bash script - os.environ["TORCH_VERSION"] = TORCH_VERSION - os.environ["VISION_VERSION"] = VISION_VERSION - os.environ["DEVICE"] = DEVICE + From d320fba20bf09d55872b856d9eba3a6ad19b48e6 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Thu, 22 Dec 2022 00:23:58 +0200 Subject: [PATCH 65/87] Update utilities.py delete additional print from debugging --- .../nanodet/algorithm/nanodet/inferencer/utilities.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py index 586f396eee..59ee4a06c2 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py @@ -74,7 +74,6 @@ def preprocessing(self, img): meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device) _input = meta["img"] - print(f"[{_input[0][50][50]}, {_input[1][50][50]}, {_input[2][50][50]}]") _height = torch.tensor(height) _width = torch.tensor(width) _warp_matrix = torch.from_numpy(meta["warp_matrix"]) From 39d4c201d76ee6d7faf4fc86085478ce17f4f4a6 Mon Sep 17 00:00:00 2001 From: ad-daniel Date: Thu, 22 Dec 2022 09:46:30 +0100 Subject: [PATCH 66/87] Fixes --- dependencies/download_torch.py | 14 ++--- dependencies/install_torch_c_api.sh | 6 +- .../c-object-detection-2d-nanodet-jit-h.md | 14 ++--- docs/reference/c-opendr-utils-h.md | 8 +-- docs/reference/c-target-h.md | 9 ++- docs/reference/nanodet.md | 57 ++++++++++--------- .../object_detection_2d/nanodet/README.md | 16 +++--- 7 files changed, 62 insertions(+), 62 deletions(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index aa75d17d46..349ab7102d 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -47,8 +47,8 @@ def get_cuda_path(): if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--cuda_path", help="Path to installed cuda", type=str, default=None) - parser.add_argument("--opendr_device", help="OpenDR variable to install dependencies during installation", - type=str, default="gpu") + parser.add_argument("--opendr_device", help="Target device for installation", + type=str, choices=["gpu", "cpu"], default="gpu") parser.add_argument("--torch_version", help="Version of Libtorch to be installed", type=str, default="1.9.0") args = parser.parse_args() @@ -88,7 +88,7 @@ def get_cuda_path(): CUDA_VERSION = CUDA_VERSION.replace(".", "") CUDA_VERSION = CUDA_VERSION[:3] else: - warnings.warn("\033[93m Not cuda version file found. Please sent an Issue in our github") + warnings.warn("\033[93m Not cuda version file found. Please open an Issue in our github.") DEVICE = f"cu{CUDA_VERSION}" except: warnings.warn("\033[93m No cuda found.\n" @@ -106,8 +106,8 @@ def get_cuda_path(): urlretrieve(file_url_libtorch, DOWNLOAD_DIRECTORY) except: - warnings.warn("\033[93m Not Libtorch found with your specific device and torch version.\n" - "Please choose another version of torch or install different CUDA.\n" + warnings.warn("\033[93m No Libtorch found for your specific device and torch version.\n" + "Please choose another version of torch or install a different version of CUDA.\n" "Please reference https://download.pytorch.org/whl/torch_stable.html") exit() # Download Vision @@ -117,6 +117,6 @@ def get_cuda_path(): DOWNLOAD_DIRECTORY = "vision.tar.gz" urlretrieve(file_url_vision, DOWNLOAD_DIRECTORY) except: - warnings.warn("\033[93m Not torchvision found with your specific torch version.\n" + warnings.warn("\033[93m No torchvision found for your specific torch version.\n" "Please see the torchvision GitHub repository for more information.") - + diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index d6c4c48ff7..6665c09d7c 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -2,8 +2,8 @@ if [[ -z "$TORCH_VERSION" ]]; then - echo "Specific Torch Version is not defined. Torch version 1.9.0 will be installed" - echo "For specific Torch Version plz defined variable TORCH_VERSION with export TORCH_VERSION=x.x.x." + echo "Torch version not defined, version 1.9.0 will be installed." + echo "For a specific Torch version please define TORCH_VERSION with 'export TORCH_VERSION=x.x.x'" TORCH_VERSION="1.9.0" fi @@ -31,7 +31,7 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then else python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" --cuda_path "$CUDA_PATH" fi - echo "Downloading Libtorch and torchvision ... FINIS" + echo "Downloading Libtorch and torchvision ... done." # TORCH INSTALLATION unzip -qq libtorch.zip diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md index e5ad2f967c..5bfb4959a5 100644 --- a/docs/reference/c-object-detection-2d-nanodet-jit-h.md +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -18,11 +18,11 @@ The *nanodet_model_t* structure keeps all the necessary information that are req ```C void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); ``` - Loads a nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. - This function also initializes a (*device*) Jit network for performing inference using this model. - The pre-trained models should follow the OpenDR conventions. - The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. - +Loads a nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. +This function also initializes a (*device*) Jit network for performing inference using this model. +The pre-trained models should follow the OpenDR conventions. +The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. + ### Function *free_nanodet_model()* ```C void free_nanodet_model(nanodet_model_t *model); @@ -34,7 +34,7 @@ Releases the memory allocated for an object detection 2d nanodet model (*model*) ```C opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); ``` -This function perform inference using an objecte detection 2d nanodet model (*model*) and an input image (*image*). +This function perform inference using an object detection 2d nanodet model (*model*) and an input image (*image*). The function returns an OpenDR detection vector structure with the inference results. @@ -42,6 +42,6 @@ The function returns an OpenDR detection vector structure with the inference res ```C void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); ``` -This function draws the given detections (*detectionsVector*) into the input image (*image*) and then show it in screen. +This function draws the given detections (*detectionsVector*) into the input image (*image*) and then shows on screen. The (*model*) keeps all the necessary information. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 1040e1d847..31dbc368f0 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -7,7 +7,7 @@ The *opendr_utils.h* header provides function definitions of OpenDR helpers (e.g ```C const char* json_get_key_string(const char *json, const char *key); ``` -The *json_get_key_string()* function allows for reading a json file and return the value of a key. +The *json_get_key_string()* function reads a json file and returns the value of a key. A pointer (*json*) that have the json string and a pointer (*key*) with the wanted value is needed. ## @@ -16,7 +16,7 @@ A pointer (*json*) that have the json string and a pointer (*key*) with the want ```C void load_image(const char *path, opendr_image_t *image); ``` -The *load_image()* function allows for reading an images from the local file system (*path*) into an OpenDR image data type. +The *load_image()* function loads an images from the local file system (*path*) into an OpenDR image data type. A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. @@ -31,7 +31,7 @@ A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. ```C void init_detections_vector(opendr_detection_vector_target_t *detection_vector); ``` -The *init_detections_vector()* function initialize the data of an OpenDR detection vector structure (*detection_vector*) with zero values. +The *init_detections_vector()* function initializes the data of an OpenDR detection vector structure (*detection_vector*) with zero values. A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should be provided. ### Function *load_detections_vector()* @@ -39,7 +39,7 @@ A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should b void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, int vector_size); ``` -The *load_detections_vector()* function allows for storing OpenDR detection target structures in to the memory allocated for multiple OpenDR detections structures (*detection*). +The *load_detections_vector()* function stores OpenDR detection target structures in the memory allocated for multiple OpenDR detections structures (*detection*). A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. ### Function *free_detections_vector()* diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 7eae5bd704..4bbae7f78f 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -44,7 +44,7 @@ typedef struct opendr_detection_target opendr_detection_target_t; The *opendr_detection_target_t* structure provides a data structure for storing inference outputs of detection models. Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. -The *opendr_detection_target_t* structure has the following field: +The *opendr_detection_target_t* structure has the following fields: #### `int name` field @@ -52,11 +52,11 @@ A numerical id of the category to which the input objects belongs to. #### `float left` field -A numerical value that corresponds to the X value of the top,left point of a detection. +A numerical value that corresponds to the X value of the top-left point of a detection. #### `float top` field -A numerical value that corresponds to the Y value of the top,left point of a detection. +A numerical value that corresponds to the Y value of the top-left point of a detection. #### `float width` field @@ -71,7 +71,6 @@ A numerical value that corresponds to the height of a detection. The decision score (a value between 0 and 1). - ### struct *opendr_detection_vector_target_t* ```C struct opendr_detection_vector_target { @@ -85,7 +84,7 @@ typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; The *opendr_detection_vector_target_t* structure provides a data structure for storing multiple inference outputs of detection models. Every function in the C API that outputs a detection decision is expected to use this or a *detection_target_t* structure. -The *opendr_detection_vector_target_t* structure has the following field: +The *opendr_detection_vector_target_t* structure has the following fields: #### `opendr_detection_target_t starting_pointer` field diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md index dcc36c178e..0b427c3336 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/nanodet.md @@ -76,7 +76,7 @@ Parameters: #### `NanodetLearner.eval` ```python -NanodetLearner.eval(self, dataset, verbose, +NanodetLearner.eval(self, dataset, verbose, ) ``` @@ -103,7 +103,7 @@ its width and height, or returns an empty list if no detections were made of the Parameters: - **input** : *Image*\ - Image type object to perform inference on it. + Image type object to perform inference on it. - **threshold**: *float, default=0.35*\ Specifies the threshold for object detection inference. An object is detected if the confidence of the output is higher than the specified threshold. @@ -121,8 +121,8 @@ If a model is already present, it will load it instead. Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX and a metadata file *"nanodet_{model_name}.json"*. Note: In Onnx optimization, the output model executes the original model's feed forward. -The user must create his or her own pre- and post-processes in order to use the Onnx model in the C API. -On the other side, in Jit optimization the output model does the feed forward and post-processing. +The user must create his or her own pre and post-processes in order to use the Onnx model in the C API. +In Jit optimization the output model does the feed forward and post-processing. For C API it is recommended the Jit optimization and the example that is provided in our [c_api](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) Parameters: @@ -130,11 +130,12 @@ Parameters: - **export_path**: *str*\ Path to save or load the optimized model. - **initial_img**: *Image*\ - If optimize is called for the first time is needed a dummy input of opendr Image. + If optimize is called for the first time a dummy OpenDR image is needed as input. - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. - **optimization**: *str, default="Jit"*\ - It can be Jit or Onnx. It determines what kind of optimization is used. + It can be Jit or Onnx. + It determines what kind of optimization is used. #### `NanodetLearner.save` ```python @@ -201,25 +202,25 @@ Furthermore, demos on performing [training](../../projects/perception/object_det #### Examples -* **Training example using an `ExternalDataset`.** +* **Training example using an `ExternalDataset`** To train properly, the architecture weights must be downloaded in a predefined directory before fit is called, in this case the directory name is "predefined_examples". Default architecture is *'m'*. The training and evaluation dataset root should be present in the path provided, along with the annotation files. The default COCO 2017 training data can be found [here](https://cocodataset.org/#download) (train, val, annotations). - All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file - in [config directori](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). + All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file + in [config directori](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). You can find more informations in [config file detail](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). For easier use, with NanodetLearner parameters user can overwrite the following parameters: (iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, weight_decay, warmup_steps, warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) - + **Note** - + The Nanodet tool can be used with any PASCAL VOC or COCO like dataset. The only thing is needed is to provide the correct root and dataset type. - + If *'voc'* is choosed for *dataset* the directory must look like this: - + - root folder - train - Annotations @@ -241,7 +242,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det - ... On the other hand if *'coco'* is choosed for *dataset* the directory must look like this: - + - root folder - train2017 - image1.jpg @@ -252,9 +253,9 @@ Furthermore, demos on performing [training](../../projects/perception/object_det - image2.jpg - ... - annotations - - instances_train2017.json + - instances_train2017.json - instances_val2017.json - + You can change the default annotation and image directories in [dataset](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py) ```python @@ -262,8 +263,8 @@ Furthermore, demos on performing [training](../../projects/perception/object_det from opendr.engine.datasets import ExternalDataset from opendr.perception.object_detection_2d import NanodetLearner - - + + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) @@ -293,12 +294,12 @@ Furthermore, demos on performing [training](../../projects/perception/object_det nanodet.save() ``` - -* **Inference and result drawing example on a test image.** + +* **Inference and result drawing example on a test image** This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. - In this example first is downloaded a pre-trained model as in training example and then an image to be inference. - With the *path* parameter you can define the image file to be used in inference. + In this example, a pre-trained model is downloaded and inference performed on an image that can be specified with the *path* parameter. + ```python import argparse from opendr.perception.object_detection_2d import NanodetLearner @@ -309,7 +310,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - parser.add_argument("--path", help="Path to the image that will be used for inference", type=str, + parser.add_argument("--path", help="Path to the image that will be used for inference", type=str, default="./predefined_examples/000000000036.jpg") args = parser.parse_args() @@ -322,8 +323,8 @@ Furthermore, demos on performing [training](../../projects/perception/object_det draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) ``` - -* **Optimization framework with Inference and result drawing example on a test image.** + +* **Optimization framework with Inference and result drawing example on a test image** This example shows how to perform optimization on a pretrained model, inference and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. In this example first a pretrained model is loaded and then an image is used to perform the optimization, in this example we use onnx optimization but Jit can also be used by passing `--optimization=jit`. @@ -333,14 +334,14 @@ Furthermore, demos on performing [training](../../projects/perception/object_det import argparse from opendr.engine.data import Image from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes - - + + if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') parser.add_argument("--optimization", help="Optimization framework to be used", type=str, default='onnx', choices=['jit', 'onnx']) - parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str, + parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str, default="./predefined_examples/000000000036.jpg") args = parser.parse_args() diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 859e343542..eec7ee01b4 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -1,17 +1,17 @@ # NanoDet Demos -This folder contains minimal code usage examples that showcase the basic functionality of the NanodetLearner +This folder contains minimal code usage examples that showcase the basic functionality of the NanodetLearner provided by OpenDR. Specifically the following examples are provided: 1. inference_demo.py: Perform inference on a single image in a directory. Setting `--device cpu` performs inference on CPU. -2. eval_demo.py: Perform evaluation on the `COCO dataset`, implemented in OpenDR format. The user must first download - the dataset and provide the path to the dataset root via `--data-root /path/to/coco_dataset`. - Setting `--device cpu` performs evaluation on CPU. - +2. eval_demo.py: Perform evaluation on the `COCO dataset`, implemented in OpenDR format. The user must first download + the dataset and provide the path to the dataset root via `--data-root /path/to/coco_dataset`. + Setting `--device cpu` performs evaluation on CPU. + 3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class. Provided is an example of training on `COCO dataset`. The user must set the dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. Setting the config file for the specific model is done with `--model "wanted model name"`. Setting `--device cpu` performs training on CPU. Additional command - line arguments can be set to overwrite various training hyperparameters from the provided config file, and running + line arguments can be set to overwrite various training hyperparameters from the provided config file, and running `python3 train_demo.py -h` prints information about them on stdout. Example usage: @@ -22,7 +22,7 @@ provided by OpenDR. Specifically the following examples are provided: Note: Onnx model only runs the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. It is recommended to additionally install the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. 5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific - model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. - Note: if you're planning on using C api, Jit optimization is preferred, so it can be used for the same postprocessing of the output + model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. + Note: if you're planning on using C api, Jit optimization is preferred, so it can be used for the same postprocessing of the output and have same exact detection as the python api. 6. inference_tutorial: A simple tutorial in jupyter for easier use of Nanodet in inference. \ No newline at end of file From 3b24ab3e5d8a0ce2d9f60639ee938f3fe03ec2e3 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Fri, 23 Dec 2022 18:34:45 +0200 Subject: [PATCH 67/87] Fixes of suggestions Combine optimizations and inference in one script --- .../object_detection_2d/nanodet/README.md | 30 +++++++-------- .../nanodet/export_onnx.py | 37 ------------------- .../nanodet/export_torchscript.py | 37 ------------------- .../nanodet/inference_demo.py | 10 ++++- 4 files changed, 23 insertions(+), 91 deletions(-) delete mode 100644 projects/python/perception/object_detection_2d/nanodet/export_onnx.py delete mode 100644 projects/python/perception/object_detection_2d/nanodet/export_torchscript.py diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index eec7ee01b4..7acfb2a43c 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -3,26 +3,26 @@ This folder contains minimal code usage examples that showcase the basic functionality of the NanodetLearner provided by OpenDR. Specifically the following examples are provided: 1. inference_demo.py: Perform inference on a single image in a directory. Setting `--device cpu` performs inference on CPU. + Setting the config file for the specific model is done with `--model "model name"`. + Inference is used optimization [ONNX or JIT] if specified in `--optimize onnx` or `--optimize jit`. + If optimization is used, first an optimized model will be exported and then inference will be performed. + + In ONNX it is recommended to install `onnxsim` dependencies with `pip install onnxsim` on OpenDR's virtual environment, for smaller and better optimized models. + + If user is planning on using the C API, JIT optimization is preferred, so it can be used for the same postprocessing of the output + and have exactly the same detection as the python API. + 2. eval_demo.py: Perform evaluation on the `COCO dataset`, implemented in OpenDR format. The user must first download the dataset and provide the path to the dataset root via `--data-root /path/to/coco_dataset`. Setting `--device cpu` performs evaluation on CPU. -3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class. - Provided is an example of training on `COCO dataset`. The user must set the dataset type using the `--dataset` +3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via the `ExternalDataset` class. + An example of training on the COCO dataset is provided. The user must set the dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. Setting the config file for the specific - model is done with `--model "wanted model name"`. Setting `--device cpu` performs training on CPU. Additional command - line arguments can be set to overwrite various training hyperparameters from the provided config file, and running - `python3 train_demo.py -h` prints information about them on stdout. + model is done with `--model "model name"`. Setting `--device cpu` performs training on CPU. Additional command + line arguments can be set to overwrite various training hyperparameters from the provided config file, run `python3 train_demo.py -h` prints information about them on stdout. Example usage: - `python3 train_demo.py --model plus-m_416 --dataset coco --data-root /path/to/coco_dataset` + `python3 train_demo.py --model m --dataset coco --data-root /path/to/coco_dataset` -4. export_onnx: Export the pretrained model into the onnx optimization format. Setting the config file for the specific - model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. - Note: Onnx model only runs the inference of the actual model. If you want the preprocessing of the output you must use jit optimization. - It is recommended to additionally install the onnxsim dependencies with `pip install onnxsim` for smaller and better optimized models. -5. export_torchscript: Export the pretrained model into the Jit optimization format. Setting the config file for the specific - model is done with `--model "wanted model name"`. Setting `--device cpu` performs the optimization inference on CPU. - Note: if you're planning on using C api, Jit optimization is preferred, so it can be used for the same postprocessing of the output - and have same exact detection as the python api. -6. inference_tutorial: A simple tutorial in jupyter for easier use of Nanodet in inference. \ No newline at end of file +4. inference_tutorial.ipynb: A simple tutorial in jupyter for using the Nanodet tool for inference. \ No newline at end of file diff --git a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py b/projects/python/perception/object_detection_2d/nanodet/export_onnx.py deleted file mode 100644 index ac1a2239fd..0000000000 --- a/projects/python/perception/object_detection_2d/nanodet/export_onnx.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020-2022 OpenDR European Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from opendr.engine.data import Image -from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - args = parser.parse_args() - - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) - - nanodet.download("./predefined_examples", mode="images") - # First read an openDR image from your dataset and run the optimizer: - img = Image.open("./predefined_examples/000000000036.jpg") - nanodet.optimize("./onnx/nanodet_{}".format(args.model), img, optimization="onnx") - - boxes = nanodet.infer(input=img) - - draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py b/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py deleted file mode 100644 index aac86c7a24..0000000000 --- a/projects/python/perception/object_detection_2d/nanodet/export_torchscript.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2020-2022 OpenDR European Project -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -from opendr.engine.data import Image -from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - args = parser.parse_args() - - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) - - nanodet.download("./predefined_examples", mode="images") - # First read an openDR image from your dataset and run the optimizer: - img = Image.open("./predefined_examples/000000000036.jpg") - nanodet.optimize("./jit/nanodet_{}".format(args.model), img, optimization="jit") - - boxes = nanodet.infer(input=img) - - draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py index 048a5b8d0f..d0f754cd2d 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py @@ -21,16 +21,22 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - parser.add_argument("--path", help="Path to the image that want to infer", type=str, + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") + parser.add_argument("--path", help="Path to the image that is used for inference", type=str, default="./predefined_examples/000000000036.jpg") + parser.add_argument("--optimize", help="", type=str, default="", choices=["", "onnx", "jit"]) args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.download("./predefined_examples", mode="images") + img = Image.open(args.path) + + if args.optimize != "": + nanodet.optimize("./{}/nanodet_{}".format(args.optimize, args.model), img, optimization=args.optimize) + boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) From fb0c4febc23e0ed14cc65f31802717f89f19e24f Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Fri, 23 Dec 2022 18:36:27 +0200 Subject: [PATCH 68/87] Fixes of suggestions Add more JSON parser capabilities Fix face recognition threshold not readed from JSON --- .../c-object-detection-2d-nanodet-jit-h.md | 12 +- docs/reference/c-opendr-utils-h.md | 33 ++- docs/reference/c-target-h.md | 2 +- ...odet.md => object-detection-2d-nanodet.md} | 225 +++++++++++------- include/face_recognition.h | 8 +- include/object_detection_2d_nanodet_jit.h | 14 +- include/opendr_utils.h | 34 ++- projects/c_api/README.md | 2 +- .../object_detection_2d/nanodet/README.md | 2 +- .../nanodet/nanodet_jit_demo.c | 4 - src/c_api/README.md | 2 +- src/c_api/face_recognition.cpp | 14 +- tests/sources/c_api/test_nanodet.c | 1 - 13 files changed, 223 insertions(+), 130 deletions(-) rename docs/reference/{nanodet.md => object-detection-2d-nanodet.md} (55%) diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md index 5bfb4959a5..f4b14ef0b0 100644 --- a/docs/reference/c-object-detection-2d-nanodet-jit-h.md +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -1,7 +1,7 @@ ## C_API: object_detection_2d_nanodet_jit.h -The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2d nanodet tool. +The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2D nanodet tool. ### Struct *nanodet_model_t* ```C @@ -11,7 +11,7 @@ struct nanodet_model { }; typedef struct nanodet_model nanodet_model_t; ``` -The *nanodet_model_t* structure keeps all the necessary information that are required by the OpenDR object detection 2d nanodet tool (e.g., model weights, normalization information, etc.). +The *nanodet_model_t* structure keeps all the necessary information that are required by the OpenDR object detection 2D nanodet tool (e.g., model weights, normalization information, etc.). ### Function *load_nanodet_model()* @@ -19,7 +19,7 @@ The *nanodet_model_t* structure keeps all the necessary information that are req void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); ``` Loads a nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. -This function also initializes a (*device*) Jit network for performing inference using this model. +This function also initializes a (*device*) JIT network for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. @@ -27,14 +27,14 @@ The Python API can be used to train and export an optimized OpenDR model that ca ```C void free_nanodet_model(nanodet_model_t *model); ``` -Releases the memory allocated for an object detection 2d nanodet model (*model*). +Releases the memory allocated for an object detection 2D nanodet model (*model*). ### Function *infer_nanodet()* ```C opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); ``` -This function perform inference using an object detection 2d nanodet model (*model*) and an input image (*image*). +This function perform inference using an object detection 2D nanodet model (*model*) and an input image (*image*). The function returns an OpenDR detection vector structure with the inference results. @@ -42,6 +42,6 @@ The function returns an OpenDR detection vector structure with the inference res ```C void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); ``` -This function draws the given detections (*detectionsVector*) into the input image (*image*) and then shows on screen. +This function draws the given detections (*detectionsVector*) onto the input image (*image*) and then shows the image on screen. The (*model*) keeps all the necessary information. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 31dbc368f0..ec63103586 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -7,16 +7,39 @@ The *opendr_utils.h* header provides function definitions of OpenDR helpers (e.g ```C const char* json_get_key_string(const char *json, const char *key); ``` -The *json_get_key_string()* function reads a json file and returns the value of a key. -A pointer (*json*) that have the json string and a pointer (*key*) with the wanted value is needed. +The *json_get_key_string()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*). -## +### Function *json_get_key_string()* +```C +const char* json_get_key_string(const char *json, const char *key, const int index); +``` +The *json_get_key_string()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as string. +If the value is an array it will return only the (*index*) value of the array. +If fails returns (*""*). + +### Function *json_get_key_float()* +```C +float json_get_key_float(const char *json, const char *key, const int index); +``` +The *json_get_key_float()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as float. +If the value is an array it will return only the (*index*) value of the array. +If fails returns (*0.0f*). + +### Function *json_get_key_from_inference_params()* +```C +float json_get_key_from_inference_params(const char *json, const char *key, const int index); +``` +The *json_get_key_from_inference_params()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) in inference_params section as float. +If the value is an array it will return only the (*index*) value of the array. +If fails returns (*0.0f*). + +--- ### Function *load_image()* ```C void load_image(const char *path, opendr_image_t *image); ``` -The *load_image()* function loads an images from the local file system (*path*) into an OpenDR image data type. +The *load_image()* function loads an image from the local file system (*path*) into an OpenDR image data type. A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. @@ -27,6 +50,8 @@ void free_image(opendr_image_t *image); The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. +--- + ### Function *init_detections_vector()* ```C void init_detections_vector(opendr_detection_vector_target_t *detection_vector); diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 4bbae7f78f..786a45c456 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -16,7 +16,7 @@ typedef struct opendr_category_target opendr_category_target_t; The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. -The *opendr_category_target_t* structure has the following field: +The *opendr_category_target_t* structure has the following fields: #### `int data` field diff --git a/docs/reference/nanodet.md b/docs/reference/object-detection-2d-nanodet.md similarity index 55% rename from docs/reference/nanodet.md rename to docs/reference/object-detection-2d-nanodet.md index 0b427c3336..e5814c0885 100644 --- a/docs/reference/nanodet.md +++ b/docs/reference/object-detection-2d-nanodet.md @@ -52,7 +52,7 @@ Constructor parameters: #### `NanodetLearner.fit` ```python -NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, seed, local_rank) +NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, logging, seed, local_rank) ``` This method is used for training the algorithm on a train dataset and validating on a val dataset. @@ -68,6 +68,8 @@ Parameters: - **logging_path** : *str, default=''*\ Subdirectory in temp_path to save log files and TensorBoard. - **verbose** : *bool, default=True*\ + Enables verbosity. +- **logging** : *bool, default=True*\ Enables the maximum verbosity and the logger. - **seed** : *int, default=123*\ Seed for repeatability. @@ -76,8 +78,7 @@ Parameters: #### `NanodetLearner.eval` ```python -NanodetLearner.eval(self, dataset, verbose, -) +NanodetLearner.eval(self, dataset, verbose, logging, local_rank) ``` This method is used to evaluate a trained model on an evaluation dataset. @@ -88,13 +89,15 @@ Parameters: - **dataset** : *ExternalDataset*\ Object that holds the evaluation dataset. - **verbose**: *bool, default=True*\ + Enables verbosity. +- **logging**: *bool, default=False*\ Enables the maximum verbosity and logger. - **local_rank** : *int, default=1*\ Needed if evaluating on multiple machines. #### `NanodetLearner.infer` ```python -NanodetLearner.infer(self, input, thershold, verbose) +NanodetLearner.infer(self, input, thershold) ``` This method is used to perform object detection on an image. @@ -104,38 +107,35 @@ its width and height, or returns an empty list if no detections were made of the Parameters: - **input** : *Image*\ Image type object to perform inference on it. - - **threshold**: *float, default=0.35*\ +- **threshold**: *float, default=0.35*\ Specifies the threshold for object detection inference. An object is detected if the confidence of the output is higher than the specified threshold. -- **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. #### `NanodetLearner.optimize` ```python -NanodetLearner.optimize(self, export_path, initial_img=None, verbose=True, optimization="jit") +NanodetLearner.optimize(self, export_path, initial_img, verbose, optimization) ``` -This method is used to perform Jit or Onnx optimizations and save a trained model with its metadata. -If a model is not present in the location specified by "export_path", the optimizer will save it there. +This method is used to perform JIT or ONNX optimizations and save a trained model with its metadata. +If a model is not present in the location specified by *export_path*, the optimizer will save it there. If a model is already present, it will load it instead. -Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* for Jit models or *"nanodet_{model_name}.onnx"* for ONNX and a metadata file *"nanodet_{model_name}.json"*. +Inside this folder, the model is saved as *nanodet_{model_name}.pth* for JIT models or *nanodet_{model_name}.onnx* for ONNX and a metadata file *nanodet_{model_name}.json*. -Note: In Onnx optimization, the output model executes the original model's feed forward. -The user must create his or her own pre and post-processes in order to use the Onnx model in the C API. -In Jit optimization the output model does the feed forward and post-processing. -For C API it is recommended the Jit optimization and the example that is provided in our [c_api](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) +Note: In ONNX optimization, the output model executes the original model's feed forward method. +The user must create their own pre- and post-processes in order to use the ONNX model in the C API. +In JIT optimization the output model performs the feed forward pass and post-processing. +To use the C API, it is recommended to use JIT optimization as shown in the [example of OpenDR's C API](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) Parameters: - **export_path**: *str*\ Path to save or load the optimized model. -- **initial_img**: *Image*\ +- **initial_img**: *Image*, default=None\ If optimize is called for the first time a dummy OpenDR image is needed as input. - **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. -- **optimization**: *str, default="Jit"*\ - It can be Jit or Onnx. - It determines what kind of optimization is used. + Enables the maximum verbosity. +- **optimization**: *str, default="jit"*\ + It determines what kind of optimization is used, possible values are *jit* or *onnx*. #### `NanodetLearner.save` ```python @@ -143,9 +143,9 @@ NanodetLearner.save(self, path, verbose) ``` This method is used to save a trained model with its metadata. -Provided with the path, it creates the "path" directory, if it does not already exist. -Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* and a metadata file *"nanodet_{model_name}.json"*. -If the directory already exists, the *"nanodet_{model_name}.pth"* and *"nanodet_{model_name}.json"* files are overwritten. +Provided with the path, it creates the *path* directory, if it does not already exist. +Inside this folder, the model is saved as *nanodet_{model_name}.pth* and a metadata file *nanodet_{model_name}.json*. +If the directory already exists, the *nanodet_{model_name}.pth* and *nanodet_{model_name}.json* files are overwritten. If optimization is performed, the optimized model is saved instead. Parameters: @@ -169,7 +169,7 @@ Parameters: - **path**: *str, default=None*\ Path of the model to be loaded. - **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. + Enables the maximum verbosity. #### `NanodetLearner.download` ```python @@ -186,7 +186,7 @@ Parameters: If *'pretrained'*, downloads a pretrained detector model from the *model_to_use* architecture which was chosen at learner initialization. If *'images'*, downloads an image to perform inference on. If *'test_data'* downloads a dummy dataset for testing purposes. - **verbose**: *bool, default=False*\ - Enables the maximum verbosity and logger. + Enables the maximum verbosity. - **url**: *str, default=OpenDR FTP URL*\ URL of the FTP server. @@ -209,17 +209,17 @@ Furthermore, demos on performing [training](../../projects/perception/object_det The training and evaluation dataset root should be present in the path provided, along with the annotation files. The default COCO 2017 training data can be found [here](https://cocodataset.org/#download) (train, val, annotations). All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file - in [config directori](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). - You can find more informations in [config file detail](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). - For easier use, with NanodetLearner parameters user can overwrite the following parameters: + in [config directory](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). + You can find more information in [corresponding documentation](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). + For easier usage of the NanodetLearner, the user can overwrite the following parameters: (iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, weight_decay, warmup_steps, warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) **Note** - The Nanodet tool can be used with any PASCAL VOC or COCO like dataset. The only thing is needed is to provide the correct root and dataset type. + The Nanodet tool can be used with any PASCAL VOC- or COCO-like dataset, by providing the correct root and dataset type. - If *'voc'* is choosed for *dataset* the directory must look like this: + If *'voc'* is chosen for *dataset*, the directory must look like this: - root folder - train @@ -241,7 +241,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det - image2.jpg - ... - On the other hand if *'coco'* is choosed for *dataset* the directory must look like this: + On the other hand, if *'coco'* is chosen for *dataset*, the directory must look like this: - root folder - train2017 @@ -256,40 +256,23 @@ Furthermore, demos on performing [training](../../projects/perception/object_det - instances_train2017.json - instances_val2017.json - You can change the default annotation and image directories in [dataset](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py) - + You can change the default annotation and image directories in [the *build_dataset* function](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py). + This example assumes the data has been downloaded and placed in the directory referenced by `data_root`. ```python - import argparse - from opendr.engine.datasets import ExternalDataset from opendr.perception.object_detection_2d import NanodetLearner if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) - parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) - parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) - parser.add_argument("--checkpoint-freq", help="Frequency in-between checkpoint saving and evaluations", - type=int, default=50) - parser.add_argument("--n-epochs", help="Number of total epochs", type=int, default=300) - parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", - type=int, default=0) - - args = parser.parse_args() - - dataset = ExternalDataset(args.data_root, args.dataset) - val_dataset = ExternalDataset(args.data_root, args.dataset) - - nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, - checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, - device=args.device) + dataset = ExternalDataset(data_root, 'voc') + val_dataset = ExternalDataset(data_root, 'voc') + + nanodet = NanodetLearner(model_to_use='m', iters=300, lr=5e-4, batch_size=8, + checkpoint_after_iter=50, checkpoint_load_iter=0, + device="cpu") nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + nanodet.load("./predefined_examples/nanodet_m", verbose=True) nanodet.fit(dataset, val_dataset) nanodet.save() @@ -298,27 +281,19 @@ Furthermore, demos on performing [training](../../projects/perception/object_det * **Inference and result drawing example on a test image** This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. - In this example, a pre-trained model is downloaded and inference performed on an image that can be specified with the *path* parameter. + In this example, a pre-trained model is downloaded and inference is performed on an image that can be specified with the *path* parameter. ```python - import argparse from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.data import Image from opendr.perception.object_detection_2d import draw_bounding_boxes if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - parser.add_argument("--path", help="Path to the image that will be used for inference", type=str, - default="./predefined_examples/000000000036.jpg") - args = parser.parse_args() - - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) + nanodet = NanodetLearner(model_to_use='m', device="cpu") nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + nanodet.load("./predefined_examples/nanodet_m", verbose=True) nanodet.download("./predefined_examples", mode="images") - img = Image.open(args.path) + img = Image.open("./predefined_examples/000000000036.jpg") boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) @@ -326,33 +301,113 @@ Furthermore, demos on performing [training](../../projects/perception/object_det * **Optimization framework with Inference and result drawing example on a test image** - This example shows how to perform optimization on a pretrained model, inference and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. - In this example first a pretrained model is loaded and then an image is used to perform the optimization, in this example we use onnx optimization but Jit can also be used by passing `--optimization=jit`. + This example shows how to perform optimization on a pretrained model, then run inference on an image and finally draw the resulting bounding boxes, using a nanodet model that is pretrained on the COCO dataset. + In this example we use ONNX optimization, but JIT can also be used by changing *optimization* to *jit*. With the *path* parameter you can define the image file to be used as dummy input for the optimization and inference. The optimized model will be saved in the `./optimization_models` folder ```python - import argparse from opendr.engine.data import Image from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') - parser.add_argument("--optimization", help="Optimization framework to be used", type=str, default='onnx', choices=['jit', 'onnx']) - parser.add_argument("--path", help="Path to the dummy image that will be used for optimization and inference", type=str, - default="./predefined_examples/000000000036.jpg") - args = parser.parse_args() - - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) + nanodet = NanodetLearner(model_to_use='m', device="cpu") + nanodet.load("./predefined_examples/nanodet_m", verbose=True) - # First read an openDR image from your dataset and run the optimizer: - img = Image.open(args.path) - nanodet.optimize("./{}/nanodet_{}/".format(args.optimization, args.model), img, optimization=args.optimization) + # First read an OpenDR image from your dataset and run the optimizer: + img = Image.open("./predefined_examples/000000000036.jpg") + nanodet.optimize("./onnx/nanodet_m/", img, optimization="onnx") boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) ``` + + +#### Performance Evaluation + +In terms of speed, the performance of Nanodet is summarized in the table below (in FPS). +The speed is measured from the start of the forward pass until the end of post-processing. + +For PyTorch inference. + +| Method {intput} | RTX 2070 | TX2 | NX | +|------------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 48.63 | 9.38 | 14.48 | +| Efficient Lite1 {416} | 43.88 | 7.93 | 11.07 | +| Efficient Lite2 {512} | 40.51 | 6.44 | 8.84 | +| RepVGG A0 {416} | 33.4 | 9.21 | 12.3 | +| Nanodet-g {416} | 51.32 | 9.57 | 15.75 | +| Nanodet-m {320} | 48.36 | 8.56 | 14.08 | +| Nanodet-m 0.5x {320} | 46.94 | 7.97 | 12.84 | +| Nanodet-m 1.5x {320} | 47.41 | 8.8 | 13.98 | +| Nanodet-m {416} | 47.3 | 8.34 | 13.15 | +| Nanodet-m 1.5x {416} | 45.62 | 8.43 | 13.2 | +| Nanodet-plue m {320} | 41.9 | 7.45 | 12.01 | +| Nanodet-plue m 1.5x {320} | 39.63 | 7.66 | 12.21 | +| Nanodet-plue m {416} | 40.16 | 7.24 | 11.58 | +| Nanodet-plue m 1.5x {416} | 38.94 | 7.37 | 11.52 | + +For JIT optimization inference. + +| Method {intput} | RTX 2070 | TX2 | NX | +|------------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 69.06 | 12.94 | 17.78 | +| Efficient Lite1 {416} | 62.94 | 9.27 | 12.94 | +| Efficient Lite2 {512} | 65.46 | 7.46 | 10.32 | +| RepVGG A0 {416} | 41.44 | 11.16 | 14.89 | +| Nanodet-g {416} | 76.3 | 12.94 | 20.52 | +| Nanodet-m {320} | 75.66 | 12.22 | 20.67 | +| Nanodet-m 0.5x {320} | 65.71 | 11.31 | 17.68 | +| Nanodet-m 1.5x {320} | 66.23 | 12.46 | 19.99 | +| Nanodet-m {416} | 79.91 | 12.08 | 19.28 | +| Nanodet-m 1.5x {416} | 69.44 | 12.3 | 18.6 | +| Nanodet-plue m {320} | 67.82 | 11.19 | 18.85 | +| Nanodet-plue m 1.5x {320} | 64.12 | 11.57 | 18.26 | +| Nanodet-plue m {416} | 64.74 | 11.22 | 17.57 | +| Nanodet-plue m 1.5x {416} | 56.77 | 10.39 | 14.81 | + +For ONNX optimization inference. + +In this case, the forward pass is performed in ONNX. +The pre-processing steps were implemented in PyTorch. +Results show that the performance on ONNX varies significantly among different architectures, with some achieving good performance while others performing poorly. +Additionally, it was observed that the performance of ONNX on a TX2 device was generally good, although it was observed to have occasional spikes of long run times that made it difficult to accurately measure. +Overall, the TX2 device demonstrated good performance with ONNX. + +| Method {intput} | RTX 2070 | TX2 | NX | +|------------------------------|-----------|-----|--------| +| Efficient Lite0 {320} | 33.12 | | 34.03 | +| Efficient Lite1 {416} | 16.78 | | 17.35 | +| Efficient Lite2 {512} | 10.35 | | 12.14 | +| RepVGG A0 {416} | 27.89 | | 51.74 | +| Nanodet-g {416} | 103.22 | | 87.40 | +| Nanodet-m {320} | 98.73 | | 122.26 | +| Nanodet-m 0.5x {320} | 144.46 | | 208.19 | +| Nanodet-m 1.5x {320} | 75.82 | | 75.40 | +| Nanodet-m {416} | 73.09 | | 72.78 | +| Nanodet-m 1.5x {416} | 51.30 | | 51.78 | +| Nanodet-plue m {320} | 51.39 | | 50.67 | +| Nanodet-plue m 1.5x {320} | 39.65 | | 40.62 | +| Nanodet-plue m {416} | 39.17 | | 36.98 | +| Nanodet-plue m 1.5x {416} | 28.55 | | 27.20 | + +Finally, we measure the performance on the COCO dataset, using the corresponding metrics. + +| Method {intput} | coco2017 mAP | +|------------------------------|--------------| +| Efficient Lite0 {320} | 24.4 | +| Efficient Lite1 {416} | 29.2 | +| Efficient Lite2 {512} | 32.4 | +| RepVGG A0 {416} | 25.5 | +| Nanodet-g {416} | 22.7 | +| Nanodet-m {320} | 20.2 | +| Nanodet-m 0.5x {320} | 13.1 | +| Nanodet-m 1.5x {320} | 23.1 | +| Nanodet-m {416} | 23.5 | +| Nanodet-m 1.5x {416} | 26.6 | +| Nanodet-plue m {320} | 27.0 | +| Nanodet-plue m 1.5x {320} | 29.9 | +| Nanodet-plue m {416} | 30.3 | +| Nanodet-plue m 1.5x {416} | 34.1 | + \ No newline at end of file diff --git a/include/face_recognition.h b/include/face_recognition.h index ff2774aab2..2647a04802 100644 --- a/include/face_recognition.h +++ b/include/face_recognition.h @@ -57,14 +57,14 @@ struct face_recognition_model { typedef struct face_recognition_model face_recognition_model_t; /** - * Loads a face recognition model saved in OpenDR format + * Loads a face recognition model saved in OpenDR format. * @param model_path path to the OpenDR face recongition model (as exported using OpenDR library) * @param model the loaded model */ void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); /** - * This function perform inference using a face recognition model and an input image + * This function perform inference using a face recognition model and an input image. * @param model face recognition model to be used for inference * @param image OpenDR image * @return OpenDR classification target containing the id of the recognized person @@ -90,7 +90,7 @@ void build_database_face_recognition(const char *database_folder, const char *ou void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); /** - * Returns the name of a recognition person by decoding the category id into a string + * Returns the name of a recognition person by decoding the category id into a string. * @param model the face recognition model to be used for inference * @param category the predicted category * @param person_name buffer to store the person name @@ -98,7 +98,7 @@ void load_database_face_recognition(const char *database_path, face_recognition_ void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); /** - * Releases the memory allocated for a face recognition model + * Releases the memory allocated for a face recognition model. * @param model model to be de-allocated */ void free_face_recognition_model(face_recognition_model_t *model); diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h index 993f54a85f..3eae7a7059 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -43,18 +43,18 @@ struct nanodet_model { typedef struct nanodet_model nanodet_model_t; /** - * Loads a nanodet object detection model saved in libtorch format - * @param modelPath path to the libtorch nanodet model (as exported using OpenDR library) - * @param device the device that will be used for the inference + * Loads a nanodet object detection model saved in libtorch format. + * @param modelPath path to the libtorch nanodet model (as exported using OpenDR) + * @param device the device that will be used for inference * @param height the height of model input * @param width the width of model input - * @param scoreThreshold a threshold for score to be inferred + * @param scoreThreshold confidence threshold * @param model the model to be loaded */ void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); /** - * This function performs inference using a nanodet object detection model and an input image + * This function performs inference using a nanodet object detection model and an input image. * @param model nanodet model to be used for inference * @param image OpenDR image * @return OpenDR detection vector target containing the detections of the recognized objects @@ -62,13 +62,13 @@ void load_nanodet_model(char *modelPath, char *device, int height, int width, fl opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); /** - * Releases the memory allocated for a nanodet object detection model + * Releases the memory allocated for a nanodet object detection model. * @param model model to be de-allocated */ void free_nanodet_model(nanodet_model_t *model); /** - * draw the bounding boxes from detections in the given image + * Draw the bounding boxes from detections in the given image. * @param image image that has been used for inference * @param model nanodet model that has been used for inference * @param detectionsVector output of the inference diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 806fdc0d6d..9d0129ade7 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -25,14 +25,34 @@ extern "C" { #endif /** - * Json parser for OpenDR model files. - * @param json a string of json file. - * @param key the value to extract from json file. + * JSON parser for OpenDR model files. + * @param json a string of json file + * @param key the value to extract from json file + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return string with the value of key */ -const char *json_get_key_string(const char *json, const char *key); +const char *json_get_key_string(const char *json, const char *key, const int index); /** - * Reads an image from path and saves it into OpenDR an image structure + * JSON parser for OpenDR model files. + * @param json a string of json file + * @param key the value to extract from json file + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return float with the value of key + */ +float json_get_key_float(const char *json, const char *key, const int index); + +/** + * JSON parser for OpenDR model files from inference_params key. + * @param json a string of json file + * @param key the value to extract from inference_params + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return float with the value of key + */ +float json_get_key_from_inference_params(const char *json, const char *key, const int index); + +/** + * Reads an image from path and saves it into OpenDR image structure. * @param path path from which the image will be read * @param image OpenDR image data structure to store the image */ @@ -45,13 +65,13 @@ void load_image(const char *path, opendr_image_t *image); void free_image(opendr_image_t *image); /** - * Initialize an empty detection list to be used in C API + * Initialize an empty detection list. * @param detection_vector OpenDR detection_target_list structure to be initialized */ void init_detections_vector(opendr_detection_vector_target_t *detection_vector); /** - * Loads an OpenDR detection target list to be used in C API + * Loads an OpenDR detection target list. * @param detection_vector OpenDR detection_target_list structure to be loaded * @param detection the pointer of the first OpenDR detection target in a vector * @param vector_size the size of the vector diff --git a/projects/c_api/README.md b/projects/c_api/README.md index bd47689ec5..3f289ccf75 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -10,5 +10,5 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: 1. Face recognition -2. Object detection (Nanodet) +2. Object detection 2D Nanodet diff --git a/projects/c_api/samples/object_detection_2d/nanodet/README.md b/projects/c_api/samples/object_detection_2d/nanodet/README.md index 31d3e90496..1ca7c102c1 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/README.md +++ b/projects/c_api/samples/object_detection_2d/nanodet/README.md @@ -1,7 +1,7 @@ # OpenDR C API Nanodet Demo C API implementation of nanodet models for inference. -To use the demo, the downloaded model from installation can be used or it can be exported with the optimization Jit from our python implementation, see [Nanodet opimization](../../../../docs/reference/nanodet.md#nanodetlearneroptimize). +To run the demo, the downloaded model can be used or it can be exported with JIT optimization from the python implementation, see [Nanodet optimization](../../../../../docs/reference/object-detection-2d-nanodet.md#nanodetlearneroptimize). After installation, the demo can be run from projects/c_api directory with: ```sh diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index 789f56767a..eae45f9eb0 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -37,10 +37,8 @@ int main(int argc, char **argv) { load_nanodet_model(argv[1], argv[2], height, width, 0.35, &model); printf("success\n"); - // Initialize opendr image opendr_image_t image; - // Load opendr image load_image(argv[3], &image); if (!image.data) { printf("Image not found!"); @@ -51,10 +49,8 @@ int main(int argc, char **argv) { opendr_detection_vector_target_t results; init_detections_vector(&results); - // Infer nanodet model results = infer_nanodet(&model, &image); - // Draw the results draw_bboxes(&image, &model, &results); // Free the memory diff --git a/src/c_api/README.md b/src/c_api/README.md index ceaab456e7..6ff33bacea 100644 --- a/src/c_api/README.md +++ b/src/c_api/README.md @@ -3,7 +3,7 @@ ## Description This module contains a C API that can be used for performing inference on models trained using the Python API of OpenDR. -Therefore, to use the C API you should first use the Python API to train a model and then export it to ONNX or Jit format using the `optimize()` method. +Therefore, to use the C API you should first use the Python API to export a pretrained or a newly trained model and export it to ONNX or JIT format using the `optimize()` method. ## Setup diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index a0c8905bf6..636176f21e 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -98,18 +98,16 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ std::string basepath = model_json_path.substr(0, split_pos); split_pos = basepath.find_last_of("/"); split_pos = split_pos > 0 ? split_pos + 1 : 0; - basepath.resize(split_pos); + if (split_pos < basepath.size()) + basepath.resize(split_pos); // Parse JSON - std::string onnx_model_path = basepath + json_get_key_string(json, "model_paths"); - std::string model_format = json_get_key_string(json, "format"); + std::string onnx_model_path = basepath + json_get_key_string(json, "model_paths", 0); + std::string model_format = json_get_key_string(json, "format", 0); // Parse inference params - std::string threshold = json_get_key_string(json, "threshold"); - - if (!threshold.empty()) { - model->threshold = std::stof(threshold); - } + float threshold = json_get_key_from_inference_params(json, "threshold", 0); + model->threshold = threshold; // Proceed only if the model is in onnx format if (model_format != "onnx") { diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c index a15850e39b..cacd4f1101 100644 --- a/tests/sources/c_api/test_nanodet.c +++ b/tests/sources/c_api/test_nanodet.c @@ -40,7 +40,6 @@ START_TEST(inference_creation_test) { nanodet_model_t model; // Load a pretrained model - printf("6\n"); load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); // Load an image and performance inference From a19138066667e5fddd4a9f777c88c614d45a0bad Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Fri, 23 Dec 2022 18:37:10 +0200 Subject: [PATCH 69/87] Fixes of suggestions Delete not used loggers. --- .../nanodet/algorithm/nanodet/trainer/task.py | 11 +- .../algorithm/nanodet/util/check_point.py | 20 +-- .../nanodet/nanodet_learner.py | 164 +++++++++--------- .../nanodet/test_nanodet.py | 8 +- 4 files changed, 103 insertions(+), 100 deletions(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py index 46e638433e..5cd2d7e125 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py @@ -73,11 +73,11 @@ def predict(self, batch, batch_idx=None, dataloader_idx=None): return results @rank_filter - def _save_current_model(self, path, logger): - save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, logger=logger) + def _save_current_model(self, path, verbose): + save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, verbose=verbose) - def save_current_model(self, path, logger): - save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, logger=logger) + def save_current_model(self, path, verbose): + save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, verbose=verbose) @torch.jit.unused def training_step(self, batch, batch_idx): @@ -179,8 +179,9 @@ def validation_epoch_end(self, validation_step_outputs): self.trainer.save_checkpoint( os.path.join(best_save_path, "model_best.ckpt") ) + verbose = True if self.logger is not None else False self._save_current_model(self.local_rank, os.path.join(best_save_path, "nanodet_model_state_best.pth"), - logger=self.logger) + verbose=verbose) txt_path = os.path.join(best_save_path, "eval_results.txt") with open(txt_path, "a") as f: f.write("Epoch:{}\n".format(self.current_epoch + 1)) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py index ea31d43d3b..fd3487f0f1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py @@ -18,7 +18,7 @@ import torch -def load_model_weight(model, checkpoint, logger=None): +def load_model_weight(model, checkpoint, verbose=None): state_dict = checkpoint["state_dict"].copy() for k in checkpoint["state_dict"]: # convert average model weights @@ -39,8 +39,8 @@ def load_model_weight(model, checkpoint, logger=None): for k in state_dict: if k in model_state_dict: if state_dict[k].shape != model_state_dict[k].shape: - if logger: - logger.log( + if verbose: + print( "Skip loading parameter {}, required shape{}, " "loaded shape{}.".format( k, model_state_dict[k].shape, state_dict[k].shape @@ -48,12 +48,12 @@ def load_model_weight(model, checkpoint, logger=None): ) state_dict[k] = model_state_dict[k] else: - if logger: - logger.log("Drop parameter {}.".format(k)) + if verbose: + print("Drop parameter {}.".format(k)) for k in model_state_dict: if not (k in state_dict): - if logger: - logger.log("No param {}.".format(k)) + if verbose: + print("No param {}.".format(k)) state_dict[k] = model_state_dict[k] model.load_state_dict(state_dict, strict=False) return model @@ -71,9 +71,9 @@ def save_model(model, path, epoch, iter, optimizer=None): torch.save(data, path) -def save_model_state(path, model, weight_averager=None, logger=None): - if logger: - logger.info("Saving model to {}".format(path)) +def save_model_state(path, model, weight_averager=None, verbose=None): + if verbose: + print("Saving model to {}".format(path)) state_dict = ( weight_averager.state_dict() if weight_averager diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index 9588e63d47..4396387c6f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -15,6 +15,7 @@ import os import datetime import json +import warnings from pathlib import Path import pytorch_lightning as pl @@ -30,7 +31,6 @@ from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.inferencer.utilities import Predictor from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import ( NanoDetLightningLogger, - Logger, cfg, load_config, load_model_weight, @@ -181,23 +181,21 @@ def save(self, path=None, verbose=True): Method for saving the current model and metadata in the path provided. :param path: path to folder where model will be saved :type path: str, optional - :param verbose: whether to print a success message or not, defaults to False + :param verbose: whether to print a success message or not :type verbose: bool, optional """ path = path if path is not None else self.cfg.save_dir model = self.cfg.check_point_name - if verbose and not self.logger: - self.logger = Logger(-1, path, False) os.makedirs(path, exist_ok=True) if self.ort_session: self._save_onnx(path, verbose=verbose) - return True + return if self.jit_model: self._save_jit(path, verbose=verbose) - return True + return metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, "inference_params": {}, "optimized": False, @@ -206,31 +204,28 @@ def save(self, path=None, verbose=True): metadata["model_paths"].append("nanodet_{}.pth".format(model)) if self.task is None: - print("You do not have call a task yet, only the state of the loaded or initialized model will be saved") - save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, self.logger) + print("You haven't called a task yet, only the state of the loaded or initialized model will be saved.") + save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, verbose) else: - self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), self.logger) + self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), verbose) with open(os.path.join(path, "nanodet_{}.json".format(model)), 'w', encoding='utf-8') as f: json.dump(metadata, f, ensure_ascii=False, indent=4) if verbose: print("Model metadata saved.") - return True + return def load(self, path=None, verbose=True): """ Loads the model from the path provided. :param path: path of the directory where the model was saved :type path: str, optional - :param verbose: whether to print a success message or not, defaults to False + :param verbose: whether to print a success message or not, defaults to Trye :type verbose: bool, optional """ path = path if path is not None else self.cfg.save_dir - if verbose and not self.logger: - self.logger = Logger(-1, path, False) - model = self.cfg.check_point_name if verbose: print("Model name:", model, "-->", os.path.join(path, "nanodet_" + model + ".json")) @@ -243,12 +238,12 @@ def load(self, path=None, verbose=True): print("Loaded ONNX model.") else: self._load_jit(os.path.join(path, metadata["model_paths"][0]), verbose=verbose) - print("Loaded Jit model.") + print("Loaded JIT model.") else: ckpt = torch.load(os.path.join(path, metadata["model_paths"][0]), map_location=torch.device(self.device)) - self.model = load_model_weight(self.model, ckpt, self.logger) + self.model = load_model_weight(self.model, ckpt, verbose) if verbose: - self.logger.log("Loaded model weight from {}".format(path)) + print("Loaded model weights from {}".format(path)) pass def download(self, path=None, mode="pretrained", verbose=True, @@ -258,15 +253,15 @@ def download(self, path=None, mode="pretrained", verbose=True, Downloads all files necessary for inference, evaluation and training. Valid mode options are: ["pretrained", "images", "test_data"]. :param path: folder to which files will be downloaded, if None self.temp_path will be used - :type path: str, optional + :type path: str :param mode: one of: ["pretrained", "images", "test_data"], where "pretrained" downloads a pretrained - network depending on the network choosed in config file, "images" downloads example inference data, - and "test_data" downloads additional image,annotation file and pretrained network for training and testing - :type mode: str, optional - :param verbose: if True, additional information is printed on stdout - :type verbose: bool, optional + network depending on the network chosen in the config file, "images" downloads example inference data, + and "test_data" downloads additional images and corresponding annotations files + :type mode: str + :param verbose: if True, additional information is printed on STDOUT + :type verbose: bool :param url: URL to file location on FTP server - :type url: str, optional + :type url: str """ valid_modes = ["pretrained", "images", "test_data"] @@ -362,7 +357,7 @@ def reset(self): def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=True): if self.jit_model: - print("Warning: A jit model has already initialized, inference will run in onnx mode by default!!!") + warnings.warn("Warning: A JIT model was already initialized, inference will run in ONNX mode by default.") if not self.predictor: self.predictor = Predictor(self.cfg, self.model, device=self.device) @@ -371,7 +366,7 @@ def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=Tru if self.dummy_input is None: assert img is not None,\ - "When use optimize or _save_jit is called for the first time, it must have and opendr Image input" + "When optimize or _save_onnx is called for the first time, it must have and OpenDR image input." if not isinstance(img, Image): img = Image(img) img = img.opencv() @@ -399,37 +394,36 @@ def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=Tru json.dump(metadata, f, ensure_ascii=False, indent=4) if verbose: - print("finished exporting onxx") - self.logger.log("finished exporting onxx") + print("Finished exporting ONNX model.") try: import onnxsim - import onnx - if verbose: - print("start simplifying onnx") - self.logger.log("start simplifying onnx") - input_data = {"data": self.dummy_input.detach().cpu().numpy()} - model_sim, flag = onnxsim.simplify(export_path, input_data=input_data) - if flag: - onnx.save(model_sim, export_path) - if verbose: - self.logger.log("simplify onnx successfully") - else: - if verbose: - self.logger.log("simplify onnx failed") except: - print("For compression in optimized models, install the onnxsim dependencies and rerun optimize") + print("For compression in optimized models, install onnxsim and rerun optimize.") + return + + import onnx + if verbose: + print("Simplifying ONNX model...") + input_data = {"data": self.dummy_input[0].detach().cpu().numpy()} + model_sim, flag = onnxsim.simplify(export_path, input_data=input_data) + if flag: + onnx.save(model_sim, export_path) + if verbose: + print("ONNX simplified successfully.") + else: + if verbose: + print("ONNX simplified failed.") def _load_onnx(self, onnx_path, verbose=True): if verbose: print("Loading ONNX runtime inference session from {}".format(onnx_path)) - self.logger.log("Loading ONNX runtime inference session from {}".format(onnx_path)) self.ort_session = ort.InferenceSession(onnx_path) def _save_jit(self, jit_path, img=None, verbose=True): if self.ort_session: - print("Warning: An onnx model has already initialized, inference will run in onnx mode by default!!!") + warnings.warn("Warning: An ONNX model was already initialized, inference will run in ONNX mode by default.") if not self.predictor: self.predictor = Predictor(self.cfg, self.model, device=self.device) @@ -437,7 +431,7 @@ def _save_jit(self, jit_path, img=None, verbose=True): if not self.dummy_input: assert img, \ - "When use optimize or _save_jit is called for the first time, it must have and opendr Image input" + "When optimize or _save_jit is called for the first time, it must have and OpenDR image input." if not isinstance(img, Image): img = Image(img) img = img.opencv() @@ -459,31 +453,29 @@ def _save_jit(self, jit_path, img=None, verbose=True): json.dump(metadata, f, ensure_ascii=False, indent=4) if verbose: - print("Finished export to TorchScript") - self.logger.log("Finished export to TorchScript") + print("Finished export to TorchScript.") def _load_jit(self, jit_path, verbose=True): if verbose: - print("Loading Jit model from {}".format(jit_path)) - self.logger.log("Loading Jit model from {}".format(jit_path)) + print("Loading JIT model from {}.".format(jit_path)) self.jit_model = torch.jit.load(jit_path, map_location=self.device) def optimize(self, export_path, initial_img=None, verbose=True, optimization="jit"): """ - Method for optimizing the model with onnx or jit. - :param export_path: the path to the folder that the model is or will be after optimization + Method for optimizing the model with ONNX or JIT. + :param export_path: The file path to the folder where the optimized model will be saved. If a model already + exists at this path, it will be overwritten. :type export_path: str - :param initial_img: if optimize is called for the first time is needed a dummy input of opendr Image - :type initial_img: Image - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True + :param initial_img: if optimize is called for the first time it needs a dummy OpenDR Image input + :type initial_img: opendr.engine.data.Image + :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool, optional :param optimization: the kind of optimization you want to perform [jit, onnx] :type optimization: str """ - if verbose and not self.logger: - self.logger = Logger(-1, self.cfg.save_dir, False) + + optimization = optimization.lower() if not os.path.exists(export_path): if optimization == "jit": self._save_jit(export_path, initial_img, verbose=verbose) @@ -500,7 +492,7 @@ def optimize(self, export_path, initial_img=None, verbose=True, optimization="ji else: assert NotImplementedError - def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123, local_rank=1): + def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, logging=False, seed=123, local_rank=1): """ This method is used to train the detector on the COCO dataset. Validation is performed in a val_dataset if provided, else validation is performed in training dataset. @@ -511,10 +503,11 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 :param val_dataset: validation dataset object :type val_dataset: ExternalDataset, DetectionDataset not implemented yet :param logging_path: subdirectory in temp_path to save logger outputs - :type logging_path: str, optional - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True + :type logging_path: str + :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool + :param logging: if set to True, text and STDOUT logging will be used + :type logging: bool :param seed: seed for reproducibility :type seed: int :param local_rank: for distribution learning @@ -523,17 +516,19 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 mkdir(local_rank, self.cfg.save_dir) - if verbose: + if logging: self.logger = NanoDetLightningLogger(self.temp_path + "/" + logging_path) self.logger.dump_cfg(self.cfg) if seed != '' or seed is not None: - if verbose: + if logging: self.logger.info("Set random seed to {}".format(seed)) pl.seed_everything(seed) - if verbose: + if logging: self.logger.info("Setting up data...") + elif verbose: + print("Setting up data...") train_dataset = build_dataset(self.cfg.data.val, dataset, self.cfg.class_names, "train") val_dataset = train_dataset if val_dataset is None else \ @@ -566,8 +561,10 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 if self.checkpoint_load_iter > 0 else None ) - if verbose: + if logging: self.logger.info("Creating task...") + elif verbose: + print("Creating task...") self.task = TrainingTask(self.cfg, self.model, evaluator) gpu_ids = None @@ -593,14 +590,15 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 trainer.fit(self.task, train_dataloader, val_dataloader) - def eval(self, dataset, verbose=True, local_rank=1): + def eval(self, dataset, verbose=True, logging=False, local_rank=1): """ This method performs evaluation on a given dataset and returns a dictionary with the evaluation results. :param dataset: dataset object, to perform evaluation on :type dataset: ExternalDataset, DetectionDataset not implemented yet - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True + :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool + :param logging: if set to True, text and STDOUT logging will be used + :type logging: bool :param local_rank: for distribution learning :type local_rank: int """ @@ -609,13 +607,15 @@ def eval(self, dataset, verbose=True, local_rank=1): save_dir = os.path.join(self.cfg.save_dir, timestr) mkdir(local_rank, save_dir) - if verbose: + if logging: self.logger = NanoDetLightningLogger(save_dir) self.cfg.update({"test_mode": "val"}) - if self.logger: + if logging: self.logger.info("Setting up data...") + elif verbose: + print("Setting up data...") val_dataset = build_dataset(self.cfg.data.val, dataset, self.cfg.class_names, "val") @@ -630,8 +630,11 @@ def eval(self, dataset, verbose=True, local_rank=1): ) evaluator = build_evaluator(self.cfg.evaluator, val_dataset) - if self.logger: + if logging: self.logger.info("Creating task...") + elif verbose: + print("Creating task...") + self.task = TrainingTask(self.cfg, self.model, evaluator) gpu_ids = None @@ -650,23 +653,22 @@ def eval(self, dataset, verbose=True, local_rank=1): ) if self.logger: self.logger.info("Starting testing...") - return trainer.test(self.task, val_dataloader, verbose=verbose) + elif verbose: + print("Starting testing...") + + test_results = (verbose or logging) + return trainer.test(self.task, val_dataloader, verbose=test_results) - def infer(self, input, threshold=0.35, verbose=True): + def infer(self, input, threshold=0.35): """ Performs inference - :param input: input can be an Image type image to perform inference - :type input: Image + :param input: input image to perform inference on + :type input: opendr.data.Image :param threshold: confidence threshold :type threshold: float, optional - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True - :type verbose: bool :return: list of bounding boxes of last image of input or last frame of the video - :rtype: BoundingBoxList + :rtype: opendr.engine.target.BoundingBoxList """ - if verbose and not self.logger: - self.logger = Logger(-1, "./last_infer", False) if not self.predictor: self.predictor = Predictor(self.cfg, self.model, device=self.device) diff --git a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py index 48690ceda8..053f0bea36 100644 --- a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py +++ b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py @@ -53,9 +53,9 @@ def setUpClass(cls): cls.detector = NanodetLearner(model_to_use=_DEFAULT_MODEL, device=device, temp_path=cls.temp_dir, batch_size=1, iters=1, checkpoint_after_iter=2, lr=1e-4) # Download all required files for testing - cls.detector.download(path=cls.temp_dir, mode="pretrained") - cls.detector.download(path=cls.temp_dir, mode="images") - cls.detector.download(path=cls.temp_dir, mode="test_data") + cls.detector.download(path=cls.temp_dir, mode="pretrained", verbose=False) + cls.detector.download(path=cls.temp_dir, mode="images", verbose=False) + cls.detector.download(path=cls.temp_dir, mode="test_data", verbose=False) @classmethod def tearDownClass(cls): @@ -105,7 +105,7 @@ def test_infer(self): print('Starting inference test for Nanodet...') self.detector.load(os.path.join(self.temp_dir, "nanodet_{}".format(_DEFAULT_MODEL)), verbose=False) img = cv2.imread(os.path.join(self.temp_dir, "000000000036.jpg")) - self.assertIsNotNone(self.detector.infer(input=img, verbose=False), + self.assertIsNotNone(self.detector.infer(input=img), msg="Returned empty BoundingBoxList.") gc.collect() print('Finished inference test for Nanodet...') From ad741a1e04036e88cc46e5a699feb78ba0a11070 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Fri, 23 Dec 2022 18:37:47 +0200 Subject: [PATCH 70/87] Fixes of suggestions --- dependencies/download_torch.py | 14 +- dependencies/install_torch_c_api.sh | 8 +- .../object_detection_2d/nanodet/eval_demo.py | 2 +- .../nanodet/inference_tutorial.ipynb | 616 +----------------- .../object_detection_2d/nanodet/train_demo.py | 2 +- .../object_detection_2d/nanodet/README.md | 2 +- 6 files changed, 28 insertions(+), 616 deletions(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index 349ab7102d..11ac49c97a 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -49,7 +49,7 @@ def get_cuda_path(): parser.add_argument("--cuda_path", help="Path to installed cuda", type=str, default=None) parser.add_argument("--opendr_device", help="Target device for installation", type=str, choices=["gpu", "cpu"], default="gpu") - parser.add_argument("--torch_version", help="Version of Libtorch to be installed", type=str, default="1.9.0") + parser.add_argument("--torch_version", help="Specifies torch version to be used for LibTorch installation", type=str, default="1.9.0") args = parser.parse_args() COMPATIBILITY_VERSIONS = { @@ -88,11 +88,11 @@ def get_cuda_path(): CUDA_VERSION = CUDA_VERSION.replace(".", "") CUDA_VERSION = CUDA_VERSION[:3] else: - warnings.warn("\033[93m Not cuda version file found. Please open an Issue in our github.") + warnings.warn("\033[93m Not CUDA version file found.") DEVICE = f"cu{CUDA_VERSION}" except: - warnings.warn("\033[93m No cuda found.\n" - "Please install cuda or specify cuda path with export CUDA_PATH=/path/to/your/cuda.") + warnings.warn("\033[93m No CUDA installation found.\n" + "Please install CUDA or specify CUDA path with export CUDA_PATH=/path/to/your/cuda.") else: DEVICE = "cpu" @@ -106,9 +106,9 @@ def get_cuda_path(): urlretrieve(file_url_libtorch, DOWNLOAD_DIRECTORY) except: - warnings.warn("\033[93m No Libtorch found for your specific device and torch version.\n" + warnings.warn("\033[93m No LibTorch found for your specific device and torch version.\n" "Please choose another version of torch or install a different version of CUDA.\n" - "Please reference https://download.pytorch.org/whl/torch_stable.html") + "Please refer to https://download.pytorch.org/whl/torch_stable.html") exit() # Download Vision try: @@ -118,5 +118,5 @@ def get_cuda_path(): urlretrieve(file_url_vision, DOWNLOAD_DIRECTORY) except: warnings.warn("\033[93m No torchvision found for your specific torch version.\n" - "Please see the torchvision GitHub repository for more information.") + "Please refer to https://github.com/pytorch/vision for more information.") diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh index 6665c09d7c..3d56b5fb2b 100755 --- a/dependencies/install_torch_c_api.sh +++ b/dependencies/install_torch_c_api.sh @@ -12,18 +12,18 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then if [[ "$OPENDR_DEVICE" == "gpu" ]] then - echo "Downloading and installing libtorch and torchvision (gpu support) ..." + echo "Downloading and installing LibTorch and torchvision (gpu support) ..." GPU="on" DEVICE="cu"${CUDA_VERSION} CUDA_COMPILER="/usr/local/cuda/bin/nvcc" else - echo "Downloading and installing libtorch and torchvsion (cpu-only) ..." + echo "Downloading and installing LibTorch and torchvision (cpu-only) ..." GPU="off" DEVICE="cpu" fi # Find CUDA version and download torch and vision - echo "Downloading Libtorch and torchvision ..." + echo "Downloading LibTorch and torchvision ..." # Make sure that we can download files if [[ -z "$CUDA_PATH" ]]; then @@ -31,7 +31,7 @@ if [ ! -f /usr/local/lib/libtorchvision.so ]; then else python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" --cuda_path "$CUDA_PATH" fi - echo "Downloading Libtorch and torchvision ... done." + echo "Downloading Libtorch and torchvision done." # TORCH INSTALLATION unzip -qq libtorch.zip diff --git a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py index c8975ccb02..5b19ef1a68 100644 --- a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="Dataset to evaluate on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) args = parser.parse_args() diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb b/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb index 96af81257c..23c6eb80b0 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb +++ b/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb @@ -25,36 +25,16 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b6f3d99a-b702-472b-b8d0-95a551e7b9ba", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/gluoncv/__init__.py:40: UserWarning: Both `mxnet==1.8.0` and `torch==1.9.0+cu111` are installed. You might encounter increased GPU memory footprint if both framework are used at the same time.\n", - " warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. '\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "model size is 1.5x\n", - "init weights...\n", - "Finish initialize NanoDet-Plus Head.\n" - ] - } - ], + "outputs": [], "source": [ "from opendr.perception.object_detection_2d import NanodetLearner\n", "\n", - "model=\"plus_m_1.5x_416\"\n", + "model=\"m\"\n", "\n", - "nanodet = NanodetLearner(model_to_use=model, device=\"cuda\")" + "nanodet = NanodetLearner(model_to_use=model, device=\"cpu\")" ] }, { @@ -77,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8a680c28-8f42-4b4a-8c6e-2580b7be2da5", "metadata": {}, "outputs": [], @@ -98,510 +78,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "e12f582b-c001-4b9d-b396-4260e23139f6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model name: plus_m_1.5x_416 --> ./predefined_examples/nanodet_plus_m_1.5x_416/plus_m_1.5x_416.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:No param aux_fpn.reduce_layers.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_cls.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_cls.weight.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_cls.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_cls.bias.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_reg.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_reg.weight.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_reg.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_reg.bias.\u001b[0m\n", - "INFO:root:No param aux_head.scales.0.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.0.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.1.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.1.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.2.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.2.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.3.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.3.scale.\u001b[0m\n", - "INFO:root:Loaded model weight from ./predefined_examples/nanodet_plus_m_1.5x_416\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mLoaded model weight from ./predefined_examples/nanodet_plus_m_1.5x_416\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "nanodet.load(path=load_model_weights, verbose=True)" ] @@ -616,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "9efba6eb-5235-4e31-a002-1bcb6e311704", "metadata": {}, "outputs": [], @@ -630,33 +110,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9f083566-3d57-4db6-baa5-0fefdf8fa8ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%matplotlib inline\n", "import cv2\n", @@ -678,32 +135,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6cab7dae-8892-4a16-ad03-651fa3bb20ee", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "forward time: 0.030s | decode time: 0.004s | " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)\n", - " return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/torch/nn/functional.py:3609: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.\n", - " warnings.warn(\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n", - " return _methods._mean(a, axis=axis, dtype=dtype,\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - } - ], + "outputs": [], "source": [ "boxes = nanodet.infer(input=img)" ] @@ -722,33 +157,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "d7129fe6-a198-4196-b35f-93ba41e50031", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from opendr.perception.object_detection_2d import draw_bounding_boxes\n", "\n", diff --git a/projects/python/perception/object_detection_2d/nanodet/train_demo.py b/projects/python/perception/object_detection_2d/nanodet/train_demo.py index bae2941259..3b027694a0 100644 --- a/projects/python/perception/object_detection_2d/nanodet/train_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/train_demo.py @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str, default="m") + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) diff --git a/src/opendr/perception/object_detection_2d/nanodet/README.md b/src/opendr/perception/object_detection_2d/nanodet/README.md index 1efb8bae5e..409e07a847 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/README.md +++ b/src/opendr/perception/object_detection_2d/nanodet/README.md @@ -10,4 +10,4 @@ Large parts of the implementation are taken from [Nanodet Github](https://github Usage ------ - For VOC and COCO like datasets, an ```ExternalDataset``` with the root path and dataset name (```voc```, ```coco```) must be passed to the fit function. -- The ```workspace``` folder is used to save checkpoints during training. \ No newline at end of file +- The ```temp_path``` folder is used to save checkpoints during training. \ No newline at end of file From f47fc17e2fb2986af2bbdb0e1afad6086e677b2e Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 29 Dec 2022 13:14:00 +0200 Subject: [PATCH 71/87] added json parser in utilities --- src/c_api/opendr_utils.cpp | 59 +++++++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index fc88f152be..e4a6c352c4 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -24,19 +24,76 @@ #include #include -const char *json_get_key_string(const char *json, const char *key) { +float json_get_key_from_inference_params(const char *json, const char *key, const int index) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember("inference_params"))) { + return 0.0f; + } + const rapidjson::Value &inference_params = doc["inference_params"]; + if ((!inference_params.IsObject()) || (!inference_params.HasMember(key))) { + return 0.0f; + } + const rapidjson::Value &value = inference_params[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return 0.0f; + } + if (!value[index].IsFloat()) { + return 0.0f; + } + return value[index].GetFloat(); + } + if (!value.IsFloat()) { + return 0.0f; + } + return value.GetFloat(); +} + +const char *json_get_key_string(const char *json, const char *key, const int index) { rapidjson::Document doc; doc.Parse(json); if ((!doc.IsObject()) || (!doc.HasMember(key))) { return ""; } const rapidjson::Value &value = doc[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return ""; + } + if (!value[index].IsString()) { + return ""; + } + return value[index].GetString(); + } if (!value.IsString()) { return ""; } return value.GetString(); } +float json_get_key_float(const char *json, const char *key, const int index) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember(key))) { + return 0.0f; + } + const rapidjson::Value &value = doc[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return 0.0f; + } + if (!value[index].IsFloat()) { + return 0.0f; + } + return value[index].IsFloat(); + } + if (!value.IsFloat()) { + return 0.0f; + } + return value.GetFloat(); +} + void load_image(const char *path, opendr_image_t *image) { cv::Mat opencv_image = cv::imread(path, cv::IMREAD_COLOR); if (opencv_image.empty()) { From 76ecd07a2aaae6a6aafc553e476e00774152d93e Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 29 Dec 2022 13:15:00 +0200 Subject: [PATCH 72/87] unnecessary use of filesystem, easier implementation in embeded --- src/c_api/face_recognition.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 636176f21e..3a8d305c09 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include From 5f8d84e482fea8a4dfa5163f13f4b0a3383c6173 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 5 Jan 2023 22:28:09 +0200 Subject: [PATCH 73/87] default directory for saves change to `temp` --- .../nanodet_EfficientNet_Lite0_320.yml | 2 +- .../nanodet_EfficientNet_Lite1_416.yml | 2 +- .../nanodet_EfficientNet_Lite2_512.yml | 2 +- .../RepVGG/nanodet_RepVGG_A0_416.yml | 2 +- .../Transformer/nanodet_t.yml | 2 +- .../config/legacy_v0.x_configs/nanodet_g.yml | 2 +- .../config/legacy_v0.x_configs/nanodet_m.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_0.5x.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_1.5x.yml | 2 +- .../nanodet_m_1.5x_416.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_416.yml | 2 +- .../algorithm/config/nanodet_custom.yml | 125 ------------------ .../config/nanodet_plus_m_1.5x_320.yml | 2 +- .../config/nanodet_plus_m_1.5x_416.yml | 2 +- .../algorithm/config/nanodet_plus_m_320.yml | 2 +- .../algorithm/config/nanodet_plus_m_416.yml | 2 +- 16 files changed, 15 insertions(+), 140 deletions(-) delete mode 100644 src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml index cdddc320cb..d47708a05f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.079 # AP_m = 0.243 # AP_l = 0.406 -save_dir: ./workspace/efficient0_320 +save_dir: ./temp/efficient0_320 check_point_name: EfficientNet_Lite0_320 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml index a189662a77..859dbe00e1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.122 # AP_m = 0.321 # AP_l = 0.432 -save_dir: ./workspace/efficient1_416_SGD +save_dir: ./temp/efficient1_416_SGD check_point_name: EfficientNet_Lite1_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml index 20664fe7ca..a4248e7eda 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml @@ -5,7 +5,7 @@ # AP_small = 0.152 # AP_m = 0.342 # AP_l = 0.481 -save_dir: ./workspace/efficientlite2_512 +save_dir: ./temp/efficientlite2_512 check_point_name: EfficientNet_Lite2_512 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml index 8a0d8debeb..fa93e55896 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml @@ -1,4 +1,4 @@ -save_dir: ./workspace/RepVGG_A0_416 +save_dir: ./temp/RepVGG_A0_416 check_point_name: RepVGG_A0_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml index 77d064c72a..4eb8282924 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml @@ -6,7 +6,7 @@ # AP_m = 0.214 # AP_l = 0.364 -save_dir: ./workspace/nanodet_t +save_dir: ./temp/nanodet_t check_point_name: t model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml index 0d09c335ab..8d2ae3cd91 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml @@ -3,7 +3,7 @@ # Flops = 4.2B # Params = 3.8M # COCO pre-trained weight link: https://drive.google.com/file/d/10uW7oqZKw231l_tr4C1bJWkbCXgBf7av/view?usp=sharing -save_dir: ./workspace/nanodet_g +save_dir: ./temp/nanodet_g check_point_name: g model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml index 876168e7ad..7bd0d075ab 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml @@ -1,5 +1,5 @@ #Config File example -save_dir: ./workspace/nanodet_m +save_dir: ./temp/nanodet_m check_point_name: m model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml index 2a38388336..c067a1535f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml @@ -5,7 +5,7 @@ # AP_small = 0.036 # AP_m = 0.119 # AP_l = 0.232 -save_dir: ./workspace/nanodet_m_0.5x +save_dir: ./temp/nanodet_m_0.5x check_point_name: m_0.5x model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml index a54268f70a..90c2c34d3b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml @@ -5,7 +5,7 @@ # AP_small = 0.069 # AP_m = 0.235 # AP_l = 0.389 -save_dir: ./workspace/nanodet_m_1.5x +save_dir: ./temp/nanodet_m_1.5x check_point_name: m_1.5x model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml index b8274403b1..b6332a5aa1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.098 # AP_m = 0.277 # AP_l = 0.420 -save_dir: ./workspace/nanodet_m_1.5x_416 +save_dir: ./temp/nanodet_m_1.5x_416 check_point_name: m_1.5x_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml index eb30de1e0d..bd8b4e2907 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.082 # AP_m = 0.240 # AP_l = 0.375 -save_dir: ./workspace/nanodet_m_416 +save_dir: ./temp/nanodet_m_416 check_point_name: m_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml deleted file mode 100644 index bf58986a48..0000000000 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml +++ /dev/null @@ -1,125 +0,0 @@ -# nanodet-plus-m-1.5x_416 -# COCO mAP(0.5:0.95) = 0.341 -# AP_50 = 0.506 -# AP_75 = 0.357 -# AP_small = 0.143 -# AP_m = 0.363 -# AP_l = 0.539 -save_dir: ./workspace/nanodet_plus_m_1.5x_416/test_training -check_point_name: plus_m_1.5x_416_default -model: - weight_averager: - name: ExpMovingAverager - decay: 0.9998 - arch: - name: NanoDetPlus - detach_epoch: 10 - backbone: - name: ShuffleNetV2 - model_size: 1.5x - out_stages: [2,3,4] - activation: LeakyReLU - fpn: - name: GhostPAN - in_channels: [176, 352, 704] - out_channels: 128 - kernel_size: 5 - num_extra_level: 1 - use_depthwise: True - activation: LeakyReLU - head: - name: NanoDetPlusHead - num_classes: 80 - input_channel: 128 - feat_channels: 128 - stacked_convs: 2 - kernel_size: 5 - strides: [8, 16, 32, 64] - activation: LeakyReLU - reg_max: 7 - norm_cfg: - type: BN - loss: - loss_qfl: - name: QualityFocalLoss - use_sigmoid: True - beta: 2.0 - loss_weight: 1.0 - loss_dfl: - name: DistributionFocalLoss - loss_weight: 0.25 - loss_bbox: - name: GIoULoss - loss_weight: 2.0 - # Auxiliary head, only use in training time. - aux_head: - name: SimpleConvHead - num_classes: 80 - input_channel: 256 - feat_channels: 256 - stacked_convs: 4 - strides: [8, 16, 32, 64] - activation: LeakyReLU - reg_max: 7 -data: - train: - input_size: [416,416] #[w,h] - keep_ratio: False - pipeline: - perspective: 0.0 - scale: [0.6, 1.4] - stretch: [[0.8, 1.2], [0.8, 1.2]] - rotation: 0 - shear: 0 - translate: 0.2 - flip: 0.5 - brightness: 0.2 - contrast: [0.6, 1.4] - saturation: [0.5, 1.2] - normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] - val: - input_size: [416,416] #[w,h] - keep_ratio: False - pipeline: - normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] -device: - gpu_ids: [0] - workers_per_gpu: 10 - batchsize_per_gpu: 12 #96 -schedule: - resume: 0 - optimizer: - name: AdamW - lr: 0.000125 - weight_decay: 0.05 - warmup: - name: linear - steps: 500 - ratio: 0.0001 - total_epochs: 300 - lr_schedule: - name: CosineAnnealingLR - T_max: 300 - eta_min: 0.00005 - val_intervals: 10 -grad_clip: 35 -evaluator: - name: CocoDetectionEvaluator - save_key: mAP -log: - interval: 50 - -class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', - 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', - 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', - 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml index 3dcd1a2973..c2a4a8bdc1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.102 # AP_m = 0.309 # AP_l = 0.493 -save_dir: ./workspace/nanodet_plus_m_1.5x_320 +save_dir: ./temp/nanodet_plus_m_1.5x_320 check_point_name: plus_m_1.5x_320 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml index 5a76789b50..f999d0c985 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.143 # AP_m = 0.363 # AP_l = 0.539 -save_dir: ./workspace/nanodet_plus_m_1.5x_416 +save_dir: ./temp/nanodet_plus_m_1.5x_416 check_point_name: plus_m_1.5x_416 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml index e4b5f58f9c..ee4b5235bc 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.083 # AP_m = 0.278 # AP_l = 0.451 -save_dir: ./workspace/nanodet_plus_m_320 +save_dir: ./temp/nanodet_plus_m_320 check_point_name: plus_m_320 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml index 61a536ad7d..cd8ea9186b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.106 # AP_m = 0.322 # AP_l = 0.477 -save_dir: ./workspace/nanodet_plus_m_416 +save_dir: ./temp/nanodet_plus_m_416 check_point_name: plus_m_416 model: weight_averager: From 64c466f6681a1f1d74406cb2139a3de94936f39b Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 5 Jan 2023 22:28:43 +0200 Subject: [PATCH 74/87] change to warnings errors and exceptions --- dependencies/download_torch.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index 11ac49c97a..bb21137588 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -49,7 +49,7 @@ def get_cuda_path(): parser.add_argument("--cuda_path", help="Path to installed cuda", type=str, default=None) parser.add_argument("--opendr_device", help="Target device for installation", type=str, choices=["gpu", "cpu"], default="gpu") - parser.add_argument("--torch_version", help="Specifies torch version to be used for LibTorch installation", type=str, default="1.9.0") + parser.add_argument("--torch_version", help="Specifies LibTorch version to be installed", type=str, default="1.9.0") args = parser.parse_args() COMPATIBILITY_VERSIONS = { @@ -63,6 +63,8 @@ def get_cuda_path(): "1.9.0": "0.10.0", } + warnings.simplefilter("error") + TORCH_VERSION = args.torch_version VISION_VERSION = COMPATIBILITY_VERSIONS[TORCH_VERSION] @@ -93,6 +95,7 @@ def get_cuda_path(): except: warnings.warn("\033[93m No CUDA installation found.\n" "Please install CUDA or specify CUDA path with export CUDA_PATH=/path/to/your/cuda.") + exit() else: DEVICE = "cpu" @@ -119,4 +122,5 @@ def get_cuda_path(): except: warnings.warn("\033[93m No torchvision found for your specific torch version.\n" "Please refer to https://github.com/pytorch/vision for more information.") + exit() From 1df22cc239c8d12876fc50efa33bcccc8d43378f Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 5 Jan 2023 22:32:29 +0200 Subject: [PATCH 75/87] Transfer warnings for jit and ort simultaneously loaded from _load to infer. --- .../perception/object_detection_2d/nanodet/README.md | 2 +- .../object_detection_2d/nanodet/nanodet_learner.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/README.md b/src/opendr/perception/object_detection_2d/nanodet/README.md index 409e07a847..777a4eb0e6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/README.md +++ b/src/opendr/perception/object_detection_2d/nanodet/README.md @@ -10,4 +10,4 @@ Large parts of the implementation are taken from [Nanodet Github](https://github Usage ------ - For VOC and COCO like datasets, an ```ExternalDataset``` with the root path and dataset name (```voc```, ```coco```) must be passed to the fit function. -- The ```temp_path``` folder is used to save checkpoints during training. \ No newline at end of file +- The ```temp``` folder is used to save checkpoints during training. \ No newline at end of file diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index 4396387c6f..84587cf348 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -356,8 +356,6 @@ def reset(self): return NotImplementedError def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=True): - if self.jit_model: - warnings.warn("Warning: A JIT model was already initialized, inference will run in ONNX mode by default.") if not self.predictor: self.predictor = Predictor(self.cfg, self.model, device=self.device) @@ -422,8 +420,6 @@ def _load_onnx(self, onnx_path, verbose=True): self.ort_session = ort.InferenceSession(onnx_path) def _save_jit(self, jit_path, img=None, verbose=True): - if self.ort_session: - warnings.warn("Warning: An ONNX model was already initialized, inference will run in ONNX mode by default.") if not self.predictor: self.predictor = Predictor(self.cfg, self.model, device=self.device) @@ -678,6 +674,10 @@ def infer(self, input, threshold=0.35): (_input, _height, _width, _warp_matrix) = self.predictor.preprocessing(_input) if self.ort_session: + if self.jit_model: + warnings.warn( + "Warning: Both JIT and ONNX models are initialized, inference will run in ONNX mode by default.\n" + "To run in JIT please delete the self.ort_session like: detector.ort_session = None.") res = self.ort_session.run(['output'], {'data': _input.cpu().detach().numpy()}) res = self.predictor.postprocessing(torch.from_numpy(res[0]), _input, _height, _width, _warp_matrix) elif self.jit_model: From 2e37c406490e957774f0822b8e3fa860dacb2cbb Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 5 Jan 2023 22:36:20 +0200 Subject: [PATCH 76/87] Change naming style. All .cpp files have only CPP style naming and .c files have CPP style naming in CPP functions and C style in everything else. --- docs/reference/c-data-h.md | 12 +- docs/reference/c-face-recognition-h.md | 42 +-- .../c-object-detection-2d-nanodet-jit-h.md | 32 +- docs/reference/c-opendr-utils-h.md | 68 ++-- docs/reference/c-target-h.md | 34 +- docs/reference/object-detection-2d-nanodet.md | 2 +- include/data.h | 4 +- include/face_recognition.h | 50 +-- include/object_detection_2d_nanodet_jit.h | 18 +- include/opendr_utils.h | 26 +- include/target.h | 14 +- .../face_recognition/face_recognition_demo.c | 20 +- .../object_detection_2d/nanodet/README.md | 4 +- .../nanodet/nanodet_jit_demo.c | 22 +- src/c_api/face_recognition.cpp | 302 +++++++++--------- src/c_api/object_detection_2d_nanodet_jit.cpp | 50 +-- src/c_api/opendr_utils.cpp | 56 ++-- tests/sources/c_api/test_face_recognition.c | 66 ++-- tests/sources/c_api/test_nanodet.c | 22 +- tests/sources/c_api/test_opendr_utils.c | 8 +- 20 files changed, 423 insertions(+), 429 deletions(-) diff --git a/docs/reference/c-data-h.md b/docs/reference/c-data-h.md index 20b5e27b8e..35e8f0539a 100644 --- a/docs/reference/c-data-h.md +++ b/docs/reference/c-data-h.md @@ -3,24 +3,24 @@ The *data.h* header provides definitions of OpenDR data types that can be used in the C API of OpenDR. -### struct *opendr_image_t* +### struct *OpendrImageT* ```C -struct opendr_image { +struct OpendrImage { void *data; }; -typedef struct opendr_image opendr_image_t; +typedef struct OpendrImage OpendrImageT; ``` -The *opendr_image_t* structure provides a data structure for storing OpenDR images. +The *OpendrImageT* structure provides a data structure for storing OpenDR images. Every function in the C API receiving images is expected to use this structure. Helper functions that directly convert images into this format are provided in *opendr_utils.h*. -The *opendr_image_t* structure has the following field: +The *OpendrImageT* structure has the following field: #### `void *data` field A pointer where image data are stored. -*opendr_image_t* is using internally OpenCV images (*cv::Mat*) for storing images. +*OpendrImageT* is using internally OpenCV images (*cv::Mat*) for storing images. Therefore, only a pointer to the memory location of the corresponding *cv::Mat* is stored. Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV data type or using the corresponding functions provided in *opendr_utils.h*. diff --git a/docs/reference/c-face-recognition-h.md b/docs/reference/c-face-recognition-h.md index 1ea2e5822d..bf1be372aa 100644 --- a/docs/reference/c-face-recognition-h.md +++ b/docs/reference/c-face-recognition-h.md @@ -3,62 +3,62 @@ The *face_recognition.h* header provides function definitions that can be used for accessing the OpenDR face recognition tool. -### Struct *face_recognition_model_t* +### Struct *FaceRecognitionModelT* ```C -struct face_recognition_model { +struct FaceRecognitionModel { ... }; -typedef struct face_recognition_model face_recognition_model_t; +typedef struct FaceRecognitionModel FaceRecognitionModelT; ``` -The *face_recognition_model_t* structure keeps all the neccesary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). +The *FaceRecognitionModelT* structure keeps all the necessary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). -### Function *load_face_recognition_model()* +### Function *loadFaceRecognitionModel()* ```C -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model); ``` - Loads a face recognition model saved in the local filesystem (*model path*) in OpenDR format. + Loads a face recognition model saved in the local filesystem (*modelPath*) in OpenDR format. This function also initializes a CPU-based ONNX session for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. -### Function *free_face_recognition_model()* +### Function *freeFaceRecognitionModel()* ```C -void free_face_recognition_model(face_recognition_model_t *model); +void freeFaceRecognitionModel(FaceRecognitionModelT *model); ``` Releases the memory allocated for a face recognition model (*model*). -### Function *infer_face_recognition()* +### Function *inferFaceRecognition()* ```C -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image); +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image); ``` This function perform inference using a face recognition model (*model*) and an input image (*image*). The function returns an OpenDR category structure with the inference results. -### Function *decode_category_face_recognition()* +### Function *decodeCategoryFaceRecognition()* ```C -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName); ``` Returns the name of a recognized person by decoding the category id into a string (this function uses the information from the built person database). -### Function *build_database_face_recognition()* +### Function *buildDatabaseFaceRecognition()* ```C -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, faceRecognitionModelT *model); ``` Build a face recognition database (containing images for persons to be recognized). -This function expects the *database_folder* to have the same format as the main Python toolkit. -The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*output_path*). -A loaded face recongition model should be provided (*model*), since this model will be used for the feature extraction process. +This function expects the (*databaseFolder*) to have the same format as the main Python toolkit. +The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*outputPath*). +A loaded face recognition model should be provided (*model*), since this model will be used for the feature extraction process. -### Function *load_database_face_recognition()* +### Function *loadDatabaseFaceRecognition()* ```C -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model); ``` -Loads an already built database (*database_path) into a face recognition model (*model*). +Loads an already built database (*databasePath*) into a face recognition model (*model*). After this step, the model can be used for performing inference. diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md index f4b14ef0b0..60c72b9db9 100644 --- a/docs/reference/c-object-detection-2d-nanodet-jit-h.md +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -1,46 +1,46 @@ ## C_API: object_detection_2d_nanodet_jit.h -The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2D nanodet tool. +The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2D Nanodet tool. -### Struct *nanodet_model_t* +### Struct *NanodetModelT* ```C -struct nanodet_model { +struct NanodetModel { ... }; -typedef struct nanodet_model nanodet_model_t; +typedef struct NanodetModel NanodetModelT; ``` -The *nanodet_model_t* structure keeps all the necessary information that are required by the OpenDR object detection 2D nanodet tool (e.g., model weights, normalization information, etc.). +The *NanodetModelT* structure keeps all the necessary information that are required by the OpenDR object detection 2D Nanodet tool (e.g., model weights, normalization information, etc.). -### Function *load_nanodet_model()* +### Function *loadNanodetModel()* ```C -void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model); ``` -Loads a nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. +Loads a Nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. This function also initializes a (*device*) JIT network for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. -### Function *free_nanodet_model()* +### Function *freeNanodetModel()* ```C -void free_nanodet_model(nanodet_model_t *model); +void freeNanodetModel(NanodetModelT *model); ``` -Releases the memory allocated for an object detection 2D nanodet model (*model*). +Releases the memory allocated for an object detection 2D Nanodet model (*model*). -### Function *infer_nanodet()* +### Function *inferNanodet()* ```C -opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image); ``` -This function perform inference using an object detection 2D nanodet model (*model*) and an input image (*image*). +This function perform inference using an object detection 2D Nanodet model (*model*) and an input image (*image*). The function returns an OpenDR detection vector structure with the inference results. -### Function *draw_bboxes()* +### Function *drawBboxes()* ```C -void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector); ``` This function draws the given detections (*detectionsVector*) onto the input image (*image*) and then shows the image on screen. The (*model*) keeps all the necessary information. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index ec63103586..20d67458c6 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -3,73 +3,67 @@ The *opendr_utils.h* header provides function definitions of OpenDR helpers (e.g., for creating OpenDR images). -### Function *json_get_key_string()* +### Function *jsonGetKeyString()* ```C -const char* json_get_key_string(const char *json, const char *key); +const char* jsonGetKeyString(const char *json, const char *key, const int index); ``` -The *json_get_key_string()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*). - -### Function *json_get_key_string()* -```C -const char* json_get_key_string(const char *json, const char *key, const int index); -``` -The *json_get_key_string()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as string. +The *jsonGetKeyString()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as string. If the value is an array it will return only the (*index*) value of the array. If fails returns (*""*). -### Function *json_get_key_float()* +### Function *jsonGetKeyFloat()* ```C -float json_get_key_float(const char *json, const char *key, const int index); +float jsonGetKeyFloat(const char *json, const char *key, const int index); ``` -The *json_get_key_float()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as float. +The *jsonGetKeyFloat()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as float. If the value is an array it will return only the (*index*) value of the array. If fails returns (*0.0f*). -### Function *json_get_key_from_inference_params()* +### Function *jsonGetKeyFromInferenceParams()* ```C -float json_get_key_from_inference_params(const char *json, const char *key, const int index); +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index); ``` -The *json_get_key_from_inference_params()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) in inference_params section as float. +The *jsonGetKeyFromInferenceParams()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) in inference_params section as float. If the value is an array it will return only the (*index*) value of the array. If fails returns (*0.0f*). --- -### Function *load_image()* +### Function *loadImage()* ```C -void load_image(const char *path, opendr_image_t *image); +void loadImage(const char *path, OpendrImageT *image); ``` -The *load_image()* function loads an image from the local file system (*path*) into an OpenDR image data type. -A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. -This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. +The *loadImage()* function loads an image from the local file system (*path*) into an OpenDR image data type. +A pointer (*image*) to an OpenDR *OpendrImageT* should be provided. +This function allocates memory during each function call, so be sure to use the *freeImage()* function to release the allocated resources, when the corresponding image is no longer needed. -### Function *free_image()* +### Function *freeImage()* ```C -void free_image(opendr_image_t *image); +void freeImage(OpendrImageT *image); ``` -The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). -A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. +The *freeImage()* function releases the memory allocated for an OpenDR image structure (*image*). +A pointer (*image*) to an OpenDR *OpendrImageT* should be provided. --- -### Function *init_detections_vector()* +### Function *initDetectionsVector()* ```C -void init_detections_vector(opendr_detection_vector_target_t *detection_vector); +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); ``` -The *init_detections_vector()* function initializes the data of an OpenDR detection vector structure (*detection_vector*) with zero values. -A pointer (*detection_vector*) to an OpenDR *detection_vector_target_t* should be provided. +The *initDetectionsVector()* function initializes the data of an OpenDR detection vector structure (*detectionVector*) with zero values. +A pointer (*detectionVector*) to an OpenDR *DetectionVectorTargetT* should be provided. -### Function *load_detections_vector()* +### Function *loadDetectionsVector()* ```C -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size); +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, + int vectorSize); ``` -The *load_detections_vector()* function stores OpenDR detection target structures in the memory allocated for multiple OpenDR detections structures (*detection*). -A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. +The *loadDetectionsVector()* function stores OpenDR detection target structures in the memory allocated for multiple OpenDR detections structures (*detection*). +A pointer (*detectionVector*) to an OpenDR *OpendrDetectionVectorTargetT* should be provided. -### Function *free_detections_vector()* +### Function *freeDetectionsVector()* ```C -void free_detections_vector(opendr_detection_vector_target_t *detection_vector); +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); ``` -The *free_detections_vector()* function releases the memory allocated for an OpenDR detection vector structure (*detection_vector*). -A pointer (*detection_vector*) to an OpenDR *opendr_detection_vector_target_t* should be provided. +The *freeDetectionsVector()* function releases the memory allocated for an OpenDR detection vector structure (*detectionVector*). +A pointer (*detectionVector*) to an OpenDR *OpendrDetectionVectorTargetT* should be provided. diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 786a45c456..d73f249df7 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -3,20 +3,20 @@ The *target.h* header provides definitions of OpenDR targets (inference outputs) that can be used in the C API of OpenDR. -### struct *opendr_category_target_t* +### struct *OpendrCategoryTargetT* ```C -struct opendr_category_target{ +struct OpendrCategoryTarget{ int data; float confidence; }; -typedef struct opendr_category_target opendr_category_target_t; +typedef struct OpendrCategoryTarget OpendrCategoryTargetT; ``` -The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. +The *OpendrCategoryTargetT* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. -The *opendr_category_target_t* structure has the following fields: +The *OpendrCategoryTargetT* structure has the following fields: #### `int data` field @@ -27,7 +27,7 @@ A numerical id of the category to which the input objects belongs to. The decision confidence (a value between 0 and 1). -### struct *opendr_detection_target_t* +### struct *OpendrDetectionTargetT* ```C struct opendr_detection_target { int name; @@ -37,14 +37,14 @@ struct opendr_detection_target { float height; float score; }; -typedef struct opendr_detection_target opendr_detection_target_t; +typedef struct OpendrDetectionTarget OpendrDetectionTargetT; ``` -The *opendr_detection_target_t* structure provides a data structure for storing inference outputs of detection models. +The *OpendrDetectionTargetT* structure provides a data structure for storing inference outputs of detection models. Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. -The *opendr_detection_target_t* structure has the following fields: +The *OpendrDetectionTargetT* structure has the following fields: #### `int name` field @@ -71,22 +71,22 @@ A numerical value that corresponds to the height of a detection. The decision score (a value between 0 and 1). -### struct *opendr_detection_vector_target_t* +### struct *OpendrDetectionVectorTargetT* ```C -struct opendr_detection_vector_target { - opendr_detection_target_t *starting_pointer; +struct OpendrDetectionVectorTarget { + OpendrDetectionTargetT *startingPointer; int size; }; -typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; +typedef struct OpendrDetectionVectorTarget OpendrDetectionVectorTargetT; ``` -The *opendr_detection_vector_target_t* structure provides a data structure for storing multiple inference outputs of detection models. -Every function in the C API that outputs a detection decision is expected to use this or a *detection_target_t* structure. +The *OpendrDetectionVectorTargetT* structure provides a data structure for storing multiple inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this or a *OpendrDetectionTargetT* structure. -The *opendr_detection_vector_target_t* structure has the following fields: +The *OpendrDetectionVectorTargetT* structure has the following fields: -#### `opendr_detection_target_t starting_pointer` field +#### `OpendrDetectionTargetT startingPointer` field A pointer to a memory of multiple OpenDR detection targets. diff --git a/docs/reference/object-detection-2d-nanodet.md b/docs/reference/object-detection-2d-nanodet.md index e5814c0885..f62546bcf6 100644 --- a/docs/reference/object-detection-2d-nanodet.md +++ b/docs/reference/object-detection-2d-nanodet.md @@ -124,7 +124,7 @@ Inside this folder, the model is saved as *nanodet_{model_name}.pth* for JIT mod Note: In ONNX optimization, the output model executes the original model's feed forward method. The user must create their own pre- and post-processes in order to use the ONNX model in the C API. In JIT optimization the output model performs the feed forward pass and post-processing. -To use the C API, it is recommended to use JIT optimization as shown in the [example of OpenDR's C API](../../projects/c_api/samples/nanodet/nanodet_jit_demo.c) +To use the C API, it is recommended to use JIT optimization as shown in the [example of OpenDR's C API](../../projects/c_api/samples/object_detection/nanodet/nanodet_jit_demo.c). Parameters: diff --git a/include/data.h b/include/data.h index 0b0018b941..b7728fd7f2 100644 --- a/include/data.h +++ b/include/data.h @@ -24,10 +24,10 @@ extern "C" { /*** * OpenDR data type for representing images */ -struct opendr_image { +struct OpendrImage { void *data; }; -typedef struct opendr_image opendr_image_t; +typedef struct OpendrImage OpendrImageT; #ifdef __cplusplus } diff --git a/include/face_recognition.h b/include/face_recognition.h index 2647a04802..99c4ec2f90 100644 --- a/include/face_recognition.h +++ b/include/face_recognition.h @@ -24,44 +24,44 @@ extern "C" { #endif -struct face_recognition_model { +struct FaceRecognitionModel { // ONNX session objects - void *onnx_session; + void *onnxSession; void *env; - void *session_options; + void *sessionOptions; // Sizes for resizing and cropping an input image - int model_size; - int resize_size; + int modelSize; + int resizeSize; // Statistics for normalization - float mean_value; - float std_value; + float meanValue; + float stdValue; // Recognition threshold float threshold; // Feature dimension - int output_size; + int outputSize; // Database data void *database; - int *database_ids; - char **person_names; + int *databaseIds; + char **personNames; // Number of persons in the database - int n_persons; + int nPersons; // Number of features vectors in the database - int n_features; + int nFeatures; }; -typedef struct face_recognition_model face_recognition_model_t; +typedef struct FaceRecognitionModel FaceRecognitionModelT; /** * Loads a face recognition model saved in OpenDR format. - * @param model_path path to the OpenDR face recongition model (as exported using OpenDR library) + * @param modelPath path to the OpenDR face recognition model (as exported using OpenDR library) * @param model the loaded model */ -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model); /** * This function perform inference using a face recognition model and an input image. @@ -69,39 +69,39 @@ void load_face_recognition_model(const char *model_path, face_recognition_model_ * @param image OpenDR image * @return OpenDR classification target containing the id of the recognized person */ -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image); +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image); /** * Builds a face recognition database (containing images for persons to be recognized). This function expects the - * database_folder to have the same format as the main Python toolkit. - * @param database_folder folder containing the database - * @param output_path output path to store the binary database. This file should be loaded along with the face + * databaseFolder to have the same format as the main Python toolkit. + * @param databaseFolder folder containing the database + * @param outputPath output path to store the binary database. This file should be loaded along with the face * recognition model before performing inference. * @param model the face recognition model to be used for extracting the database features */ -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, FaceRecognitionModelT *model); /** * Loads an already built database into the face recognition model. After this step, the model can be used for * performing inference. - * @param database_path path to the database file + * @param databasePath path to the database file * @param model the face recognition model to be used for inference */ -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model); /** * Returns the name of a recognition person by decoding the category id into a string. * @param model the face recognition model to be used for inference * @param category the predicted category - * @param person_name buffer to store the person name + * @param personName buffer to store the person name */ -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName); /** * Releases the memory allocated for a face recognition model. * @param model model to be de-allocated */ -void free_face_recognition_model(face_recognition_model_t *model); +void freeFaceRecognitionModel(FaceRecognitionModelT *model); #ifdef __cplusplus } diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h index 3eae7a7059..24fad3b6a1 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -24,7 +24,7 @@ extern "C" { #endif -struct nanodet_model { +struct NanodetModel { // Jit cpp class holder void *network; @@ -32,15 +32,15 @@ struct nanodet_model { char *device; // Recognition threshold - float score_threshold; + float scoreThreshold; // Model input size - int input_size[2]; + int inputSizes[2]; // Keep ratio flag - int keep_ratio; + int keepRatio; }; -typedef struct nanodet_model nanodet_model_t; +typedef struct NanodetModel NanodetModelT; /** * Loads a nanodet object detection model saved in libtorch format. @@ -51,7 +51,7 @@ typedef struct nanodet_model nanodet_model_t; * @param scoreThreshold confidence threshold * @param model the model to be loaded */ -void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model); +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model); /** * This function performs inference using a nanodet object detection model and an input image. @@ -59,13 +59,13 @@ void load_nanodet_model(char *modelPath, char *device, int height, int width, fl * @param image OpenDR image * @return OpenDR detection vector target containing the detections of the recognized objects */ -opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image); +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image); /** * Releases the memory allocated for a nanodet object detection model. * @param model model to be de-allocated */ -void free_nanodet_model(nanodet_model_t *model); +void freeNanodetModel(NanodetModelT *model); /** * Draw the bounding boxes from detections in the given image. @@ -73,7 +73,7 @@ void free_nanodet_model(nanodet_model_t *model); * @param model nanodet model that has been used for inference * @param detectionsVector output of the inference */ -void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector); +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector); #ifdef __cplusplus } diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 9d0129ade7..82e7645c58 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -31,7 +31,7 @@ extern "C" { * @param index the index to choose the value if it is an array, otherwise it is not used * @return string with the value of key */ -const char *json_get_key_string(const char *json, const char *key, const int index); +const char *jsonGetKeyString(const char *json, const char *key, const int index); /** * JSON parser for OpenDR model files. @@ -40,7 +40,7 @@ const char *json_get_key_string(const char *json, const char *key, const int ind * @param index the index to choose the value if it is an array, otherwise it is not used * @return float with the value of key */ -float json_get_key_float(const char *json, const char *key, const int index); +float jsonGetKeyFloat(const char *json, const char *key, const int index); /** * JSON parser for OpenDR model files from inference_params key. @@ -49,41 +49,41 @@ float json_get_key_float(const char *json, const char *key, const int index); * @param index the index to choose the value if it is an array, otherwise it is not used * @return float with the value of key */ -float json_get_key_from_inference_params(const char *json, const char *key, const int index); +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index); /** * Reads an image from path and saves it into OpenDR image structure. * @param path path from which the image will be read * @param image OpenDR image data structure to store the image */ -void load_image(const char *path, opendr_image_t *image); +void loadImage(const char *path, OpendrImageT *image); /** * Releases the memory allocated for an OpenDR image structure * @param image OpenDR image structure to release */ -void free_image(opendr_image_t *image); +void freeImage(OpendrImageT *image); /** * Initialize an empty detection list. - * @param detection_vector OpenDR detection_target_list structure to be initialized + * @param detectionVector OpenDR OpendrDetectionVectorTarget structure to be initialized */ -void init_detections_vector(opendr_detection_vector_target_t *detection_vector); +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); /** * Loads an OpenDR detection target list. - * @param detection_vector OpenDR detection_target_list structure to be loaded + * @param detectionVector OpenDR OpendrDetectionVectorTarget structure to be loaded * @param detection the pointer of the first OpenDR detection target in a vector - * @param vector_size the size of the vector + * @param vectorSize the size of the vector */ -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size); +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, + int vectorSize); /** * Releases the memory allocated for a detection list structure - * @param detection_vector OpenDR detection vector target structure to release + * @param detectionVector OpenDR detection vector target structure to release */ -void free_detections_vector(opendr_detection_vector_target_t *detection_vector); +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); #ifdef __cplusplus } diff --git a/include/target.h b/include/target.h index 894bc03d2f..c3f2d22592 100644 --- a/include/target.h +++ b/include/target.h @@ -23,16 +23,16 @@ extern "C" { /*** * OpenDR data type for representing classification targets */ -struct opendr_category_target { +struct OpendrCategoryTarget { int data; float confidence; }; -typedef struct opendr_category_target opendr_category_target_t; +typedef struct OpendrCategoryTarget OpendrCategoryTargetT; /*** * OpenDR data type for representing detection targets */ -struct opendr_detection_target { +struct OpendrDetectionTarget { int name; float left; float top; @@ -40,16 +40,16 @@ struct opendr_detection_target { float height; float score; }; -typedef struct opendr_detection_target opendr_detection_target_t; +typedef struct OpendrDetectionTarget OpendrDetectionTargetT; /*** * OpenDR data type for representing a structure of detection targets */ -struct opendr_detection_vector_target { - opendr_detection_target_t *starting_pointer; +struct OpendrDetectionVectorTarget { + OpendrDetectionTargetT *startingPointer; int size; }; -typedef struct opendr_detection_vector_target opendr_detection_vector_target_t; +typedef struct OpendrDetectionVectorTarget OpendrDetectionVectorTargetT; #ifdef __cplusplus } diff --git a/projects/c_api/samples/face_recognition/face_recognition_demo.c b/projects/c_api/samples/face_recognition/face_recognition_demo.c index 03465f77fe..20430709ea 100644 --- a/projects/c_api/samples/face_recognition/face_recognition_demo.c +++ b/projects/c_api/samples/face_recognition/face_recognition_demo.c @@ -22,32 +22,32 @@ int main(int argc, char *argv[]) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model (see instructions for downloading the data) - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); // Build and load the database - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); // Load an image and performance inference - opendr_image_t image; - load_image("data/database/1/1.jpg", &image); + OpendrImageT image; + loadImage("data/database/1/1.jpg", &image); if (!image.data) { printf("Image not found!"); return 1; } - opendr_category_target_t res = infer_face_recognition(&model, &image); + OpendrCategoryTargetT res = inferFaceRecognition(&model, &image); // Free the image resources - free_image(&image); + freeImage(&image); // Get the prediction and decode it char buff[512]; - decode_category_face_recognition(&model, res, buff); + decodeCategoryFaceRecognition(&model, res, buff); printf("Predicted category %d (folder name: %s) with confidence %f\n", res.data, buff, res.confidence); // Free the model resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); return 0; } diff --git a/projects/c_api/samples/object_detection_2d/nanodet/README.md b/projects/c_api/samples/object_detection_2d/nanodet/README.md index 1ca7c102c1..cf27c62956 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/README.md +++ b/projects/c_api/samples/object_detection_2d/nanodet/README.md @@ -5,11 +5,11 @@ To run the demo, the downloaded model can be used or it can be exported with JIT After installation, the demo can be run from projects/c_api directory with: ```sh -./built/nanodet_libtorch_demo ./path/to/your/model.pth device_name{cpu, cuda} ./path/to/your/image.jpg height width +./build/nanodet_libtorch_demo ./path/to/your/model.pth device_name{cpu, cuda} ./path/to/your/image.jpg height width ``` Or with the downloaded model and image with: ```sh -./built/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 +./build/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 ``` diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index eae45f9eb0..756720cb51 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -29,34 +29,34 @@ int main(int argc, char **argv) { return -1; } - nanodet_model_t model; + NanodetModelT model; int height = atoi(argv[4]); int width = atoi(argv[5]); printf("start init model\n"); - load_nanodet_model(argv[1], argv[2], height, width, 0.35, &model); + loadNanodetModel(argv[1], argv[2], height, width, 0.35, &model); printf("success\n"); - opendr_image_t image; + OpendrImageT image; - load_image(argv[3], &image); + loadImage(argv[3], &image); if (!image.data) { printf("Image not found!"); return 1; } // Initialize opendr detection target list; - opendr_detection_vector_target_t results; - init_detections_vector(&results); + OpendrDetectionVectorTargetT results; + initDetectionsVector(&results); - results = infer_nanodet(&model, &image); + results = inferNanodet(&model, &image); - draw_bboxes(&image, &model, &results); + drawBboxes(&image, &model, &results); // Free the memory - free_detections_vector(&results); - free_image(&image); - free_nanodet_model(&model); + freeDetectionsVector(&results); + freeImage(&image); + freeNanodetModel(&model); return 0; } diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 3a8d305c09..508e5df829 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -35,113 +35,113 @@ /** * Helper function for preprocessing images before feeding them into the face recognition model. * This function follows the OpenDR's face recognition pre-processing pipeline, which includes the following: - * a) resizing the image into resize_target x resize_target pixels and then taking a center crop of size model_input_size, - * and b) normalizing the resulting values using mean_value and std_value + * a) resizing the image into resizeTarget x resizeTarget pixels and then taking a center crop of size modelInputSize, + * and b) normalizing the resulting values using meanValue and stdValue * @param image image to be preprocesses * @param data pre-processed data in a flattened vector - * @param resize_target target size for resizing - * @param model_input_size size of the center crop (equals the size that the DL model expects) - * @param mean_value value used for centering the input image - * @param std_value value used for scaling the input image + * @param resizeTarget target size for resizing + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue value used for centering the input image + * @param stdValue value used for scaling the input image */ -void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resize_target = 128, int model_input_size = 112, - float mean_value = 0.5, float std_value = 0.5) { +void preprocessFaceRecognition(cv::Mat *image, std::vector &data, int resizeTarget = 128, int modelInputSize = 112, + float meanValue = 0.5, float stdValue = 0.5) { // Convert to RGB cv::Mat img; cv::cvtColor(*image, img, cv::COLOR_BGR2RGB); // Resize and then get a center crop - cv::resize(img, img, cv::Size(resize_target, resize_target)); - int stride = (resize_target - model_input_size) / 2; - cv::Rect myROI(stride, stride, resize_target - stride, resize_target - stride); + cv::resize(img, img, cv::Size(resizeTarget, resizeTarget)); + int stride = (resizeTarget - modelInputSize) / 2; + cv::Rect myROI(stride, stride, resizeTarget - stride, resizeTarget - stride); img = img(myROI); // Scale to 0...1 - cv::Mat out_img; - img.convertTo(out_img, CV_32FC3, 1 / 255.0); + cv::Mat outImg; + img.convertTo(outImg, CV_32FC3, 1 / 255.0); // Unfold the image into the appropriate format // This is certainly not the most efficient way to do this... // ... and is probably constantly leading to cache misses // ... but it works for now. - for (unsigned int j = 0; j < model_input_size; ++j) { - for (unsigned int k = 0; k < model_input_size; ++k) { - cv::Vec3f cur_pixel = out_img.at(j, k); - data[0 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[0] - mean_value) / std_value; - data[1 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[1] - mean_value) / std_value; - data[2 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[2] - mean_value) / std_value; + for (unsigned int j = 0; j < modelInputSize; ++j) { + for (unsigned int k = 0; k < modelInputSize; ++k) { + cv::Vec3f curPixel = outImg.at(j, k); + data[0 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; + data[1 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; + data[2 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; } } } -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model) { +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model) { // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - model->database = model->database_ids = NULL; - model->person_names = NULL; + model->onnxSession = model->env = model->sessionOptions = NULL; + model->database = model->databaseIds = NULL; + model->personNames = NULL; model->threshold = 1; // Parse the model JSON file - std::string model_json_path(model_path); - std::size_t split_pos = model_json_path.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - model_json_path = model_json_path + "/" + model_json_path.substr(split_pos) + ".json"; + std::string modelJsonPath(modelPath); + std::size_t splitPos = modelJsonPath.find_last_of("/"); + splitPos = splitPos > 0 ? splitPos + 1 : 0; + modelJsonPath = modelJsonPath + "/" + modelJsonPath.substr(splitPos) + ".json"; - std::ifstream in_stream(model_json_path); - if (!in_stream.is_open()) { + std::ifstream inStream(modelJsonPath); + if (!inStream.is_open()) { std::cerr << "Cannot open JSON model file" << std::endl; return; } - std::string str((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); + std::string str((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); const char *json = str.c_str(); - std::string basepath = model_json_path.substr(0, split_pos); - split_pos = basepath.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - if (split_pos < basepath.size()) - basepath.resize(split_pos); + std::string basepath = modelJsonPath.substr(0, splitPos); + splitPos = basepath.find_last_of("/"); + splitPos = splitPos > 0 ? splitPos + 1 : 0; + if (splitPos < basepath.size()) + basepath.resize(splitPos); // Parse JSON - std::string onnx_model_path = basepath + json_get_key_string(json, "model_paths", 0); - std::string model_format = json_get_key_string(json, "format", 0); + std::string onnxModelPath = basepath + jsonGetKeyString(json, "model_paths", 0); + std::string modelFormat = jsonGetKeyString(json, "format", 0); // Parse inference params - float threshold = json_get_key_from_inference_params(json, "threshold", 0); + float threshold = jsonGetKeyFromInferenceParams(json, "threshold", 0); model->threshold = threshold; // Proceed only if the model is in onnx format - if (model_format != "onnx") { + if (modelFormat != "onnx") { std::cerr << "Model not in ONNX format." << std::endl; return; } Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *session_options = new Ort::SessionOptions; - session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, onnx_model_path.c_str(), *session_options); + Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); model->env = env; - model->onnx_session = session; - model->session_options = session_options; + model->onnxSession = session; + model->sessionOptions = sessionOptions; // Should we pass these parameters through the model json file? - model->model_size = 112; - model->resize_size = 128; - model->mean_value = 0.5; - model->std_value = 0.5; - model->output_size = 128; + model->modelSize = 112; + model->resizeSize = 128; + model->meanValue = 0.5; + model->stdValue = 0.5; + model->outputSize = 128; } -void free_face_recognition_model(face_recognition_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); +void freeFaceRecognitionModel(FaceRecognitionModelT *model) { + if (model->onnxSession) { + Ort::Session *session = static_cast(model->onnxSession); delete session; } - if (model->session_options) { - Ort::SessionOptions *session_options = static_cast(model->session_options); - delete session_options; + if (model->sessionOptions) { + Ort::SessionOptions *sessionOptions = static_cast(model->sessionOptions); + delete sessionOptions; } if (model->env) { @@ -149,8 +149,8 @@ void free_face_recognition_model(face_recognition_model_t *model) { delete env; } - if (model->database_ids) { - delete[] model->database_ids; + if (model->databaseIds) { + delete[] model->databaseIds; } if (model->database) { @@ -158,139 +158,139 @@ void free_face_recognition_model(face_recognition_model_t *model) { delete database; } - if (model->person_names) { - for (int i = 0; i < model->n_persons; i++) - delete[] model->person_names[i]; - delete[] model->person_names; + if (model->personNames) { + for (int i = 0; i < model->nPersons; i++) + delete[] model->personNames[i]; + delete[] model->personNames; } } -void ff_face_recognition(face_recognition_model_t *model, opendr_image_t *image, cv::Mat *features) { - Ort::Session *session = static_cast(model->onnx_session); +void ffFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image, cv::Mat *features) { + Ort::Session *session = static_cast(model->onnxSession); if (!session) { std::cerr << "ONNX session not initialized." << std::endl; return; } // Prepare the input dimensions - std::vector input_node_dims = {1, 3, model->model_size, model->model_size}; - size_t input_tensor_size = model->model_size * model->model_size * 3; + std::vector inputNodeDims = {1, 3, model->modelSize, model->modelSize}; + size_t inputTensorSize = model->modelSize * model->modelSize * 3; // Get the input image and pre-process it - std::vector input_tensor_values(input_tensor_size); - cv::Mat *opencv_image = static_cast(image->data); - if (!opencv_image) { + std::vector inputTensorValues(inputTensorSize); + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; return; } - preprocess_face_recognition(opencv_image, input_tensor_values, model->resize_size, model->model_size, model->mean_value, - model->std_value); + preprocessFaceRecognition(opencvImage, inputTensorValues, model->resizeSize, model->modelSize, model->meanValue, + model->stdValue); // Setup input/output names Ort::AllocatorWithDefaultOptions allocator; - std::vector input_node_names = {"data"}; - std::vector output_node_names = {"features"}; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"features"}; // Set up the input tensor - auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value input_tensor = - Ort::Value::CreateTensor(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4); - assert(input_tensor.IsTensor()); + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); // Feed-forward the model - auto output_tensors = - session->Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); - assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1 && outputTensors.front().IsTensor()); // Get the results back - float *floatarr = output_tensors.front().GetTensorMutableData(); - cv::Mat cur_features(cv::Size(model->output_size, 1), CV_32F, floatarr); + float *floatarr = outputTensors.front().GetTensorMutableData(); + cv::Mat curFeatures(cv::Size(model->outputSize, 1), CV_32F, floatarr); // Perform l2 normalizaton - cv::Mat features_square = cur_features.mul(cur_features); - float norm = sqrt(cv::sum(features_square)[0]); - cur_features = cur_features / norm; - memcpy(features->data, cur_features.data, sizeof(float) * model->output_size); + cv::Mat featuresSquare = curFeatures.mul(curFeatures); + float norm = sqrt(cv::sum(featuresSquare)[0]); + curFeatures = curFeatures / norm; + memcpy(features->data, curFeatures.data, sizeof(float) * model->outputSize); } -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model) { +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, FaceRecognitionModelT *model) { using namespace boost::filesystem; - std::vector person_names; - std::vector database_ids; - cv::Mat database(cv::Size(model->output_size, 0), CV_32F); + std::vector personNames; + std::vector databaseIds; + cv::Mat database(cv::Size(model->outputSize, 0), CV_32F); - path root_path(database_folder); - if (!exists(root_path)) { + path rootPath(databaseFolder); + if (!exists(rootPath)) { std::cerr << "Database path does not exist." << std::endl; return; } - int current_id = 0; - for (auto person_path = directory_iterator(root_path); person_path != directory_iterator(); person_path++) { + int currentId = 0; + for (auto personPath = directory_iterator(rootPath); personPath != directory_iterator(); personPath++) { // For each person in the database - if (is_directory(person_path->path())) { - path cur_person_path(person_path->path()); - person_names.push_back(person_path->path().filename().string()); + if (is_directory(personPath->path())) { + path curPersonPath(personPath->path()); + personNames.push_back(personPath->path().filename().string()); - for (auto cur_img_path = directory_iterator(cur_person_path); cur_img_path != directory_iterator(); cur_img_path++) { - opendr_image_t image; - load_image(cur_img_path->path().string().c_str(), &image); + for (auto curImgPath = directory_iterator(curPersonPath); curImgPath != directory_iterator(); curImgPath++) { + OpendrImageT image; + loadImage(curImgPath->path().string().c_str(), &image); - cv::Mat features(cv::Size(model->output_size, 1), CV_32F); - ff_face_recognition(model, &image, &features); + cv::Mat features(cv::Size(model->outputSize, 1), CV_32F); + ffFaceRecognition(model, &image, &features); - free_image(&image); + freeImage(&image); database.push_back(features.clone()); - database_ids.push_back(current_id); + databaseIds.push_back(currentId); } - current_id++; + currentId++; } else { continue; } } - if (current_id == 0) { + if (currentId == 0) { std::cerr << "Cannot open database files." << std::endl; return; } // Make the array continuous - cv::Mat database_out = database.clone(); + cv::Mat databaseOut = database.clone(); - std::ofstream fout(output_path, std::ios::out | std::ios::binary); + std::ofstream fout(outputPath, std::ios::out | std::ios::binary); if (!fout.is_open()) { std::cerr << "Cannot open database file for writting." << std::endl; return; } // Write number of persons - int n = person_names.size(); + int n = personNames.size(); fout.write(reinterpret_cast(&n), sizeof(int)); for (int i = 0; i < n; i++) { // Write the name of the person (along with its size) - int name_length = person_names[i].size() + 1; - fout.write(reinterpret_cast(&name_length), sizeof(int)); - fout.write(person_names[i].c_str(), name_length); + int nameLength = personNames[i].size() + 1; + fout.write(reinterpret_cast(&nameLength), sizeof(int)); + fout.write(personNames[i].c_str(), nameLength); } - cv::Size s = database_out.size(); + cv::Size s = databaseOut.size(); fout.write(reinterpret_cast(&s.height), sizeof(int)); fout.write(reinterpret_cast(&s.width), sizeof(int)); - fout.write(reinterpret_cast(database_out.data), sizeof(float) * s.height * s.width); - fout.write(reinterpret_cast(&database_ids[0]), sizeof(int) * s.height); + fout.write(reinterpret_cast(databaseOut.data), sizeof(float) * s.height * s.width); + fout.write(reinterpret_cast(&databaseIds[0]), sizeof(int) * s.height); fout.flush(); fout.close(); } -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model) { +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model) { model->database = NULL; - model->database_ids = NULL; + model->databaseIds = NULL; - std::ifstream fin(database_path, std::ios::out | std::ios::binary); + std::ifstream fin(databasePath, std::ios::out | std::ios::binary); if (!fin.is_open()) { std::cerr << "Cannot load database file (check that file exists and you have created the database)." << std::endl; @@ -298,50 +298,50 @@ void load_database_face_recognition(const char *database_path, face_recognition_ } int n; fin.read(reinterpret_cast(&n), sizeof(int)); - char **person_names = new char *[n]; + char **personNames = new char *[n]; for (int i = 0; i < n; i++) { - person_names[i] = new char[512]; + personNames[i] = new char[512]; // Read person name - int name_length; - fin.read(reinterpret_cast(&name_length), sizeof(int)); - if (name_length > 512) { + int nameLength; + fin.read(reinterpret_cast(&nameLength), sizeof(int)); + if (nameLength > 512) { std::cerr << "Person name exceeds max number of characters (512)" << std::endl; return; } - fin.read(person_names[i], name_length); + fin.read(personNames[i], nameLength); } int height, width; fin.read(reinterpret_cast(&height), sizeof(int)); fin.read(reinterpret_cast(&width), sizeof(int)); - float *database_buff = new float[height * width]; - int *features_ids = new int[height]; - fin.read(reinterpret_cast(database_buff), sizeof(float) * height * width); - fin.read(reinterpret_cast(features_ids), sizeof(int) * height); + float *databaseBuff = new float[height * width]; + int *featuresIds = new int[height]; + fin.read(reinterpret_cast(databaseBuff), sizeof(float) * height * width); + fin.read(reinterpret_cast(featuresIds), sizeof(int) * height); fin.close(); cv::Mat *database = new cv::Mat(cv::Size(width, height), CV_32F); - memcpy(database->data, database_buff, sizeof(float) * width * height); - delete[] database_buff; + memcpy(database->data, databaseBuff, sizeof(float) * width * height); + delete[] databaseBuff; model->database = database; - model->database_ids = features_ids; - model->person_names = person_names; - model->n_persons = n; - model->n_features = height; + model->databaseIds = featuresIds; + model->personNames = personNames; + model->nPersons = n; + model->nFeatures = height; } -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image) { - cv::Mat features(cv::Size(model->output_size, 1), CV_32F); - opendr_category_target_t target; +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image) { + cv::Mat features(cv::Size(model->outputSize, 1), CV_32F); + OpendrCategoryTargetT target; target.data = -1; target.confidence = 0; // Get the feature vector for the current image - ff_face_recognition(model, image, &features); + ffFaceRecognition(model, image, &features); if (!model->database) { std::cerr << "Database is not loaded!" << std::endl; @@ -349,27 +349,27 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, } cv::Mat *database = static_cast(model->database); // Calculate the distance between the extracted feature vector and database features - cv::Mat features_repeated; - cv::repeat(features, model->n_features, 1, features_repeated); - cv::Mat diff = features_repeated - *database; + cv::Mat featuresRepeated; + cv::repeat(features, model->nFeatures, 1, featuresRepeated); + cv::Mat diff = featuresRepeated - *database; diff = diff.mul(diff); - cv::Mat sq_dists; - cv::reduce(diff, sq_dists, 1, CV_REDUCE_SUM, CV_32F); + cv::Mat sqDists; + cv::reduce(diff, sqDists, 1, CV_REDUCE_SUM, CV_32F); cv::Mat dists; - cv::sqrt(sq_dists, dists); + cv::sqrt(sqDists, dists); - double min_dist, max_dist; - cv::Point min_loc, max_loc; - cv::minMaxLoc(dists, &min_dist, &max_dist, &min_loc, &max_loc); + double minDist, maxDist; + cv::Point minLoc, maxLoc; + cv::minMaxLoc(dists, &minDist, &maxDist, &minLoc, &maxLoc); - target.data = model->database_ids[min_loc.y]; - target.confidence = 1 - (min_dist / model->threshold); + target.data = model->databaseIds[minLoc.y]; + target.confidence = 1 - (minDist / model->threshold); return target; } -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name) { - if (category.data >= model->n_persons) +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName) { + if (category.data >= model->nPersons) return; - strcpy(person_name, model->person_names[category.data]); + strcpy(personName, model->personNames[category.data]); } diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index 4908996e37..202b26877f 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -41,7 +41,7 @@ class NanoDet { torch::Tensor meanTensor() const; torch::Tensor stdTensor() const; std::vector labels() const; - std::vector outputs; + std::vector outputs; }; NanoDet::NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, @@ -104,7 +104,7 @@ std::vector NanoDet::labels() const { /** * Helper function to calculate the final shape of the model input relative to size ratio of input image. */ -void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { +void getMinimumDstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { float ratio; float srcRatio = ((float)srcSize->width / (float)srcSize->height); float dstRatio = ((float)dstSize->width / (float)dstSize->height); @@ -125,7 +125,7 @@ void get_minimum_dstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) /** * Helper function to calculate the warp matrix for resizing. */ -void get_resize_matrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { +void getResizeMatrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { if (keepRatio == 1) { float ratio; cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); @@ -171,10 +171,10 @@ void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatr // Get new destination size if keep ratio is wanted if (keepRatio == 1) { - get_minimum_dstShape(&srcSize, dstSize, divisible); + getMinimumDstShape(&srcSize, dstSize, divisible); } - get_resize_matrix(&srcSize, dstSize, warpMatrix, keepRatio); + getResizeMatrix(&srcSize, dstSize, warpMatrix, keepRatio); cv::warpPerspective(*src, *dst, *warpMatrix, *dstSize); } @@ -195,13 +195,13 @@ torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { return device; } -void load_nanodet_model(char *modelPath, char *device, int height, int width, float scoreThreshold, nanodet_model_t *model) { +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model) { // Initialize model - model->input_size[0] = width; - model->input_size[1] = height; + model->inputSizes[0] = width; + model->inputSizes[1] = height; - model->score_threshold = scoreThreshold; - model->keep_ratio = 1; + model->scoreThreshold = scoreThreshold; + model->keepRatio = 1; const std::vector labels{ "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", @@ -231,7 +231,7 @@ void load_nanodet_model(char *modelPath, char *device, int height, int width, fl model->network = static_cast(detector); } -void ff_nanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, +void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, torch::Tensor *outputs) { // Make all the inputs as tensors to use in jit model torch::Tensor srcHeight = torch::tensor(originalSize->width); @@ -243,10 +243,10 @@ void ff_nanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, *outputs = outputs->to(torch::Device(torch::kCPU, 0)); } -opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_image_t *image) { +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image) { NanoDet *networkPTR = static_cast(model->network); - opendr_detection_vector_target_t detectionsVector; - init_detections_vector(&detectionsVector); + OpendrDetectionVectorTargetT detectionsVector; + initDetectionsVector(&detectionsVector); cv::Mat *opencvImage = static_cast(image->data); if (!opencvImage) { @@ -256,21 +256,21 @@ opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_im // Preprocess image and keep values as input in jit model cv::Mat resizedImg; - cv::Size dstSize = cv::Size(model->input_size[0], model->input_size[1]); + cv::Size dstSize = cv::Size(model->inputSizes[0], model->inputSizes[1]); cv::Mat warpMatrix = cv::Mat::eye(3, 3, CV_32FC1); - preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keep_ratio); + preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keepRatio); torch::Tensor input = networkPTR->preProcess(&resizedImg); cv::Size originalSize(opencvImage->cols, opencvImage->rows); torch::Tensor outputs; - ff_nanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); + ffNanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); - std::vector detections; + std::vector detections; // Postprocessing, find which outputs have better score than threshold and keep them. for (int label = 0; label < outputs.size(0); label++) { for (int box = 0; box < outputs.size(1); box++) { - if (outputs[label][box][4].item() > model->score_threshold) { - opendr_detection_target_t detection; + if (outputs[label][box][4].item() > model->scoreThreshold) { + OpendrDetectionTargetT detection; detection.name = label; detection.left = outputs[label][box][0].item(); detection.top = outputs[label][box][1].item(); @@ -284,12 +284,12 @@ opendr_detection_vector_target_t infer_nanodet(nanodet_model_t *model, opendr_im // Put vector detection as C pointer and size if (static_cast(detections.size()) > 0) - load_detections_vector(&detectionsVector, detections.data(), static_cast(detections.size())); + loadDetectionsVector(&detectionsVector, detections.data(), static_cast(detections.size())); return detectionsVector; } -void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection_vector_target_t *detectionsVector) { +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector) { const int colorList[80][3] = { //{255 ,255 ,255}, //bg {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, @@ -316,9 +316,9 @@ void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection cv::Mat imageWithDetections = (*opencvImage).clone(); for (size_t i = 0; i < detectionsVector->size; i++) { - const opendr_detection_target bbox = (detectionsVector->starting_pointer)[i]; + const OpendrDetectionTarget bbox = (detectionsVector->startingPointer)[i]; float score = bbox.score > 1 ? 1 : bbox.score; - if (score > model->score_threshold) { + if (score > model->scoreThreshold) { cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); cv::rectangle(imageWithDetections, cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), @@ -349,7 +349,7 @@ void draw_bboxes(opendr_image_t *image, nanodet_model_t *model, opendr_detection cv::waitKey(0); } -void free_nanodet_model(nanodet_model_t *model) { +void freeNanodetModel(NanodetModelT *model) { if (model->network) { NanoDet *networkPTR = static_cast(model->network); delete networkPTR; diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index e4a6c352c4..00a5a53268 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -24,17 +24,17 @@ #include #include -float json_get_key_from_inference_params(const char *json, const char *key, const int index) { +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index) { rapidjson::Document doc; doc.Parse(json); if ((!doc.IsObject()) || (!doc.HasMember("inference_params"))) { return 0.0f; } - const rapidjson::Value &inference_params = doc["inference_params"]; - if ((!inference_params.IsObject()) || (!inference_params.HasMember(key))) { + const rapidjson::Value &inferenceParams = doc["inference_params"]; + if ((!inferenceParams.IsObject()) || (!inferenceParams.HasMember(key))) { return 0.0f; } - const rapidjson::Value &value = inference_params[key]; + const rapidjson::Value &value = inferenceParams[key]; if (value.IsArray()) { if (value.Size() <= index) { return 0.0f; @@ -50,7 +50,7 @@ float json_get_key_from_inference_params(const char *json, const char *key, cons return value.GetFloat(); } -const char *json_get_key_string(const char *json, const char *key, const int index) { +const char *jsonGetKeyString(const char *json, const char *key, const int index) { rapidjson::Document doc; doc.Parse(json); if ((!doc.IsObject()) || (!doc.HasMember(key))) { @@ -72,7 +72,7 @@ const char *json_get_key_string(const char *json, const char *key, const int ind return value.GetString(); } -float json_get_key_float(const char *json, const char *key, const int index) { +float jsonGetKeyFloat(const char *json, const char *key, const int index) { rapidjson::Document doc; doc.Parse(json); if ((!doc.IsObject()) || (!doc.HasMember(key))) { @@ -94,27 +94,27 @@ float json_get_key_float(const char *json, const char *key, const int index) { return value.GetFloat(); } -void load_image(const char *path, opendr_image_t *image) { - cv::Mat opencv_image = cv::imread(path, cv::IMREAD_COLOR); - if (opencv_image.empty()) { +void loadImage(const char *path, OpendrImageT *image) { + cv::Mat opencvImage = cv::imread(path, cv::IMREAD_COLOR); + if (opencvImage.empty()) { image->data = NULL; } else { - image->data = new cv::Mat(opencv_image); + image->data = new cv::Mat(opencvImage); } } -void free_image(opendr_image_t *image) { +void freeImage(OpendrImageT *image) { if (image->data) { - cv::Mat *opencv_image = static_cast(image->data); - delete opencv_image; + cv::Mat *opencvImage = static_cast(image->data); + delete opencvImage; } } -void init_detections_vector(opendr_detection_vector_target_t *detection_vector) { - detection_vector->starting_pointer = NULL; +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector) { + detectionVector->startingPointer = NULL; - std::vector detections; - opendr_detection_target_t detection; + std::vector detections; + OpendrDetectionTargetT detection; detection.name = -1; detection.left = 0.0; @@ -125,20 +125,20 @@ void init_detections_vector(opendr_detection_vector_target_t *detection_vector) detections.push_back(detection); - load_detections_vector(detection_vector, detections.data(), static_cast(detections.size())); + loadDetectionsVector(detectionVector, detections.data(), static_cast(detections.size())); } -void load_detections_vector(opendr_detection_vector_target_t *detection_vector, opendr_detection_target_t *detection, - int vector_size) { - free_detections_vector(detection_vector); +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, + int vectorSize) { + freeDetectionsVector(detectionVector); - detection_vector->size = vector_size; - int size_of_output = (vector_size) * sizeof(opendr_detection_target_t); - detection_vector->starting_pointer = static_cast(malloc(size_of_output)); - std::memcpy(detection_vector->starting_pointer, detection, size_of_output); + detectionVector->size = vectorSize; + int sizeOfOutput = (vectorSize) * sizeof(OpendrDetectionTargetT); + detectionVector->startingPointer = static_cast(malloc(sizeOfOutput)); + std::memcpy(detectionVector->startingPointer, detection, sizeOfOutput); } -void free_detections_vector(opendr_detection_vector_target_t *detection_vector) { - if (detection_vector->starting_pointer != NULL) - free(detection_vector->starting_pointer); +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector) { + if (detectionVector->startingPointer != NULL) + free(detectionVector->startingPointer); } diff --git a/tests/sources/c_api/test_face_recognition.c b/tests/sources/c_api/test_face_recognition.c index 65160fbbcc..fe29a8cf74 100644 --- a/tests/sources/c_api/test_face_recognition.c +++ b/tests/sources/c_api/test_face_recognition.c @@ -22,79 +22,79 @@ START_TEST(model_creation_test) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); - ck_assert(model.onnx_session); + ck_assert(model.onnxSession); ck_assert(model.env); - ck_assert(model.session_options); + ck_assert(model.sessionOptions); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); // Load a model that does not exist - load_face_recognition_model("data/optimized_model_not_existant", &model); - ck_assert(!model.onnx_session); + loadFaceRecognitionModel("data/optimized_model_not_existant", &model); + ck_assert(!model.onnxSession); ck_assert(!model.env); - ck_assert(!model.session_options); + ck_assert(!model.sessionOptions); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST START_TEST(database_creation_test) { - face_recognition_model_t model; - load_face_recognition_model("data/optimized_model", &model); + FaceRecognitionModelT model; + loadFaceRecognitionModel("data/optimized_model", &model); // Check that we can create and load a database that exists - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); ck_assert(model.database); - ck_assert(model.database_ids); - ck_assert(model.database_ids); + ck_assert(model.databaseIds); + ck_assert(model.databaseIds); // Check that we can handle errors in the process - build_database_face_recognition("data/database_not_existant", "data/database.dat", &model); - load_database_face_recognition("data/database_not_existant.dat", &model); + buildDatabaseFaceRecognition("data/database_not_existant", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database_not_existant.dat", &model); ck_assert(!model.database); - ck_assert(!model.database_ids); + ck_assert(!model.databaseIds); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST START_TEST(inference_creation_test) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model (see instructions for downloading the data) - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); // Build and load the database - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); // Load an image and performance inference - opendr_image_t image; - load_image("data/database/1/1.jpg", &image); - opendr_category_target_t res = infer_face_recognition(&model, &image); - free_image(&image); + OpendrImageT image; + loadImage("data/database/1/1.jpg", &image); + OpendrCategoryTargetT res = inferFaceRecognition(&model, &image); + freeImage(&image); char buff[512]; - decode_category_face_recognition(&model, res, buff); + decodeCategoryFaceRecognition(&model, res, buff); ck_assert(!strcmp(buff, "1")); // Load another image - load_image("data/database/5/1.jpg", &image); - res = infer_face_recognition(&model, &image); - free_image(&image); - decode_category_face_recognition(&model, res, buff); + loadImage("data/database/5/1.jpg", &image); + res = inferFaceRecognition(&model, &image); + freeImage(&image); + decodeCategoryFaceRecognition(&model, res, buff); ck_assert(!strcmp(buff, "5")); // Free the model resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c index cacd4f1101..22a8e8e695 100644 --- a/tests/sources/c_api/test_nanodet.c +++ b/tests/sources/c_api/test_nanodet.c @@ -22,13 +22,13 @@ START_TEST(model_creation_test) { // Create a nanodet libtorch model - nanodet_model_t model; + NanodetModelT model; // Load a pretrained model - load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + loadNanodetModel("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); ck_assert_msg(model.network != 0, "net is NULL"); // Release the resources - free_nanodet_model(&model); + freeNanodetModel(&model); // Check if memory steel exist ck_assert_msg(model.network, "net is NULL"); @@ -37,22 +37,22 @@ END_TEST START_TEST(inference_creation_test) { // Create a nanodet model - nanodet_model_t model; + NanodetModelT model; // Load a pretrained model - load_nanodet_model("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + loadNanodetModel("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); // Load an image and performance inference - opendr_image_t image; - load_image("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); - opendr_detection_vector_target_t res = infer_nanodet(&model, &image); - free_image(&image); + OpendrImageT image; + loadImage("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); + OpendrDetectionVectorTargetT res = inferNanodet(&model, &image); + freeImage(&image); ck_assert(res.size != 0); // Free the model resources - free_detections_vector(&res); - free_nanodet_model(&model); + freeDetectionsVector(&res); + freeNanodetModel(&model); } END_TEST diff --git a/tests/sources/c_api/test_opendr_utils.c b/tests/sources/c_api/test_opendr_utils.c index 04e2bcf878..82fe8c94bd 100644 --- a/tests/sources/c_api/test_opendr_utils.c +++ b/tests/sources/c_api/test_opendr_utils.c @@ -21,16 +21,16 @@ START_TEST(image_load_test) { // Load an image and performance inference - opendr_image_t image; + OpendrImageT image; // An example of an image that exist - load_image("data/database/1/1.jpg", &image); + loadImage("data/database/1/1.jpg", &image); ck_assert(image.data); // An example of an image that does not exist - load_image("images/not_existant/1.jpg", &image); + loadImage("images/not_existant/1.jpg", &image); ck_assert(image.data == 0); // Free the resources - free_image(&image); + freeImage(&image); } END_TEST From cbb23c5fa04ad9d82a5da2cba3b7eb68c53721f6 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Mon, 9 Jan 2023 14:13:41 +0200 Subject: [PATCH 77/87] license test update --- include/object_detection_2d_nanodet_jit.h | 2 +- include/opendr_utils.h | 3 +-- .../samples/object_detection_2d/nanodet/nanodet_jit_demo.c | 2 +- src/c_api/face_recognition.cpp | 4 ++-- src/c_api/object_detection_2d_nanodet_jit.cpp | 4 ++-- src/c_api/opendr_utils.cpp | 3 +-- 6 files changed, 8 insertions(+), 10 deletions(-) diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h index 24fad3b6a1..e8e6a0466d 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -1,5 +1,5 @@ /* - * Copyright 2020-2022 OpenDR European Project + * Copyright 2020-2023 OpenDR European Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 364c0e7127..3b07d48868 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -76,8 +76,7 @@ void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); * @param detection the pointer of the first OpenDR detection target in a vector * @param vectorSize the size of the vector */ -void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, - int vectorSize); +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, int vectorSize); /** * Releases the memory allocated for a detection list structure diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c index 756720cb51..e384a6224f 100644 --- a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -1,5 +1,5 @@ /* - * Copyright 2020-2022 OpenDR European Project + * Copyright 2020-2023 OpenDR European Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index d1810ee302..3461ea5dca 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -45,7 +45,7 @@ * @param stdValue value used for scaling the input image */ void preprocessFaceRecognition(cv::Mat *image, std::vector &data, int resizeTarget = 128, int modelInputSize = 112, - float meanValue = 0.5, float stdValue = 0.5) { + float meanValue = 0.5, float stdValue = 0.5) { // Convert to RGB cv::Mat img; cv::cvtColor(*image, img, cv::COLOR_BGR2RGB); @@ -185,7 +185,7 @@ void ffFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image, cv::Ma } preprocessFaceRecognition(opencvImage, inputTensorValues, model->resizeSize, model->modelSize, model->meanValue, - model->stdValue); + model->stdValue); // Setup input/output names Ort::AllocatorWithDefaultOptions allocator; diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index 202b26877f..5d1fe2337b 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -1,4 +1,4 @@ -// Copyright 2020-2022 OpenDR European Project +// Copyright 2020-2023 OpenDR European Project // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -232,7 +232,7 @@ void loadNanodetModel(char *modelPath, char *device, int height, int width, floa } void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, - torch::Tensor *outputs) { + torch::Tensor *outputs) { // Make all the inputs as tensors to use in jit model torch::Tensor srcHeight = torch::tensor(originalSize->width); torch::Tensor srcWidth = torch::tensor(originalSize->height); diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 383e8fe598..42563e58b9 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -128,8 +128,7 @@ void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector) { loadDetectionsVector(detectionVector, detections.data(), static_cast(detections.size())); } -void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, - int vectorSize) { +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, int vectorSize) { freeDetectionsVector(detectionVector); detectionVector->size = vectorSize; From 53a58c30b1dd7e09c361850dea0188b8c4a4c121 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Sun, 15 Jan 2023 22:34:43 +0200 Subject: [PATCH 78/87] Apply suggestions from code review Co-authored-by: Kostas Tsampazis <27914645+tsampazk@users.noreply.github.com> --- dependencies/download_torch.py | 2 +- .../c-object-detection-2d-nanodet-jit-h.md | 2 +- docs/reference/c-opendr-utils-h.md | 6 +-- docs/reference/c-target-h.md | 2 +- docs/reference/object-detection-2d-nanodet.md | 40 +++++++++---------- .../object_detection_2d/nanodet/README.md | 2 +- src/c_api/object_detection_2d_nanodet_jit.cpp | 14 +++---- .../Transformer/nanodet_t.yml | 4 +- .../nanodet/nanodet_learner.py | 2 +- 9 files changed, 37 insertions(+), 37 deletions(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index bb21137588..ec4e5eda1a 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -90,7 +90,7 @@ def get_cuda_path(): CUDA_VERSION = CUDA_VERSION.replace(".", "") CUDA_VERSION = CUDA_VERSION[:3] else: - warnings.warn("\033[93m Not CUDA version file found.") + warnings.warn("\033[93m No CUDA version file found.") DEVICE = f"cu{CUDA_VERSION}" except: warnings.warn("\033[93m No CUDA installation found.\n" diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md index 60c72b9db9..d875834c3d 100644 --- a/docs/reference/c-object-detection-2d-nanodet-jit-h.md +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -34,7 +34,7 @@ Releases the memory allocated for an object detection 2D Nanodet model (*model*) ```C OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image); ``` -This function perform inference using an object detection 2D Nanodet model (*model*) and an input image (*image*). +This function performs inference using an object detection 2D Nanodet model (*model*) and an input image (*image*). The function returns an OpenDR detection vector structure with the inference results. diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 20d67458c6..baf3f4e19e 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -9,7 +9,7 @@ const char* jsonGetKeyString(const char *json, const char *key, const int index) ``` The *jsonGetKeyString()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as string. If the value is an array it will return only the (*index*) value of the array. -If fails returns (*""*). +If it fails it returns (""). ### Function *jsonGetKeyFloat()* ```C @@ -17,7 +17,7 @@ float jsonGetKeyFloat(const char *json, const char *key, const int index); ``` The *jsonGetKeyFloat()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as float. If the value is an array it will return only the (*index*) value of the array. -If fails returns (*0.0f*). +If it fails it returns (*0.0f*). ### Function *jsonGetKeyFromInferenceParams()* ```C @@ -25,7 +25,7 @@ float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int ``` The *jsonGetKeyFromInferenceParams()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) in inference_params section as float. If the value is an array it will return only the (*index*) value of the array. -If fails returns (*0.0f*). +If it fails it returns (*0.0f*). --- diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index d73f249df7..d8303cb58c 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -88,7 +88,7 @@ The *OpendrDetectionVectorTargetT* structure has the following fields: #### `OpendrDetectionTargetT startingPointer` field -A pointer to a memory of multiple OpenDR detection targets. +A pointer to multiple OpenDR detection targets. #### `int size` field diff --git a/docs/reference/object-detection-2d-nanodet.md b/docs/reference/object-detection-2d-nanodet.md index f62546bcf6..8b6af899f2 100644 --- a/docs/reference/object-detection-2d-nanodet.md +++ b/docs/reference/object-detection-2d-nanodet.md @@ -7,7 +7,7 @@ Bases: `engine.learners.Learner` The *NanodetLearner* class is a wrapper of the Nanodet object detection algorithms based on the original [Nanodet implementation](https://github.com/RangiLyu/nanodet). -It can be used to perform object detection on images (inference) and train All predefined Nanodet object detection models and new modular models from the user. +It can be used to perform object detection on images (inference) and train all predefined Nanodet object detection models and new modular models from the user. The [NanodetLearner](../../src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py) class has the following public methods: @@ -101,12 +101,12 @@ NanodetLearner.infer(self, input, thershold) ``` This method is used to perform object detection on an image. -Returns an `engine.target.BoundingBoxList` object, which contains bounding boxes that are described by the left-top corner and -its width and height, or returns an empty list if no detections were made of the image in input. +Returns an `engine.target.BoundingBoxList` object, which contains bounding boxes that are described by the top-left corner and +their width and height, or returns an empty list if no detections were made on the input image. Parameters: - **input** : *Image*\ - Image type object to perform inference on it. + Image type object to perform inference on. - **threshold**: *float, default=0.35*\ Specifies the threshold for object detection inference. An object is detected if the confidence of the output is higher than the specified threshold. @@ -151,7 +151,7 @@ If optimization is performed, the optimized model is saved instead. Parameters: - **path**: *str, default=None*\ - Path to save the model, if None it will be the `"temp_folder"` or the `"cfg.save_dir"` from learner. + Path to save the model, if None it will be `"temp_folder"` or `"cfg.save_dir"` from the learner. - **verbose**: *bool, default=True*\ Enables the maximum verbosity and logger. @@ -193,10 +193,10 @@ Parameters: #### Tutorials and Demos -A tutorial on performing inference is available. -Furthermore, demos on performing [training](../../projects/perception/object_detection_2d/nanodet/train_demo.py), -[evaluation](../../projects/perception/object_detection_2d/nanodet/eval_demo.py) and -[inference](../../projects/perception/object_detection_2d/nanodet/inference_demo.py) are also available. +A Jupyter notebook tutorial on performing inference is [available](../../projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb). +Furthermore, demos on performing [training](../../projects/python/perception/object_detection_2d/nanodet/train_demo.py), +[evaluation](../../projects/python/perception/object_detection_2d/nanodet/eval_demo.py) and +[inference](../../projects/python/perception/object_detection_2d/nanodet/inference_demo.py) are also available. @@ -211,7 +211,7 @@ Furthermore, demos on performing [training](../../projects/perception/object_det All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file in [config directory](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). You can find more information in [corresponding documentation](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). - For easier usage of the NanodetLearner, the user can overwrite the following parameters: + For easier usage of the NanodetLearner, you can overwrite the following parameters: (iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, weight_decay, warmup_steps, warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) @@ -326,12 +326,12 @@ Furthermore, demos on performing [training](../../projects/perception/object_det #### Performance Evaluation -In terms of speed, the performance of Nanodet is summarized in the table below (in FPS). +In terms of speed, the performance of Nanodet is summarized in the tables below (in FPS). The speed is measured from the start of the forward pass until the end of post-processing. -For PyTorch inference. +For PyTorch inference: -| Method {intput} | RTX 2070 | TX2 | NX | +| Method {input} | RTX 2070 | TX2 | NX | |------------------------------|----------|-------|-------| | Efficient Lite0 {320} | 48.63 | 9.38 | 14.48 | | Efficient Lite1 {416} | 43.88 | 7.93 | 11.07 | @@ -348,9 +348,9 @@ For PyTorch inference. | Nanodet-plue m {416} | 40.16 | 7.24 | 11.58 | | Nanodet-plue m 1.5x {416} | 38.94 | 7.37 | 11.52 | -For JIT optimization inference. +For JIT optimization inference: -| Method {intput} | RTX 2070 | TX2 | NX | +| Method {input} | RTX 2070 | TX2 | NX | |------------------------------|----------|-------|-------| | Efficient Lite0 {320} | 69.06 | 12.94 | 17.78 | | Efficient Lite1 {416} | 62.94 | 9.27 | 12.94 | @@ -367,15 +367,15 @@ For JIT optimization inference. | Nanodet-plue m {416} | 64.74 | 11.22 | 17.57 | | Nanodet-plue m 1.5x {416} | 56.77 | 10.39 | 14.81 | -For ONNX optimization inference. +For ONNX optimization inference: In this case, the forward pass is performed in ONNX. The pre-processing steps were implemented in PyTorch. Results show that the performance on ONNX varies significantly among different architectures, with some achieving good performance while others performing poorly. Additionally, it was observed that the performance of ONNX on a TX2 device was generally good, although it was observed to have occasional spikes of long run times that made it difficult to accurately measure. -Overall, the TX2 device demonstrated good performance with ONNX. +Overall, the TX2 device demonstrated good performance with ONNX: -| Method {intput} | RTX 2070 | TX2 | NX | +| Method {input} | RTX 2070 | TX2 | NX | |------------------------------|-----------|-----|--------| | Efficient Lite0 {320} | 33.12 | | 34.03 | | Efficient Lite1 {416} | 16.78 | | 17.35 | @@ -392,9 +392,9 @@ Overall, the TX2 device demonstrated good performance with ONNX. | Nanodet-plue m {416} | 39.17 | | 36.98 | | Nanodet-plue m 1.5x {416} | 28.55 | | 27.20 | -Finally, we measure the performance on the COCO dataset, using the corresponding metrics. +Finally, we measure the performance on the COCO dataset, using the corresponding metrics: -| Method {intput} | coco2017 mAP | +| Method {input} | coco2017 mAP | |------------------------------|--------------| | Efficient Lite0 {320} | 24.4 | | Efficient Lite1 {416} | 29.2 | diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 7acfb2a43c..4d67869184 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -20,7 +20,7 @@ provided by OpenDR. Specifically the following examples are provided: An example of training on the COCO dataset is provided. The user must set the dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. Setting the config file for the specific model is done with `--model "model name"`. Setting `--device cpu` performs training on CPU. Additional command - line arguments can be set to overwrite various training hyperparameters from the provided config file, run `python3 train_demo.py -h` prints information about them on stdout. + line arguments can be set to overwrite various training hyperparameters from the provided config file, run `python3 train_demo.py -h` to print information about them on stdout. Example usage: `python3 train_demo.py --model m --dataset coco --data-root /path/to/coco_dataset` diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index 5d1fe2337b..235bdd31c7 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -59,8 +59,8 @@ NanoDet::~NanoDet() { /** * Helper function for preprocessing images for normalization. * This function follows the OpenDR's Nanodet pre-processing pipeline for color normalization. - * Mean and Standard deviation are already part of NanoDet class when is initialized. - * @param image, image to be preprocesses + * Mean and Standard deviation are already part of NanoDet class when it is initialized. + * @param image, image to be preprocessed */ torch::Tensor NanoDet::preProcess(cv::Mat *image) { torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); @@ -156,10 +156,10 @@ void getResizeMatrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int ke /** * Helper function for preprocessing images for resizing. - * This function follows the OpenDR's Nanodet pre-processing pipeline for shape transformation, which include - * find the actual final size of model input if keep ratio is enabled, calculate the warp matrix and finally - * resize and warp perspective of the input image. - * @param src, image to be preprocesses + * This function follows OpenDR's Nanodet pre-processing pipeline for shape transformation, which includes + * finding the actual final size of the model input if keep ratio is enabled, calculating the warp matrix and finally + * resizing and warping the perspective of the input image. + * @param src, image to be preprocessed * @param dst, output image to be used as model input * @param dstSize, final size of the dst * @param warpMatrix, matrix to be used for warp perspective @@ -169,7 +169,7 @@ void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatr cv::Size srcSize = cv::Size(src->cols, src->rows); const float divisible = 0.0; - // Get new destination size if keep ratio is wanted + // Get new destination size if keep ratio is enabled if (keepRatio == 1) { getMinimumDstShape(&srcSize, dstSize, divisible); } diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml index 4eb8282924..aa1986f0c3 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml @@ -69,8 +69,8 @@ data: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: input_size: [320,320] #[w,h] - keep_ratio: False #feature_hw: [20,20]. Size for position embedding are hard coded and can not have varius values, - #Please use images with standard ratio and change the value accordingly if you want to keep_ratio. + keep_ratio: False # feature_hw: [20,20]. Size for position embedding is hard coded and can not have various values, + # Please use images with standard ratio and change the value accordingly if you want to keep_ratio. pipeline: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] device: diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index bcd31f35c7..ec3505b1d9 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -220,7 +220,7 @@ def load(self, path=None, verbose=True): Loads the model from the path provided. :param path: path of the directory where the model was saved :type path: str, optional - :param verbose: whether to print a success message or not, defaults to Trye + :param verbose: whether to print a success message or not, defaults to True :type verbose: bool, optional """ From 7c84d5982ed5dfe30f375aea72cdecbdf7d0219f Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Sun, 15 Jan 2023 23:16:52 +0200 Subject: [PATCH 79/87] Apply suggestions from code review --- src/c_api/object_detection_2d_nanodet_jit.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index 5d1fe2337b..1d16bfa7b1 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -234,12 +234,12 @@ void loadNanodetModel(char *modelPath, char *device, int height, int width, floa void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, torch::Tensor *outputs) { // Make all the inputs as tensors to use in jit model - torch::Tensor srcHeight = torch::tensor(originalSize->width); - torch::Tensor srcWidth = torch::tensor(originalSize->height); + torch::Tensor srcHeight = torch::tensor(originalSize->height); + torch::Tensor srcWidth = torch::tensor(originalSize->width); torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3}); // Model inference - *outputs = (model->network()).forward({*inputTensor, srcHeight, srcWidth, warpMat}).toTensor(); + *outputs = (model->network()).forward({*inputTensor, srcWidth, srcHeight, warpMat}).toTensor(); *outputs = outputs->to(torch::Device(torch::kCPU, 0)); } From bca8c378ee2dc962a8b12e254c8d22f53fc018e6 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:27:08 +0200 Subject: [PATCH 80/87] fixe bug to find cuda --- dependencies/download_torch.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py index ec4e5eda1a..3cf7496eef 100644 --- a/dependencies/download_torch.py +++ b/dependencies/download_torch.py @@ -31,10 +31,8 @@ def search_on_path(filenames): def get_cuda_path(): nvcc_path = search_on_path(('nvcc', 'nvcc.exe')) - cuda_path_default = None if nvcc_path is not None: - cuda_path_default = os.path.normpath(os.path.join(os.path.dirname(nvcc_path), '..', '..')) - if cuda_path_default is not None: + cuda_path_default = os.path.normpath(os.path.join(os.path.dirname(nvcc_path), '..')) _cuda_path = cuda_path_default elif os.path.exists('/usr/local/cuda'): _cuda_path = '/usr/local/cuda' @@ -53,6 +51,7 @@ def get_cuda_path(): args = parser.parse_args() COMPATIBILITY_VERSIONS = { + "1.13.1": "0.14.1", "1.13.0": "0.14.0", "1.12.0": "0.13.0", "1.11.0": "0.12.0", From 4cae0ae39b9165744c225a1bd366c0fa27cfde14 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:27:45 +0200 Subject: [PATCH 81/87] add automatic random colors --- include/object_detection_2d_nanodet_jit.h | 2 ++ src/c_api/object_detection_2d_nanodet_jit.cpp | 35 +++++++++++-------- 2 files changed, 22 insertions(+), 15 deletions(-) diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h index e8e6a0466d..72a0288659 100644 --- a/include/object_detection_2d_nanodet_jit.h +++ b/include/object_detection_2d_nanodet_jit.h @@ -30,6 +30,8 @@ struct NanodetModel { // Device to be used char *device; + int **colorList; + int numberOfClasses; // Recognition threshold float scoreThreshold; diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp index a1ceed3a05..4fa8e37940 100644 --- a/src/c_api/object_detection_2d_nanodet_jit.cpp +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -217,6 +217,18 @@ void loadNanodetModel(char *modelPath, char *device, int height, int width, floa "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"}; + int **colorList = new int *[labels.size()]; + for (int i = 0; i < labels.size(); i++) { + colorList[i] = new int[3]; + } + // seed the random number generator + std::srand(1); + for (int i = 0; i < labels.size(); i++) { + for (int j = 0; j < 3; j++) { + colorList[i][j] = std::rand() % 256; + } + } + // mean and standard deviation tensors for normalization of input torch::Tensor meanTensor = torch::tensor({{{-103.53f}}, {{-116.28f}}, {{-123.675f}}}); torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); @@ -229,6 +241,8 @@ void loadNanodetModel(char *modelPath, char *device, int height, int width, floa NanoDet *detector = new NanoDet(network, meanTensor, stdValues, initDevice, labels); model->network = static_cast(detector); + model->colorList = colorList; + model->numberOfClasses = labels.size(); } void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, @@ -290,21 +304,7 @@ OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *im } void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector) { - const int colorList[80][3] = { - //{255 ,255 ,255}, //bg - {216, 82, 24}, {236, 176, 31}, {125, 46, 141}, {118, 171, 47}, {76, 189, 237}, {238, 19, 46}, {76, 76, 76}, - {153, 153, 153}, {255, 0, 0}, {255, 127, 0}, {190, 190, 0}, {0, 255, 0}, {0, 0, 255}, {170, 0, 255}, - {84, 84, 0}, {84, 170, 0}, {84, 255, 0}, {170, 84, 0}, {170, 170, 0}, {170, 255, 0}, {255, 84, 0}, - {255, 170, 0}, {255, 255, 0}, {0, 84, 127}, {0, 170, 127}, {0, 255, 127}, {84, 0, 127}, {84, 84, 127}, - {84, 170, 127}, {84, 255, 127}, {170, 0, 127}, {170, 84, 127}, {170, 170, 127}, {170, 255, 127}, {255, 0, 127}, - {255, 84, 127}, {255, 170, 127}, {255, 255, 127}, {0, 84, 255}, {0, 170, 255}, {0, 255, 255}, {84, 0, 255}, - {84, 84, 255}, {84, 170, 255}, {84, 255, 255}, {170, 0, 255}, {170, 84, 255}, {170, 170, 255}, {170, 255, 255}, - {255, 0, 255}, {255, 84, 255}, {255, 170, 255}, {42, 0, 0}, {84, 0, 0}, {127, 0, 0}, {170, 0, 0}, - {212, 0, 0}, {255, 0, 0}, {0, 42, 0}, {0, 84, 0}, {0, 127, 0}, {0, 170, 0}, {0, 212, 0}, - {0, 255, 0}, {0, 0, 42}, {0, 0, 84}, {0, 0, 127}, {0, 0, 170}, {0, 0, 212}, {0, 0, 255}, - {0, 0, 0}, {36, 36, 36}, {72, 72, 72}, {109, 109, 109}, {145, 145, 145}, {182, 182, 182}, {218, 218, 218}, - {0, 113, 188}, {80, 182, 188}, {127, 127, 0}, - }; + int **colorList = model->colorList; std::vector classNames = (static_cast(model->network))->labels(); @@ -354,4 +354,9 @@ void freeNanodetModel(NanodetModelT *model) { NanoDet *networkPTR = static_cast(model->network); delete networkPTR; } + + for (int i = 0; i < model->numberOfClasses; i++) { + delete[] model->colorList[i]; + } + delete[] model->colorList; } From d2777a428c95b8f392b93ffdc81608a4964fb534 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:29:43 +0200 Subject: [PATCH 82/87] better implementation of optimize --- .../nanodet/inference_demo.py | 5 +- .../algorithm/nanodet/inferencer/utilities.py | 10 +- .../nanodet/model/arch/one_stage_detector.py | 10 +- .../algorithm/nanodet/model/head/gfl_head.py | 27 +++-- .../nanodet/model/head/nanodet_plus_head.py | 22 ++-- .../nanodet/nanodet_learner.py | 109 +++++++++--------- .../nanodet/test_nanodet.py | 6 +- 7 files changed, 101 insertions(+), 88 deletions(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py index 61b6eed3d3..351f00fd15 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py @@ -24,7 +24,8 @@ parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") parser.add_argument("--path", help="Path to the image that is used for inference", type=str, default="./predefined_examples/000000000036.jpg") - parser.add_argument("--optimize", help="", type=str, default="", choices=["", "onnx", "jit"]) + parser.add_argument("--optimize", help="If specified will determine the optimization to be used (onnx, jit)", + type=str, default="", choices=["", "onnx", "jit"]) args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) @@ -35,7 +36,7 @@ img = Image.open(args.path) if args.optimize != "": - nanodet.optimize("./{}/nanodet_{}".format(args.optimize, args.model), img, optimization=args.optimize) + nanodet.optimize("./{}/nanodet_{}".format(args.optimize, args.model), optimization=args.optimize) boxes = nanodet.infer(input=img) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py index 59ee4a06c2..c4683d8543 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py @@ -23,11 +23,11 @@ class Predictor(nn.Module): - def __init__(self, cfg, model, device="cuda"): + def __init__(self, cfg, model, device="cuda", nms_max_num=100): super(Predictor, self).__init__() self.cfg = cfg self.device = device - + self.nms_max_num = nms_max_num if self.cfg.model.arch.backbone.name == "RepVGG": deploy_config = self.cfg.model deploy_config.arch.backbone.update({"deploy": True}) @@ -53,12 +53,12 @@ def script_model(self, img, height, width, warp_matrix): scripted_model = self.postprocessing(preds, img, height, width, warp_matrix) return scripted_model - def forward(self, img, height, width, warp_matrix): + def forward(self, img, height=torch.tensor(0), width=torch.tensor(0), warp_matrix=torch.tensor(0)): if torch.jit.is_scripting(): return self.script_model(img, height, width, warp_matrix) # In tracing (Jit and Onnx optimizations) we must first run the pipeline before the graf, # cv2 is needed, and it is installed with abi cxx11 but torch is in cxx<11 - meta = {"height": height, "width": width, "img": img, "warp_matrix": warp_matrix} + meta = {"img": img} meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) with torch.no_grad(): results = self.model.inference(meta) @@ -83,5 +83,5 @@ def preprocessing(self, img): def postprocessing(self, preds, input, height, width, warp_matrix): meta = {"height": height, "width": width, 'img': input, 'warp_matrix': warp_matrix} meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) - res = self.model.head.post_process(preds, meta) + res = self.model.head.post_process(preds, meta, nms_max_num=self.nms_max_num) return res diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py index 909dbb78bf..425a0a6154 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py @@ -47,15 +47,7 @@ def forward(self, x): def inference(self, meta: Dict[str, torch.Tensor]): with torch.no_grad(): preds = self(meta["img"]) - if torch.jit.is_tracing(): - return preds - if torch.onnx.is_in_onnx_export(): - # torch.linalg.inv is not supported from onnx opset 11. - # problem with constant folding although is set to false. - # export scriptable model have problem with barchnorm2d - return preds - results = self.head.post_process(preds, meta) - return results + return preds def forward_train(self, gt_meta): preds = self(gt_meta["img"]) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py index 471d2951fd..79e215ce0e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py @@ -534,13 +534,14 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer", nms_max_num: int = 100): """Prediction results postprocessing. Decode bboxes and rescale to original image size. Args: preds (Tensor): Prediction output. meta (dict): Meta info. mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. """ if mode == "eval" and not torch.jit.is_scripting(): # Inference do not use batches and tries to have @@ -550,7 +551,7 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - results = self.get_bboxes(cls_scores, bbox_preds, meta["img"]) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"], nms_max_num=nms_max_num) (det_bboxes, det_labels) = results det_bboxes[:, :4] = scriptable_warp_boxes( @@ -559,7 +560,13 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): ) # constant output of model every time for tracing - det_result = torch.zeros((self.num_classes, 100, 5)) + if torch.jit.is_scripting(): + max_count = nms_max_num + else: + _, frequencies = torch.unique(det_labels, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + + det_result = torch.zeros((self.num_classes, max_count, 5)) for i in range(self.num_classes): inds = det_labels == i det = torch.cat(( @@ -569,11 +576,16 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): dim=1 ) - pad = det.new_zeros((100 - det.size(0), 5)) + pad = det.new_zeros((max_count - det.size(0), 5)) det = torch.cat([det, pad], dim=0) det_result[i] = det return det_result + def most_common_tensor(self, tensor): + _, frequencies = torch.unique(tensor, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + return max_count + def _eval_post_process(self, preds, meta): cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 @@ -623,13 +635,14 @@ def _eval_post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer", nms_max_num: int = 100): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). input_img (Tensor): Input image to net. mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ @@ -661,7 +674,7 @@ def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) - return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=100) + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=nms_max_num) result_list = [] for i in range(b): @@ -675,7 +688,7 @@ def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), - max_num=100, + max_num=nms_max_num, ) result_list.append(results) return result_list diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py index d2eb0a19f8..5e82255e39 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py @@ -359,13 +359,14 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer", nms_max_num: int = 100): """Prediction results postprocessing. Decode bboxes and rescale to original image size. Args: preds (Tensor): Prediction output. meta (dict): Meta info. mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. """ if mode == "eval" and not torch.jit.is_scripting(): # Inference do not use batches and tries to have @@ -375,7 +376,7 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - results = self.get_bboxes(cls_scores, bbox_preds, meta["img"]) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"], nms_max_num=nms_max_num) (det_bboxes, det_labels) = results det_bboxes[:, :4] = scriptable_warp_boxes( @@ -384,7 +385,13 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): ) # constant output of model every time for tracing - det_result = torch.zeros((self.num_classes, 100, 5)) + if torch.jit.is_scripting(): + max_count = nms_max_num + else: + _, frequencies = torch.unique(det_labels, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + + det_result = torch.zeros((self.num_classes, max_count, 5)) for i in range(self.num_classes): inds = det_labels == i det = torch.cat(( @@ -394,7 +401,7 @@ def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer"): dim=1 ) - pad = det.new_zeros((100 - det.size(0), 5)) + pad = det.new_zeros((max_count - det.size(0), 5)) det = torch.cat([det, pad], dim=0) det_result[i] = det return det_result @@ -448,13 +455,14 @@ def _eval_post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer", nms_max_num: int = 100): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). input_img (Tensor): Input image to net. mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ @@ -486,7 +494,7 @@ def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) - return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=100) + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=nms_max_num) result_list = [] for i in range(b): @@ -500,7 +508,7 @@ def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer"): score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), - max_num=100, + max_num=nms_max_num, ) result_list.append(results) return result_list diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index ec3505b1d9..f087264104 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -52,7 +52,7 @@ class NanodetLearner(Learner): - def __init__(self, model_to_use="plus_m_1.5x_416", iters=None, lr=None, batch_size=None, checkpoint_after_iter=None, + def __init__(self, model_to_use="m", iters=None, lr=None, batch_size=None, checkpoint_after_iter=None, checkpoint_load_iter=None, temp_path='', device='cuda', weight_decay=None, warmup_steps=None, warmup_ratio=None, lr_schedule_T_max=None, lr_schedule_eta_min=None, grad_clip=None): @@ -89,7 +89,6 @@ def __init__(self, model_to_use="plus_m_1.5x_416", iters=None, lr=None, batch_si self.predictor = None self.pipeline = None - self.dummy_input = None self.model = build_model(self.cfg.model) self.logger = None self.task = None @@ -197,9 +196,9 @@ def save(self, path=None, verbose=True): self._save_jit(path, verbose=verbose) return - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} metadata["model_paths"].append("nanodet_{}.pth".format(model)) @@ -300,9 +299,9 @@ def download(self, path=None, mode="pretrained", verbose=True, if verbose: print("Making metadata...") - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} param_filepath = "nanodet_{}.pth".format(model) metadata["model_paths"].append(param_filepath) @@ -315,9 +314,9 @@ def download(self, path=None, mode="pretrained", verbose=True, if verbose: print("Making metadata...") - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} param_filepath = "nanodet_{}.ckpt".format(model) metadata["model_paths"].append(param_filepath) @@ -355,25 +354,28 @@ def reset(self): """This method is not used in this implementation.""" return NotImplementedError - def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=True): + def __dummy_input(self): + width, height = self.cfg.data.val.input_size + dummy_input = ( + torch.randn((3, width, height), device=self.device, dtype=torch.float32), + torch.tensor(width, device="cpu", dtype=torch.int64), + torch.tensor(height, device="cpu", dtype=torch.int64), + torch.eye(3, device="cpu", dtype=torch.float32), + ) + return dummy_input + + def _save_onnx(self, onnx_path, do_constant_folding=False, verbose=True, nms_max_num=100): if not self.predictor: - self.predictor = Predictor(self.cfg, self.model, device=self.device) + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) os.makedirs(onnx_path, exist_ok=True) export_path = os.path.join(onnx_path, "nanodet_{}.onnx".format(self.cfg.check_point_name)) - if self.dummy_input is None: - assert img is not None,\ - "When optimize or _save_onnx is called for the first time, it must have and OpenDR image input." - if not isinstance(img, Image): - img = Image(img) - img = img.opencv() - if not self.dummy_input: - self.dummy_input = self.predictor.preprocessing(img) + dummy_input = self.__dummy_input() torch.onnx.export( self.predictor, - self.dummy_input, + dummy_input[0], export_path, verbose=verbose, keep_initializers_as_inputs=True, @@ -381,11 +383,13 @@ def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=Tru opset_version=11, input_names=['data'], output_names=['output'], + dynamic_axes={'data': {1: 'width', + 2: 'height'}} ) metadata = {"model_paths": ["nanodet_{}.onnx".format(self.cfg.check_point_name)], "framework": "pytorch", - "format": "onnx", "has_data": False, "inference_params": {}, "optimized": True, - "optimizer_info": {}, "classes": self.classes} + "format": "onnx", "has_data": False, "optimized": True, "optimizer_info": {}, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}} with open(os.path.join(onnx_path, "nanodet_{}.json".format(self.cfg.check_point_name)), 'w', encoding='utf-8') as f: @@ -403,7 +407,7 @@ def _save_onnx(self, onnx_path, img=None, do_constant_folding=False, verbose=Tru import onnx if verbose: print("Simplifying ONNX model...") - input_data = {"data": self.dummy_input[0].detach().cpu().numpy()} + input_data = {"data": dummy_input[0].detach().cpu().numpy()} model_sim, flag = onnxsim.simplify(export_path, input_data=input_data) if flag: onnx.save(model_sim, export_path) @@ -419,29 +423,22 @@ def _load_onnx(self, onnx_path, verbose=True): self.ort_session = ort.InferenceSession(onnx_path) - def _save_jit(self, jit_path, img=None, verbose=True): + def _save_jit(self, jit_path, verbose=True, nms_max_num=100): if not self.predictor: - self.predictor = Predictor(self.cfg, self.model, device=self.device) + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) os.makedirs(jit_path, exist_ok=True) - if not self.dummy_input: - assert img, \ - "When optimize or _save_jit is called for the first time, it must have and OpenDR image input." - if not isinstance(img, Image): - img = Image(img) - img = img.opencv() - if not self.dummy_input: - self.dummy_input = self.predictor.preprocessing(img) + dummy_input = self.__dummy_input() with torch.no_grad(): export_path = os.path.join(jit_path, "nanodet_{}.pth".format(self.cfg.check_point_name)) - self.predictor.trace_model(self.dummy_input) + self.predictor.trace_model(dummy_input) model_traced = torch.jit.script(self.predictor) metadata = {"model_paths": ["nanodet_{}.pth".format(self.cfg.check_point_name)], "framework": "pytorch", - "format": "pth", "has_data": False, "inference_params": {}, "optimized": True, - "optimizer_info": {}, "classes": self.classes} + "format": "pth", "has_data": False, "optimized": True, "optimizer_info": {}, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}} model_traced.save(export_path) with open(os.path.join(jit_path, "nanodet_{}.json".format(self.cfg.check_point_name)), @@ -457,26 +454,26 @@ def _load_jit(self, jit_path, verbose=True): self.jit_model = torch.jit.load(jit_path, map_location=self.device) - def optimize(self, export_path, initial_img=None, verbose=True, optimization="jit"): + def optimize(self, export_path, verbose=True, optimization="jit", nms_max_num=100): """ Method for optimizing the model with ONNX or JIT. :param export_path: The file path to the folder where the optimized model will be saved. If a model already exists at this path, it will be overwritten. :type export_path: str - :param initial_img: if optimize is called for the first time it needs a dummy OpenDR Image input - :type initial_img: opendr.engine.data.Image :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool, optional :param optimization: the kind of optimization you want to perform [jit, onnx] :type optimization: str + :param nms_max_num: determines the maximum number of bounding boxes that will be retained following the nms. + :type nms_max_num: int """ optimization = optimization.lower() if not os.path.exists(export_path): if optimization == "jit": - self._save_jit(export_path, initial_img, verbose=verbose) + self._save_jit(export_path, verbose=verbose, nms_max_num=nms_max_num) elif optimization == "onnx": - self._save_onnx(export_path, initial_img, verbose=verbose) + self._save_onnx(export_path, verbose=verbose, nms_max_num=nms_max_num) else: assert NotImplementedError with open(os.path.join(export_path, "nanodet_{}.json".format(self.cfg.check_point_name))) as f: @@ -537,7 +534,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, logging= batch_size=self.batch_size, shuffle=True, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=True, ) @@ -546,7 +543,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, logging= batch_size=self.batch_size, shuffle=False, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=False, ) @@ -590,7 +587,7 @@ def eval(self, dataset, verbose=True, logging=False, local_rank=1): """ This method performs evaluation on a given dataset and returns a dictionary with the evaluation results. :param dataset: dataset object, to perform evaluation on - :type dataset: ExternalDataset, DetectionDataset not implemented yet + :type dataset: ExternalDataset, XMLBasedDataset :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool :param logging: if set to True, text and STDOUT logging will be used @@ -620,7 +617,7 @@ def eval(self, dataset, verbose=True, logging=False, local_rank=1): batch_size=self.batch_size, shuffle=False, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=False, ) @@ -655,35 +652,39 @@ def eval(self, dataset, verbose=True, logging=False, local_rank=1): test_results = (verbose or logging) return trainer.test(self.task, val_dataloader, verbose=test_results) - def infer(self, input, threshold=0.35): + def infer(self, input, threshold=0.35, nms_max_num=100): """ Performs inference :param input: input image to perform inference on :type input: opendr.data.Image :param threshold: confidence threshold :type threshold: float, optional + :param nms_max_num: determines the maximum number of bounding boxes that will be retained following the nms. + :type nms_max_num: int :return: list of bounding boxes of last image of input or last frame of the video :rtype: opendr.engine.target.BoundingBoxList """ if not self.predictor: - self.predictor = Predictor(self.cfg, self.model, device=self.device) + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) if not isinstance(input, Image): input = Image(input) _input = input.opencv() - (_input, _height, _width, _warp_matrix) = self.predictor.preprocessing(_input) + _input, *metadata = self.predictor.preprocessing(_input) + if self.ort_session: if self.jit_model: warnings.warn( "Warning: Both JIT and ONNX models are initialized, inference will run in ONNX mode by default.\n" "To run in JIT please delete the self.ort_session like: detector.ort_session = None.") - res = self.ort_session.run(['output'], {'data': _input.cpu().detach().numpy()}) - res = self.predictor.postprocessing(torch.from_numpy(res[0]), _input, _height, _width, _warp_matrix) + preds = self.ort_session.run(['output'], {'data': _input.cpu().detach().numpy()}) + res = self.predictor.postprocessing(torch.from_numpy(preds[0]), _input, *metadata) elif self.jit_model: - res = self.jit_model(_input, _height, _width, _warp_matrix).cpu() + res = self.jit_model(_input, *metadata).cpu() else: - res = self.predictor(_input, _height, _width, _warp_matrix) + preds = self.predictor(_input, *metadata) + res = self.predictor.postprocessing(preds, _input, *metadata) bounding_boxes = [] for label in range(len(res)): diff --git a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py index e213e484e4..e4a212fe5d 100644 --- a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py +++ b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py @@ -140,12 +140,10 @@ def test_optimize(self): self.detector.ort_session = None self.detector.jit_model = None - img = cv2.imread(os.path.join(self.temp_dir, "000000000036.jpg")) - - self.detector.optimize(os.path.join(self.temp_dir, "onnx"), initial_img=img, verbose=False, optimization="onnx") + self.detector.optimize(os.path.join(self.temp_dir, "onnx"), verbose=False, optimization="onnx") self.assertIsNotNone(self.detector.ort_session) - self.detector.optimize(os.path.join(self.temp_dir, "jit"), initial_img=img, verbose=False, optimization="jit") + self.detector.optimize(os.path.join(self.temp_dir, "jit"), verbose=False, optimization="jit") self.assertIsNotNone(self.detector.jit_model) # Cleanup From bda2e221f3a21b62e9fcf96895cbae4df1dd6cdb Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:30:27 +0200 Subject: [PATCH 83/87] add XMLBaseDataset option --- .../nanodet/algorithm/nanodet/data/dataset/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py index b68b60e389..6c40da7117 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py @@ -17,7 +17,7 @@ import copy from opendr.engine.datasets import ExternalDataset - +from opendr.perception.object_detection_2d.datasets import XMLBasedDataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.dataset.coco import CocoDataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.dataset.xml_dataset import XMLDataset @@ -54,5 +54,9 @@ def build_dataset(cfg, dataset, class_names, mode, verbose=True): if verbose: print("ExternalDataset loaded.") return dataset + elif isinstance(dataset, XMLBasedDataset): + dataset = XMLDataset(img_path=dataset.abs_images_dir, ann_path=dataset.abs_annot_dir, mode=mode, + class_names=dataset.classes, **dataset_cfg) + return dataset else: raise ValueError("Dataset type {} not supported".format(type(dataset))) From d091dbb25731a28a618eb954d6858277a8570541 Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:30:41 +0200 Subject: [PATCH 84/87] train bug fix --- .../nanodet/algorithm/nanodet/model/arch/nanodet_plus.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py index a4ae17e39b..a400ecff29 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py @@ -42,10 +42,10 @@ def forward_train(self, gt_meta): fpn_feat = self.fpn(feat) if self.epoch >= self.detach_epoch: aux_fpn_feat = self.aux_fpn([f.detach() for f in feat]) - dual_fpn_feat = ( + dual_fpn_feat = [ torch.cat([f.detach(), aux_f], dim=1) for f, aux_f in zip(fpn_feat, aux_fpn_feat) - ) + ] else: aux_fpn_feat = self.aux_fpn(feat) dual_fpn_feat = [ From 6acf11dc299b6fb554d837b2bf837fa1537e7afd Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:30:50 +0200 Subject: [PATCH 85/87] train bug fix --- .../nanodet/algorithm/nanodet/trainer/task.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py index 5cd2d7e125..7ec2a04864 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py @@ -180,8 +180,10 @@ def validation_epoch_end(self, validation_step_outputs): os.path.join(best_save_path, "model_best.ckpt") ) verbose = True if self.logger is not None else False - self._save_current_model(self.local_rank, os.path.join(best_save_path, "nanodet_model_state_best.pth"), - verbose=verbose) + # TODO: save only if local_rank is < 0 + # self._save_current_model(self.local_rank, os.path.join(best_save_path, "nanodet_model_state_best.pth"), + # verbose=verbose) + self.save_current_model(os.path.join(best_save_path, "nanodet_model_state_best.pth"), verbose=verbose) txt_path = os.path.join(best_save_path, "eval_results.txt") with open(txt_path, "a") as f: f.write("Epoch:{}\n".format(self.current_epoch + 1)) From 96c86a7eb0986e083fd68459351666b9f2a0adad Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:31:02 +0200 Subject: [PATCH 86/87] tipo fix --- .../python/perception/object_detection_2d/nanodet/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 4d67869184..53bb4fc074 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -4,7 +4,7 @@ This folder contains minimal code usage examples that showcase the basic functio provided by OpenDR. Specifically the following examples are provided: 1. inference_demo.py: Perform inference on a single image in a directory. Setting `--device cpu` performs inference on CPU. Setting the config file for the specific model is done with `--model "model name"`. - Inference is used optimization [ONNX or JIT] if specified in `--optimize onnx` or `--optimize jit`. + Inference will use optimization [ONNX or JIT] if specified in `--optimize onnx` or `--optimize jit`. If optimization is used, first an optimized model will be exported and then inference will be performed. In ONNX it is recommended to install `onnxsim` dependencies with `pip install onnxsim` on OpenDR's virtual environment, for smaller and better optimized models. From 42ddb48a78e5ad0015fde82f6ebffa714ff6e95b Mon Sep 17 00:00:00 2001 From: ManosMpampis Date: Thu, 19 Jan 2023 18:31:20 +0200 Subject: [PATCH 87/87] add changes and some better namings --- docs/reference/object-detection-2d-nanodet.md | 159 +++++++++--------- 1 file changed, 78 insertions(+), 81 deletions(-) diff --git a/docs/reference/object-detection-2d-nanodet.md b/docs/reference/object-detection-2d-nanodet.md index 8b6af899f2..009e1358d5 100644 --- a/docs/reference/object-detection-2d-nanodet.md +++ b/docs/reference/object-detection-2d-nanodet.md @@ -59,17 +59,17 @@ This method is used for training the algorithm on a train dataset and validating Parameters: -- **dataset**: *ExternalDataset*\ +- **dataset**: *object*\ Object that holds the training dataset. - Can be of type `ExternalDataset`. -- **val_dataset** : *ExternalDataset, default=None*\ + Can be of type `ExternalDataset` or `XMLBasedDataset`. +- **val_dataset** : *object, default=None*\ Object that holds the validation dataset. - Can be of type `ExternalDataset`. + Can be of type `ExternalDataset` or `XMLBasedDataset`. - **logging_path** : *str, default=''*\ Subdirectory in temp_path to save log files and TensorBoard. - **verbose** : *bool, default=True*\ Enables verbosity. -- **logging** : *bool, default=True*\ +- **logging** : *bool, default=False*\ Enables the maximum verbosity and the logger. - **seed** : *int, default=123*\ Seed for repeatability. @@ -86,8 +86,9 @@ Saves a txt logger file containing stats regarding evaluation. Parameters: -- **dataset** : *ExternalDataset*\ +- **dataset** : *object*\ Object that holds the evaluation dataset. + Can be of type `ExternalDataset` or `XMLBasedDataset`. - **verbose**: *bool, default=True*\ Enables verbosity. - **logging**: *bool, default=False*\ @@ -97,7 +98,7 @@ Parameters: #### `NanodetLearner.infer` ```python -NanodetLearner.infer(self, input, thershold) +NanodetLearner.infer(self, input, thershold, nms_max_num) ``` This method is used to perform object detection on an image. @@ -105,15 +106,18 @@ Returns an `engine.target.BoundingBoxList` object, which contains bounding boxes their width and height, or returns an empty list if no detections were made on the input image. Parameters: -- **input** : *Image*\ +- **input** : *object*\ + Object of type engine.data.Image. Image type object to perform inference on. - **threshold**: *float, default=0.35*\ Specifies the threshold for object detection inference. An object is detected if the confidence of the output is higher than the specified threshold. +- **nms_max_num**: *int, default=100*\ + Determines the maximum number of bounding boxes that will be retained following the nms. #### `NanodetLearner.optimize` ```python -NanodetLearner.optimize(self, export_path, initial_img, verbose, optimization) +NanodetLearner.optimize(self, export_path, verbose, optimization, nms_max_num) ``` This method is used to perform JIT or ONNX optimizations and save a trained model with its metadata. @@ -130,12 +134,12 @@ Parameters: - **export_path**: *str*\ Path to save or load the optimized model. -- **initial_img**: *Image*, default=None\ - If optimize is called for the first time a dummy OpenDR image is needed as input. - **verbose**: *bool, default=True*\ Enables the maximum verbosity. - **optimization**: *str, default="jit"*\ It determines what kind of optimization is used, possible values are *jit* or *onnx*. +- **nms_max_num**: *int, default=100*\ + Determines the maximum number of bounding boxes that will be retained following the nms. #### `NanodetLearner.save` ```python @@ -185,7 +189,7 @@ Parameters: - **mode**: *{'pretrained', 'images', 'test_data'}, default='pretrained'*\ If *'pretrained'*, downloads a pretrained detector model from the *model_to_use* architecture which was chosen at learner initialization. If *'images'*, downloads an image to perform inference on. If *'test_data'* downloads a dummy dataset for testing purposes. -- **verbose**: *bool, default=False*\ +- **verbose**: *bool, default=True*\ Enables the maximum verbosity. - **url**: *str, default=OpenDR FTP URL*\ URL of the FTP server. @@ -303,7 +307,6 @@ Furthermore, demos on performing [training](../../projects/python/perception/obj This example shows how to perform optimization on a pretrained model, then run inference on an image and finally draw the resulting bounding boxes, using a nanodet model that is pretrained on the COCO dataset. In this example we use ONNX optimization, but JIT can also be used by changing *optimization* to *jit*. - With the *path* parameter you can define the image file to be used as dummy input for the optimization and inference. The optimized model will be saved in the `./optimization_models` folder ```python from opendr.engine.data import Image @@ -316,7 +319,7 @@ Furthermore, demos on performing [training](../../projects/python/perception/obj # First read an OpenDR image from your dataset and run the optimizer: img = Image.open("./predefined_examples/000000000036.jpg") - nanodet.optimize("./onnx/nanodet_m/", img, optimization="onnx") + nanodet.optimize("./onnx/nanodet_m/", optimization="onnx") boxes = nanodet.infer(input=img) @@ -332,82 +335,76 @@ The speed is measured from the start of the forward pass until the end of post-p For PyTorch inference: | Method {input} | RTX 2070 | TX2 | NX | -|------------------------------|----------|-------|-------| -| Efficient Lite0 {320} | 48.63 | 9.38 | 14.48 | -| Efficient Lite1 {416} | 43.88 | 7.93 | 11.07 | -| Efficient Lite2 {512} | 40.51 | 6.44 | 8.84 | -| RepVGG A0 {416} | 33.4 | 9.21 | 12.3 | -| Nanodet-g {416} | 51.32 | 9.57 | 15.75 | -| Nanodet-m {320} | 48.36 | 8.56 | 14.08 | -| Nanodet-m 0.5x {320} | 46.94 | 7.97 | 12.84 | -| Nanodet-m 1.5x {320} | 47.41 | 8.8 | 13.98 | -| Nanodet-m {416} | 47.3 | 8.34 | 13.15 | -| Nanodet-m 1.5x {416} | 45.62 | 8.43 | 13.2 | -| Nanodet-plue m {320} | 41.9 | 7.45 | 12.01 | -| Nanodet-plue m 1.5x {320} | 39.63 | 7.66 | 12.21 | -| Nanodet-plue m {416} | 40.16 | 7.24 | 11.58 | -| Nanodet-plue m 1.5x {416} | 38.94 | 7.37 | 11.52 | +|-----------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 48.63 | 9.38 | 14.48 | +| Efficient Lite1 {416} | 43.88 | 7.93 | 11.07 | +| Efficient Lite2 {512} | 40.51 | 6.44 | 8.84 | +| RepVGG A0 {416} | 33.4 | 9.21 | 12.3 | +| Nanodet-g {416} | 51.32 | 9.57 | 15.75 | +| Nanodet-m {320} | 48.36 | 8.56 | 14.08 | +| Nanodet-m 0.5x {320} | 46.94 | 7.97 | 12.84 | +| Nanodet-m 1.5x {320} | 47.41 | 8.8 | 13.98 | +| Nanodet-m {416} | 47.3 | 8.34 | 13.15 | +| Nanodet-m 1.5x {416} | 45.62 | 8.43 | 13.2 | +| Nanodet-plus m {320} | 41.9 | 7.45 | 12.01 | +| Nanodet-plus m 1.5x {320} | 39.63 | 7.66 | 12.21 | +| Nanodet-plus m {416} | 40.16 | 7.24 | 11.58 | +| Nanodet-plus m 1.5x {416} | 38.94 | 7.37 | 11.52 | For JIT optimization inference: | Method {input} | RTX 2070 | TX2 | NX | -|------------------------------|----------|-------|-------| -| Efficient Lite0 {320} | 69.06 | 12.94 | 17.78 | -| Efficient Lite1 {416} | 62.94 | 9.27 | 12.94 | -| Efficient Lite2 {512} | 65.46 | 7.46 | 10.32 | -| RepVGG A0 {416} | 41.44 | 11.16 | 14.89 | -| Nanodet-g {416} | 76.3 | 12.94 | 20.52 | -| Nanodet-m {320} | 75.66 | 12.22 | 20.67 | -| Nanodet-m 0.5x {320} | 65.71 | 11.31 | 17.68 | -| Nanodet-m 1.5x {320} | 66.23 | 12.46 | 19.99 | -| Nanodet-m {416} | 79.91 | 12.08 | 19.28 | -| Nanodet-m 1.5x {416} | 69.44 | 12.3 | 18.6 | -| Nanodet-plue m {320} | 67.82 | 11.19 | 18.85 | -| Nanodet-plue m 1.5x {320} | 64.12 | 11.57 | 18.26 | -| Nanodet-plue m {416} | 64.74 | 11.22 | 17.57 | -| Nanodet-plue m 1.5x {416} | 56.77 | 10.39 | 14.81 | +|-----------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 69.06 | 12.94 | 17.78 | +| Efficient Lite1 {416} | 62.94 | 9.27 | 12.94 | +| Efficient Lite2 {512} | 65.46 | 7.46 | 10.32 | +| RepVGG A0 {416} | 41.44 | 11.16 | 14.89 | +| Nanodet-g {416} | 76.3 | 12.94 | 20.52 | +| Nanodet-m {320} | 75.66 | 12.22 | 20.67 | +| Nanodet-m 0.5x {320} | 65.71 | 11.31 | 17.68 | +| Nanodet-m 1.5x {320} | 66.23 | 12.46 | 19.99 | +| Nanodet-m {416} | 79.91 | 12.08 | 19.28 | +| Nanodet-m 1.5x {416} | 69.44 | 12.3 | 18.6 | +| Nanodet-plus m {320} | 67.82 | 11.19 | 18.85 | +| Nanodet-plus m 1.5x {320} | 64.12 | 11.57 | 18.26 | +| Nanodet-plus m {416} | 64.74 | 11.22 | 17.57 | +| Nanodet-plus m 1.5x {416} | 56.77 | 10.39 | 14.81 | For ONNX optimization inference: -In this case, the forward pass is performed in ONNX. -The pre-processing steps were implemented in PyTorch. -Results show that the performance on ONNX varies significantly among different architectures, with some achieving good performance while others performing poorly. -Additionally, it was observed that the performance of ONNX on a TX2 device was generally good, although it was observed to have occasional spikes of long run times that made it difficult to accurately measure. -Overall, the TX2 device demonstrated good performance with ONNX: - -| Method {input} | RTX 2070 | TX2 | NX | -|------------------------------|-----------|-----|--------| -| Efficient Lite0 {320} | 33.12 | | 34.03 | -| Efficient Lite1 {416} | 16.78 | | 17.35 | -| Efficient Lite2 {512} | 10.35 | | 12.14 | -| RepVGG A0 {416} | 27.89 | | 51.74 | -| Nanodet-g {416} | 103.22 | | 87.40 | -| Nanodet-m {320} | 98.73 | | 122.26 | -| Nanodet-m 0.5x {320} | 144.46 | | 208.19 | -| Nanodet-m 1.5x {320} | 75.82 | | 75.40 | -| Nanodet-m {416} | 73.09 | | 72.78 | -| Nanodet-m 1.5x {416} | 51.30 | | 51.78 | -| Nanodet-plue m {320} | 51.39 | | 50.67 | -| Nanodet-plue m 1.5x {320} | 39.65 | | 40.62 | -| Nanodet-plue m {416} | 39.17 | | 36.98 | -| Nanodet-plue m 1.5x {416} | 28.55 | | 27.20 | +| Method {input} | RTX 2070 | +|-----------------------------|-----------| +| Efficient Lite0 {320} | 33.12 | +| Efficient Lite1 {416} | 16.78 | +| Efficient Lite2 {512} | 10.35 | +| RepVGG A0 {416} | 27.89 | +| Nanodet-g {416} | 103.22 | +| Nanodet-m {320} | 98.73 | +| Nanodet-m 0.5x {320} | 144.46 | +| Nanodet-m 1.5x {320} | 75.82 | +| Nanodet-m {416} | 73.09 | +| Nanodet-m 1.5x {416} | 51.30 | +| Nanodet-plus m {320} | 51.39 | +| Nanodet-plus m 1.5x {320} | 39.65 | +| Nanodet-plus m {416} | 39.17 | +| Nanodet-plus m 1.5x {416} | 28.55 | Finally, we measure the performance on the COCO dataset, using the corresponding metrics: | Method {input} | coco2017 mAP | -|------------------------------|--------------| -| Efficient Lite0 {320} | 24.4 | -| Efficient Lite1 {416} | 29.2 | -| Efficient Lite2 {512} | 32.4 | -| RepVGG A0 {416} | 25.5 | -| Nanodet-g {416} | 22.7 | -| Nanodet-m {320} | 20.2 | -| Nanodet-m 0.5x {320} | 13.1 | -| Nanodet-m 1.5x {320} | 23.1 | -| Nanodet-m {416} | 23.5 | -| Nanodet-m 1.5x {416} | 26.6 | -| Nanodet-plue m {320} | 27.0 | -| Nanodet-plue m 1.5x {320} | 29.9 | -| Nanodet-plue m {416} | 30.3 | -| Nanodet-plue m 1.5x {416} | 34.1 | +|-----------------------------|--------------| +| Efficient Lite0 {320} | 24.4 | +| Efficient Lite1 {416} | 29.2 | +| Efficient Lite2 {512} | 32.4 | +| RepVGG A0 {416} | 25.5 | +| Nanodet-g {416} | 22.7 | +| Nanodet-m {320} | 20.2 | +| Nanodet-m 0.5x {320} | 13.1 | +| Nanodet-m 1.5x {320} | 23.1 | +| Nanodet-m {416} | 23.5 | +| Nanodet-m 1.5x {416} | 26.6 | +| Nanodet-plus m {320} | 27.0 | +| Nanodet-plus m 1.5x {320} | 29.9 | +| Nanodet-plus m {416} | 30.3 | +| Nanodet-plus m 1.5x {416} | 34.1 | \ No newline at end of file