From 2a5e5f101ddae4d2fe953caadc8ba73ff5287649 Mon Sep 17 00:00:00 2001 From: ManosMpampis <93824600+ManosMpampis@users.noreply.github.com> Date: Tue, 24 Jan 2023 12:50:08 +0200 Subject: [PATCH] Nanodet C API. Onnx and Libtorch (JIT) modules implementations and Libtorch Installation. (#352) * Onnx and Jit tracing-scripting implementation with python and c api inference. Docs and tests have changed acordingly * Implementation of libtorch and torch vision installation as part of tool installation. * add licence and file name corection * style, inconclusive cppcheck fixes. * Fixes for C API initilization errors * Deleting no used code * macos cppcheck -> C style fixes * macos cppcheck -c style fixes * c-style pointer delceration fixes * c-style pointer delceration fixes * cpp casting casting fixes * bug fixes * Update projects/python/perception/object_detection_2d/nanodet/export_torchscript.py Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update install_torch_c_api.sh * Update dependencies/install_torch_c_api.sh Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/nanodet_c.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/opendr_utils.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update include/target.h Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update nanodet.md * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update nanodet.md * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update Makefile * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update docs/reference/nanodet.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update nanodet.md * Update projects/c_api/Makefile Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update eval_demo.py * Update projects/python/perception/object_detection_2d/nanodet/export_onnx.py Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * Update projects/python/perception/object_detection_2d/nanodet/README.md Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> * installing libtorch and vision with respect to CUDA of the user * better explanations of variables in optimization runtimes * small typing fixes * Fix dependency * Update dependencies.ini deleting torchmetrics * update docs for new data structures and utilities of c api * new source, samples, utilities and test for onnx C API. The new scripts are implemente the feed forward of almost all the onnx models that python api is provided. * bug fixes in onnx exporting. * Revert "update docs for new data structures and utilities of c api" This reverts commit 467c1f1e661462de46dd88a1762b23cbfaf7f4c8. * Revert "bug fixes in onnx exporting." This reverts commit fcd4f9c3a09d70deb51eec83ad1d469c7290ae27. * Revert "Revert "bug fixes in onnx exporting."" This reverts commit 953a193aa9a99522ac6069814a4b50f6837c0fac. * Revert "new source, samples, utilities and test for onnx C API. The new scripts are implemente the feed forward of almost all the onnx models that python api is provided." This reverts commit 617572d5d6d3e5d02004ece522f7d224d050bd61. * Revert "bug fixes in onnx exporting." This reverts commit fcd4f9c3a09d70deb51eec83ad1d469c7290ae27. * revert last commits, change files for cpp styles as wiki says and better directory construction for easier navigation and new tools implementation. * change function name to be uniform with the others * doc for c nanodet * Added small Json parser and installation script for easier navigation and universal use for future c api * Update projects/c_api/README.md Co-authored-by: Nikolaos Passalis * Update dependencies/install_torch_c_api.sh Co-authored-by: Nikolaos Passalis * Update dependencies/install_torch_c_api.sh Co-authored-by: Nikolaos Passalis * Update install_torch_c_api.sh not need for specified vision version in sh * Update download_torch.py not need for environment variables to be passed * Update utilities.py delete additional print from debugging * Fixes * Fixes of suggestions Combine optimizations and inference in one script * Fixes of suggestions Add more JSON parser capabilities Fix face recognition threshold not readed from JSON * Fixes of suggestions Delete not used loggers. * Fixes of suggestions * added json parser in utilities * unnecessary use of filesystem, easier implementation in embeded * default directory for saves change to `temp` * change to warnings errors and exceptions * Transfer warnings for jit and ort simultaneously loaded from _load to infer. * Change naming style. All .cpp files have only CPP style naming and .c files have CPP style naming in CPP functions and C style in everything else. * license test update * Apply suggestions from code review Co-authored-by: Kostas Tsampazis <27914645+tsampazk@users.noreply.github.com> * Apply suggestions from code review * fixe bug to find cuda * add automatic random colors * better implementation of optimize * add XMLBaseDataset option * train bug fix * train bug fix * tipo fix * add changes and some better namings Co-authored-by: ad-daniel <44834743+ad-daniel@users.noreply.github.com> Co-authored-by: ad-daniel Co-authored-by: Nikolaos Passalis Co-authored-by: Kostas Tsampazis <27914645+tsampazk@users.noreply.github.com> --- Makefile | 2 + dependencies/download_torch.py | 125 ++++ dependencies/install_rapidjson.sh | 17 + dependencies/install_torch_c_api.sh | 64 ++ docs/reference/c-data-h.md | 12 +- docs/reference/c-face-recognition-h.md | 42 +- .../c-object-detection-2d-nanodet-jit-h.md | 47 ++ docs/reference/c-opendr-utils-h.md | 65 +- docs/reference/c-target-h.md | 78 ++- docs/reference/nanodet.md | 289 -------- docs/reference/object-detection-2d-nanodet.md | 410 ++++++++++++ include/data.h | 4 +- include/face_recognition.h | 58 +- include/object_detection_2d_nanodet_jit.h | 84 +++ include/opendr_utils.h | 54 +- include/target.h | 26 +- projects/c_api/Makefile | 17 +- projects/c_api/README.md | 1 + .../face_recognition/face_recognition_demo.c | 20 +- .../object_detection_2d/nanodet/README.md | 15 + .../nanodet/nanodet_jit_demo.c | 62 ++ .../object_detection_2d/nanodet/README.md | 32 +- .../object_detection_2d/nanodet/eval_demo.py | 11 +- .../nanodet/inference_demo.py | 13 +- .../nanodet/inference_tutorial.ipynb | 616 +----------------- .../object_detection_2d/nanodet/train_demo.py | 12 +- src/c_api/Makefile | 11 +- src/c_api/README.md | 2 +- src/c_api/face_recognition.cpp | 329 +++++----- src/c_api/object_detection_2d_nanodet_jit.cpp | 362 ++++++++++ src/c_api/opendr_utils.cpp | 120 +++- .../object_detection_2d/nanodet/README.md | 2 +- .../nanodet_EfficientNet_Lite0_320.yml | 2 +- .../nanodet_EfficientNet_Lite1_416.yml | 2 +- .../nanodet_EfficientNet_Lite2_512.yml | 2 +- .../RepVGG/nanodet_RepVGG_A0_416.yml | 2 +- .../Transformer/nanodet_t.yml | 7 +- .../config/legacy_v0.x_configs/nanodet_g.yml | 2 +- .../config/legacy_v0.x_configs/nanodet_m.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_0.5x.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_1.5x.yml | 2 +- .../nanodet_m_1.5x_416.yml | 2 +- .../legacy_v0.x_configs/nanodet_m_416.yml | 2 +- .../algorithm/config/nanodet_custom.yml | 125 ---- .../config/nanodet_plus_m_1.5x_320.yml | 2 +- .../config/nanodet_plus_m_1.5x_416.yml | 2 +- .../algorithm/config/nanodet_plus_m_320.yml | 2 +- .../algorithm/config/nanodet_plus_m_416.yml | 2 +- .../algorithm/nanodet/data/batch_process.py | 31 +- .../nanodet/data/dataset/__init__.py | 6 +- .../nanodet/data/transform/pipeline.py | 20 +- .../algorithm/nanodet/data/transform/warp.py | 85 +-- .../nanodet/evaluator/coco_detection.py | 15 +- .../algorithm/nanodet/inferencer/utilities.py | 66 +- .../nanodet/model/arch/nanodet_plus.py | 8 +- .../nanodet/model/arch/one_stage_detector.py | 6 +- .../nanodet/model/backbone/custom_csp.py | 5 +- .../model/backbone/efficientnet_lite.py | 6 +- .../nanodet/model/backbone/ghostnet.py | 14 +- .../nanodet/model/backbone/mobilenetv2.py | 13 +- .../nanodet/model/backbone/repvgg.py | 3 +- .../nanodet/model/backbone/resnet.py | 6 +- .../nanodet/model/backbone/shufflenetv2.py | 9 +- .../algorithm/nanodet/model/fpn/fpn.py | 20 +- .../algorithm/nanodet/model/fpn/ghost_pan.py | 14 +- .../algorithm/nanodet/model/fpn/pan.py | 17 +- .../algorithm/nanodet/model/fpn/tan.py | 7 +- .../algorithm/nanodet/model/head/gfl_head.py | 189 ++++-- .../nanodet/model/head/nanodet_head.py | 44 +- .../nanodet/model/head/nanodet_plus_head.py | 143 ++-- .../nanodet/model/head/simple_conv_head.py | 12 +- .../nanodet/model/loss/gfocal_loss.py | 2 + .../algorithm/nanodet/model/loss/iou_loss.py | 1 + .../algorithm/nanodet/model/module/conv.py | 26 +- .../algorithm/nanodet/model/module/nms.py | 48 +- .../nanodet/model/module/transformer.py | 2 + .../nanodet/algorithm/nanodet/trainer/task.py | 31 +- .../algorithm/nanodet/util/box_transform.py | 5 +- .../algorithm/nanodet/util/check_point.py | 27 +- .../nanodet/algorithm/nanodet/util/logger.py | 13 +- .../nanodet/algorithm/nanodet/util/path.py | 4 +- .../nanodet/dependencies.ini | 6 +- .../nanodet/nanodet_learner.py | 348 +++++++--- tests/Makefile | 19 +- tests/sources/c_api/test_face_recognition.c | 66 +- tests/sources/c_api/test_nanodet.c | 85 +++ tests/sources/c_api/test_opendr_utils.c | 8 +- .../nanodet/test_nanodet.py | 45 +- 88 files changed, 2787 insertions(+), 1850 deletions(-) create mode 100644 dependencies/download_torch.py create mode 100755 dependencies/install_rapidjson.sh create mode 100755 dependencies/install_torch_c_api.sh create mode 100644 docs/reference/c-object-detection-2d-nanodet-jit-h.md delete mode 100644 docs/reference/nanodet.md create mode 100644 docs/reference/object-detection-2d-nanodet.md create mode 100644 include/object_detection_2d_nanodet_jit.h create mode 100644 projects/c_api/samples/object_detection_2d/nanodet/README.md create mode 100644 projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c create mode 100644 src/c_api/object_detection_2d_nanodet_jit.cpp delete mode 100644 src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml create mode 100644 tests/sources/c_api/test_nanodet.c diff --git a/Makefile b/Makefile index 7c4ca70dc7..dffd1227b5 100644 --- a/Makefile +++ b/Makefile @@ -37,6 +37,8 @@ install_compilation_dependencies: @+echo "#"; echo "# * Install Compilation Dependencies *"; echo "#" @+cd dependencies; ./install.sh compilation @+cd dependencies; ./install_onnx.sh + @+cd dependencies; ./install_rapidjson.sh + @+cd dependencies; ./install_torch_c_api.sh @+make --silent -C src/opendr/control/mobile_manipulation $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; @+make --silent -C src/opendr/control/single_demo_grasp $(TARGET) OPENDR_HOME="$(OPENDR_HOME)"; diff --git a/dependencies/download_torch.py b/dependencies/download_torch.py new file mode 100644 index 0000000000..3cf7496eef --- /dev/null +++ b/dependencies/download_torch.py @@ -0,0 +1,125 @@ +# Copyright 2020-2022 OpenDR European Project +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import argparse +import glob +from urllib.request import urlretrieve +import os +import warnings + + +def search_on_path(filenames): + for p in os.environ.get('PATH', '').split(os.pathsep): + for filename in filenames: + full = os.path.join(p, filename) + if os.path.exists(full): + return os.path.abspath(full) + return None + + +def get_cuda_path(): + nvcc_path = search_on_path(('nvcc', 'nvcc.exe')) + if nvcc_path is not None: + cuda_path_default = os.path.normpath(os.path.join(os.path.dirname(nvcc_path), '..')) + _cuda_path = cuda_path_default + elif os.path.exists('/usr/local/cuda'): + _cuda_path = '/usr/local/cuda' + else: + _cuda_path = None + + return _cuda_path + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--cuda_path", help="Path to installed cuda", type=str, default=None) + parser.add_argument("--opendr_device", help="Target device for installation", + type=str, choices=["gpu", "cpu"], default="gpu") + parser.add_argument("--torch_version", help="Specifies LibTorch version to be installed", type=str, default="1.9.0") + args = parser.parse_args() + + COMPATIBILITY_VERSIONS = { + "1.13.1": "0.14.1", + "1.13.0": "0.14.0", + "1.12.0": "0.13.0", + "1.11.0": "0.12.0", + "1.10.2": "0.11.3", + "1.10.1": "0.11.2", + "1.10.0": "0.11.1", + "1.9.1": "0.10.1", + "1.9.0": "0.10.0", + } + + warnings.simplefilter("error") + + TORCH_VERSION = args.torch_version + VISION_VERSION = COMPATIBILITY_VERSIONS[TORCH_VERSION] + + CUDA_VERSION = None + DEVICE = None + # Find Device + if args.opendr_device == "gpu": + try: + if args.cuda_path is None: + CUDA_PATH = get_cuda_path() + else: + CUDA_PATH = args.cuda_path + version_file_type = glob.glob(f"{CUDA_PATH}/version*") + if version_file_type[0].endswith('.txt'): + version_file = open(f"{CUDA_PATH}/version.txt", mode='r') + version_line = version_file.readlines() + version_line = version_line[0].replace(".", "") + CUDA_VERSION = version_line[13:16] + elif version_file_type[0].endswith('.json'): + version_file = open(f"{CUDA_PATH}/version.json", mode='r') + version_dict = json.load(version_file) + CUDA_VERSION = version_dict["cuda"]["version"] + CUDA_VERSION = CUDA_VERSION.replace(".", "") + CUDA_VERSION = CUDA_VERSION[:3] + else: + warnings.warn("\033[93m No CUDA version file found.") + DEVICE = f"cu{CUDA_VERSION}" + except: + warnings.warn("\033[93m No CUDA installation found.\n" + "Please install CUDA or specify CUDA path with export CUDA_PATH=/path/to/your/cuda.") + exit() + else: + DEVICE = "cpu" + + # Download Libtorch + try: + file_url_libtorch = f"https://download.pytorch.org/libtorch/{DEVICE}/" \ + f"libtorch-cxx11-abi-shared-with-deps-{TORCH_VERSION}%2B{DEVICE}.zip" + + DOWNLOAD_DIRECTORY = "libtorch.zip" + + urlretrieve(file_url_libtorch, DOWNLOAD_DIRECTORY) + + except: + warnings.warn("\033[93m No LibTorch found for your specific device and torch version.\n" + "Please choose another version of torch or install a different version of CUDA.\n" + "Please refer to https://download.pytorch.org/whl/torch_stable.html") + exit() + # Download Vision + try: + file_url_vision = f"https://github.com/pytorch/vision/archive/refs/tags/" \ + f"v{VISION_VERSION}.tar.gz" + DOWNLOAD_DIRECTORY = "vision.tar.gz" + urlretrieve(file_url_vision, DOWNLOAD_DIRECTORY) + except: + warnings.warn("\033[93m No torchvision found for your specific torch version.\n" + "Please refer to https://github.com/pytorch/vision for more information.") + exit() + diff --git a/dependencies/install_rapidjson.sh b/dependencies/install_rapidjson.sh new file mode 100755 index 0000000000..a60d44132b --- /dev/null +++ b/dependencies/install_rapidjson.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +if [ ! -d /usr/local/include/rapidjson ]; then + + VERSION="1.1.0" + + wget https://github.com/Tencent/rapidjson/archive/refs/tags/v${VERSION}.tar.gz --quiet + tar zxf v${VERSION}.tar.gz + cd rapidjson-${VERSION} + sudo mkdir -p /usr/local/include/rapidjson + sudo mv include/rapidjson/* /usr/local/include/rapidjson + cd .. + rm -rf rapidjson-${VERSION} + rm -rf v${VERSION}.tar.gz + + +fi diff --git a/dependencies/install_torch_c_api.sh b/dependencies/install_torch_c_api.sh new file mode 100755 index 0000000000..3d56b5fb2b --- /dev/null +++ b/dependencies/install_torch_c_api.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +if [[ -z "$TORCH_VERSION" ]]; +then + echo "Torch version not defined, version 1.9.0 will be installed." + echo "For a specific Torch version please define TORCH_VERSION with 'export TORCH_VERSION=x.x.x'" + TORCH_VERSION="1.9.0" +fi + +if [ ! -f /usr/local/lib/libtorchvision.so ]; then + TORCH_DIRECTORY="/usr/local/libtorch" + + if [[ "$OPENDR_DEVICE" == "gpu" ]] + then + echo "Downloading and installing LibTorch and torchvision (gpu support) ..." + GPU="on" + DEVICE="cu"${CUDA_VERSION} + CUDA_COMPILER="/usr/local/cuda/bin/nvcc" + else + echo "Downloading and installing LibTorch and torchvision (cpu-only) ..." + GPU="off" + DEVICE="cpu" + fi + + # Find CUDA version and download torch and vision + echo "Downloading LibTorch and torchvision ..." + # Make sure that we can download files + if [[ -z "$CUDA_PATH" ]]; + then + python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" + else + python3 ./download_torch.py --opendr_device "$OPENDR_DEVICE" --torch_version "$TORCH_VERSION" --cuda_path "$CUDA_PATH" + fi + echo "Downloading Libtorch and torchvision done." + + # TORCH INSTALLATION + unzip -qq libtorch.zip + cd libtorch + + sudo mkdir -p ${TORCH_DIRECTORY} + sudo cp -r ./* ${TORCH_DIRECTORY} + cd .. + + # TORCH VISION INSTALLATION + tar zxf vision.tar.gz + mv vision-* vision + cd vision + sudo mkdir -p build + cd build + sudo cmake .. -DCMAKE_CUDA_COMPILER=${CUDA_COMPILER} -DCMAKE_PREFIX_PATH=${TORCH_DIRECTORY} -DWITH_CUDA=${GPU} + sudo make + sudo make install + cd ../.. + + # CLEAN + sudo rm -rf libtorch + sudo rm -rf libtorch.zip + + sudo rm -rf vision + sudo rm -rf vision.tar.gz + + sudo ldconfig + +fi diff --git a/docs/reference/c-data-h.md b/docs/reference/c-data-h.md index 20b5e27b8e..35e8f0539a 100644 --- a/docs/reference/c-data-h.md +++ b/docs/reference/c-data-h.md @@ -3,24 +3,24 @@ The *data.h* header provides definitions of OpenDR data types that can be used in the C API of OpenDR. -### struct *opendr_image_t* +### struct *OpendrImageT* ```C -struct opendr_image { +struct OpendrImage { void *data; }; -typedef struct opendr_image opendr_image_t; +typedef struct OpendrImage OpendrImageT; ``` -The *opendr_image_t* structure provides a data structure for storing OpenDR images. +The *OpendrImageT* structure provides a data structure for storing OpenDR images. Every function in the C API receiving images is expected to use this structure. Helper functions that directly convert images into this format are provided in *opendr_utils.h*. -The *opendr_image_t* structure has the following field: +The *OpendrImageT* structure has the following field: #### `void *data` field A pointer where image data are stored. -*opendr_image_t* is using internally OpenCV images (*cv::Mat*) for storing images. +*OpendrImageT* is using internally OpenCV images (*cv::Mat*) for storing images. Therefore, only a pointer to the memory location of the corresponding *cv::Mat* is stored. Please note that the user is not expected to directly manipulate these data without first converting them into OpenCV data type or using the corresponding functions provided in *opendr_utils.h*. diff --git a/docs/reference/c-face-recognition-h.md b/docs/reference/c-face-recognition-h.md index 1ea2e5822d..bf1be372aa 100644 --- a/docs/reference/c-face-recognition-h.md +++ b/docs/reference/c-face-recognition-h.md @@ -3,62 +3,62 @@ The *face_recognition.h* header provides function definitions that can be used for accessing the OpenDR face recognition tool. -### Struct *face_recognition_model_t* +### Struct *FaceRecognitionModelT* ```C -struct face_recognition_model { +struct FaceRecognitionModel { ... }; -typedef struct face_recognition_model face_recognition_model_t; +typedef struct FaceRecognitionModel FaceRecognitionModelT; ``` -The *face_recognition_model_t* structure keeps all the neccesary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). +The *FaceRecognitionModelT* structure keeps all the necessary information that are required by the OpenDR face recognition tool (e.g., model weights, normalization information, database for person recognition, ONNX session information, etc.). -### Function *load_face_recognition_model()* +### Function *loadFaceRecognitionModel()* ```C -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model); ``` - Loads a face recognition model saved in the local filesystem (*model path*) in OpenDR format. + Loads a face recognition model saved in the local filesystem (*modelPath*) in OpenDR format. This function also initializes a CPU-based ONNX session for performing inference using this model. The pre-trained models should follow the OpenDR conventions. The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. -### Function *free_face_recognition_model()* +### Function *freeFaceRecognitionModel()* ```C -void free_face_recognition_model(face_recognition_model_t *model); +void freeFaceRecognitionModel(FaceRecognitionModelT *model); ``` Releases the memory allocated for a face recognition model (*model*). -### Function *infer_face_recognition()* +### Function *inferFaceRecognition()* ```C -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image); +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image); ``` This function perform inference using a face recognition model (*model*) and an input image (*image*). The function returns an OpenDR category structure with the inference results. -### Function *decode_category_face_recognition()* +### Function *decodeCategoryFaceRecognition()* ```C -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName); ``` Returns the name of a recognized person by decoding the category id into a string (this function uses the information from the built person database). -### Function *build_database_face_recognition()* +### Function *buildDatabaseFaceRecognition()* ```C -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, faceRecognitionModelT *model); ``` Build a face recognition database (containing images for persons to be recognized). -This function expects the *database_folder* to have the same format as the main Python toolkit. -The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*output_path*). -A loaded face recongition model should be provided (*model*), since this model will be used for the feature extraction process. +This function expects the (*databaseFolder*) to have the same format as the main Python toolkit. +The function calculates the features of the person that are contained in the database and it stores it into a binary file that can be then loaded to perform inference (*outputPath*). +A loaded face recognition model should be provided (*model*), since this model will be used for the feature extraction process. -### Function *load_database_face_recognition()* +### Function *loadDatabaseFaceRecognition()* ```C -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model); ``` -Loads an already built database (*database_path) into a face recognition model (*model*). +Loads an already built database (*databasePath*) into a face recognition model (*model*). After this step, the model can be used for performing inference. diff --git a/docs/reference/c-object-detection-2d-nanodet-jit-h.md b/docs/reference/c-object-detection-2d-nanodet-jit-h.md new file mode 100644 index 0000000000..d875834c3d --- /dev/null +++ b/docs/reference/c-object-detection-2d-nanodet-jit-h.md @@ -0,0 +1,47 @@ +## C_API: object_detection_2d_nanodet_jit.h + + +The *object_detection_2d_nanodet_jit.h* header provides function definitions that can be used for accessing the OpenDR object detection 2D Nanodet tool. + +### Struct *NanodetModelT* +```C + +struct NanodetModel { + ... +}; +typedef struct NanodetModel NanodetModelT; +``` +The *NanodetModelT* structure keeps all the necessary information that are required by the OpenDR object detection 2D Nanodet tool (e.g., model weights, normalization information, etc.). + + +### Function *loadNanodetModel()* +```C +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model); +``` +Loads a Nanodet object detection model saved in the local filesystem (*modelPath*) in OpenDR format. +This function also initializes a (*device*) JIT network for performing inference using this model. +The pre-trained models should follow the OpenDR conventions. +The Python API can be used to train and export an optimized OpenDR model that can be used for inference using the C API. + +### Function *freeNanodetModel()* +```C +void freeNanodetModel(NanodetModelT *model); +``` +Releases the memory allocated for an object detection 2D Nanodet model (*model*). + + +### Function *inferNanodet()* +```C +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image); +``` +This function performs inference using an object detection 2D Nanodet model (*model*) and an input image (*image*). +The function returns an OpenDR detection vector structure with the inference results. + + +### Function *drawBboxes()* +```C +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector); +``` +This function draws the given detections (*detectionsVector*) onto the input image (*image*) and then shows the image on screen. +The (*model*) keeps all the necessary information. + diff --git a/docs/reference/c-opendr-utils-h.md b/docs/reference/c-opendr-utils-h.md index 4e76a24258..baf3f4e19e 100644 --- a/docs/reference/c-opendr-utils-h.md +++ b/docs/reference/c-opendr-utils-h.md @@ -3,20 +3,67 @@ The *opendr_utils.h* header provides function definitions of OpenDR helpers (e.g., for creating OpenDR images). -### Function *load_image()* +### Function *jsonGetKeyString()* ```C -void load_image(const char *path, opendr_image_t *image); +const char* jsonGetKeyString(const char *json, const char *key, const int index); ``` -The *load_image()* function allows for reading an images from the local file system (*path*) into an OpenDR image data type. -A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. -This function allocates memory during each function call, so be sure to use the *free_image()* function to release the allocated resources, when the corresponding image is no longer needed. +The *jsonGetKeyString()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as string. +If the value is an array it will return only the (*index*) value of the array. +If it fails it returns (""). +### Function *jsonGetKeyFloat()* +```C +float jsonGetKeyFloat(const char *json, const char *key, const int index); +``` +The *jsonGetKeyFloat()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) as float. +If the value is an array it will return only the (*index*) value of the array. +If it fails it returns (*0.0f*). + +### Function *jsonGetKeyFromInferenceParams()* +```C +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index); +``` +The *jsonGetKeyFromInferenceParams()* function reads a JSON string from the pointer (*json*) and returns tha value of a key with pointer (*key*) in inference_params section as float. +If the value is an array it will return only the (*index*) value of the array. +If it fails it returns (*0.0f*). +--- -### Function *free_image()* +### Function *loadImage()* ```C -void free_image(opendr_image_t *image); +void loadImage(const char *path, OpendrImageT *image); ``` -The *free_image()* function releases the memory allocated for an OpenDR image structure (*image*). -A pointer (*image*) to an OpenDR *opendr_image_t* should be provided. +The *loadImage()* function loads an image from the local file system (*path*) into an OpenDR image data type. +A pointer (*image*) to an OpenDR *OpendrImageT* should be provided. +This function allocates memory during each function call, so be sure to use the *freeImage()* function to release the allocated resources, when the corresponding image is no longer needed. + +### Function *freeImage()* +```C +void freeImage(OpendrImageT *image); +``` +The *freeImage()* function releases the memory allocated for an OpenDR image structure (*image*). +A pointer (*image*) to an OpenDR *OpendrImageT* should be provided. + +--- +### Function *initDetectionsVector()* +```C +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); +``` +The *initDetectionsVector()* function initializes the data of an OpenDR detection vector structure (*detectionVector*) with zero values. +A pointer (*detectionVector*) to an OpenDR *DetectionVectorTargetT* should be provided. + +### Function *loadDetectionsVector()* +```C +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, + int vectorSize); +``` +The *loadDetectionsVector()* function stores OpenDR detection target structures in the memory allocated for multiple OpenDR detections structures (*detection*). +A pointer (*detectionVector*) to an OpenDR *OpendrDetectionVectorTargetT* should be provided. + +### Function *freeDetectionsVector()* +```C +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); +``` +The *freeDetectionsVector()* function releases the memory allocated for an OpenDR detection vector structure (*detectionVector*). +A pointer (*detectionVector*) to an OpenDR *OpendrDetectionVectorTargetT* should be provided. diff --git a/docs/reference/c-target-h.md b/docs/reference/c-target-h.md index 6f748759e8..d8303cb58c 100644 --- a/docs/reference/c-target-h.md +++ b/docs/reference/c-target-h.md @@ -3,20 +3,20 @@ The *target.h* header provides definitions of OpenDR targets (inference outputs) that can be used in the C API of OpenDR. -### struct *opendr_category_target_t* +### struct *OpendrCategoryTargetT* ```C -struct opendr_category_target{ +struct OpendrCategoryTarget{ int data; float confidence; }; -typedef struct opendr_category_target opendr_category_target_t; +typedef struct OpendrCategoryTarget OpendrCategoryTargetT; ``` -The *opendr_category_target_t* structure provides a data structure for storing inference outputs of classification models. +The *OpendrCategoryTargetT* structure provides a data structure for storing inference outputs of classification models. Every function in the C API that outputs a classification decision is expected to use this structure. -The *opendr_category_target_t* structure has the following field: +The *OpendrCategoryTargetT* structure has the following fields: #### `int data` field @@ -25,3 +25,71 @@ A numerical id of the category to which the input objects belongs to. #### `float confidence` field The decision confidence (a value between 0 and 1). + + +### struct *OpendrDetectionTargetT* +```C +struct opendr_detection_target { + int name; + float left; + float top; + float width; + float height; + float score; +}; +typedef struct OpendrDetectionTarget OpendrDetectionTargetT; +``` + + +The *OpendrDetectionTargetT* structure provides a data structure for storing inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this structure or a vector of this structure. + +The *OpendrDetectionTargetT* structure has the following fields: + +#### `int name` field + +A numerical id of the category to which the input objects belongs to. + +#### `float left` field + +A numerical value that corresponds to the X value of the top-left point of a detection. + +#### `float top` field + +A numerical value that corresponds to the Y value of the top-left point of a detection. + +#### `float width` field + +A numerical value that corresponds to the width of a detection. + +#### `float height` field + +A numerical value that corresponds to the height of a detection. + +#### `float score` field + +The decision score (a value between 0 and 1). + + +### struct *OpendrDetectionVectorTargetT* +```C +struct OpendrDetectionVectorTarget { + OpendrDetectionTargetT *startingPointer; + int size; +}; +typedef struct OpendrDetectionVectorTarget OpendrDetectionVectorTargetT; +``` + + +The *OpendrDetectionVectorTargetT* structure provides a data structure for storing multiple inference outputs of detection models. +Every function in the C API that outputs a detection decision is expected to use this or a *OpendrDetectionTargetT* structure. + +The *OpendrDetectionVectorTargetT* structure has the following fields: + +#### `OpendrDetectionTargetT startingPointer` field + +A pointer to multiple OpenDR detection targets. + +#### `int size` field + +A numerical value that represents the number of OpenDR detection structures that are stored. diff --git a/docs/reference/nanodet.md b/docs/reference/nanodet.md deleted file mode 100644 index 765f210673..0000000000 --- a/docs/reference/nanodet.md +++ /dev/null @@ -1,289 +0,0 @@ -## nanodet module - -The *nanodet* module contains the *NanodetLearner* class, which inherits from the abstract class *Learner*. - -### Class NanodetLearner -Bases: `engine.learners.Learner` - -The *NanodetLearner* class is a wrapper of the Nanodet object detection algorithms based on the original -[Nanodet implementation](https://github.com/RangiLyu/nanodet). -It can be used to perform object detection on images (inference) and train All predefined Nanodet object detection models and new modular models from the user. - -The [NanodetLearner](../../src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py) class has the -following public methods: - -#### `NanodetLearner` constructor -```python -NanodetLearner(self, model_to_use, iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, - weight_decay, warmup_steps, warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) -``` - -Constructor parameters: - -- **model_to_use**: *{"EfficientNet_Lite0_320", "EfficientNet_Lite1_416", "EfficientNet_Lite2_512", "RepVGG_A0_416", - "t", "g", "m", "m_416", "m_0.5x", "m_1.5x", "m_1.5x_416", "plus_m_320", "plus_m_1.5x_320", "plus_m_416", - "plus_m_1.5x_416", "custom"}, default=plus_m_1.5x_416*\ - Specifies the model to use and the config file that contains all hyperparameters for training, evaluation and inference as the original - [Nanodet implementation](https://github.com/RangiLyu/nanodet). If you want to overwrite some of the parameters you can - put them as parameters in the learner. -- **iters**: *int, default=None*\ - Specifies the number of epochs the training should run for. -- **lr**: *float, default=None*\ - Specifies the initial learning rate to be used during training. -- **batch_size**: *int, default=None*\ - Specifies number of images to be bundled up in a batch during training. - This heavily affects memory usage, adjust according to your system. -- **checkpoint_after_iter**: *int, default=None*\ - Specifies per how many training iterations a checkpoint should be saved. - If it is set to 0 no checkpoints will be saved. -- **checkpoint_load_iter**: *int, default=None*\ - Specifies which checkpoint should be loaded. - If it is set to 0, no checkpoints will be loaded. -- **temp_path**: *str, default=''*\ - Specifies a path where the algorithm looks for saving the checkpoints along with the logging files. If *''* the `cfg.save_dir` will be used instead. -- **device**: *{'cpu', 'cuda'}, default='cuda'*\ - Specifies the device to be used. -- **weight_decay**: *float, default=None*\ -- **warmup_steps**: *int, default=None*\ -- **warmup_ratio**: *float, default=None*\ -- **lr_schedule_T_max**: *int, default=None*\ -- **lr_schedule_eta_min**: *float, default=None*\ -- **grad_clip**: *int, default=None*\ - -#### `NanodetLearner.fit` -```python -NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, seed) -``` - -This method is used for training the algorithm on a train dataset and validating on a val dataset. - -Parameters: - -- **dataset**: *ExternalDataset*\ - Object that holds the training dataset. - Can be of type `ExternalDataset`. -- **val_dataset** : *ExternalDataset, default=None*\ - Object that holds the validation dataset. - Can be of type `ExternalDataset`. -- **logging_path** : *str, default=''*\ - Subdirectory in temp_path to save log files and TensorBoard. -- **verbose** : *bool, default=True*\ - Enables the maximum verbosity and the logger. -- **seed** : *int, default=123*\ - Seed for repeatability. - -#### `NanodetLearner.eval` -```python -NanodetLearner.eval(self, dataset, verbose) -``` - -This method is used to evaluate a trained model on an evaluation dataset. -Saves a txt logger file containing stats regarding evaluation. - -Parameters: - -- **dataset** : *ExternalDataset*\ - Object that holds the evaluation dataset. -- **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. - -#### `NanodetLearner.infer` -```python -NanodetLearner.infer(self, input, thershold, verbose) -``` - -This method is used to perform object detection on an image. -Returns an `engine.target.BoundingBoxList` object, which contains bounding boxes that are described by the left-top corner and -its width and height, or returns an empty list if no detections were made of the image in input. - -Parameters: -- **input** : *Image*\ - Image type object to perform inference on it. - - **threshold**: *float, default=0.35*\ - Specifies the threshold for object detection inference. - An object is detected if the confidence of the output is higher than the specified threshold. -- **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. - -#### `NanodetLearner.save` -```python -NanodetLearner.save(self, path, verbose) -``` - -This method is used to save a trained model with its metadata. -Provided with the path, it creates the "path" directory, if it does not already exist. -Inside this folder, the model is saved as *"nanodet_{model_name}.pth"* and a metadata file *"nanodet_{model_name}.json"*. -If the directory already exists, the *"nanodet_{model_name}.pth"* and *"nanodet_{model_name}.json"* files are overwritten. - -Parameters: - -- **path**: *str, default=None*\ - Path to save the model, if None it will be the `"temp_folder"` or the `"cfg.save_dir"` from learner. -- **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. - -#### `NanodetLearner.load` -```python -NanodetLearner.load(self, path, verbose) -``` - -This method is used to load a previously saved model from its saved folder. -Loads the model from inside the directory of the path provided, using the metadata .json file included. - -Parameters: - -- **path**: *str, default=None*\ - Path of the model to be loaded. -- **verbose**: *bool, default=True*\ - Enables the maximum verbosity and logger. - -#### `NanodetLearner.download` -```python -NanodetLearner.download(self, path, mode, model, verbose, url) -``` - -Downloads data needed for the various functions of the learner, e.g., pretrained models as well as test data. - -Parameters: - -- **path**: *str, default=None*\ - Specifies the folder where data will be downloaded. If *None*, the *self.temp_path* directory is used instead. -- **mode**: *{'pretrained', 'images', 'test_data'}, default='pretrained'*\ - If *'pretrained'*, downloads a pretrained detector model from the *model_to_use* architecture which was chosen at learner initialization. - If *'images'*, downloads an image to perform inference on. If *'test_data'* downloads a dummy dataset for testing purposes. -- **verbose**: *bool, default=False*\ - Enables the maximum verbosity and logger. -- **url**: *str, default=OpenDR FTP URL*\ - URL of the FTP server. - - -#### Tutorials and Demos - -A tutorial on performing inference is available. -Furthermore, demos on performing [training](../../projects/perception/object_detection_2d/nanodet/train_demo.py), -[evaluation](../../projects/perception/object_detection_2d/nanodet/eval_demo.py) and -[inference](../../projects/perception/object_detection_2d/nanodet/inference_demo.py) are also available. - - - -#### Examples - -* **Training example using an `ExternalDataset`.** - - To train properly, the architecture weights must be downloaded in a predefined directory before fit is called, in this case the directory name is "predefined_examples". - Default architecture is *'plus-m-1.5x_416'*. - The training and evaluation dataset root should be present in the path provided, along with the annotation files. - The default COCO 2017 training data can be found [here](https://cocodataset.org/#download) (train, val, annotations). - All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file - in [config directori](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). - You can find more informations in [config file detail](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). - For easier use, with NanodetLearner parameters user can overwrite the following parameters: - (iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, weight_decay, warmup_steps, - warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) - - **Note** - - The Nanodet tool can be used with any PASCAL VOC or COCO like dataset. The only thing is needed is to provide the correct root and dataset type. - - If *'voc'* is choosed for *dataset* the directory must look like this: - - - root folder - - train - - Annotations - - image1.xml - - image2.xml - - ... - - JPEGImages - - image1.jpg - - image2.jpg - - ... - - val - - Annotations - - image1.xml - - image2.xml - - ... - - JPEGImages - - image1.jpg - - image2.jpg - - ... - - On the other hand if *'coco'* is choosed for *dataset* the directory must look like this: - - - root folder - - train2017 - - image1.jpg - - image2.jpg - - ... - - val2017 - - image1.jpg - - image2.jpg - - ... - - annotations - - instances_train2017.json - - instances_val2017.json - - You can change the default annotation and image directories in [dataset](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py) - - ```python - import argparse - - from opendr.engine.datasets import ExternalDataset - from opendr.perception.object_detection_2d import NanodetLearner - - - if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) - parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) - parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) - parser.add_argument("--checkpoint-freq", help="Frequency in-between checkpoint saving and evaluations", type=int, default=50) - parser.add_argument("--n-epochs", help="Number of total epochs", type=int, default=300) - parser.add_argument("--resume-from", help="Epoch to load checkpoint file and resume training from", type=int, default=0) - - args = parser.parse_args() - - if args.dataset == 'voc': - dataset = ExternalDataset(args.data_root, 'voc') - val_dataset = ExternalDataset(args.data_root, 'voc') - elif args.dataset == 'coco': - dataset = ExternalDataset(args.data_root, 'coco') - val_dataset = ExternalDataset(args.data_root, 'coco') - - nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, - checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, - device=args.device) - - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) - nanodet.fit(dataset, val_dataset) - nanodet.save() - ``` - -* **Inference and result drawing example on a test image.** - - This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. - Moreover, inference can be used in all images in a folder, frames of a video or a webcam feedback with the provided *mode*. - In this example first is downloaded a pre-trained model as in training example and then an image to be inference. - With the same *path* parameter you can choose a folder or a video file to be used as inference. Last but not least, if 'webcam' is - used in *mode* the *camid* parameter of inference must be used to determine the webcam device in your machine. - - ```python - import argparse - from opendr.perception.object_detection_2d import NanodetLearner - - if __name__ == '__main__': - parser = argparse.ArgumentParser() - parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str) - args = parser.parse_args() - - nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) - nanodet.download("./predefined_examples", mode="images") - boxes = nanodet.infer(path="./predefined_examples/000000000036.jpg") - ``` \ No newline at end of file diff --git a/docs/reference/object-detection-2d-nanodet.md b/docs/reference/object-detection-2d-nanodet.md new file mode 100644 index 0000000000..009e1358d5 --- /dev/null +++ b/docs/reference/object-detection-2d-nanodet.md @@ -0,0 +1,410 @@ +## nanodet module + +The *nanodet* module contains the *NanodetLearner* class, which inherits from the abstract class *Learner*. + +### Class NanodetLearner +Bases: `engine.learners.Learner` + +The *NanodetLearner* class is a wrapper of the Nanodet object detection algorithms based on the original +[Nanodet implementation](https://github.com/RangiLyu/nanodet). +It can be used to perform object detection on images (inference) and train all predefined Nanodet object detection models and new modular models from the user. + +The [NanodetLearner](../../src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py) class has the +following public methods: + +#### `NanodetLearner` constructor +```python +NanodetLearner(self, model_to_use, iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, + weight_decay, warmup_steps, warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) +``` + +Constructor parameters: + +- **model_to_use**: *{"EfficientNet_Lite0_320", "EfficientNet_Lite1_416", "EfficientNet_Lite2_512", "RepVGG_A0_416", + "t", "g", "m", "m_416", "m_0.5x", "m_1.5x", "m_1.5x_416", "plus_m_320", "plus_m_1.5x_320", "plus_m_416", + "plus_m_1.5x_416", "custom"}, default=m*\ + Specifies the model to use and the config file that contains all hyperparameters for training, evaluation and inference as the original + [Nanodet implementation](https://github.com/RangiLyu/nanodet). If you want to overwrite some of the parameters you can + put them as parameters in the learner. +- **iters**: *int, default=None*\ + Specifies the number of epochs the training should run for. +- **lr**: *float, default=None*\ + Specifies the initial learning rate to be used during training. +- **batch_size**: *int, default=None*\ + Specifies number of images to be bundled up in a batch during training. + This heavily affects memory usage, adjust according to your system. +- **checkpoint_after_iter**: *int, default=None*\ + Specifies per how many training iterations a checkpoint should be saved. + If it is set to 0 no checkpoints will be saved. +- **checkpoint_load_iter**: *int, default=None*\ + Specifies which checkpoint should be loaded. + If it is set to 0, no checkpoints will be loaded. +- **temp_path**: *str, default=''*\ + Specifies a path where the algorithm looks for saving the checkpoints along with the logging files. If *''* the `cfg.save_dir` will be used instead. +- **device**: *{'cpu', 'cuda'}, default='cuda'*\ + Specifies the device to be used. +- **weight_decay**: *float, default=None*\ +- **warmup_steps**: *int, default=None*\ +- **warmup_ratio**: *float, default=None*\ +- **lr_schedule_T_max**: *int, default=None*\ +- **lr_schedule_eta_min**: *float, default=None*\ +- **grad_clip**: *int, default=None*\ + +#### `NanodetLearner.fit` +```python +NanodetLearner.fit(self, dataset, val_dataset, logging_path, verbose, logging, seed, local_rank) +``` + +This method is used for training the algorithm on a train dataset and validating on a val dataset. + +Parameters: + +- **dataset**: *object*\ + Object that holds the training dataset. + Can be of type `ExternalDataset` or `XMLBasedDataset`. +- **val_dataset** : *object, default=None*\ + Object that holds the validation dataset. + Can be of type `ExternalDataset` or `XMLBasedDataset`. +- **logging_path** : *str, default=''*\ + Subdirectory in temp_path to save log files and TensorBoard. +- **verbose** : *bool, default=True*\ + Enables verbosity. +- **logging** : *bool, default=False*\ + Enables the maximum verbosity and the logger. +- **seed** : *int, default=123*\ + Seed for repeatability. +- **local_rank** : *int, default=1*\ + Needed if training on multiple machines. + +#### `NanodetLearner.eval` +```python +NanodetLearner.eval(self, dataset, verbose, logging, local_rank) +``` + +This method is used to evaluate a trained model on an evaluation dataset. +Saves a txt logger file containing stats regarding evaluation. + +Parameters: + +- **dataset** : *object*\ + Object that holds the evaluation dataset. + Can be of type `ExternalDataset` or `XMLBasedDataset`. +- **verbose**: *bool, default=True*\ + Enables verbosity. +- **logging**: *bool, default=False*\ + Enables the maximum verbosity and logger. +- **local_rank** : *int, default=1*\ + Needed if evaluating on multiple machines. + +#### `NanodetLearner.infer` +```python +NanodetLearner.infer(self, input, thershold, nms_max_num) +``` + +This method is used to perform object detection on an image. +Returns an `engine.target.BoundingBoxList` object, which contains bounding boxes that are described by the top-left corner and +their width and height, or returns an empty list if no detections were made on the input image. + +Parameters: +- **input** : *object*\ + Object of type engine.data.Image. + Image type object to perform inference on. +- **threshold**: *float, default=0.35*\ + Specifies the threshold for object detection inference. + An object is detected if the confidence of the output is higher than the specified threshold. +- **nms_max_num**: *int, default=100*\ + Determines the maximum number of bounding boxes that will be retained following the nms. + +#### `NanodetLearner.optimize` +```python +NanodetLearner.optimize(self, export_path, verbose, optimization, nms_max_num) +``` + +This method is used to perform JIT or ONNX optimizations and save a trained model with its metadata. +If a model is not present in the location specified by *export_path*, the optimizer will save it there. +If a model is already present, it will load it instead. +Inside this folder, the model is saved as *nanodet_{model_name}.pth* for JIT models or *nanodet_{model_name}.onnx* for ONNX and a metadata file *nanodet_{model_name}.json*. + +Note: In ONNX optimization, the output model executes the original model's feed forward method. +The user must create their own pre- and post-processes in order to use the ONNX model in the C API. +In JIT optimization the output model performs the feed forward pass and post-processing. +To use the C API, it is recommended to use JIT optimization as shown in the [example of OpenDR's C API](../../projects/c_api/samples/object_detection/nanodet/nanodet_jit_demo.c). + +Parameters: + +- **export_path**: *str*\ + Path to save or load the optimized model. +- **verbose**: *bool, default=True*\ + Enables the maximum verbosity. +- **optimization**: *str, default="jit"*\ + It determines what kind of optimization is used, possible values are *jit* or *onnx*. +- **nms_max_num**: *int, default=100*\ + Determines the maximum number of bounding boxes that will be retained following the nms. + +#### `NanodetLearner.save` +```python +NanodetLearner.save(self, path, verbose) +``` + +This method is used to save a trained model with its metadata. +Provided with the path, it creates the *path* directory, if it does not already exist. +Inside this folder, the model is saved as *nanodet_{model_name}.pth* and a metadata file *nanodet_{model_name}.json*. +If the directory already exists, the *nanodet_{model_name}.pth* and *nanodet_{model_name}.json* files are overwritten. +If optimization is performed, the optimized model is saved instead. + +Parameters: + +- **path**: *str, default=None*\ + Path to save the model, if None it will be `"temp_folder"` or `"cfg.save_dir"` from the learner. +- **verbose**: *bool, default=True*\ + Enables the maximum verbosity and logger. + +#### `NanodetLearner.load` +```python +NanodetLearner.load(self, path, verbose) +``` + +This method is used to load a previously saved model from its saved folder. +Loads the model from inside the directory of the path provided, using the metadata .json file included. +If optimization is performed, the optimized model is loaded instead. + +Parameters: + +- **path**: *str, default=None*\ + Path of the model to be loaded. +- **verbose**: *bool, default=True*\ + Enables the maximum verbosity. + +#### `NanodetLearner.download` +```python +NanodetLearner.download(self, path, mode, model, verbose, url) +``` + +Downloads data needed for the various functions of the learner, e.g., pretrained models as well as test data. + +Parameters: + +- **path**: *str, default=None*\ + Specifies the folder where data will be downloaded. If *None*, the *self.temp_path* directory is used instead. +- **mode**: *{'pretrained', 'images', 'test_data'}, default='pretrained'*\ + If *'pretrained'*, downloads a pretrained detector model from the *model_to_use* architecture which was chosen at learner initialization. + If *'images'*, downloads an image to perform inference on. If *'test_data'* downloads a dummy dataset for testing purposes. +- **verbose**: *bool, default=True*\ + Enables the maximum verbosity. +- **url**: *str, default=OpenDR FTP URL*\ + URL of the FTP server. + + +#### Tutorials and Demos + +A Jupyter notebook tutorial on performing inference is [available](../../projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb). +Furthermore, demos on performing [training](../../projects/python/perception/object_detection_2d/nanodet/train_demo.py), +[evaluation](../../projects/python/perception/object_detection_2d/nanodet/eval_demo.py) and +[inference](../../projects/python/perception/object_detection_2d/nanodet/inference_demo.py) are also available. + + + +#### Examples + +* **Training example using an `ExternalDataset`** + + To train properly, the architecture weights must be downloaded in a predefined directory before fit is called, in this case the directory name is "predefined_examples". + Default architecture is *'m'*. + The training and evaluation dataset root should be present in the path provided, along with the annotation files. + The default COCO 2017 training data can be found [here](https://cocodataset.org/#download) (train, val, annotations). + All training parameters (optimizer, lr schedule, losses, model parameters etc.) can be changed in the model config file + in [config directory](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config). + You can find more information in [corresponding documentation](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/config/config_file_detail.md). + For easier usage of the NanodetLearner, you can overwrite the following parameters: + (iters, lr, batch_size, checkpoint_after_iter, checkpoint_load_iter, temp_path, device, weight_decay, warmup_steps, + warmup_ratio, lr_schedule_T_max, lr_schedule_eta_min, grad_clip) + + **Note** + + The Nanodet tool can be used with any PASCAL VOC- or COCO-like dataset, by providing the correct root and dataset type. + + If *'voc'* is chosen for *dataset*, the directory must look like this: + + - root folder + - train + - Annotations + - image1.xml + - image2.xml + - ... + - JPEGImages + - image1.jpg + - image2.jpg + - ... + - val + - Annotations + - image1.xml + - image2.xml + - ... + - JPEGImages + - image1.jpg + - image2.jpg + - ... + + On the other hand, if *'coco'* is chosen for *dataset*, the directory must look like this: + + - root folder + - train2017 + - image1.jpg + - image2.jpg + - ... + - val2017 + - image1.jpg + - image2.jpg + - ... + - annotations + - instances_train2017.json + - instances_val2017.json + + You can change the default annotation and image directories in [the *build_dataset* function](../../src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py). + This example assumes the data has been downloaded and placed in the directory referenced by `data_root`. + ```python + from opendr.engine.datasets import ExternalDataset + from opendr.perception.object_detection_2d import NanodetLearner + + + if __name__ == '__main__': + dataset = ExternalDataset(data_root, 'voc') + val_dataset = ExternalDataset(data_root, 'voc') + + nanodet = NanodetLearner(model_to_use='m', iters=300, lr=5e-4, batch_size=8, + checkpoint_after_iter=50, checkpoint_load_iter=0, + device="cpu") + + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load("./predefined_examples/nanodet_m", verbose=True) + nanodet.fit(dataset, val_dataset) + nanodet.save() + + ``` + +* **Inference and result drawing example on a test image** + + This example shows how to perform inference on an image and draw the resulting bounding boxes using a nanodet model that is pretrained on the COCO dataset. + In this example, a pre-trained model is downloaded and inference is performed on an image that can be specified with the *path* parameter. + + ```python + from opendr.perception.object_detection_2d import NanodetLearner + from opendr.engine.data import Image + from opendr.perception.object_detection_2d import draw_bounding_boxes + + if __name__ == '__main__': + nanodet = NanodetLearner(model_to_use='m', device="cpu") + nanodet.download("./predefined_examples", mode="pretrained") + nanodet.load("./predefined_examples/nanodet_m", verbose=True) + nanodet.download("./predefined_examples", mode="images") + img = Image.open("./predefined_examples/000000000036.jpg") + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) + ``` + +* **Optimization framework with Inference and result drawing example on a test image** + + This example shows how to perform optimization on a pretrained model, then run inference on an image and finally draw the resulting bounding boxes, using a nanodet model that is pretrained on the COCO dataset. + In this example we use ONNX optimization, but JIT can also be used by changing *optimization* to *jit*. + The optimized model will be saved in the `./optimization_models` folder + ```python + from opendr.engine.data import Image + from opendr.perception.object_detection_2d import NanodetLearner, draw_bounding_boxes + + + if __name__ == '__main__': + nanodet = NanodetLearner(model_to_use='m', device="cpu") + nanodet.load("./predefined_examples/nanodet_m", verbose=True) + + # First read an OpenDR image from your dataset and run the optimizer: + img = Image.open("./predefined_examples/000000000036.jpg") + nanodet.optimize("./onnx/nanodet_m/", optimization="onnx") + + boxes = nanodet.infer(input=img) + + draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) + ``` + + +#### Performance Evaluation + +In terms of speed, the performance of Nanodet is summarized in the tables below (in FPS). +The speed is measured from the start of the forward pass until the end of post-processing. + +For PyTorch inference: + +| Method {input} | RTX 2070 | TX2 | NX | +|-----------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 48.63 | 9.38 | 14.48 | +| Efficient Lite1 {416} | 43.88 | 7.93 | 11.07 | +| Efficient Lite2 {512} | 40.51 | 6.44 | 8.84 | +| RepVGG A0 {416} | 33.4 | 9.21 | 12.3 | +| Nanodet-g {416} | 51.32 | 9.57 | 15.75 | +| Nanodet-m {320} | 48.36 | 8.56 | 14.08 | +| Nanodet-m 0.5x {320} | 46.94 | 7.97 | 12.84 | +| Nanodet-m 1.5x {320} | 47.41 | 8.8 | 13.98 | +| Nanodet-m {416} | 47.3 | 8.34 | 13.15 | +| Nanodet-m 1.5x {416} | 45.62 | 8.43 | 13.2 | +| Nanodet-plus m {320} | 41.9 | 7.45 | 12.01 | +| Nanodet-plus m 1.5x {320} | 39.63 | 7.66 | 12.21 | +| Nanodet-plus m {416} | 40.16 | 7.24 | 11.58 | +| Nanodet-plus m 1.5x {416} | 38.94 | 7.37 | 11.52 | + +For JIT optimization inference: + +| Method {input} | RTX 2070 | TX2 | NX | +|-----------------------------|----------|-------|-------| +| Efficient Lite0 {320} | 69.06 | 12.94 | 17.78 | +| Efficient Lite1 {416} | 62.94 | 9.27 | 12.94 | +| Efficient Lite2 {512} | 65.46 | 7.46 | 10.32 | +| RepVGG A0 {416} | 41.44 | 11.16 | 14.89 | +| Nanodet-g {416} | 76.3 | 12.94 | 20.52 | +| Nanodet-m {320} | 75.66 | 12.22 | 20.67 | +| Nanodet-m 0.5x {320} | 65.71 | 11.31 | 17.68 | +| Nanodet-m 1.5x {320} | 66.23 | 12.46 | 19.99 | +| Nanodet-m {416} | 79.91 | 12.08 | 19.28 | +| Nanodet-m 1.5x {416} | 69.44 | 12.3 | 18.6 | +| Nanodet-plus m {320} | 67.82 | 11.19 | 18.85 | +| Nanodet-plus m 1.5x {320} | 64.12 | 11.57 | 18.26 | +| Nanodet-plus m {416} | 64.74 | 11.22 | 17.57 | +| Nanodet-plus m 1.5x {416} | 56.77 | 10.39 | 14.81 | + +For ONNX optimization inference: + +| Method {input} | RTX 2070 | +|-----------------------------|-----------| +| Efficient Lite0 {320} | 33.12 | +| Efficient Lite1 {416} | 16.78 | +| Efficient Lite2 {512} | 10.35 | +| RepVGG A0 {416} | 27.89 | +| Nanodet-g {416} | 103.22 | +| Nanodet-m {320} | 98.73 | +| Nanodet-m 0.5x {320} | 144.46 | +| Nanodet-m 1.5x {320} | 75.82 | +| Nanodet-m {416} | 73.09 | +| Nanodet-m 1.5x {416} | 51.30 | +| Nanodet-plus m {320} | 51.39 | +| Nanodet-plus m 1.5x {320} | 39.65 | +| Nanodet-plus m {416} | 39.17 | +| Nanodet-plus m 1.5x {416} | 28.55 | + +Finally, we measure the performance on the COCO dataset, using the corresponding metrics: + +| Method {input} | coco2017 mAP | +|-----------------------------|--------------| +| Efficient Lite0 {320} | 24.4 | +| Efficient Lite1 {416} | 29.2 | +| Efficient Lite2 {512} | 32.4 | +| RepVGG A0 {416} | 25.5 | +| Nanodet-g {416} | 22.7 | +| Nanodet-m {320} | 20.2 | +| Nanodet-m 0.5x {320} | 13.1 | +| Nanodet-m 1.5x {320} | 23.1 | +| Nanodet-m {416} | 23.5 | +| Nanodet-m 1.5x {416} | 26.6 | +| Nanodet-plus m {320} | 27.0 | +| Nanodet-plus m 1.5x {320} | 29.9 | +| Nanodet-plus m {416} | 30.3 | +| Nanodet-plus m 1.5x {416} | 34.1 | + \ No newline at end of file diff --git a/include/data.h b/include/data.h index 274067ab54..f366b8f20e 100644 --- a/include/data.h +++ b/include/data.h @@ -24,10 +24,10 @@ extern "C" { /*** * OpenDR data type for representing images */ -struct opendr_image { +struct OpendrImage { void *data; }; -typedef struct opendr_image opendr_image_t; +typedef struct OpendrImage OpendrImageT; #ifdef __cplusplus } diff --git a/include/face_recognition.h b/include/face_recognition.h index 3af615219b..43eed4afe4 100644 --- a/include/face_recognition.h +++ b/include/face_recognition.h @@ -24,84 +24,84 @@ extern "C" { #endif -struct face_recognition_model { +struct FaceRecognitionModel { // ONNX session objects - void *onnx_session; + void *onnxSession; void *env; - void *session_options; + void *sessionOptions; // Sizes for resizing and cropping an input image - int model_size; - int resize_size; + int modelSize; + int resizeSize; // Statistics for normalization - float mean_value; - float std_value; + float meanValue; + float stdValue; // Recognition threshold float threshold; // Feature dimension - int output_size; + int outputSize; // Database data void *database; - int *database_ids; - char **person_names; + int *databaseIds; + char **personNames; // Number of persons in the database - int n_persons; + int nPersons; // Number of features vectors in the database - int n_features; + int nFeatures; }; -typedef struct face_recognition_model face_recognition_model_t; +typedef struct FaceRecognitionModel FaceRecognitionModelT; /** - * Loads a face recognition model saved in OpenDR format - * @param model_path path to the OpenDR face recongition model (as exported using OpenDR library) + * Loads a face recognition model saved in OpenDR format. + * @param modelPath path to the OpenDR face recognition model (as exported using OpenDR library) * @param model the loaded model */ -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model); +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model); /** - * This function perform inference using a face recognition model and an input image + * This function perform inference using a face recognition model and an input image. * @param model face recognition model to be used for inference * @param image OpenDR image * @return OpenDR classification target containing the id of the recognized person */ -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image); +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image); /** * Builds a face recognition database (containing images for persons to be recognized). This function expects the - * database_folder to have the same format as the main Python toolkit. - * @param database_folder folder containing the database - * @param output_path output path to store the binary database. This file should be loaded along with the face + * databaseFolder to have the same format as the main Python toolkit. + * @param databaseFolder folder containing the database + * @param outputPath output path to store the binary database. This file should be loaded along with the face * recognition model before performing inference. * @param model the face recognition model to be used for extracting the database features */ -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model); +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, FaceRecognitionModelT *model); /** * Loads an already built database into the face recognition model. After this step, the model can be used for * performing inference. - * @param database_path path to the database file + * @param databasePath path to the database file * @param model the face recognition model to be used for inference */ -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model); +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model); /** - * Returns the name of a recognition person by decoding the category id into a string + * Returns the name of a recognition person by decoding the category id into a string. * @param model the face recognition model to be used for inference * @param category the predicted category - * @param person_name buffer to store the person name + * @param personName buffer to store the person name */ -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name); +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName); /** - * Releases the memory allocated for a face recognition model + * Releases the memory allocated for a face recognition model. * @param model model to be de-allocated */ -void free_face_recognition_model(face_recognition_model_t *model); +void freeFaceRecognitionModel(FaceRecognitionModelT *model); #ifdef __cplusplus } diff --git a/include/object_detection_2d_nanodet_jit.h b/include/object_detection_2d_nanodet_jit.h new file mode 100644 index 0000000000..72a0288659 --- /dev/null +++ b/include/object_detection_2d_nanodet_jit.h @@ -0,0 +1,84 @@ +/* + * Copyright 2020-2023 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef C_API_NANODET_H +#define C_API_NANODET_H + +#include "opendr_utils.h" +#include "target.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct NanodetModel { + // Jit cpp class holder + void *network; + + // Device to be used + char *device; + int **colorList; + int numberOfClasses; + + // Recognition threshold + float scoreThreshold; + + // Model input size + int inputSizes[2]; + + // Keep ratio flag + int keepRatio; +}; +typedef struct NanodetModel NanodetModelT; + +/** + * Loads a nanodet object detection model saved in libtorch format. + * @param modelPath path to the libtorch nanodet model (as exported using OpenDR) + * @param device the device that will be used for inference + * @param height the height of model input + * @param width the width of model input + * @param scoreThreshold confidence threshold + * @param model the model to be loaded + */ +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model); + +/** + * This function performs inference using a nanodet object detection model and an input image. + * @param model nanodet model to be used for inference + * @param image OpenDR image + * @return OpenDR detection vector target containing the detections of the recognized objects + */ +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image); + +/** + * Releases the memory allocated for a nanodet object detection model. + * @param model model to be de-allocated + */ +void freeNanodetModel(NanodetModelT *model); + +/** + * Draw the bounding boxes from detections in the given image. + * @param image image that has been used for inference + * @param model nanodet model that has been used for inference + * @param detectionsVector output of the inference + */ +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector); + +#ifdef __cplusplus +} +#endif + +#endif // C_API_NANODET_H diff --git a/include/opendr_utils.h b/include/opendr_utils.h index 1df1dc8af5..3b07d48868 100644 --- a/include/opendr_utils.h +++ b/include/opendr_utils.h @@ -18,23 +18,71 @@ #define C_API_OPENDR_UTILS_H #include "data.h" +#include "target.h" #ifdef __cplusplus extern "C" { #endif /** - * Reads an image from path and saves it into OpenDR an image structure + * JSON parser for OpenDR model files. + * @param json a string of json file + * @param key the value to extract from json file + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return string with the value of key + */ +const char *jsonGetKeyString(const char *json, const char *key, const int index); + +/** + * JSON parser for OpenDR model files. + * @param json a string of json file + * @param key the value to extract from json file + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return float with the value of key + */ +float jsonGetKeyFloat(const char *json, const char *key, const int index); + +/** + * JSON parser for OpenDR model files from inference_params key. + * @param json a string of json file + * @param key the value to extract from inference_params + * @param index the index to choose the value if it is an array, otherwise it is not used + * @return float with the value of key + */ +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index); + +/** + * Reads an image from path and saves it into OpenDR image structure. * @param path path from which the image will be read * @param image OpenDR image data structure to store the image */ -void load_image(const char *path, opendr_image_t *image); +void loadImage(const char *path, OpendrImageT *image); /** * Releases the memory allocated for an OpenDR image structure * @param image OpenDR image structure to release */ -void free_image(opendr_image_t *image); +void freeImage(OpendrImageT *image); + +/** + * Initialize an empty detection list. + * @param detectionVector OpenDR OpendrDetectionVectorTarget structure to be initialized + */ +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); + +/** + * Loads an OpenDR detection target list. + * @param detectionVector OpenDR OpendrDetectionVectorTarget structure to be loaded + * @param detection the pointer of the first OpenDR detection target in a vector + * @param vectorSize the size of the vector + */ +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, int vectorSize); + +/** + * Releases the memory allocated for a detection list structure + * @param detectionVector OpenDR detection vector target structure to release + */ +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector); #ifdef __cplusplus } diff --git a/include/target.h b/include/target.h index c93888606a..8313e14565 100644 --- a/include/target.h +++ b/include/target.h @@ -23,11 +23,33 @@ extern "C" { /*** * OpenDR data type for representing classification targets */ -struct opendr_category_target { +struct OpendrCategoryTarget { int data; float confidence; }; -typedef struct opendr_category_target opendr_category_target_t; +typedef struct OpendrCategoryTarget OpendrCategoryTargetT; + +/*** + * OpenDR data type for representing detection targets + */ +struct OpendrDetectionTarget { + int name; + float left; + float top; + float width; + float height; + float score; +}; +typedef struct OpendrDetectionTarget OpendrDetectionTargetT; + +/*** + * OpenDR data type for representing a structure of detection targets + */ +struct OpendrDetectionVectorTarget { + OpendrDetectionTargetT *startingPointer; + int size; +}; +typedef struct OpendrDetectionVectorTarget OpendrDetectionVectorTargetT; #ifdef __cplusplus } diff --git a/projects/c_api/Makefile b/projects/c_api/Makefile index 3b1b567086..771c6fe63b 100644 --- a/projects/c_api/Makefile +++ b/projects/c_api/Makefile @@ -33,7 +33,10 @@ OPENDR_LD = -L$(OPENDR_HOME)/lib -lopendr all: download demos -demos: $(BUILD_DIR)/face_recognition_demo +demos: face_recognition nanodet + +face_recognition: $(BUILD_DIR)/face_recognition_demo +nanodet: $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo download: @+if [ -a $(DATA_DIR) ] ; \ @@ -47,6 +50,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -55,8 +63,13 @@ $(BUILD_DIR)/face_recognition_demo: @+echo "Building face recognition demo..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/face_recognition_demo samples/face_recognition/face_recognition_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) +$(BUILD_DIR)/object_detection_2d/nanodet_jit_demo: + $(MKDIR_P) $(BUILD_DIR)/object_detection_2d + @+echo "Building nanodet object detection demo..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/object_detection_2d/nanodet_jit_demo samples/object_detection_2d/nanodet/nanodet_jit_demo.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + clean: @+echo "Cleaning C API demo binaries and temporary files..." - @+$(RM) $(BUILD_DIR)/* + @+$(RM) -rf $(BUILD_DIR)/* @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/projects/c_api/README.md b/projects/c_api/README.md index 62dd65bea7..3f289ccf75 100644 --- a/projects/c_api/README.md +++ b/projects/c_api/README.md @@ -10,4 +10,5 @@ Make sure that you have downloaded the necessary resources before running the de ## Supported tools Currently, the following tools are exposing a C API: 1. Face recognition +2. Object detection 2D Nanodet diff --git a/projects/c_api/samples/face_recognition/face_recognition_demo.c b/projects/c_api/samples/face_recognition/face_recognition_demo.c index d6adf1488c..c19f11ff60 100644 --- a/projects/c_api/samples/face_recognition/face_recognition_demo.c +++ b/projects/c_api/samples/face_recognition/face_recognition_demo.c @@ -22,32 +22,32 @@ int main(int argc, char *argv[]) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model (see instructions for downloading the data) - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); // Build and load the database - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); // Load an image and performance inference - opendr_image_t image; - load_image("data/database/1/1.jpg", &image); + OpendrImageT image; + loadImage("data/database/1/1.jpg", &image); if (!image.data) { printf("Image not found!"); return 1; } - opendr_category_target_t res = infer_face_recognition(&model, &image); + OpendrCategoryTargetT res = inferFaceRecognition(&model, &image); // Free the image resources - free_image(&image); + freeImage(&image); // Get the prediction and decode it char buff[512]; - decode_category_face_recognition(&model, res, buff); + decodeCategoryFaceRecognition(&model, res, buff); printf("Predicted category %d (folder name: %s) with confidence %f\n", res.data, buff, res.confidence); // Free the model resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); return 0; } diff --git a/projects/c_api/samples/object_detection_2d/nanodet/README.md b/projects/c_api/samples/object_detection_2d/nanodet/README.md new file mode 100644 index 0000000000..cf27c62956 --- /dev/null +++ b/projects/c_api/samples/object_detection_2d/nanodet/README.md @@ -0,0 +1,15 @@ +# OpenDR C API Nanodet Demo + +C API implementation of nanodet models for inference. +To run the demo, the downloaded model can be used or it can be exported with JIT optimization from the python implementation, see [Nanodet optimization](../../../../../docs/reference/object-detection-2d-nanodet.md#nanodetlearneroptimize). + +After installation, the demo can be run from projects/c_api directory with: +```sh +./build/nanodet_libtorch_demo ./path/to/your/model.pth device_name{cpu, cuda} ./path/to/your/image.jpg height width +``` + +Or with the downloaded model and image with: + +```sh +./build/nanodet_libtorch_demo ./data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth cuda ./data/object_detection_2d/nanodet/database/000000000036.jpg 320 320 +``` diff --git a/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c new file mode 100644 index 0000000000..e384a6224f --- /dev/null +++ b/projects/c_api/samples/object_detection_2d/nanodet/nanodet_jit_demo.c @@ -0,0 +1,62 @@ +/* + * Copyright 2020-2023 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "object_detection_2d_nanodet_jit.h" +#include "opendr_utils.h" + +int main(int argc, char **argv) { + if (argc != 6) { + fprintf(stderr, + "usage: %s [model_path] [device] [images_path] [input_sizes].\n" + "model_path = path/to/your/libtorch/model.pth \ndevice = cuda or cpu \n" + "images_path = \"xxx/xxx/*.jpg\" \ninput_size = width height.\n", + argv[0]); + return -1; + } + + NanodetModelT model; + + int height = atoi(argv[4]); + int width = atoi(argv[5]); + printf("start init model\n"); + loadNanodetModel(argv[1], argv[2], height, width, 0.35, &model); + printf("success\n"); + + OpendrImageT image; + + loadImage(argv[3], &image); + if (!image.data) { + printf("Image not found!"); + return 1; + } + + // Initialize opendr detection target list; + OpendrDetectionVectorTargetT results; + initDetectionsVector(&results); + + results = inferNanodet(&model, &image); + + drawBboxes(&image, &model, &results); + + // Free the memory + freeDetectionsVector(&results); + freeImage(&image); + freeNanodetModel(&model); + + return 0; +} diff --git a/projects/python/perception/object_detection_2d/nanodet/README.md b/projects/python/perception/object_detection_2d/nanodet/README.md index 92c456c235..53bb4fc074 100644 --- a/projects/python/perception/object_detection_2d/nanodet/README.md +++ b/projects/python/perception/object_detection_2d/nanodet/README.md @@ -1,18 +1,28 @@ # NanoDet Demos -This folder contains minimal code usage examples that showcase the basic functionality of the NanodetLearner +This folder contains minimal code usage examples that showcase the basic functionality of the NanodetLearner provided by OpenDR. Specifically the following examples are provided: 1. inference_demo.py: Perform inference on a single image in a directory. Setting `--device cpu` performs inference on CPU. -2. eval_demo.py: Perform evaluation on the `COCO dataset`, implemented in OpenDR format. The user must first download - the dataset and provide the path to the dataset root via `--data-root /path/to/coco_dataset`. - Setting `--device cpu` performs evaluation on CPU. + Setting the config file for the specific model is done with `--model "model name"`. + Inference will use optimization [ONNX or JIT] if specified in `--optimize onnx` or `--optimize jit`. + If optimization is used, first an optimized model will be exported and then inference will be performed. + + In ONNX it is recommended to install `onnxsim` dependencies with `pip install onnxsim` on OpenDR's virtual environment, for smaller and better optimized models. -3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via `ExternalDataset` class. - Provided is an example of training on `COCO dataset`. The user must set the dataset type using the `--dataset` + If user is planning on using the C API, JIT optimization is preferred, so it can be used for the same postprocessing of the output + and have exactly the same detection as the python API. + +2. eval_demo.py: Perform evaluation on the `COCO dataset`, implemented in OpenDR format. The user must first download + the dataset and provide the path to the dataset root via `--data-root /path/to/coco_dataset`. + Setting `--device cpu` performs evaluation on CPU. + +3. train_demo.py: Fit learner to dataset. PASCAL VOC and COCO datasets are supported via the `ExternalDataset` class. + An example of training on the COCO dataset is provided. The user must set the dataset type using the `--dataset` argument and provide the dataset root path with the `--data-root` argument. Setting the config file for the specific - model is done with `--model "wanted model name"`. Setting `--device cpu` performs training on CPU. Additional command - line arguments can be set to overwrite various training hyperparameters from the provided config file, and running - `python3 train_demo.py -h` prints information about them on stdout. - + model is done with `--model "model name"`. Setting `--device cpu` performs training on CPU. Additional command + line arguments can be set to overwrite various training hyperparameters from the provided config file, run `python3 train_demo.py -h` to print information about them on stdout. + Example usage: - `python3 train_demo.py --model plus-m_416 --dataset coco --data-root /path/to/coco_dataset` \ No newline at end of file + `python3 train_demo.py --model m --dataset coco --data-root /path/to/coco_dataset` + +4. inference_tutorial.ipynb: A simple tutorial in jupyter for using the Nanodet tool for inference. \ No newline at end of file diff --git a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py index 21ff430b94..c2e049efe3 100644 --- a/projects/python/perception/object_detection_2d/nanodet/eval_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/eval_demo.py @@ -20,15 +20,16 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() + parser.add_argument("--dataset", help="Dataset to evaluate on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) args = parser.parse_args() - val_dataset = ExternalDataset(args.data_root, 'coco') + val_dataset = ExternalDataset(args.data_root, args.dataset) nanodet = NanodetLearner(model_to_use=args.model, device=args.device) - nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) - nanodet.eval(val_dataset) + nanodet.download("./predefined_examples", mode="pretrained", verbose=False) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=False) + nanodet.eval(val_dataset, verbose=False) diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py index ae2b7b5748..351f00fd15 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/inference_demo.py @@ -21,14 +21,23 @@ if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) - parser.add_argument("--model", help="Model that config file will be used", type=str, default='m') + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") + parser.add_argument("--path", help="Path to the image that is used for inference", type=str, + default="./predefined_examples/000000000036.jpg") + parser.add_argument("--optimize", help="If specified will determine the optimization to be used (onnx, jit)", + type=str, default="", choices=["", "onnx", "jit"]) args = parser.parse_args() nanodet = NanodetLearner(model_to_use=args.model, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.download("./predefined_examples", mode="images") - img = Image.open("./predefined_examples/000000000036.jpg") + + img = Image.open(args.path) + + if args.optimize != "": + nanodet.optimize("./{}/nanodet_{}".format(args.optimize, args.model), optimization=args.optimize) + boxes = nanodet.infer(input=img) draw_bounding_boxes(img.opencv(), boxes, class_names=nanodet.classes, show=True) diff --git a/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb b/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb index 96af81257c..23c6eb80b0 100644 --- a/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb +++ b/projects/python/perception/object_detection_2d/nanodet/inference_tutorial.ipynb @@ -25,36 +25,16 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "b6f3d99a-b702-472b-b8d0-95a551e7b9ba", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/gluoncv/__init__.py:40: UserWarning: Both `mxnet==1.8.0` and `torch==1.9.0+cu111` are installed. You might encounter increased GPU memory footprint if both framework are used at the same time.\n", - " warnings.warn(f'Both `mxnet=={mx.__version__}` and `torch=={torch.__version__}` are installed. '\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "model size is 1.5x\n", - "init weights...\n", - "Finish initialize NanoDet-Plus Head.\n" - ] - } - ], + "outputs": [], "source": [ "from opendr.perception.object_detection_2d import NanodetLearner\n", "\n", - "model=\"plus_m_1.5x_416\"\n", + "model=\"m\"\n", "\n", - "nanodet = NanodetLearner(model_to_use=model, device=\"cuda\")" + "nanodet = NanodetLearner(model_to_use=model, device=\"cpu\")" ] }, { @@ -77,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "8a680c28-8f42-4b4a-8c6e-2580b7be2da5", "metadata": {}, "outputs": [], @@ -98,510 +78,10 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "e12f582b-c001-4b9d-b396-4260e23139f6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model name: plus_m_1.5x_416 --> ./predefined_examples/nanodet_plus_m_1.5x_416/plus_m_1.5x_416.json\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "INFO:root:No param aux_fpn.reduce_layers.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.0.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.0.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.1.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.1.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.reduce_layers.2.bn.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.reduce_layers.2.bn.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.top_down_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.downsamples.1.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.downsamples.1.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.0.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost1.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.primary_conv.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.ghost2.cheap_operation.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.0.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.0.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.1.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.2.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.2.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.bottom_up_blocks.1.blocks.0.shortcut.3.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_in_conv.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_in_conv.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.depthwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.depthwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pointwise.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pointwise.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.dwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.dwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.weight.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.bias.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_mean.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_mean.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_var.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.running_var.\u001b[0m\n", - "INFO:root:No param aux_fpn.extra_lvl_out_conv.0.pwnorm.num_batches_tracked.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_fpn.extra_lvl_out_conv.0.pwnorm.num_batches_tracked.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.0.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.0.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.1.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.1.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.2.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.2.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.cls_convs.3.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.cls_convs.3.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.0.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.0.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.1.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.1.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.2.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.2.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.conv.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.conv.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.gn.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.gn.weight.\u001b[0m\n", - "INFO:root:No param aux_head.reg_convs.3.gn.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.reg_convs.3.gn.bias.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_cls.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_cls.weight.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_cls.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_cls.bias.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_reg.weight.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_reg.weight.\u001b[0m\n", - "INFO:root:No param aux_head.gfl_reg.bias.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.gfl_reg.bias.\u001b[0m\n", - "INFO:root:No param aux_head.scales.0.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.0.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.1.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.1.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.2.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.2.scale.\u001b[0m\n", - "INFO:root:No param aux_head.scales.3.scale.\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mNo param aux_head.scales.3.scale.\u001b[0m\n", - "INFO:root:Loaded model weight from ./predefined_examples/nanodet_plus_m_1.5x_416\n", - "\u001b[1m\u001b[35m[root]\u001b[0m\u001b[34m[09-01 18:10:13]\u001b[0m\u001b[32mINFO:\u001b[0m\u001b[37mLoaded model weight from ./predefined_examples/nanodet_plus_m_1.5x_416\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "nanodet.load(path=load_model_weights, verbose=True)" ] @@ -616,7 +96,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "9efba6eb-5235-4e31-a002-1bcb6e311704", "metadata": {}, "outputs": [], @@ -630,33 +110,10 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "9f083566-3d57-4db6-baa5-0fefdf8fa8ea", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "%matplotlib inline\n", "import cv2\n", @@ -678,32 +135,10 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "6cab7dae-8892-4a16-ad03-651fa3bb20ee", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "forward time: 0.030s | decode time: 0.004s | " - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at /pytorch/c10/core/TensorImpl.h:1156.)\n", - " return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/torch/nn/functional.py:3609: UserWarning: Default upsampling behavior when mode=bilinear is changed to align_corners=False since 0.4.0. Please specify align_corners=True if the old behavior is desired. See the documentation of nn.Upsample for details.\n", - " warnings.warn(\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/numpy/core/fromnumeric.py:3474: RuntimeWarning: Mean of empty slice.\n", - " return _methods._mean(a, axis=axis, dtype=dtype,\n", - "/home/manos/new_opendr/opendr/venv/lib/python3.8/site-packages/numpy/core/_methods.py:189: RuntimeWarning: invalid value encountered in double_scalars\n", - " ret = ret.dtype.type(ret / rcount)\n" - ] - } - ], + "outputs": [], "source": [ "boxes = nanodet.infer(input=img)" ] @@ -722,33 +157,10 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "d7129fe6-a198-4196-b35f-93ba41e50031", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "\n", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from opendr.perception.object_detection_2d import draw_bounding_boxes\n", "\n", diff --git a/projects/python/perception/object_detection_2d/nanodet/train_demo.py b/projects/python/perception/object_detection_2d/nanodet/train_demo.py index 9c659fd888..4057901326 100644 --- a/projects/python/perception/object_detection_2d/nanodet/train_demo.py +++ b/projects/python/perception/object_detection_2d/nanodet/train_demo.py @@ -22,7 +22,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--dataset", help="Dataset to train on", type=str, default="coco", choices=["voc", "coco"]) parser.add_argument("--data-root", help="Dataset root folder", type=str) - parser.add_argument("--model", help="Model that config file will be used", type=str) + parser.add_argument("--model", help="Model for which a config file will be used", type=str, default="m") parser.add_argument("--device", help="Device to use (cpu, cuda)", type=str, default="cuda", choices=["cuda", "cpu"]) parser.add_argument("--batch-size", help="Batch size to use for training", type=int, default=6) parser.add_argument("--lr", help="Learning rate to use for training", type=float, default=5e-4) @@ -34,18 +34,14 @@ args = parser.parse_args() - if args.dataset == 'voc': - dataset = ExternalDataset(args.data_root, 'voc') - val_dataset = ExternalDataset(args.data_root, 'voc') - elif args.dataset == 'coco': - dataset = ExternalDataset(args.data_root, 'coco') - val_dataset = ExternalDataset(args.data_root, 'coco') + dataset = ExternalDataset(args.data_root, args.dataset) + val_dataset = ExternalDataset(args.data_root, args.dataset) nanodet = NanodetLearner(model_to_use=args.model, iters=args.n_epochs, lr=args.lr, batch_size=args.batch_size, checkpoint_after_iter=args.checkpoint_freq, checkpoint_load_iter=args.resume_from, device=args.device) nanodet.download("./predefined_examples", mode="pretrained") - nanodet.load("./predefined_examples/nanodet-{}/nanodet-{}.ckpt".format(args.model, args.model), verbose=True) + nanodet.load("./predefined_examples/nanodet_{}".format(args.model), verbose=True) nanodet.fit(dataset, val_dataset) nanodet.save() diff --git a/src/c_api/Makefile b/src/c_api/Makefile index 1ce21bbaad..ac4aaf6df1 100644 --- a/src/c_api/Makefile +++ b/src/c_api/Makefile @@ -25,11 +25,15 @@ BUILD_DIR = $(OPENDR_HOME)/build LIB_DIR = $(OPENDR_HOME)/lib CFLAGS = -fPIC -INC = -I/usr/local/include/onnxruntime/ `pkg-config --cflags opencv4` +INC = -I/usr/local/include/onnxruntime/ -I/usr/local/include/rapidjson/ `pkg-config --cflags opencv4` LD = -L/usr/lib/ -L/usr/local/lib/ -lstdc++ -lm -lonnxruntime `pkg-config --libs opencv4` -lboost_filesystem OPENDR_INC = -I$(OPENDR_HOME)/include OPENDR_LD = -L$(OPENDR_HOME)//lib -lopendr +LIBTORCH_DIR = /usr/local/libtorch +TORCHSCRIPT_INC = -I$(LIBTORCH_DIR)/include -I$(LIBTORCH_DIR)/include/torch/csrc/api/include +TORCHSCRIPT_LD = -L$(LIBTORCH_DIR)/lib -L$(LIBTORCH_DIR)/share -ltorchvision -ltorch + all: libopendr libopendr: $(OPENDR_HOME)/lib/libopendr.so @@ -39,9 +43,10 @@ $(OPENDR_HOME)/lib/libopendr.so: @+$(MKDIR_P) $(BUILD_DIR) $(CPP) $(CFLAGS) -c opendr_utils.cpp -o $(BUILD_DIR)/opendr_utils.o $(INC) $(OPENDR_INC) $(CPP) $(CFLAGS) -c face_recognition.cpp -o $(BUILD_DIR)/opendr_face_recognition.o $(INC) $(OPENDR_INC) - @$(MKDIR_P) $(LIB_DIR) - $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(LD) --shared + $(CPP) $(CFLAGS) -c object_detection_2d_nanodet_jit.cpp -o $(BUILD_DIR)/opendr_nanodet_jit.o $(INC) $(OPENDR_INC) $(TORCHSCRIPT_INC) + @$(MKDIR_P) $(LIB_DIR) + $(CPP) $(CFLAGS) -o $(OPENDR_HOME)/lib/libopendr.so $(BUILD_DIR)/opendr_utils.o $(BUILD_DIR)/opendr_face_recognition.o $(BUILD_DIR)/opendr_nanodet_jit.o $(LD) $(TORCHSCRIPT_LD) --shared clean: @+echo "Cleaning C API binaries and temporary files..." @+$(RM) $(BUILD_DIR)/* diff --git a/src/c_api/README.md b/src/c_api/README.md index 1377f753a7..6ff33bacea 100644 --- a/src/c_api/README.md +++ b/src/c_api/README.md @@ -3,7 +3,7 @@ ## Description This module contains a C API that can be used for performing inference on models trained using the Python API of OpenDR. -Therefore, to use the C API you should first use the Python API to train a model and then export it to ONNX format using the `optimize()` method. +Therefore, to use the C API you should first use the Python API to export a pretrained or a newly trained model and export it to ONNX or JIT format using the `optimize()` method. ## Setup diff --git a/src/c_api/face_recognition.cpp b/src/c_api/face_recognition.cpp index 54d2690c52..3461ea5dca 100644 --- a/src/c_api/face_recognition.cpp +++ b/src/c_api/face_recognition.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -36,133 +35,113 @@ /** * Helper function for preprocessing images before feeding them into the face recognition model. * This function follows the OpenDR's face recognition pre-processing pipeline, which includes the following: - * a) resizing the image into resize_target x resize_target pixels and then taking a center crop of size model_input_size, - * and b) normalizing the resulting values using mean_value and std_value + * a) resizing the image into resizeTarget x resizeTarget pixels and then taking a center crop of size modelInputSize, + * and b) normalizing the resulting values using meanValue and stdValue * @param image image to be preprocesses * @param data pre-processed data in a flattened vector - * @param resize_target target size for resizing - * @param model_input_size size of the center crop (equals the size that the DL model expects) - * @param mean_value value used for centering the input image - * @param std_value value used for scaling the input image + * @param resizeTarget target size for resizing + * @param modelInputSize size of the center crop (equals the size that the DL model expects) + * @param meanValue value used for centering the input image + * @param stdValue value used for scaling the input image */ -void preprocess_face_recognition(cv::Mat *image, std::vector &data, int resize_target = 128, int model_input_size = 112, - float mean_value = 0.5, float std_value = 0.5) { +void preprocessFaceRecognition(cv::Mat *image, std::vector &data, int resizeTarget = 128, int modelInputSize = 112, + float meanValue = 0.5, float stdValue = 0.5) { // Convert to RGB cv::Mat img; cv::cvtColor(*image, img, cv::COLOR_BGR2RGB); // Resize and then get a center crop - cv::resize(img, img, cv::Size(resize_target, resize_target)); - int stride = (resize_target - model_input_size) / 2; - cv::Rect myROI(stride, stride, resize_target - stride, resize_target - stride); + cv::resize(img, img, cv::Size(resizeTarget, resizeTarget)); + int stride = (resizeTarget - modelInputSize) / 2; + cv::Rect myROI(stride, stride, resizeTarget - stride, resizeTarget - stride); img = img(myROI); // Scale to 0...1 - cv::Mat out_img; - img.convertTo(out_img, CV_32FC3, 1 / 255.0); + cv::Mat outImg; + img.convertTo(outImg, CV_32FC3, 1 / 255.0); // Unfold the image into the appropriate format // This is certainly not the most efficient way to do this... // ... and is probably constantly leading to cache misses // ... but it works for now. - for (unsigned int j = 0; j < model_input_size; ++j) { - for (unsigned int k = 0; k < model_input_size; ++k) { - cv::Vec3f cur_pixel = out_img.at(j, k); - data[0 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[0] - mean_value) / std_value; - data[1 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[1] - mean_value) / std_value; - data[2 * model_input_size * model_input_size + j * model_input_size + k] = (cur_pixel[2] - mean_value) / std_value; + for (unsigned int j = 0; j < modelInputSize; ++j) { + for (unsigned int k = 0; k < modelInputSize; ++k) { + cv::Vec3f curPixel = outImg.at(j, k); + data[0 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; + data[1 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; + data[2 * modelInputSize * modelInputSize + j * modelInputSize + k] = (curPixel[0] - meanValue) / stdValue; } } } -/** - * Very simple helper function to parse OpenDR model files for face recognition - * In the future this can be done at library level using a JSON-parser - */ -std::string json_get_key_string(std::string json, const std::string &key) { - std::size_t start_idx = json.find(key); - std::string value = json.substr(start_idx); - value = value.substr(value.find(":") + 1); - value.resize(value.find(",")); - value = value.substr(value.find("\"") + 1); - value.resize(value.find("\"")); - return value; -} - -void load_face_recognition_model(const char *model_path, face_recognition_model_t *model) { +void loadFaceRecognitionModel(const char *modelPath, FaceRecognitionModelT *model) { // Initialize model - model->onnx_session = model->env = model->session_options = NULL; - model->database = model->database_ids = NULL; - model->person_names = NULL; + model->onnxSession = model->env = model->sessionOptions = NULL; + model->database = model->databaseIds = NULL; + model->personNames = NULL; model->threshold = 1; // Parse the model JSON file - std::string model_json_path(model_path); - std::size_t split_pos = model_json_path.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - model_json_path = model_json_path + "/" + model_json_path.substr(split_pos) + ".json"; + std::string modelJsonPath(modelPath); + std::size_t splitPos = modelJsonPath.find_last_of("/"); + splitPos = splitPos > 0 ? splitPos + 1 : 0; + modelJsonPath = modelJsonPath + "/" + modelJsonPath.substr(splitPos) + ".json"; - std::ifstream in_stream(model_json_path); - if (!in_stream.is_open()) { + std::ifstream inStream(modelJsonPath); + if (!inStream.is_open()) { std::cerr << "Cannot open JSON model file" << std::endl; return; } + std::string str((std::istreambuf_iterator(inStream)), std::istreambuf_iterator()); + const char *json = str.c_str(); - std::string str; - in_stream.seekg(0, std::ios::end); - str.reserve(in_stream.tellg()); - in_stream.seekg(0, std::ios::beg); - str.assign((std::istreambuf_iterator(in_stream)), std::istreambuf_iterator()); - - std::string basepath = model_json_path.substr(0, split_pos); - split_pos = basepath.find_last_of("/"); - split_pos = split_pos > 0 ? split_pos + 1 : 0; - basepath.resize(split_pos); + std::string basepath = modelJsonPath.substr(0, splitPos); + splitPos = basepath.find_last_of("/"); + splitPos = splitPos > 0 ? splitPos + 1 : 0; + if (splitPos < basepath.size()) + basepath.resize(splitPos); // Parse JSON - std::string onnx_model_path = basepath + json_get_key_string(str, "model_paths"); - std::string model_format = json_get_key_string(str, "format"); + std::string onnxModelPath = basepath + jsonGetKeyString(json, "model_paths", 0); + std::string modelFormat = jsonGetKeyString(json, "format", 0); // Parse inference params - std::string threshold = json_get_key_string(str, "threshold"); - ; - if (!threshold.empty()) { - model->threshold = std::stof(threshold); - } + float threshold = jsonGetKeyFromInferenceParams(json, "threshold", 0); + model->threshold = threshold; // Proceed only if the model is in onnx format - if (model_format != "onnx") { + if (modelFormat != "onnx") { std::cerr << "Model not in ONNX format." << std::endl; return; } Ort::Env *env = new Ort::Env(ORT_LOGGING_LEVEL_WARNING, "opendr_env"); - Ort::SessionOptions *session_options = new Ort::SessionOptions; - session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); + Ort::SessionOptions *sessionOptions = new Ort::SessionOptions; + sessionOptions->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); - Ort::Session *session = new Ort::Session(*env, onnx_model_path.c_str(), *session_options); + Ort::Session *session = new Ort::Session(*env, onnxModelPath.c_str(), *sessionOptions); model->env = env; - model->onnx_session = session; - model->session_options = session_options; + model->onnxSession = session; + model->sessionOptions = sessionOptions; // Should we pass these parameters through the model json file? - model->model_size = 112; - model->resize_size = 128; - model->mean_value = 0.5; - model->std_value = 0.5; - model->output_size = 128; + model->modelSize = 112; + model->resizeSize = 128; + model->meanValue = 0.5; + model->stdValue = 0.5; + model->outputSize = 128; } -void free_face_recognition_model(face_recognition_model_t *model) { - if (model->onnx_session) { - Ort::Session *session = static_cast(model->onnx_session); +void freeFaceRecognitionModel(FaceRecognitionModelT *model) { + if (model->onnxSession) { + Ort::Session *session = static_cast(model->onnxSession); delete session; } - if (model->session_options) { - Ort::SessionOptions *session_options = static_cast(model->session_options); - delete session_options; + if (model->sessionOptions) { + Ort::SessionOptions *sessionOptions = static_cast(model->sessionOptions); + delete sessionOptions; } if (model->env) { @@ -170,8 +149,8 @@ void free_face_recognition_model(face_recognition_model_t *model) { delete env; } - if (model->database_ids) { - delete[] model->database_ids; + if (model->databaseIds) { + delete[] model->databaseIds; } if (model->database) { @@ -179,139 +158,139 @@ void free_face_recognition_model(face_recognition_model_t *model) { delete database; } - if (model->person_names) { - for (int i = 0; i < model->n_persons; i++) - delete[] model->person_names[i]; - delete[] model->person_names; + if (model->personNames) { + for (int i = 0; i < model->nPersons; i++) + delete[] model->personNames[i]; + delete[] model->personNames; } } -void ff_face_recognition(face_recognition_model_t *model, opendr_image_t *image, cv::Mat *features) { - Ort::Session *session = static_cast(model->onnx_session); +void ffFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image, cv::Mat *features) { + Ort::Session *session = static_cast(model->onnxSession); if (!session) { std::cerr << "ONNX session not initialized." << std::endl; return; } // Prepare the input dimensions - std::vector input_node_dims = {1, 3, model->model_size, model->model_size}; - size_t input_tensor_size = model->model_size * model->model_size * 3; + std::vector inputNodeDims = {1, 3, model->modelSize, model->modelSize}; + size_t inputTensorSize = model->modelSize * model->modelSize * 3; // Get the input image and pre-process it - std::vector input_tensor_values(input_tensor_size); - cv::Mat *opencv_image = static_cast(image->data); - if (!opencv_image) { + std::vector inputTensorValues(inputTensorSize); + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { std::cerr << "Cannot load image for inference." << std::endl; return; } - preprocess_face_recognition(opencv_image, input_tensor_values, model->resize_size, model->model_size, model->mean_value, - model->std_value); + preprocessFaceRecognition(opencvImage, inputTensorValues, model->resizeSize, model->modelSize, model->meanValue, + model->stdValue); // Setup input/output names Ort::AllocatorWithDefaultOptions allocator; - std::vector input_node_names = {"data"}; - std::vector output_node_names = {"features"}; + std::vector inputNodeNames = {"data"}; + std::vector outputNodeNames = {"features"}; - // Setup the input tensor - auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); - Ort::Value input_tensor = - Ort::Value::CreateTensor(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), 4); - assert(input_tensor.IsTensor()); + // Set up the input tensor + auto memoryInfo = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault); + Ort::Value inputTensor = + Ort::Value::CreateTensor(memoryInfo, inputTensorValues.data(), inputTensorSize, inputNodeDims.data(), 4); + assert(inputTensor.IsTensor()); // Feed-forward the model - auto output_tensors = - session->Run(Ort::RunOptions{nullptr}, input_node_names.data(), &input_tensor, 1, output_node_names.data(), 1); - assert(output_tensors.size() == 1 && output_tensors.front().IsTensor()); + auto outputTensors = + session->Run(Ort::RunOptions{nullptr}, inputNodeNames.data(), &inputTensor, 1, outputNodeNames.data(), 1); + assert(outputTensors.size() == 1 && outputTensors.front().IsTensor()); // Get the results back - float *floatarr = output_tensors.front().GetTensorMutableData(); - cv::Mat cur_features(cv::Size(model->output_size, 1), CV_32F, floatarr); + float *floatarr = outputTensors.front().GetTensorMutableData(); + cv::Mat curFeatures(cv::Size(model->outputSize, 1), CV_32F, floatarr); // Perform l2 normalizaton - cv::Mat features_square = cur_features.mul(cur_features); - float norm = sqrt(cv::sum(features_square)[0]); - cur_features = cur_features / norm; - memcpy(features->data, cur_features.data, sizeof(float) * model->output_size); + cv::Mat featuresSquare = curFeatures.mul(curFeatures); + float norm = sqrt(cv::sum(featuresSquare)[0]); + curFeatures = curFeatures / norm; + memcpy(features->data, curFeatures.data, sizeof(float) * model->outputSize); } -void build_database_face_recognition(const char *database_folder, const char *output_path, face_recognition_model_t *model) { +void buildDatabaseFaceRecognition(const char *databaseFolder, const char *outputPath, FaceRecognitionModelT *model) { using namespace boost::filesystem; - std::vector person_names; - std::vector database_ids; - cv::Mat database(cv::Size(model->output_size, 0), CV_32F); + std::vector personNames; + std::vector databaseIds; + cv::Mat database(cv::Size(model->outputSize, 0), CV_32F); - path root_path(database_folder); - if (!exists(root_path)) { + path rootPath(databaseFolder); + if (!exists(rootPath)) { std::cerr << "Database path does not exist." << std::endl; return; } - int current_id = 0; - for (auto person_path = directory_iterator(root_path); person_path != directory_iterator(); person_path++) { + int currentId = 0; + for (auto personPath = directory_iterator(rootPath); personPath != directory_iterator(); personPath++) { // For each person in the database - if (is_directory(person_path->path())) { - path cur_person_path(person_path->path()); - person_names.push_back(person_path->path().filename().string()); + if (is_directory(personPath->path())) { + path curPersonPath(personPath->path()); + personNames.push_back(personPath->path().filename().string()); - for (auto cur_img_path = directory_iterator(cur_person_path); cur_img_path != directory_iterator(); cur_img_path++) { - opendr_image_t image; - load_image(cur_img_path->path().string().c_str(), &image); + for (auto curImgPath = directory_iterator(curPersonPath); curImgPath != directory_iterator(); curImgPath++) { + OpendrImageT image; + loadImage(curImgPath->path().string().c_str(), &image); - cv::Mat features(cv::Size(model->output_size, 1), CV_32F); - ff_face_recognition(model, &image, &features); + cv::Mat features(cv::Size(model->outputSize, 1), CV_32F); + ffFaceRecognition(model, &image, &features); - free_image(&image); + freeImage(&image); database.push_back(features.clone()); - database_ids.push_back(current_id); + databaseIds.push_back(currentId); } - current_id++; + currentId++; } else { continue; } } - if (current_id == 0) { + if (currentId == 0) { std::cerr << "Cannot open database files." << std::endl; return; } // Make the array continuous - cv::Mat database_out = database.clone(); + cv::Mat databaseOut = database.clone(); - std::ofstream fout(output_path, std::ios::out | std::ios::binary); + std::ofstream fout(outputPath, std::ios::out | std::ios::binary); if (!fout.is_open()) { std::cerr << "Cannot open database file for writting." << std::endl; return; } // Write number of persons - int n = person_names.size(); + int n = personNames.size(); fout.write(reinterpret_cast(&n), sizeof(int)); for (int i = 0; i < n; i++) { // Write the name of the person (along with its size) - int name_length = person_names[i].size() + 1; - fout.write(reinterpret_cast(&name_length), sizeof(int)); - fout.write(person_names[i].c_str(), name_length); + int nameLength = personNames[i].size() + 1; + fout.write(reinterpret_cast(&nameLength), sizeof(int)); + fout.write(personNames[i].c_str(), nameLength); } - cv::Size s = database_out.size(); + cv::Size s = databaseOut.size(); fout.write(reinterpret_cast(&s.height), sizeof(int)); fout.write(reinterpret_cast(&s.width), sizeof(int)); - fout.write(reinterpret_cast(database_out.data), sizeof(float) * s.height * s.width); - fout.write(reinterpret_cast(&database_ids[0]), sizeof(int) * s.height); + fout.write(reinterpret_cast(databaseOut.data), sizeof(float) * s.height * s.width); + fout.write(reinterpret_cast(&databaseIds[0]), sizeof(int) * s.height); fout.flush(); fout.close(); } -void load_database_face_recognition(const char *database_path, face_recognition_model_t *model) { +void loadDatabaseFaceRecognition(const char *databasePath, FaceRecognitionModelT *model) { model->database = NULL; - model->database_ids = NULL; + model->databaseIds = NULL; - std::ifstream fin(database_path, std::ios::out | std::ios::binary); + std::ifstream fin(databasePath, std::ios::out | std::ios::binary); if (!fin.is_open()) { std::cerr << "Cannot load database file (check that file exists and you have created the database)." << std::endl; @@ -319,50 +298,50 @@ void load_database_face_recognition(const char *database_path, face_recognition_ } int n; fin.read(reinterpret_cast(&n), sizeof(int)); - char **person_names = new char *[n]; + char **personNames = new char *[n]; for (int i = 0; i < n; i++) { - person_names[i] = new char[512]; + personNames[i] = new char[512]; // Read person name - int name_length; - fin.read(reinterpret_cast(&name_length), sizeof(int)); - if (name_length > 512) { + int nameLength; + fin.read(reinterpret_cast(&nameLength), sizeof(int)); + if (nameLength > 512) { std::cerr << "Person name exceeds max number of characters (512)" << std::endl; return; } - fin.read(person_names[i], name_length); + fin.read(personNames[i], nameLength); } int height, width; fin.read(reinterpret_cast(&height), sizeof(int)); fin.read(reinterpret_cast(&width), sizeof(int)); - float *database_buff = new float[height * width]; - int *features_ids = new int[height]; - fin.read(reinterpret_cast(database_buff), sizeof(float) * height * width); - fin.read(reinterpret_cast(features_ids), sizeof(int) * height); + float *databaseBuff = new float[height * width]; + int *featuresIds = new int[height]; + fin.read(reinterpret_cast(databaseBuff), sizeof(float) * height * width); + fin.read(reinterpret_cast(featuresIds), sizeof(int) * height); fin.close(); cv::Mat *database = new cv::Mat(cv::Size(width, height), CV_32F); - memcpy(database->data, database_buff, sizeof(float) * width * height); - delete[] database_buff; + memcpy(database->data, databaseBuff, sizeof(float) * width * height); + delete[] databaseBuff; model->database = database; - model->database_ids = features_ids; - model->person_names = person_names; - model->n_persons = n; - model->n_features = height; + model->databaseIds = featuresIds; + model->personNames = personNames; + model->nPersons = n; + model->nFeatures = height; } -opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, opendr_image_t *image) { - cv::Mat features(cv::Size(model->output_size, 1), CV_32F); - opendr_category_target_t target; +OpendrCategoryTargetT inferFaceRecognition(FaceRecognitionModelT *model, OpendrImageT *image) { + cv::Mat features(cv::Size(model->outputSize, 1), CV_32F); + OpendrCategoryTargetT target; target.data = -1; target.confidence = 0; // Get the feature vector for the current image - ff_face_recognition(model, image, &features); + ffFaceRecognition(model, image, &features); if (!model->database) { std::cerr << "Database is not loaded!" << std::endl; @@ -370,27 +349,27 @@ opendr_category_target_t infer_face_recognition(face_recognition_model_t *model, } cv::Mat *database = static_cast(model->database); // Calculate the distance between the extracted feature vector and database features - cv::Mat features_repeated; - cv::repeat(features, model->n_features, 1, features_repeated); - cv::Mat diff = features_repeated - *database; + cv::Mat featuresRepeated; + cv::repeat(features, model->nFeatures, 1, featuresRepeated); + cv::Mat diff = featuresRepeated - *database; diff = diff.mul(diff); - cv::Mat sq_dists; - cv::reduce(diff, sq_dists, 1, CV_REDUCE_SUM, CV_32F); + cv::Mat sqDists; + cv::reduce(diff, sqDists, 1, CV_REDUCE_SUM, CV_32F); cv::Mat dists; - cv::sqrt(sq_dists, dists); + cv::sqrt(sqDists, dists); - double min_dist, max_dist; - cv::Point min_loc, max_loc; - cv::minMaxLoc(dists, &min_dist, &max_dist, &min_loc, &max_loc); + double minDist, maxDist; + cv::Point minLoc, maxLoc; + cv::minMaxLoc(dists, &minDist, &maxDist, &minLoc, &maxLoc); - target.data = model->database_ids[min_loc.y]; - target.confidence = 1 - (min_dist / model->threshold); + target.data = model->databaseIds[minLoc.y]; + target.confidence = 1 - (minDist / model->threshold); return target; } -void decode_category_face_recognition(face_recognition_model_t *model, opendr_category_target_t category, char *person_name) { - if (category.data >= model->n_persons) +void decodeCategoryFaceRecognition(FaceRecognitionModelT *model, OpendrCategoryTargetT category, char *personName) { + if (category.data >= model->nPersons) return; - strcpy(person_name, model->person_names[category.data]); + strcpy(personName, model->personNames[category.data]); } diff --git a/src/c_api/object_detection_2d_nanodet_jit.cpp b/src/c_api/object_detection_2d_nanodet_jit.cpp new file mode 100644 index 0000000000..4fa8e37940 --- /dev/null +++ b/src/c_api/object_detection_2d_nanodet_jit.cpp @@ -0,0 +1,362 @@ +// Copyright 2020-2023 OpenDR European Project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "object_detection_2d_nanodet_jit.h" + +#include +#include +#include +#include +#include + +/** + * Helper class holder of c++ values and jit model. + */ +class NanoDet { +private: + torch::DeviceType mDevice; + torch::jit::script::Module mNetwork; + torch::Tensor mMeanTensor; + torch::Tensor mStdTensor; + std::vector mLabels; + +public: + NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, torch::DeviceType device, + std::vector labels); + ~NanoDet(); + + torch::Tensor preProcess(cv::Mat *image); + torch::jit::script::Module network() const; + torch::Tensor meanTensor() const; + torch::Tensor stdTensor() const; + std::vector labels() const; + std::vector outputs; +}; + +NanoDet::NanoDet(torch::jit::script::Module network, torch::Tensor meanValues, torch::Tensor stdValues, + torch::DeviceType device, const std::vector labels) { + this->mDevice = device; + this->mNetwork = network; + this->mMeanTensor = meanValues.clone().to(device); + this->mStdTensor = stdValues.clone().to(device); + this->mLabels = labels; +} + +NanoDet::~NanoDet() { +} + +/** + * Helper function for preprocessing images for normalization. + * This function follows the OpenDR's Nanodet pre-processing pipeline for color normalization. + * Mean and Standard deviation are already part of NanoDet class when it is initialized. + * @param image, image to be preprocessed + */ +torch::Tensor NanoDet::preProcess(cv::Mat *image) { + torch::Tensor tensorImage = torch::from_blob(image->data, {image->rows, image->cols, 3}, torch::kByte); + tensorImage = tensorImage.toType(torch::kFloat); + tensorImage = tensorImage.to(this->mDevice); + tensorImage = tensorImage.permute({2, 0, 1}); + tensorImage = tensorImage.add(this->mMeanTensor); + tensorImage = tensorImage.mul(this->mStdTensor); + + return tensorImage; +} + +/** + * Getter for jit model + */ +torch::jit::script::Module NanoDet::network() const { + return this->mNetwork; +} + +/** + * Getter for tensor with the mean values + */ +torch::Tensor NanoDet::meanTensor() const { + return this->mMeanTensor; +} + +/** + * Getter for tensor with the standard deviation values + */ +torch::Tensor NanoDet::stdTensor() const { + return this->mStdTensor; +} + +/** + * Getter of labels + */ +std::vector NanoDet::labels() const { + return this->mLabels; +} + +/** + * Helper function to calculate the final shape of the model input relative to size ratio of input image. + */ +void getMinimumDstShape(cv::Size *srcSize, cv::Size *dstSize, float divisible) { + float ratio; + float srcRatio = ((float)srcSize->width / (float)srcSize->height); + float dstRatio = ((float)dstSize->width / (float)dstSize->height); + if (srcRatio < dstRatio) + ratio = ((float)dstSize->height / (float)srcSize->height); + else + ratio = ((float)dstSize->width / (float)srcSize->width); + + dstSize->width = static_cast(ratio * srcSize->width); + dstSize->height = static_cast(ratio * srcSize->height); + + if (divisible > 0) { + dstSize->width = std::max(divisible, ((int)((dstSize->width + divisible - 1) / divisible) * divisible)); + dstSize->height = std::max(divisible, ((int)((dstSize->height + divisible - 1) / divisible) * divisible)); + } +} + +/** + * Helper function to calculate the warp matrix for resizing. + */ +void getResizeMatrix(cv::Size *srcShape, cv::Size *dstShape, cv::Mat *Rs, int keepRatio) { + if (keepRatio == 1) { + float ratio; + cv::Mat C = cv::Mat::eye(3, 3, CV_32FC1); + + C.at(0, 2) = -srcShape->width / 2.0; + C.at(1, 2) = -srcShape->height / 2.0; + float srcRatio = ((float)srcShape->width / (float)srcShape->height); + float dstRatio = ((float)dstShape->width / (float)dstShape->height); + if (srcRatio < dstRatio) { + ratio = ((float)dstShape->height / (float)srcShape->height); + } else { + ratio = ((float)dstShape->width / (float)srcShape->width); + } + + Rs->at(0, 0) *= ratio; + Rs->at(1, 1) *= ratio; + + cv::Mat T = cv::Mat::eye(3, 3, CV_32FC1); + T.at(0, 2) = 0.5 * dstShape->width; + T.at(1, 2) = 0.5 * dstShape->height; + + *Rs = T * (*Rs) * C; + } else { + Rs->at(0, 0) *= (float)dstShape->width / (float)srcShape->width; + Rs->at(1, 1) *= (float)dstShape->height / (float)srcShape->height; + } +} + +/** + * Helper function for preprocessing images for resizing. + * This function follows OpenDR's Nanodet pre-processing pipeline for shape transformation, which includes + * finding the actual final size of the model input if keep ratio is enabled, calculating the warp matrix and finally + * resizing and warping the perspective of the input image. + * @param src, image to be preprocessed + * @param dst, output image to be used as model input + * @param dstSize, final size of the dst + * @param warpMatrix, matrix to be used for warp perspective + * @param keepRatio, flag for targeting the resized image size relative to input image ratio + */ +void preprocess(cv::Mat *src, cv::Mat *dst, cv::Size *dstSize, cv::Mat *warpMatrix, int keepRatio) { + cv::Size srcSize = cv::Size(src->cols, src->rows); + const float divisible = 0.0; + + // Get new destination size if keep ratio is enabled + if (keepRatio == 1) { + getMinimumDstShape(&srcSize, dstSize, divisible); + } + + getResizeMatrix(&srcSize, dstSize, warpMatrix, keepRatio); + cv::warpPerspective(*src, *dst, *warpMatrix, *dstSize); +} + +/** + * Helper function to determine the device of jit model and tensors. + */ +torch::DeviceType torchDevice(char *deviceName, int verbose = 0) { + torch::DeviceType device; + if (std::string(deviceName) == "cuda") { + if (verbose == 1) + printf("to cuda\n"); + device = torch::kCUDA; + } else { + if (verbose == 1) + printf("to cpu\n"); + device = torch::kCPU; + } + return device; +} + +void loadNanodetModel(char *modelPath, char *device, int height, int width, float scoreThreshold, NanodetModelT *model) { + // Initialize model + model->inputSizes[0] = width; + model->inputSizes[1] = height; + + model->scoreThreshold = scoreThreshold; + model->keepRatio = 1; + + const std::vector labels{ + "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", + "truck", "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", + "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", + "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", + "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite", "baseball bat", + "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", + "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", + "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", + "chair", "couch", "potted plant", "bed", "dining table", "toilet", "tv", + "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave", "oven", + "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", + "teddy bear", "hair drier", "toothbrush"}; + + int **colorList = new int *[labels.size()]; + for (int i = 0; i < labels.size(); i++) { + colorList[i] = new int[3]; + } + // seed the random number generator + std::srand(1); + for (int i = 0; i < labels.size(); i++) { + for (int j = 0; j < 3; j++) { + colorList[i][j] = std::rand() % 256; + } + } + + // mean and standard deviation tensors for normalization of input + torch::Tensor meanTensor = torch::tensor({{{-103.53f}}, {{-116.28f}}, {{-123.675f}}}); + torch::Tensor stdValues = torch::tensor({{{0.017429f}}, {{0.017507f}}, {{0.017125f}}}); + + // initialization of jit model and class as holder of c++ values. + torch::DeviceType initDevice = torchDevice(device, 1); + torch::jit::script::Module network = torch::jit::load(modelPath, initDevice); + network.eval(); + + NanoDet *detector = new NanoDet(network, meanTensor, stdValues, initDevice, labels); + + model->network = static_cast(detector); + model->colorList = colorList; + model->numberOfClasses = labels.size(); +} + +void ffNanodet(NanoDet *model, torch::Tensor *inputTensor, cv::Mat *warpMatrix, cv::Size *originalSize, + torch::Tensor *outputs) { + // Make all the inputs as tensors to use in jit model + torch::Tensor srcHeight = torch::tensor(originalSize->height); + torch::Tensor srcWidth = torch::tensor(originalSize->width); + torch::Tensor warpMat = torch::from_blob(warpMatrix->data, {3, 3}); + + // Model inference + *outputs = (model->network()).forward({*inputTensor, srcWidth, srcHeight, warpMat}).toTensor(); + *outputs = outputs->to(torch::Device(torch::kCPU, 0)); +} + +OpendrDetectionVectorTargetT inferNanodet(NanodetModelT *model, OpendrImageT *image) { + NanoDet *networkPTR = static_cast(model->network); + OpendrDetectionVectorTargetT detectionsVector; + initDetectionsVector(&detectionsVector); + + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { + std::cerr << "Cannot load image for inference." << std::endl; + return detectionsVector; + } + + // Preprocess image and keep values as input in jit model + cv::Mat resizedImg; + cv::Size dstSize = cv::Size(model->inputSizes[0], model->inputSizes[1]); + cv::Mat warpMatrix = cv::Mat::eye(3, 3, CV_32FC1); + preprocess(opencvImage, &resizedImg, &dstSize, &warpMatrix, model->keepRatio); + torch::Tensor input = networkPTR->preProcess(&resizedImg); + cv::Size originalSize(opencvImage->cols, opencvImage->rows); + + torch::Tensor outputs; + ffNanodet(networkPTR, &input, &warpMatrix, &originalSize, &outputs); + + std::vector detections; + // Postprocessing, find which outputs have better score than threshold and keep them. + for (int label = 0; label < outputs.size(0); label++) { + for (int box = 0; box < outputs.size(1); box++) { + if (outputs[label][box][4].item() > model->scoreThreshold) { + OpendrDetectionTargetT detection; + detection.name = label; + detection.left = outputs[label][box][0].item(); + detection.top = outputs[label][box][1].item(); + detection.width = outputs[label][box][2].item() - outputs[label][box][0].item(); + detection.height = outputs[label][box][3].item() - outputs[label][box][1].item(); + detection.score = outputs[label][box][4].item(); + detections.push_back(detection); + } + } + } + + // Put vector detection as C pointer and size + if (static_cast(detections.size()) > 0) + loadDetectionsVector(&detectionsVector, detections.data(), static_cast(detections.size())); + + return detectionsVector; +} + +void drawBboxes(OpendrImageT *image, NanodetModelT *model, OpendrDetectionVectorTargetT *detectionsVector) { + int **colorList = model->colorList; + + std::vector classNames = (static_cast(model->network))->labels(); + + cv::Mat *opencvImage = static_cast(image->data); + if (!opencvImage) { + std::cerr << "Cannot load image for inference." << std::endl; + return; + } + + cv::Mat imageWithDetections = (*opencvImage).clone(); + for (size_t i = 0; i < detectionsVector->size; i++) { + const OpendrDetectionTarget bbox = (detectionsVector->startingPointer)[i]; + float score = bbox.score > 1 ? 1 : bbox.score; + if (score > model->scoreThreshold) { + cv::Scalar color = cv::Scalar(colorList[bbox.name][0], colorList[bbox.name][1], colorList[bbox.name][2]); + cv::rectangle(imageWithDetections, + cv::Rect(cv::Point(bbox.left, bbox.top), cv::Point((bbox.left + bbox.width), (bbox.top + bbox.height))), + color); + + char text[256]; + + sprintf(text, "%s %.1f%%", (classNames)[bbox.name].c_str(), score * 100); + + int baseLine = 0; + cv::Size labelSize = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.4, 1, &baseLine); + + int x = (int)bbox.left; + int y = (int)bbox.top; + if (y < 0) + y = 0; + if (x + labelSize.width > imageWithDetections.cols) + x = imageWithDetections.cols - labelSize.width; + + cv::rectangle(imageWithDetections, cv::Rect(cv::Point(x, y), cv::Size(labelSize.width, labelSize.height + baseLine)), + color, -1); + cv::putText(imageWithDetections, text, cv::Point(x, y + labelSize.height), cv::FONT_HERSHEY_SIMPLEX, 0.4, + cv::Scalar(255, 255, 255)); + } + } + + cv::imshow("image", imageWithDetections); + cv::waitKey(0); +} + +void freeNanodetModel(NanodetModelT *model) { + if (model->network) { + NanoDet *networkPTR = static_cast(model->network); + delete networkPTR; + } + + for (int i = 0; i < model->numberOfClasses; i++) { + delete[] model->colorList[i]; + } + delete[] model->colorList; +} diff --git a/src/c_api/opendr_utils.cpp b/src/c_api/opendr_utils.cpp index 55249bc728..42563e58b9 100644 --- a/src/c_api/opendr_utils.cpp +++ b/src/c_api/opendr_utils.cpp @@ -20,18 +20,124 @@ #include #include -void load_image(const char *path, opendr_image_t *image) { - cv::Mat opencv_image = cv::imread(path, cv::IMREAD_COLOR); - if (opencv_image.empty()) { +#include +#include +#include + +float jsonGetKeyFromInferenceParams(const char *json, const char *key, const int index) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember("inference_params"))) { + return 0.0f; + } + const rapidjson::Value &inferenceParams = doc["inference_params"]; + if ((!inferenceParams.IsObject()) || (!inferenceParams.HasMember(key))) { + return 0.0f; + } + const rapidjson::Value &value = inferenceParams[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return 0.0f; + } + if (!value[index].IsFloat()) { + return 0.0f; + } + return value[index].GetFloat(); + } + if (!value.IsFloat()) { + return 0.0f; + } + return value.GetFloat(); +} + +const char *jsonGetKeyString(const char *json, const char *key, const int index) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember(key))) { + return ""; + } + const rapidjson::Value &value = doc[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return ""; + } + if (!value[index].IsString()) { + return ""; + } + return value[index].GetString(); + } + if (!value.IsString()) { + return ""; + } + return value.GetString(); +} + +float jsonGetKeyFloat(const char *json, const char *key, const int index) { + rapidjson::Document doc; + doc.Parse(json); + if ((!doc.IsObject()) || (!doc.HasMember(key))) { + return 0.0f; + } + const rapidjson::Value &value = doc[key]; + if (value.IsArray()) { + if (value.Size() <= index) { + return 0.0f; + } + if (!value[index].IsFloat()) { + return 0.0f; + } + return value[index].IsFloat(); + } + if (!value.IsFloat()) { + return 0.0f; + } + return value.GetFloat(); +} + +void loadImage(const char *path, OpendrImageT *image) { + cv::Mat opencvImage = cv::imread(path, cv::IMREAD_COLOR); + if (opencvImage.empty()) { image->data = NULL; } else { - image->data = new cv::Mat(opencv_image); + image->data = new cv::Mat(opencvImage); } } -void free_image(opendr_image_t *image) { +void freeImage(OpendrImageT *image) { if (image->data) { - cv::Mat *opencv_image = static_cast(image->data); - delete opencv_image; + cv::Mat *opencvImage = static_cast(image->data); + delete opencvImage; } } + +void initDetectionsVector(OpendrDetectionVectorTargetT *detectionVector) { + detectionVector->startingPointer = NULL; + + std::vector detections; + OpendrDetectionTargetT detection; + + detection.name = -1; + detection.left = 0.0; + detection.top = 0.0; + detection.width = 0.0; + detection.height = 0.0; + detection.score = 0.0; + + detections.push_back(detection); + + loadDetectionsVector(detectionVector, detections.data(), static_cast(detections.size())); +} + +void loadDetectionsVector(OpendrDetectionVectorTargetT *detectionVector, OpendrDetectionTargetT *detection, int vectorSize) { + freeDetectionsVector(detectionVector); + + detectionVector->size = vectorSize; + int sizeOfOutput = (vectorSize) * sizeof(OpendrDetectionTargetT); + detectionVector->startingPointer = static_cast(malloc(sizeOfOutput)); + std::memcpy(detectionVector->startingPointer, detection, sizeOfOutput); +} + +void freeDetectionsVector(OpendrDetectionVectorTargetT *detectionVector) { + if (detectionVector->startingPointer != NULL) + free(detectionVector->startingPointer); +} diff --git a/src/opendr/perception/object_detection_2d/nanodet/README.md b/src/opendr/perception/object_detection_2d/nanodet/README.md index 409e07a847..777a4eb0e6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/README.md +++ b/src/opendr/perception/object_detection_2d/nanodet/README.md @@ -10,4 +10,4 @@ Large parts of the implementation are taken from [Nanodet Github](https://github Usage ------ - For VOC and COCO like datasets, an ```ExternalDataset``` with the root path and dataset name (```voc```, ```coco```) must be passed to the fit function. -- The ```temp_path``` folder is used to save checkpoints during training. \ No newline at end of file +- The ```temp``` folder is used to save checkpoints during training. \ No newline at end of file diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml index cdddc320cb..d47708a05f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite0_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.079 # AP_m = 0.243 # AP_l = 0.406 -save_dir: ./workspace/efficient0_320 +save_dir: ./temp/efficient0_320 check_point_name: EfficientNet_Lite0_320 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml index a189662a77..859dbe00e1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite1_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.122 # AP_m = 0.321 # AP_l = 0.432 -save_dir: ./workspace/efficient1_416_SGD +save_dir: ./temp/efficient1_416_SGD check_point_name: EfficientNet_Lite1_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml index 20664fe7ca..a4248e7eda 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/EfficientNet_Lite/nanodet_EfficientNet_Lite2_512.yml @@ -5,7 +5,7 @@ # AP_small = 0.152 # AP_m = 0.342 # AP_l = 0.481 -save_dir: ./workspace/efficientlite2_512 +save_dir: ./temp/efficientlite2_512 check_point_name: EfficientNet_Lite2_512 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml index 8a0d8debeb..fa93e55896 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/RepVGG/nanodet_RepVGG_A0_416.yml @@ -1,4 +1,4 @@ -save_dir: ./workspace/RepVGG_A0_416 +save_dir: ./temp/RepVGG_A0_416 check_point_name: RepVGG_A0_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml index a8c312cd61..aa1986f0c3 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/Transformer/nanodet_t.yml @@ -6,7 +6,7 @@ # AP_m = 0.214 # AP_l = 0.364 -save_dir: ./workspace/nanodet_t +save_dir: ./temp/nanodet_t check_point_name: t model: arch: @@ -54,7 +54,7 @@ model: data: train: input_size: [320,320] #[w,h] - keep_ratio: True + keep_ratio: False pipeline: perspective: 0.0 scale: [0.6, 1.4] @@ -69,7 +69,8 @@ data: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] val: input_size: [320,320] #[w,h] - keep_ratio: True + keep_ratio: False # feature_hw: [20,20]. Size for position embedding is hard coded and can not have various values, + # Please use images with standard ratio and change the value accordingly if you want to keep_ratio. pipeline: normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] device: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml index 0d09c335ab..8d2ae3cd91 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_g.yml @@ -3,7 +3,7 @@ # Flops = 4.2B # Params = 3.8M # COCO pre-trained weight link: https://drive.google.com/file/d/10uW7oqZKw231l_tr4C1bJWkbCXgBf7av/view?usp=sharing -save_dir: ./workspace/nanodet_g +save_dir: ./temp/nanodet_g check_point_name: g model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml index 876168e7ad..7bd0d075ab 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m.yml @@ -1,5 +1,5 @@ #Config File example -save_dir: ./workspace/nanodet_m +save_dir: ./temp/nanodet_m check_point_name: m model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml index 2a38388336..c067a1535f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_0.5x.yml @@ -5,7 +5,7 @@ # AP_small = 0.036 # AP_m = 0.119 # AP_l = 0.232 -save_dir: ./workspace/nanodet_m_0.5x +save_dir: ./temp/nanodet_m_0.5x check_point_name: m_0.5x model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml index a54268f70a..90c2c34d3b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x.yml @@ -5,7 +5,7 @@ # AP_small = 0.069 # AP_m = 0.235 # AP_l = 0.389 -save_dir: ./workspace/nanodet_m_1.5x +save_dir: ./temp/nanodet_m_1.5x check_point_name: m_1.5x model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml index b8274403b1..b6332a5aa1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_1.5x_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.098 # AP_m = 0.277 # AP_l = 0.420 -save_dir: ./workspace/nanodet_m_1.5x_416 +save_dir: ./temp/nanodet_m_1.5x_416 check_point_name: m_1.5x_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml index eb30de1e0d..bd8b4e2907 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/legacy_v0.x_configs/nanodet_m_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.082 # AP_m = 0.240 # AP_l = 0.375 -save_dir: ./workspace/nanodet_m_416 +save_dir: ./temp/nanodet_m_416 check_point_name: m_416 model: arch: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml deleted file mode 100644 index bf58986a48..0000000000 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_custom.yml +++ /dev/null @@ -1,125 +0,0 @@ -# nanodet-plus-m-1.5x_416 -# COCO mAP(0.5:0.95) = 0.341 -# AP_50 = 0.506 -# AP_75 = 0.357 -# AP_small = 0.143 -# AP_m = 0.363 -# AP_l = 0.539 -save_dir: ./workspace/nanodet_plus_m_1.5x_416/test_training -check_point_name: plus_m_1.5x_416_default -model: - weight_averager: - name: ExpMovingAverager - decay: 0.9998 - arch: - name: NanoDetPlus - detach_epoch: 10 - backbone: - name: ShuffleNetV2 - model_size: 1.5x - out_stages: [2,3,4] - activation: LeakyReLU - fpn: - name: GhostPAN - in_channels: [176, 352, 704] - out_channels: 128 - kernel_size: 5 - num_extra_level: 1 - use_depthwise: True - activation: LeakyReLU - head: - name: NanoDetPlusHead - num_classes: 80 - input_channel: 128 - feat_channels: 128 - stacked_convs: 2 - kernel_size: 5 - strides: [8, 16, 32, 64] - activation: LeakyReLU - reg_max: 7 - norm_cfg: - type: BN - loss: - loss_qfl: - name: QualityFocalLoss - use_sigmoid: True - beta: 2.0 - loss_weight: 1.0 - loss_dfl: - name: DistributionFocalLoss - loss_weight: 0.25 - loss_bbox: - name: GIoULoss - loss_weight: 2.0 - # Auxiliary head, only use in training time. - aux_head: - name: SimpleConvHead - num_classes: 80 - input_channel: 256 - feat_channels: 256 - stacked_convs: 4 - strides: [8, 16, 32, 64] - activation: LeakyReLU - reg_max: 7 -data: - train: - input_size: [416,416] #[w,h] - keep_ratio: False - pipeline: - perspective: 0.0 - scale: [0.6, 1.4] - stretch: [[0.8, 1.2], [0.8, 1.2]] - rotation: 0 - shear: 0 - translate: 0.2 - flip: 0.5 - brightness: 0.2 - contrast: [0.6, 1.4] - saturation: [0.5, 1.2] - normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] - val: - input_size: [416,416] #[w,h] - keep_ratio: False - pipeline: - normalize: [[103.53, 116.28, 123.675], [57.375, 57.12, 58.395]] -device: - gpu_ids: [0] - workers_per_gpu: 10 - batchsize_per_gpu: 12 #96 -schedule: - resume: 0 - optimizer: - name: AdamW - lr: 0.000125 - weight_decay: 0.05 - warmup: - name: linear - steps: 500 - ratio: 0.0001 - total_epochs: 300 - lr_schedule: - name: CosineAnnealingLR - T_max: 300 - eta_min: 0.00005 - val_intervals: 10 -grad_clip: 35 -evaluator: - name: CocoDetectionEvaluator - save_key: mAP -log: - interval: 50 - -class_names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', - 'train', 'truck', 'boat', 'traffic_light', 'fire_hydrant', - 'stop_sign', 'parking_meter', 'bench', 'bird', 'cat', 'dog', - 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports_ball', 'kite', 'baseball_bat', - 'baseball_glove', 'skateboard', 'surfboard', 'tennis_racket', - 'bottle', 'wine_glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', - 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', - 'hot_dog', 'pizza', 'donut', 'cake', 'chair', 'couch', - 'potted_plant', 'bed', 'dining_table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell_phone', 'microwave', - 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', - 'vase', 'scissors', 'teddy_bear', 'hair_drier', 'toothbrush'] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml index 3dcd1a2973..c2a4a8bdc1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.102 # AP_m = 0.309 # AP_l = 0.493 -save_dir: ./workspace/nanodet_plus_m_1.5x_320 +save_dir: ./temp/nanodet_plus_m_1.5x_320 check_point_name: plus_m_1.5x_320 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml index 5a76789b50..f999d0c985 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_1.5x_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.143 # AP_m = 0.363 # AP_l = 0.539 -save_dir: ./workspace/nanodet_plus_m_1.5x_416 +save_dir: ./temp/nanodet_plus_m_1.5x_416 check_point_name: plus_m_1.5x_416 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml index e4b5f58f9c..ee4b5235bc 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_320.yml @@ -5,7 +5,7 @@ # AP_small = 0.083 # AP_m = 0.278 # AP_l = 0.451 -save_dir: ./workspace/nanodet_plus_m_320 +save_dir: ./temp/nanodet_plus_m_320 check_point_name: plus_m_320 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml index 61a536ad7d..cd8ea9186b 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/config/nanodet_plus_m_416.yml @@ -5,7 +5,7 @@ # AP_small = 0.106 # AP_m = 0.322 # AP_l = 0.477 -save_dir: ./workspace/nanodet_plus_m_416 +save_dir: ./temp/nanodet_plus_m_416 check_point_name: plus_m_416 model: weight_averager: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py index f84170a275..4b0c910d0f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/batch_process.py @@ -27,11 +27,38 @@ def stack_batch_img( img_widths.append(img.shape[-1]) max_h, max_w = max(img_heights), max(img_widths) if divisible > 0: - max_h = (max_h + divisible - 1) // divisible * divisible - max_w = (max_w + divisible - 1) // divisible * divisible + max_h = torch.div((max_h + divisible - 1), divisible, rounding_mode='trunc') * divisible + max_w = torch.div((max_w + divisible - 1), divisible, rounding_mode='trunc') * divisible batch_imgs = [] for img in img_tensors: padding_size = [0, max_w - img.shape[-1], 0, max_h - img.shape[-2]] batch_imgs.append(F.pad(img, padding_size, value=pad_value)) return torch.stack(batch_imgs, dim=0).contiguous() + + +def divisible_padding( + img_tensor: torch.Tensor, divisible: torch.Tensor = torch.tensor(0), pad_value: float = 0.0 +) -> torch.Tensor: + """ + Helper function which uses only pytorch api for scripting and tracing. + Args: + img_tensor (torch.Tensor): + divisible (int): + pad_value (float): value to pad + + Returns: + torch.Tensor. + """ + assert divisible >= 0 + + img_heights = img_tensor.shape[-2] + img_widths = img_tensor.shape[-1] + + if divisible > 0: + img_heights = torch.div((img_heights + divisible - 1), divisible, rounding_mode='trunc') * divisible + img_widths = torch.div((img_widths + divisible - 1), divisible, rounding_mode='trunc') * divisible + + padding_size = [0, img_widths - img_tensor.shape[-1], 0, img_heights - img_tensor.shape[-2]] + batch_img = F.pad(img_tensor, padding_size, value=pad_value) + return batch_img.unsqueeze(0) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py index b68b60e389..6c40da7117 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/dataset/__init__.py @@ -17,7 +17,7 @@ import copy from opendr.engine.datasets import ExternalDataset - +from opendr.perception.object_detection_2d.datasets import XMLBasedDataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.dataset.coco import CocoDataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.dataset.xml_dataset import XMLDataset @@ -54,5 +54,9 @@ def build_dataset(cfg, dataset, class_names, mode, verbose=True): if verbose: print("ExternalDataset loaded.") return dataset + elif isinstance(dataset, XMLBasedDataset): + dataset = XMLDataset(img_path=dataset.abs_images_dir, ann_path=dataset.abs_annot_dir, mode=mode, + class_names=dataset.classes, **dataset_cfg) + return dataset else: raise ValueError("Dataset type {} not supported".format(type(dataset))) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py index 24acdb1880..2b9cfe6d32 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/pipeline.py @@ -13,30 +13,12 @@ # limitations under the License. import functools -import warnings from typing import Dict, Tuple from torch.utils.data import Dataset from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.color import color_aug_and_norm -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import ShapeTransform, warp_and_resize - - -class LegacyPipeline: - def __init__(self, cfg, keep_ratio): - warnings.warn( - "Deprecated warning! Pipeline from nanodet v0.x has been deprecated," - "Please use new Pipeline and update your config!" - ) - self.warp = functools.partial( - warp_and_resize, warp_kwargs=cfg, keep_ratio=keep_ratio - ) - self.color = functools.partial(color_aug_and_norm, kwargs=cfg) - - def __call__(self, meta, dst_shape): - meta = self.warp(meta, dst_shape=dst_shape) - meta = self.color(meta=meta) - return meta +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import ShapeTransform class Pipeline: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py index 6ffd1b66d3..29fa91d242 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/data/transform/warp.py @@ -14,10 +14,11 @@ import math import random -from typing import Dict, Optional, Tuple +from typing import Tuple import cv2 import numpy as np +import torch def get_flip_matrix(prob=0.5): @@ -93,7 +94,8 @@ def get_shear_matrix(degree): def get_translate_matrix(translate, width, height): """ - + :param width: + :param height: :param translate: :return: """ @@ -136,60 +138,31 @@ def get_resize_matrix(raw_shape, dst_shape, keep_ratio): return Rs -def warp_and_resize( - meta: Dict, - warp_kwargs: Dict, - dst_shape: Tuple[int, int], - keep_ratio: bool = True, -): - # TODO: background, type - raw_img = meta["img"] - height = raw_img.shape[0] # shape(h,w,c) - width = raw_img.shape[1] - - # center - C = np.eye(3) - C[0, 2] = -width / 2 - C[1, 2] = -height / 2 - - # do not change the order of mat mul - if "perspective" in warp_kwargs and random.randint(0, 1): - P = get_perspective_matrix(warp_kwargs["perspective"]) - C = P @ C - if "scale" in warp_kwargs and random.randint(0, 1): - Scl = get_scale_matrix(warp_kwargs["scale"]) - C = Scl @ C - if "stretch" in warp_kwargs and random.randint(0, 1): - Str = get_stretch_matrix(*warp_kwargs["stretch"]) - C = Str @ C - if "rotation" in warp_kwargs and random.randint(0, 1): - R = get_rotation_matrix(warp_kwargs["rotation"]) - C = R @ C - if "shear" in warp_kwargs and random.randint(0, 1): - Sh = get_shear_matrix(warp_kwargs["shear"]) - C = Sh @ C - if "flip" in warp_kwargs: - F = get_flip_matrix(warp_kwargs["flip"]) - C = F @ C - if "translate" in warp_kwargs and random.randint(0, 1): - T = get_translate_matrix(warp_kwargs["translate"], width, height) +def scriptable_warp_boxes(boxes, M, width, height): + """ + Warp boxes function that uses pytorch api, so it can be used with scripting and tracing for optimization. + """ + n = boxes.shape[0] + if n: + # warp points + xy = torch.ones((n * 4, 3), dtype=torch.float32) + xy[:, :2] = boxes[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape( + n * 4, 2 + ) # x1y1, x2y2, x1y2, x2y1 + M = torch.transpose(M, 0, 1).float() + xy = torch.mm(xy, M) # transform + xy = (xy[:, :2] / xy[:, 2:3]).reshape(n, 8) # rescale + # create new boxes + x = xy[:, [0, 2, 4, 6]] + y = xy[:, [1, 3, 5, 7]] + xy = torch.cat((x.min(1).values, y.min(1).values, x.max(1).values, y.max(1).values)).reshape(4, n) + xy = torch.transpose(xy, 0, 1).float() + # clip boxes + xy[:, [0, 2]] = xy[:, [0, 2]].clip(0, width) + xy[:, [1, 3]] = xy[:, [1, 3]].clip(0, height) + return xy else: - T = get_translate_matrix(0, width, height) - M = T @ C - # M = T @ Sh @ R @ Str @ P @ C - ResizeM = get_resize_matrix((width, height), dst_shape, keep_ratio) - M = ResizeM @ M - img = cv2.warpPerspective(raw_img, M, dsize=tuple(dst_shape)) - meta["img"] = img - meta["warp_matrix"] = M - if "gt_bboxes" in meta: - boxes = meta["gt_bboxes"] - meta["gt_bboxes"] = warp_boxes(boxes, M, dst_shape[0], dst_shape[1]) - if "gt_masks" in meta: - for i, mask in enumerate(meta["gt_masks"]): - meta["gt_masks"][i] = cv2.warpPerspective(mask, M, dsize=tuple(dst_shape)) - - return meta + return boxes def warp_boxes(boxes, M, width, height): @@ -217,7 +190,7 @@ def warp_boxes(boxes, M, width, height): def get_minimum_dst_shape( src_shape: Tuple[int, int], dst_shape: Tuple[int, int], - divisible: Optional[int] = None, + divisible: int = 0, ) -> Tuple[int, int]: """Calculate minimum dst shape""" src_w, src_h = src_shape diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py index c408d996a6..797a5e7cbb 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/evaluator/coco_detection.py @@ -26,6 +26,7 @@ from pycocotools.cocoeval import COCOeval from tabulate import tabulate +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import mkdir logger = logging.getLogger("NanoDet") @@ -74,7 +75,7 @@ def results2json(self, results): json_results.append(detection) return json_results - def evaluate(self, results, save_dir): # rank=-1 + def evaluate(self, results, save_dir, rank=-1): results_json = self.results2json(results) if len(results_json) == 0: warnings.warn( @@ -87,9 +88,15 @@ def evaluate(self, results, save_dir): # rank=-1 for key in self.metric_names: empty_eval_results[key] = 0 return empty_eval_results - # json_path = os.path.join(save_dir, "results{}.json".format(rank)) - json_path = os.path.join(save_dir, "results.json") - json.dump(results_json, open(json_path, "w")) + if rank > 0: + json_path = os.path.join(save_dir, "results{}.json".format(rank)) + else: + json_path = os.path.join(save_dir, "results.json") + + mkdir(rank, save_dir) + with open(json_path, 'w') as f: + json.dump(results_json, f) + coco_dets = self.coco_api.loadRes(json_path) coco_eval = COCOeval( copy.deepcopy(self.coco_api), copy.deepcopy(coco_dets), "bbox" diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py index b20b891d58..c4683d8543 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/inferencer/utilities.py @@ -14,23 +14,20 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import torch +import torch.nn as nn -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import stack_batch_img -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.collate import naive_collate +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import divisible_padding from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform import Pipeline from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.arch import build_model -image_ext = [".jpg", ".jpeg", ".webp", ".bmp", ".png"] -video_ext = ["mp4", "mov", "avi", "mkv"] - -class Predictor(object): - def __init__(self, cfg, model, device="cuda"): +class Predictor(nn.Module): + def __init__(self, cfg, model, device="cuda", nms_max_num=100): + super(Predictor, self).__init__() self.cfg = cfg self.device = device - + self.nms_max_num = nms_max_num if self.cfg.model.arch.backbone.name == "RepVGG": deploy_config = self.cfg.model deploy_config.arch.backbone.update({"deploy": True}) @@ -41,9 +38,33 @@ def __init__(self, cfg, model, device="cuda"): self.model = model.to(device).eval() + for para in self.model.parameters(): + para.requires_grad = False + self.pipeline = Pipeline(self.cfg.data.val.pipeline, self.cfg.data.val.keep_ratio) + self.traced_model = None + + def trace_model(self, dummy_input): + self.traced_model = torch.jit.trace(self, dummy_input) + return True + + def script_model(self, img, height, width, warp_matrix): + preds = self.traced_model(img, height, width, warp_matrix) + scripted_model = self.postprocessing(preds, img, height, width, warp_matrix) + return scripted_model - def inference(self, img, verbose=True): + def forward(self, img, height=torch.tensor(0), width=torch.tensor(0), warp_matrix=torch.tensor(0)): + if torch.jit.is_scripting(): + return self.script_model(img, height, width, warp_matrix) + # In tracing (Jit and Onnx optimizations) we must first run the pipeline before the graf, + # cv2 is needed, and it is installed with abi cxx11 but torch is in cxx<11 + meta = {"img": img} + meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) + with torch.no_grad(): + results = self.model.inference(meta) + return results + + def preprocessing(self, img): img_info = {"id": 0} height, width = img.shape[:2] img_info["height"] = height @@ -51,19 +72,16 @@ def inference(self, img, verbose=True): meta = dict(img_info=img_info, raw_img=img, img=img) meta = self.pipeline(None, meta, self.cfg.data.val.input_size) meta["img"] = torch.from_numpy(meta["img"].transpose(2, 0, 1)).to(self.device) - meta = naive_collate([meta]) - meta["img"] = stack_batch_img(meta["img"], divisible=32) - with torch.no_grad(): - results = self.model.inference(meta, verbose) - return meta, results + _input = meta["img"] + _height = torch.tensor(height) + _width = torch.tensor(width) + _warp_matrix = torch.from_numpy(meta["warp_matrix"]) + + return _input, _height, _width, _warp_matrix -def get_image_list(path): - image_names = [] - for maindir, subdir, file_name_list in os.walk(path): - for filename in file_name_list: - apath = os.path.join(maindir, filename) - ext = os.path.splitext(apath)[1] - if ext in image_ext: - image_names.append(apath) - return image_names + def postprocessing(self, preds, input, height, width, warp_matrix): + meta = {"height": height, "width": width, 'img': input, 'warp_matrix': warp_matrix} + meta["img"] = divisible_padding(meta["img"], divisible=torch.tensor(32)) + res = self.model.head.post_process(preds, meta, nms_max_num=self.nms_max_num) + return res diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py index 518c0af01b..a400ecff29 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/nanodet_plus.py @@ -42,15 +42,15 @@ def forward_train(self, gt_meta): fpn_feat = self.fpn(feat) if self.epoch >= self.detach_epoch: aux_fpn_feat = self.aux_fpn([f.detach() for f in feat]) - dual_fpn_feat = ( + dual_fpn_feat = [ torch.cat([f.detach(), aux_f], dim=1) for f, aux_f in zip(fpn_feat, aux_fpn_feat) - ) + ] else: aux_fpn_feat = self.aux_fpn(feat) - dual_fpn_feat = ( + dual_fpn_feat = [ torch.cat([f, aux_f], dim=1) for f, aux_f in zip(fpn_feat, aux_fpn_feat) - ) + ] head_out = self.head(fpn_feat) aux_head_out = self.aux_head(dual_fpn_feat) loss, loss_states = self.head.loss(head_out, gt_meta, aux_preds=aux_head_out) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py index e1ce7a650e..425a0a6154 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/arch/one_stage_detector.py @@ -15,6 +15,7 @@ import torch import torch.nn as nn +from typing import Dict from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.backbone import build_backbone from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.fpn import build_fpn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.head import build_head @@ -43,11 +44,10 @@ def forward(self, x): x = self.head(x) return x - def inference(self, meta, verbose=True): + def inference(self, meta: Dict[str, torch.Tensor]): with torch.no_grad(): preds = self(meta["img"]) - results = self.head.post_process(preds, meta) - return results + return preds def forward_train(self, gt_meta): preds = self(gt_meta["img"]) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py index 17cd08402e..ef8c1cd368 100755 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/custom_csp.py @@ -52,6 +52,7 @@ def __init__( activation=activation, ) + @torch.jit.unused def forward(self, x): x = self.in_conv(x) x1 = self.mid_conv(x) @@ -96,6 +97,7 @@ def __init__( activation=activation, ) + @torch.jit.unused def forward(self, x): x = self.in_conv(x) x1 = self.res_blocks(x) @@ -145,13 +147,14 @@ def __init__( self.stages.append(stage) self._init_weight() + @torch.jit.unused def forward(self, x): output = [] for i, stage in enumerate(self.stages): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _init_weight(self): for m in self.modules(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py index 9cd6e41baf..6d2f6d4d55 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/efficientnet_lite.py @@ -123,7 +123,8 @@ def __init__( ) self._relu = act_layers(activation) - def forward(self, x, drop_connect_rate=None): + @torch.jit.unused + def forward(self, x, drop_connect_rate: bool = None): """ :param x: input tensor :param drop_connect_rate: drop connect rate (float, between 0 and 1) @@ -148,7 +149,7 @@ def forward(self, x, drop_connect_rate=None): if self.id_skip and self.stride == 1 and self.input_filters == self.output_filters: if drop_connect_rate: x = drop_connect(x, drop_connect_rate, training=self.training) - x += identity # skip connection + x = x + identity # skip connection return x @@ -246,6 +247,7 @@ def __init__( self.blocks.append(stage) self._initialize_weights(pretrain) + @torch.jit.unused def forward(self, x): x = self.stem(x) output = [] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py index 2e8f59f001..923c8acb27 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/ghostnet.py @@ -10,7 +10,6 @@ """ import logging import math -import warnings import torch import torch.nn as nn @@ -41,7 +40,7 @@ def _make_divisible(v, divisor, min_value=None): new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) # Make sure that round down does not go down by more than 10%. if new_v < 0.9 * v: - new_v += divisor + new_v = new_v + divisor return new_v @@ -195,6 +194,7 @@ def __init__( nn.BatchNorm2d(out_chs), ) + @torch.jit.unused def forward(self, x): residual = x @@ -213,7 +213,7 @@ def forward(self, x): # 2nd ghost bottleneck x = self.ghost2(x) - x += self.shortcut(residual) + x = x + self.shortcut(residual) return x @@ -224,7 +224,6 @@ def __init__( out_stages=(4, 6, 9), activation="ReLU", pretrain=True, - act=None, ): super(GhostNet, self).__init__() assert set(out_stages).issubset(i for i in range(10)) @@ -262,11 +261,6 @@ def __init__( # ------conv+bn+act----------# 9 1/32 self.activation = activation - if act is not None: - warnings.warn( - "Warning! act argument has been deprecated, " "use activation instead!" - ) - self.activation = act # building first layer output_channel = _make_divisible(16 * width_mult, 4) @@ -317,7 +311,7 @@ def forward(self, x): x = self.blocks[i](x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _initialize_weights(self, pretrain=True): print("init weights...") diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py index 19fcae379e..a08f4abb38 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/mobilenetv2.py @@ -1,7 +1,6 @@ from __future__ import absolute_import, division, print_function -import warnings - +import torch.jit import torch.nn as nn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.activation import act_layers @@ -79,7 +78,6 @@ def __init__( out_stages=(1, 2, 4, 6), last_channel=1280, activation="ReLU", - act=None, ): super(MobileNetV2, self).__init__() # TODO: support load torchvison pretrained weight @@ -89,11 +87,6 @@ def __init__( input_channel = 32 self.last_channel = last_channel self.activation = activation - if act is not None: - warnings.warn( - "Warning! act argument has been deprecated, " "use activation instead!" - ) - self.activation = act self.interverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], @@ -154,6 +147,7 @@ def build_mobilenet_stage(self, stage_num): stage = nn.Sequential(*stage) return stage + @torch.jit.unused def forward(self, x): x = self.first_layer(x) output = [] @@ -162,8 +156,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - - return tuple(output) + return output def _initialize_weights(self): for m in self.modules(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py index fa30508f13..c6c090276f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/repvgg.py @@ -176,6 +176,7 @@ def _make_stage(self, planes, num_blocks, stride): self.cur_layer_idx += 1 return nn.Sequential(*blocks) + @torch.jit.unused def forward(self, x): x = self.stage0(x) output = [] @@ -184,7 +185,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def repvgg_model_convert(model, deploy_model, save_path=None): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py index cbd84f7546..d4cdacb0b7 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/resnet.py @@ -1,5 +1,6 @@ from __future__ import absolute_import, division, print_function +import torch.jit import torch.nn as nn import torch.utils.model_zoo as model_zoo @@ -99,8 +100,6 @@ def fill_fc_weights(layers): for m in layers.modules(): if isinstance(m, nn.Conv2d): nn.init.normal_(m.weight, std=0.001) - # torch.nn.init.kaiming_normal_(m.weight.data, nonlinearity='relu') - # torch.nn.init.xavier_normal_(m.weight.data) if m.bias is not None: nn.init.constant_(m.bias, 0) @@ -161,6 +160,7 @@ def _make_layer(self, block, planes, blocks, stride=1): return nn.Sequential(*layers) + @torch.jit.unused def forward(self, x): x = self.conv1(x) x = self.bn1(x) @@ -173,7 +173,7 @@ def forward(self, x): if i in self.out_stages: output.append(x) - return tuple(output) + return output def init_weights(self, pretrain=True): if pretrain: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py index 013f22a8c1..75a322f179 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/backbone/shufflenetv2.py @@ -14,11 +14,11 @@ def channel_shuffle(x, groups): # type: (torch.Tensor, int) -> torch.Tensor - batchsize, num_channels, height, width = x.data.size() - channels_per_group = num_channels // groups + batchsize, num_channels, height, width = x.size() + channels_per_group = int(num_channels/groups) # reshape - x = x.view(batchsize, groups, channels_per_group, height, width) + x = x.view([batchsize, groups, channels_per_group, height, width]) x = torch.transpose(x, 1, 2).contiguous() @@ -173,6 +173,7 @@ def __init__( self.stage4.add_module("conv5", conv5) self._initialize_weights(pretrain) + @torch.jit.unused def forward(self, x): x = self.conv1(x) x = self.maxpool(x) @@ -182,7 +183,7 @@ def forward(self, x): x = stage(x) if i in self.out_stages: output.append(x) - return tuple(output) + return output def _initialize_weights(self, pretrain=True): print("init weights...") diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py index 4549c7409e..f373f9f5d6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/fpn.py @@ -1,6 +1,6 @@ # Modification 2020 RangiLyu # Copyright 2018-2019 Open-MMLab. - +import torch.jit # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,8 @@ import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import xavier_init @@ -72,7 +74,8 @@ def init_weights(self): if isinstance(m, nn.Conv2d): xavier_init(m, distribution="uniform") - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): assert len(inputs) == len(self.in_channels) # build laterals @@ -84,17 +87,10 @@ def forward(self, inputs): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.interpolate( + laterals[i - 1] = laterals[i - 1] + F.interpolate( laterals[i], scale_factor=2, mode="bilinear" ) # build outputs - outs = [ - # self.fpn_convs[i](laterals[i]) for i in range(used_backbone_levels) - laterals[i] - for i in range(used_backbone_levels) - ] - return tuple(outs) - - -# if __name__ == '__main__': + outs = [laterals[i] for i in range(used_backbone_levels)] + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py index 76e043179c..cf03e3fb4e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/ghost_pan.py @@ -13,6 +13,8 @@ # limitations under the License. import torch import torch.nn as nn +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.backbone.ghostnet import GhostBottleneck from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule, DepthwiseConvModule @@ -65,6 +67,7 @@ def __init__( ) self.blocks = nn.Sequential(*blocks) + @torch.jit.unused def forward(self, x): out = self.blocks(x) if self.use_res: @@ -118,7 +121,7 @@ def __init__( conv = DepthwiseConvModule if use_depthwise else ConvModule # build top-down blocks - self.upsample = nn.Upsample(**upsample_cfg) + self.upsample = nn.Upsample(**upsample_cfg, align_corners=False) self.reduce_layers = nn.ModuleList() for idx in range(len(in_channels)): self.reduce_layers.append( @@ -198,12 +201,13 @@ def __init__( ) ) - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): """ Args: - inputs (tuple[Tensor]): input features. + inputs (List[Tensor]): input features. Returns: - tuple[Tensor]: multi level features. + List[Tensor]: multi level features. """ assert len(inputs) == len(self.in_channels) inputs = [ @@ -241,4 +245,4 @@ def forward(self, inputs): ): outs.append(extra_in_layer(inputs[-1]) + extra_out_layer(outs[-1])) - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py index c12482f294..8bb2114b76 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/pan.py @@ -13,7 +13,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +import torch.jit import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.fpn.fpn import FPN @@ -61,7 +64,8 @@ def __init__( ) self.init_weights() - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): """Forward function.""" assert len(inputs) == len(self.in_channels) @@ -74,8 +78,8 @@ def forward(self, inputs): # build top-down path used_backbone_levels = len(laterals) for i in range(used_backbone_levels - 1, 0, -1): - laterals[i - 1] += F.interpolate( - laterals[i], scale_factor=2, mode="bilinear" + laterals[i - 1] = laterals[i - 1] + F.interpolate( + laterals[i], scale_factor=2.0, mode="bilinear" ) # build outputs @@ -84,11 +88,10 @@ def forward(self, inputs): # part 2: add bottom-up path for i in range(0, used_backbone_levels - 1): - inter_outs[i + 1] += F.interpolate( + inter_outs[i + 1] = inter_outs[i + 1] + F.interpolate( inter_outs[i], scale_factor=0.5, mode="bilinear" ) - outs = [] - outs.append(inter_outs[0]) + outs = [inter_outs[0]] outs.extend([inter_outs[i] for i in range(1, used_backbone_levels)]) - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py index 42efd128b9..b079dde44f 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/fpn/tan.py @@ -15,6 +15,8 @@ import torch import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init @@ -92,7 +94,8 @@ def init_weights(self): elif isinstance(m, nn.Conv2d): normal_init(m, 0.01) - def forward(self, inputs): + @torch.jit.unused + def forward(self, inputs: List[Tensor]): assert len(inputs) == len(self.in_channels) # build laterals @@ -118,4 +121,4 @@ def forward(self, inputs): laterals[1] + mid_lvl, laterals[2] + F.interpolate(mid_lvl, size=laterals[2].shape[2:], mode="bilinear"), ] - return tuple(outs) + return outs diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py index e26e083b37..79e215ce0e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/gfl_head.py @@ -5,6 +5,8 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F +from torch import Tensor +from typing import List, Dict, Tuple from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import ( bbox2distance, @@ -13,7 +15,8 @@ multi_apply, ) -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes,\ + scriptable_warp_boxes from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.gfocal_loss\ import DistributionFocalLoss, QualityFocalLoss from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.iou_loss import GIoULoss, bbox_overlaps @@ -62,6 +65,11 @@ def forward(self, x): offsets from the box center in four directions, shape (N, 4). """ shape = x.size() + if torch.jit.is_scripting(): + x = F.softmax(x.reshape(shape[0], shape[1], 4, self.reg_max + 1), dim=-1) + x = F.linear(x, self.project.type_as(x)).reshape(shape[0], shape[1], 4) + return x + x = F.softmax(x.reshape(*shape[:-1], 4, self.reg_max + 1), dim=-1) x = F.linear(x, self.project.type_as(x)).reshape(*shape[:-1], 4) return x @@ -185,13 +193,11 @@ def init_weights(self): normal_init(self.gfl_cls, std=0.01, bias=bias_cls) normal_init(self.gfl_reg, std=0.01) - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + def forward(self, feats: List[Tensor]): outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x + for idx, scale in enumerate(self.scales): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: @@ -371,7 +377,8 @@ def target_assign( ): """ Assign target for a batch of images. - :param batch_size: num of images in one batch + :param cls_preds: predictions of class in images in one batch + :param reg_preds: predictions of bbox in images in one batch :param featmap_sizes: A list of all grid cell boxes in all image :param gt_bboxes_list: A list of ground truth boxes in all image :param gt_bboxes_ignore_list: A list of all ignored boxes in all image @@ -405,8 +412,6 @@ def target_assign( if gt_labels_list is None: gt_labels_list = [None for _ in range(batch_size)] # target assign on all images, get list of tensors - # list length = batch size - # tensor first dim = num of all grid cell ( all_grid_cells, all_labels, @@ -449,6 +454,7 @@ def target_assign( num_total_neg, ) + @torch.no_grad() def target_assign_single_img( self, grid_cells, num_level_cells, gt_bboxes, gt_bboxes_ignore, gt_labels ): @@ -480,7 +486,7 @@ def target_assign_single_img( label_weights = grid_cells.new_zeros(num_cells, dtype=torch.float) if len(pos_inds) > 0: - pos_bbox_targets = pos_gt_bboxes + pos_bbox_targets = pos_gt_bboxes.float() bbox_targets[pos_inds, :] = pos_bbox_targets bbox_weights[pos_inds, :] = 1.0 if gt_labels is None: @@ -505,6 +511,7 @@ def target_assign_single_img( ) def sample(self, assign_result, gt_bboxes): + """Sample positive and negative bboxes.""" pos_inds = ( torch.nonzero(assign_result.gt_inds > 0, as_tuple=False) .squeeze(-1) @@ -527,11 +534,63 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta): + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer", nms_max_num: int = 100): + """Prediction results postprocessing. Decode bboxes and rescale + to original image size. + Args: + preds (Tensor): Prediction output. + meta (dict): Meta info. + mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. + """ + if mode == "eval" and not torch.jit.is_scripting(): + # Inference do not use batches and tries to have + # tensors exclusively for better optimization during scripting. + return self._eval_post_process(preds, meta) + + cls_scores, bbox_preds = preds.split( + [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 + ) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"], nms_max_num=nms_max_num) + (det_bboxes, det_labels) = results + + det_bboxes[:, :4] = scriptable_warp_boxes( + det_bboxes[:, :4], + torch.linalg.inv(meta["warp_matrix"]), meta["width"], meta["height"] + ) + + # constant output of model every time for tracing + if torch.jit.is_scripting(): + max_count = nms_max_num + else: + _, frequencies = torch.unique(det_labels, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + + det_result = torch.zeros((self.num_classes, max_count, 5)) + for i in range(self.num_classes): + inds = det_labels == i + det = torch.cat(( + det_bboxes[inds, :4], + det_bboxes[inds, 4:5] + ), + dim=1 + ) + + pad = det.new_zeros((max_count - det.size(0), 5)) + det = torch.cat([det, pad], dim=0) + det_result[i] = det + return det_result + + def most_common_tensor(self, tensor): + _, frequencies = torch.unique(tensor, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + return max_count + + def _eval_post_process(self, preds, meta): cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - result_list = self.get_bboxes(cls_scores, bbox_preds, meta) + result_list = self.get_bboxes(cls_scores, bbox_preds, meta["img"], mode="eval") det_results = {} warp_matrixes = ( meta["warp_matrix"] @@ -576,67 +635,106 @@ def post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, img_metas): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer", nms_max_num: int = 100): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). - img_metas (dict): Dict of image info. - + input_img (Tensor): Input image to net. + mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ device = cls_preds.device b = cls_preds.shape[0] - input_height, input_width = img_metas["img"].shape[2:] + input_height, input_width = input_img.shape[2:] input_shape = (input_height, input_width) featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (int(math.ceil(input_height / stride)), int(math.ceil(input_width / stride))) for stride in self.strides ] # get grid cells of one image mlvl_center_priors = [] for i, stride in enumerate(self.strides): - y, x = self.get_single_level_center_point( - featmap_sizes[i], stride, torch.float32, device + proiors = self.get_single_level_center_priors( + b, featmap_sizes[i], stride, torch.float32, device ) - strides = x.new_full((x.shape[0],), stride) - proiors = torch.stack([x, y, strides, strides], dim=-1) - mlvl_center_priors.append(proiors.unsqueeze(0).repeat(b, 1, 1)) + mlvl_center_priors.append(proiors) center_priors = torch.cat(mlvl_center_priors, dim=1) dis_preds = self.distribution_project(reg_preds) * center_priors[..., 2, None] bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=input_shape) - scores = cls_preds.sigmoid() + cls_preds = cls_preds.sigmoid() + # add a dummy background class at the end of all labels + if torch.jit.is_scripting() or mode == "infer": + # for faster inference and jit scripting in most common cases we do not try to go through for statement + score, bbox = cls_preds[0], bboxes[0] + padding = score.new_zeros(score.shape[0], 1) + score = torch.cat([score, padding], dim=1) + + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=nms_max_num) + result_list = [] for i in range(b): # add a dummy background class at the end of all labels # same with mmdetection2.0 - score, bbox = scores[i], bboxes[i] + score, bbox = cls_preds[i], bboxes[i] padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) results = multiclass_nms( bbox, score, score_thr=0.05, - nms_cfg=dict(type="nms", iou_threshold=0.6), - max_num=100, + nms_cfg=dict(iou_threshold=0.6), + max_num=nms_max_num, ) result_list.append(results) return result_list - def get_single_level_center_point( - self, featmap_size, stride, dtype, device, flatten=True + def get_single_level_center_priors( + self, + batch_size: int, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True ): + """Generate centers of a single stage feature map. + Args: + batch_size (int): Number of images in one batch. + featmap_size (tuple[int]): height and width of the feature map + stride (int): down sample stride of the feature map + dtype (obj:`torch.dtype`): data type of the tensors + device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors + Return: + priors (Tensor): center priors of a single level feature map. """ - Generate pixel centers of a single stage feature map. - :param featmap_size: height and width of the feature map - :param stride: down sample stride of the feature map - :param dtype: data type of the tensors - :param device: device of the tensors - :param flatten: flatten the x and y tensors - :return: y and x of the center points + x, y = self.get_single_level_center_point(featmap_size, stride, dtype, device, flatten) + strides = x.new_full((x.shape[0],), stride) + proiors = torch.stack([x, y, strides, strides], dim=-1) + return proiors.unsqueeze(0).repeat(batch_size, 1, 1) + + def get_single_level_center_point( + self, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True + ): + """Generate pixel centers of a single stage feature map. + Args: + featmap_size (tuple[int]): height and width of the feature map + stride (int): down sample stride of the feature map + dtype (obj:`torch.dtype`): data type of the tensors + device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors + Return: + x, y (Tuple[Tensor, Tensor]): y and x of the center points. """ h, w = featmap_size x_range = (torch.arange(w, dtype=dtype, device=device) + 0.5) * stride @@ -645,7 +743,7 @@ def get_single_level_center_point( if flatten: y = y.flatten() x = x.flatten() - return y, x + return x, y def get_grid_cells(self, featmap_size, scale, stride, dtype, device): """ @@ -681,20 +779,3 @@ def grid_cells_to_center(self, grid_cells): cells_cx = (grid_cells[:, 2] + grid_cells[:, 0]) / 2 cells_cy = (grid_cells[:, 3] + grid_cells[:, 1]) / 2 return torch.stack([cells_cx, cells_cy], dim=-1) - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x - for cls_conv in self.cls_convs: - cls_feat = cls_conv(cls_feat) - for reg_conv in self.reg_convs: - reg_feat = reg_conv(reg_feat) - cls_pred = self.gfl_cls(cls_feat) - reg_pred = scale(self.gfl_reg(reg_feat)) - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py index 01eac4146e..2e50867a21 100755 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_head.py @@ -14,6 +14,8 @@ import torch import torch.nn as nn +from torch import Tensor +from typing import List from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule, DepthwiseConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init @@ -135,15 +137,14 @@ def init_weights(self): normal_init(self.gfl_reg[i], std=0.01) print("Finish initialize NanoDet Head.") - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + @torch.jit.unused + def forward(self, feats: List[Tensor]): outputs = [] - for x, cls_convs, reg_convs, gfl_cls, gfl_reg in zip( - feats, self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg - ): - cls_feat = x - reg_feat = x + for idx, (cls_convs, reg_convs, gfl_cls, gfl_reg) in enumerate(zip( + self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg + )): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in reg_convs: @@ -155,31 +156,6 @@ def forward(self, feats): bbox_pred = gfl_reg(reg_feat) output = torch.cat([cls_score, bbox_pred], dim=1) outputs.append(output.flatten(start_dim=2)) + outputs = torch.cat(outputs, dim=2).permute(0, 2, 1) return outputs - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for x, cls_convs, reg_convs, gfl_cls, gfl_reg in zip( - feats, self.cls_convs, self.reg_convs, self.gfl_cls, self.gfl_reg - ): - cls_feat = x - reg_feat = x - for cls_conv in cls_convs: - cls_feat = cls_conv(cls_feat) - for reg_conv in reg_convs: - reg_feat = reg_conv(reg_feat) - if self.share_cls_reg: - output = gfl_cls(cls_feat) - cls_pred, reg_pred = output.split( - [self.num_classes, 4 * (self.reg_max + 1)], dim=1 - ) - else: - cls_pred = gfl_cls(cls_feat) - reg_pred = gfl_reg(reg_feat) - - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py index 5d853d5ecf..5e82255e39 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/nanodet_plus_head.py @@ -3,10 +3,13 @@ import numpy as np import torch import torch.nn as nn +from torch import Tensor +from typing import List, Tuple, Dict from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util\ import bbox2distance, distance2bbox, multi_apply -from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp import warp_boxes +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.transform.warp \ + import warp_boxes, scriptable_warp_boxes from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.gfocal_loss \ import DistributionFocalLoss, QualityFocalLoss from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.loss.iou_loss import GIoULoss @@ -133,15 +136,10 @@ def init_weights(self): normal_init(self.gfl_cls[i], std=0.01, bias=bias_cls) print("Finish initialize NanoDet-Plus Head.") - def forward(self, feats): - if torch.onnx.is_in_onnx_export(): - return self._forward_onnx(feats) + def forward(self, feats: List[Tensor]): outputs = [] - for feat, cls_convs, gfl_cls in zip( - feats, - self.cls_convs, - self.gfl_cls, - ): + for idx, (cls_convs, gfl_cls) in enumerate(zip(self.cls_convs, self.gfl_cls)): + feat = feats[idx] for conv in cls_convs: feat = conv(feat) output = gfl_cls(feat) @@ -166,7 +164,7 @@ def loss(self, preds, gt_meta, aux_preds=None): batch_size = preds.shape[0] input_height, input_width = gt_meta["img"].shape[2:] featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (math.ceil(input_height / stride), math.ceil(input_width / stride)) for stride in self.strides ] # get grid cells of one image @@ -361,17 +359,58 @@ def sample(self, assign_result, gt_bboxes): pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :] return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds - def post_process(self, preds, meta): - """Prediction results post processing. Decode bboxes and rescale + def post_process(self, preds, meta: Dict[str, Tensor], mode: str = "infer", nms_max_num: int = 100): + """Prediction results postprocessing. Decode bboxes and rescale to original image size. Args: preds (Tensor): Prediction output. meta (dict): Meta info. + mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. """ + if mode == "eval" and not torch.jit.is_scripting(): + # Inference do not use batches and tries to have + # tensors exclusively for better optimization during scripting. + return self._eval_post_process(preds, meta) + cls_scores, bbox_preds = preds.split( [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 ) - result_list = self.get_bboxes(cls_scores, bbox_preds, meta) + results = self.get_bboxes(cls_scores, bbox_preds, meta["img"], nms_max_num=nms_max_num) + (det_bboxes, det_labels) = results + + det_bboxes[:, :4] = scriptable_warp_boxes( + det_bboxes[:, :4], + torch.linalg.inv(meta["warp_matrix"]), meta["width"], meta["height"] + ) + + # constant output of model every time for tracing + if torch.jit.is_scripting(): + max_count = nms_max_num + else: + _, frequencies = torch.unique(det_labels, return_counts=True) + max_count = frequencies[torch.argmax(frequencies)].item() + + det_result = torch.zeros((self.num_classes, max_count, 5)) + for i in range(self.num_classes): + inds = det_labels == i + det = torch.cat(( + det_bboxes[inds, :4], + det_bboxes[inds, 4:5] + ), + dim=1 + ) + + pad = det.new_zeros((max_count - det.size(0), 5)) + det = torch.cat([det, pad], dim=0) + det_result[i] = det + return det_result + + def _eval_post_process(self, preds, meta): + cls_scores, bbox_preds = preds.split( + [self.num_classes, 4 * (self.reg_max + 1)], dim=-1 + ) + result_list = self.get_bboxes(cls_scores, bbox_preds, meta["img"], mode="eval") det_results = {} warp_matrixes = ( meta["warp_matrix"] @@ -395,7 +434,7 @@ def post_process(self, preds, meta): ) for result, img_width, img_height, img_id, warp_matrix in zip( - result_list, img_widths, img_heights, img_ids, warp_matrixes + result_list, img_widths, img_heights, img_ids, warp_matrixes ): det_result = {} det_bboxes, det_labels = result @@ -416,59 +455,72 @@ def post_process(self, preds, meta): det_results[img_id] = det_result return det_results - def get_bboxes(self, cls_preds, reg_preds, img_metas): + def get_bboxes(self, cls_preds, reg_preds, input_img, mode: str = "infer", nms_max_num: int = 100): """Decode the outputs to bboxes. Args: cls_preds (Tensor): Shape (num_imgs, num_points, num_classes). reg_preds (Tensor): Shape (num_imgs, num_points, 4 * (regmax + 1)). - img_metas (dict): Dict of image info. - + input_img (Tensor): Input image to net. + mode (str): Determines if it uses batches and numpy or tensors for scripting. + nms_max_num (int): Determines the maximum number of bounding boxes that will be retained following the nms. Returns: results_list (list[tuple]): List of detection bboxes and labels. """ device = cls_preds.device b = cls_preds.shape[0] - input_height, input_width = img_metas["img"].shape[2:] + input_height, input_width = input_img.shape[2:] input_shape = (input_height, input_width) featmap_sizes = [ - (math.ceil(input_height / stride), math.ceil(input_width) / stride) + (int(math.ceil(input_height / stride)), int(math.ceil(input_width / stride))) for stride in self.strides ] # get grid cells of one image - mlvl_center_priors = [ - self.get_single_level_center_priors( - b, - featmap_sizes[i], - stride, - dtype=torch.float32, - device=device, + mlvl_center_priors = [] + for i, stride in enumerate(self.strides): + proiors = self.get_single_level_center_priors( + b, featmap_sizes[i], stride, torch.float32, device ) - for i, stride in enumerate(self.strides) - ] + mlvl_center_priors.append(proiors) + center_priors = torch.cat(mlvl_center_priors, dim=1) dis_preds = self.distribution_project(reg_preds) * center_priors[..., 2, None] bboxes = distance2bbox(center_priors[..., :2], dis_preds, max_shape=input_shape) - scores = cls_preds.sigmoid() + cls_preds = cls_preds.sigmoid() + # add a dummy background class at the end of all labels + if torch.jit.is_scripting() or mode == "infer": + # for faster inference and jit scripting in most common cases we do not try to go through for statement + score, bbox = cls_preds[0], bboxes[0] + padding = score.new_zeros(score.shape[0], 1) + score = torch.cat([score, padding], dim=1) + + return multiclass_nms(bbox, score, score_thr=0.05, nms_cfg=dict(iou_threshold=0.6), max_num=nms_max_num) + result_list = [] for i in range(b): # add a dummy background class at the end of all labels # same with mmdetection2.0 - score, bbox = scores[i], bboxes[i] + score, bbox = cls_preds[i], bboxes[i] padding = score.new_zeros(score.shape[0], 1) score = torch.cat([score, padding], dim=1) results = multiclass_nms( bbox, score, score_thr=0.05, - nms_cfg=dict(type="nms", iou_threshold=0.6), - max_num=100, + nms_cfg=dict(iou_threshold=0.6), + max_num=nms_max_num, ) result_list.append(results) return result_list def get_single_level_center_priors( - self, batch_size, featmap_size, stride, dtype, device + self, + batch_size: int, + featmap_size: Tuple[int, int], + stride: int, + dtype: torch.dtype, + device: torch.device, + flatten: bool = True ): """Generate centers of a single stage feature map. Args: @@ -477,6 +529,7 @@ def get_single_level_center_priors( stride (int): down sample stride of the feature map dtype (obj:`torch.dtype`): data type of the tensors device (obj:`torch.device`): device of the tensors + flatten (bool): flatten the x and y tensors Return: priors (Tensor): center priors of a single level feature map. """ @@ -484,27 +537,9 @@ def get_single_level_center_priors( x_range = (torch.arange(w, dtype=dtype, device=device)) * stride y_range = (torch.arange(h, dtype=dtype, device=device)) * stride y, x = torch.meshgrid(y_range, x_range) - y = y.flatten() - x = x.flatten() + if flatten: + y = y.flatten() + x = x.flatten() strides = x.new_full((x.shape[0],), stride) proiors = torch.stack([x, y, strides, strides], dim=-1) return proiors.unsqueeze(0).repeat(batch_size, 1, 1) - - def _forward_onnx(self, feats): - """only used for onnx export""" - outputs = [] - for feat, cls_convs, gfl_cls in zip( - feats, - self.cls_convs, - self.gfl_cls, - ): - for conv in cls_convs: - feat = conv(feat) - output = gfl_cls(feat) - cls_pred, reg_pred = output.split( - [self.num_classes, 4 * (self.reg_max + 1)], dim=1 - ) - cls_pred = cls_pred.sigmoid() - out = torch.cat([cls_pred, reg_pred], dim=1) - outputs.append(out.flatten(start_dim=2)) - return torch.cat(outputs, dim=2).permute(0, 2, 1) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py index b3d4d95ff7..5a8e1a737a 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/head/simple_conv_head.py @@ -1,6 +1,9 @@ import torch import torch.nn as nn +from torch import Tensor +from typing import List + from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.conv import ConvModule from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.init_weights import normal_init from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.scale import Scale @@ -83,11 +86,12 @@ def init_weights(self): normal_init(self.gfl_cls, std=0.01, bias=bias_cls) normal_init(self.gfl_reg, std=0.01) - def forward(self, feats): + @torch.jit.unused + def forward(self, feats: List[Tensor]): outputs = [] - for x, scale in zip(feats, self.scales): - cls_feat = x - reg_feat = x + for idx, scale in enumerate(self.scales): + cls_feat = feats[idx] + reg_feat = feats[idx] for cls_conv in self.cls_convs: cls_feat = cls_conv(cls_feat) for reg_conv in self.reg_convs: diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py index af0b4251c2..b089a8d1f4 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/gfocal_loss.py @@ -101,6 +101,7 @@ def __init__(self, use_sigmoid=True, beta=2.0, reduction="mean", loss_weight=1.0 self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, target, weight=None, avg_factor=None, reduction_override=None ): @@ -151,6 +152,7 @@ def __init__(self, reduction="mean", loss_weight=1.0): self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, target, weight=None, avg_factor=None, reduction_override=None ): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py index 7ee9d324a3..73809580da 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/loss/iou_loss.py @@ -449,6 +449,7 @@ def __init__(self, eps=1e-6, reduction="mean", loss_weight=1.0): self.reduction = reduction self.loss_weight = loss_weight + @torch.jit.unused def forward( self, pred, diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py index 693e6fd0fe..0e55d157b6 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/conv.py @@ -115,9 +115,10 @@ def __init__( # Use msra init by default self.init_weights() + @torch.jit.unused @property def norm(self): - if self.norm_name: + if self.norm_name is not None: return getattr(self, self.norm_name) else: return None @@ -131,13 +132,14 @@ def init_weights(self): if self.with_norm: constant_init(self.norm, 1, bias=0) - def forward(self, x, norm=True): + @torch.jit.unused + def forward(self, x, norm: bool = True): for layer in self.order: if layer == "conv": x = self.conv(x) - elif layer == "norm" and norm and self.with_norm: + elif layer == "norm" and (norm is not None) and (self.with_norm is not None) and (self.norm is not None): x = self.norm(x) - elif layer == "act" and self.activation: + elif layer == "act" and (self.activation is not None): x = self.act(x) return x @@ -211,7 +213,6 @@ def __init__( # norm layer is after conv layer _, self.dwnorm = build_norm_layer(norm_cfg, in_channels) _, self.pwnorm = build_norm_layer(norm_cfg, out_channels) - # build activation layer if self.activation: self.act = act_layers(self.activation) @@ -230,12 +231,17 @@ def init_weights(self): constant_init(self.dwnorm, 1, bias=0) constant_init(self.pwnorm, 1, bias=0) - def forward(self, x, norm=True): + def forward(self, x): for layer_name in self.order: - if layer_name != "act": - layer = self.__getattr__(layer_name) - x = layer(x) - elif layer_name == "act" and self.activation: + if layer_name == "depthwise": + x = self.depthwise(x) + elif layer_name == "pointwise": + x = self.pointwise(x) + elif layer_name == "dwnorm" and (self.dwnorm is not None): + x = self.dwnorm(x) + elif layer_name == "pwnorm" and (self.pwnorm is not None): + x = self.pwnorm(x) + elif layer_name == "act" and (self.activation is not None): x = self.act(x) return x diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py index e5fa3e216c..abb97a62ca 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/nms.py @@ -1,9 +1,15 @@ import torch from torchvision.ops import nms +from typing import Dict def multiclass_nms( - multi_bboxes, multi_scores, score_thr, nms_cfg, max_num=-1, score_factors=None + multi_bboxes, + multi_scores, + score_thr: float, + nms_cfg: Dict[str, float], + max_num: int = -1, + score_factors: torch.Tensor = torch.empty(0) ): """NMS for multi-class bboxes. @@ -13,7 +19,7 @@ def multiclass_nms( contains scores of the background class, but this will be ignored. score_thr (float): bbox threshold, bboxes with scores lower than it will not be considered. - nms_thr (float): NMS IoU threshold + nms_cfg (dictionary): dictionary of the type and threshold of IoU max_num (int): if there are more than max_num bboxes after NMS, only top max_num will be kept. score_factors (Tensor): The factors multiplied to scores before @@ -40,20 +46,19 @@ def multiclass_nms( bboxes = torch.masked_select( bboxes, torch.stack((valid_mask, valid_mask, valid_mask, valid_mask), -1) ).view(-1, 4) - if score_factors is not None: + if not (score_factors.numel() == 0): scores = scores * score_factors[:, None] scores = torch.masked_select(scores, valid_mask) - labels = valid_mask.nonzero(as_tuple=False)[:, 1] + + # for scripting + labels = torch.tensor(0).to(valid_mask.device).long() + torch.nonzero(valid_mask, out=labels) + # labels = valid_mask.nonzero(as_tuple=False)#[:, 1] + labels = labels[:, 1] if bboxes.numel() == 0: bboxes = multi_bboxes.new_zeros((0, 5)) labels = multi_bboxes.new_zeros((0,), dtype=torch.long) - - if torch.onnx.is_in_onnx_export(): - raise RuntimeError( - "[ONNX Error] Can not record NMS " - "as it has not been executed this time" - ) return bboxes, labels dets, keep = batched_nms(bboxes, scores, labels, nms_cfg) @@ -65,7 +70,7 @@ def multiclass_nms( return dets, labels[keep] -def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): +def batched_nms(boxes, scores, idxs, nms_cfg: Dict[str, float], class_agnostic: bool = False): """Performs non-maximum suppression in a batched fashion. Modified from https://github.com/pytorch/vision/blob /505cd6957711af790211896d32b40291bea1bc21/torchvision/ops/boxes.py#L39. @@ -94,27 +99,32 @@ def batched_nms(boxes, scores, idxs, nms_cfg, class_agnostic=False): tuple: kept dets and indice. """ nms_cfg_ = nms_cfg.copy() - class_agnostic = nms_cfg_.pop("class_agnostic", class_agnostic) if class_agnostic: boxes_for_nms = boxes else: max_coordinate = boxes.max() offsets = idxs.to(boxes) * (max_coordinate + 1) boxes_for_nms = boxes + offsets[:, None] - nms_cfg_.pop("type", "nms") - split_thr = nms_cfg_.pop("split_thr", 10000) - if len(boxes_for_nms) < split_thr: - keep = nms(boxes_for_nms, scores, **nms_cfg_) + split_thr = nms_cfg_.pop("split_thr", 10000.0) + if boxes_for_nms.shape[0] < split_thr: + keep = nms(boxes_for_nms, scores, nms_cfg_["iou_threshold"]) boxes = boxes[keep] scores = scores[keep] else: total_mask = scores.new_zeros(scores.size(), dtype=torch.bool) for id in torch.unique(idxs): - mask = (idxs == id).nonzero(as_tuple=False).view(-1) - keep = nms(boxes_for_nms[mask], scores[mask], **nms_cfg_) + mask = (idxs == id) + mask_out = torch.tensor(0).to(mask.device).long() + torch.nonzero(mask, out=mask_out) + mask = mask_out.view(-1) + # mask = (idxs == id).nonzero(as_tuple=False).view(-1) + keep = nms(boxes_for_nms[mask], scores[mask], nms_cfg_["iou_threshold"]) total_mask[mask[keep]] = True - keep = total_mask.nonzero(as_tuple=False).view(-1) + keep_out = torch.tensor(0).to(total_mask.device).long() + torch.nonzero(total_mask, out=keep_out) + keep = keep_out.view(-1) + # keep = total_mask.nonzero(as_tuple=False).view(-1) keep = keep[scores[keep].argsort(descending=True)] boxes = boxes[keep] scores = scores[keep] diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py index 24e2de458b..c44788d32e 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/model/module/transformer.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +import torch.jit import torch.nn as nn from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.module.activation import act_layers @@ -128,6 +129,7 @@ def __init__( ] self.encoders = nn.Sequential(*encoders) + @torch.jit.unused def forward(self, x, pos_embed): b, _, h, w = x.shape x = self.conv(x) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py index d2939d22e1..7ec2a04864 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/trainer/task.py @@ -25,7 +25,7 @@ from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.data.batch_process import stack_batch_img from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util\ - import convert_avg_params, gather_results, mkdir + import convert_avg_params, gather_results, mkdir, rank_filter from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.check_point import save_model_state from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.model.weight_averager import build_weight_averager @@ -69,12 +69,17 @@ def forward(self, x): def predict(self, batch, batch_idx=None, dataloader_idx=None): batch = self._preprocess_batch_input(batch) preds = self.forward(batch["img"]) - results = self.model.head.post_process(preds, batch) + results = self.model.head.post_process(preds, batch, "eval") return results - def save_current_model(self, path, logger): - save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, logger=logger) + @rank_filter + def _save_current_model(self, path, verbose): + save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, verbose=verbose) + def save_current_model(self, path, verbose): + save_model_state(path=path, model=self.model, weight_averager=self.weight_averager, verbose=verbose) + + @torch.jit.unused def training_step(self, batch, batch_idx): batch = self._preprocess_batch_input(batch) preds, loss, loss_states = self.model.forward_train(batch) @@ -109,7 +114,7 @@ def training_epoch_end(self, outputs: List[Any]) -> None: # save models in schedule epoches if self.current_epoch % self.cfg.schedule.val_intervals == 0: checkpoint_save_path = os.path.join(self.cfg.save_dir, "checkpoints") - mkdir(checkpoint_save_path) + mkdir(self.local_rank, checkpoint_save_path) print("===" * 10) print("checkpoint_save_path: {} \n epoch: {}".format(checkpoint_save_path, self.current_epoch)) print("===" * 10) @@ -142,7 +147,7 @@ def validation_step(self, batch, batch_idx): if self.logger: self.logger.info(log_msg) - dets = self.model.head.post_process(preds, batch) + dets = self.model.head.post_process(preds, batch, "eval") return dets def validation_epoch_end(self, validation_step_outputs): @@ -170,11 +175,15 @@ def validation_epoch_end(self, validation_step_outputs): if metric > self.save_flag: self.save_flag = metric best_save_path = os.path.join(self.cfg.save_dir, "model_best") - mkdir(best_save_path) + mkdir(self.local_rank, best_save_path) self.trainer.save_checkpoint( os.path.join(best_save_path, "model_best.ckpt") ) - self.save_current_model(os.path.join(best_save_path, "nanodet_model_best.pth"), logger=self.logger) + verbose = True if self.logger is not None else False + # TODO: save only if local_rank is < 0 + # self._save_current_model(self.local_rank, os.path.join(best_save_path, "nanodet_model_state_best.pth"), + # verbose=verbose) + self.save_current_model(os.path.join(best_save_path, "nanodet_model_state_best.pth"), verbose=verbose) txt_path = os.path.join(best_save_path, "eval_results.txt") with open(txt_path, "a") as f: f.write("Epoch:{}\n".format(self.current_epoch + 1)) @@ -187,9 +196,8 @@ def validation_epoch_end(self, validation_step_outputs): if self.logger: self.logger.log_metrics(eval_results, self.current_epoch + 1) else: - # self.logger.info("Skip val on rank {}".format(self.local_rank)) if self.logger: - self.logger.info("Skip val ") + self.logger.info("Skip val on rank {}".format(self.local_rank)) def test_step(self, batch, batch_idx): dets = self.predict(batch, batch_idx) @@ -207,7 +215,8 @@ def test_epoch_end(self, test_step_outputs): if all_results: if self.cfg.test_mode == "val": eval_results = self.evaluator.evaluate( - all_results, self.cfg.save_dir) + all_results, self.cfg.save_dir, rank=self.local_rank + ) txt_path = os.path.join(self.cfg.save_dir, "eval_results.txt") with open(txt_path, "a") as f: for k, v in eval_results.items(): diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py index 4b82a8c19f..b954a165db 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/box_transform.py @@ -1,7 +1,8 @@ import torch +from typing import Tuple -def distance2bbox(points, distance, max_shape=None): +def distance2bbox(points, distance, max_shape: Tuple[int, int]=(0, 0)): """Decode distance prediction to bounding box. Args: @@ -17,7 +18,7 @@ def distance2bbox(points, distance, max_shape=None): y1 = points[..., 1] - distance[..., 1] x2 = points[..., 0] + distance[..., 2] y2 = points[..., 1] + distance[..., 3] - if max_shape is not None: + if max_shape != (0, 0): x1 = x1.clamp(min=0, max=max_shape[1]) y1 = y1.clamp(min=0, max=max_shape[0]) x2 = x2.clamp(min=0, max=max_shape[1]) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py index 2ac516167a..fd3487f0f1 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/check_point.py @@ -13,12 +13,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter from typing import Any, Dict import torch -def load_model_weight(model, checkpoint, logger=None): +def load_model_weight(model, checkpoint, verbose=None): state_dict = checkpoint["state_dict"].copy() for k in checkpoint["state_dict"]: # convert average model weights @@ -39,8 +39,8 @@ def load_model_weight(model, checkpoint, logger=None): for k in state_dict: if k in model_state_dict: if state_dict[k].shape != model_state_dict[k].shape: - if logger: - logger.log( + if verbose: + print( "Skip loading parameter {}, required shape{}, " "loaded shape{}.".format( k, model_state_dict[k].shape, state_dict[k].shape @@ -48,19 +48,18 @@ def load_model_weight(model, checkpoint, logger=None): ) state_dict[k] = model_state_dict[k] else: - if logger: - logger.log("Drop parameter {}.".format(k)) + if verbose: + print("Drop parameter {}.".format(k)) for k in model_state_dict: if not (k in state_dict): - if logger: - logger.log("No param {}.".format(k)) + if verbose: + print("No param {}.".format(k)) state_dict[k] = model_state_dict[k] model.load_state_dict(state_dict, strict=False) return model -# @rank_zero_only -# @rank_filter +@rank_filter def save_model(model, path, epoch, iter, optimizer=None): model_state_dict = ( model.module.state_dict() if hasattr(model, "module") else model.state_dict() @@ -72,11 +71,9 @@ def save_model(model, path, epoch, iter, optimizer=None): torch.save(data, path) -# @rank_zero_only -# @rank_filter -def save_model_state(path, model, weight_averager=None, logger=None): - if logger: - logger.info("Saving model to {}".format(path)) +def save_model_state(path, model, weight_averager=None, verbose=None): + if verbose: + print("Saving model to {}".format(path)) state_dict = ( weight_averager.state_dict() if weight_averager diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py index b883d8f336..bbe5f59c47 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/logger.py @@ -22,15 +22,15 @@ from pytorch_lightning.utilities import rank_zero_only from pytorch_lightning.utilities.cloud_io import get_filesystem + from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.path import mkdir class Logger: def __init__(self, local_rank, save_dir="./", use_tensorboard=True): - # mkdir(local_rank, save_dir) - mkdir(save_dir) + mkdir(local_rank, save_dir) self.rank = local_rank - fmt = ("[%(name)s] [%(asctime)s] %(levelname)s: %(message)s") + fmt = "[%(name)s] [%(asctime)s] %(levelname)s: %(message)s" logging.basicConfig( level=logging.INFO, filename=os.path.join(save_dir, "logs.txt"), @@ -61,6 +61,10 @@ def log(self, string): if self.rank < 1: logging.info(string) + def info(self, string): + if self.rank < 1: + logging.info(string) + def scalar_summary(self, tag, phase, value, step): if self.rank < 1: self.writer.add_scalars(tag, {phase: value}, step) @@ -173,8 +177,7 @@ def _init_logger(self): ch = logging.StreamHandler() ch.setLevel(logging.INFO) # set console formatter - - c_fmt = ("[%(name)s] [%(asctime)s] %(levelname)s: %(message)s") + c_fmt = "[%(name)s] [%(asctime)s] %(levelname)s: %(message)s" console_formatter = logging.Formatter(c_fmt, datefmt="%m-%d %H:%M:%S") ch.setFormatter(console_formatter) diff --git a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py index b0887d41a6..6f101ece69 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py +++ b/src/opendr/perception/object_detection_2d/nanodet/algorithm/nanodet/util/path.py @@ -14,10 +14,10 @@ import os -# from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter +from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util.rank_filter import rank_filter -# @rank_filter +@rank_filter def mkdir(path): if not os.path.exists(path): os.makedirs(path) diff --git a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini index c8a39d062d..aac7b3d043 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini +++ b/src/opendr/perception/object_detection_2d/nanodet/dependencies.ini @@ -1,7 +1,7 @@ [runtime] # 'python' key expects a value using the Python requirements file format # https://pip.pypa.io/en/stable/reference/pip_install/#requirements-file-format -python=torch>=1.7 +python=torch>=1.9.0 pytorch-lightning==1.2.3 protobuf<=3.20.0 omegaconf>=2.0.1 @@ -10,13 +10,9 @@ python=torch>=1.7 opencv-python pycocotools Cython - matplotlib onnx - onnx-simplifier pyaml tabulate tensorboard - torchmetrics - tqdm opendr=opendr-toolkit-engine diff --git a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py index 6eb79c3db6..f087264104 100644 --- a/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py +++ b/src/opendr/perception/object_detection_2d/nanodet/nanodet_learner.py @@ -15,6 +15,7 @@ import os import datetime import json +import warnings from pathlib import Path import pytorch_lightning as pl @@ -30,7 +31,6 @@ from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.inferencer.utilities import Predictor from opendr.perception.object_detection_2d.nanodet.algorithm.nanodet.util import ( NanoDetLightningLogger, - Logger, cfg, load_config, load_model_weight, @@ -44,13 +44,15 @@ from opendr.engine.learners import Learner from urllib.request import urlretrieve +import onnxruntime as ort + _MODEL_NAMES = {"EfficientNet_Lite0_320", "EfficientNet_Lite1_416", "EfficientNet_Lite2_512", "RepVGG_A0_416", "t", "g", "m", "m_416", "m_0.5x", "m_1.5x", "m_1.5x_416", "plus_m_320", "plus_m_1.5x_320", "plus_m_416", "plus_m_1.5x_416", "custom"} class NanodetLearner(Learner): - def __init__(self, model_to_use="plus_m_1.5x_416", iters=None, lr=None, batch_size=None, checkpoint_after_iter=None, + def __init__(self, model_to_use="m", iters=None, lr=None, batch_size=None, checkpoint_after_iter=None, checkpoint_load_iter=None, temp_path='', device='cuda', weight_decay=None, warmup_steps=None, warmup_ratio=None, lr_schedule_T_max=None, lr_schedule_eta_min=None, grad_clip=None): @@ -82,6 +84,11 @@ def __init__(self, model_to_use="plus_m_1.5x_416", iters=None, lr=None, batch_si checkpoint_load_iter=self.checkpoint_load_iter, temp_path=self.temp_path, device=self.device) + self.ort_session = None + self.jit_model = None + self.predictor = None + + self.pipeline = None self.model = build_model(self.cfg.model) self.logger = None self.task = None @@ -173,73 +180,87 @@ def save(self, path=None, verbose=True): Method for saving the current model and metadata in the path provided. :param path: path to folder where model will be saved :type path: str, optional - :param verbose: whether to print a success message or not, defaults to False + :param verbose: whether to print a success message or not :type verbose: bool, optional """ + path = path if path is not None else self.cfg.save_dir model = self.cfg.check_point_name + os.makedirs(path, exist_ok=True) - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + if self.ort_session: + self._save_onnx(path, verbose=verbose) + return + if self.jit_model: + self._save_jit(path, verbose=verbose) + return - param_filepath = "nanodet_{}.pth".format(model) - metadata["model_paths"].append(param_filepath) + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} + + metadata["model_paths"].append("nanodet_{}.pth".format(model)) - logger = self.logger if verbose else None if self.task is None: - print("You do not have call a task yet, only the state of the loaded or initialized model will be saved") - save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, logger) + print("You haven't called a task yet, only the state of the loaded or initialized model will be saved.") + save_model_state(os.path.join(path, metadata["model_paths"][0]), self.model, None, verbose) else: - self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), logger) + self.task.save_current_model(os.path.join(path, metadata["model_paths"][0]), verbose) with open(os.path.join(path, "nanodet_{}.json".format(model)), 'w', encoding='utf-8') as f: json.dump(metadata, f, ensure_ascii=False, indent=4) if verbose: print("Model metadata saved.") - return True + return def load(self, path=None, verbose=True): """ Loads the model from the path provided. :param path: path of the directory where the model was saved :type path: str, optional - :param verbose: whether to print a success message or not, defaults to False + :param verbose: whether to print a success message or not, defaults to True :type verbose: bool, optional """ + path = path if path is not None else self.cfg.save_dir + model = self.cfg.check_point_name if verbose: - print("Model name:", model, "-->", os.path.join(path, model + ".json")) + print("Model name:", model, "-->", os.path.join(path, "nanodet_" + model + ".json")) with open(os.path.join(path, "nanodet_{}.json".format(model))) as f: metadata = json.load(f) - logger = Logger(-1, path, False) if verbose else None - ckpt = torch.load(os.path.join(path, metadata["model_paths"][0]), map_location=torch.device(self.device)) - self.model = load_model_weight(self.model, ckpt, logger) + if metadata['optimized']: + if metadata['format'] == "onnx": + self._load_onnx(os.path.join(path, metadata["model_paths"][0]), verbose=verbose) + print("Loaded ONNX model.") + else: + self._load_jit(os.path.join(path, metadata["model_paths"][0]), verbose=verbose) + print("Loaded JIT model.") + else: + ckpt = torch.load(os.path.join(path, metadata["model_paths"][0]), map_location=torch.device(self.device)) + self.model = load_model_weight(self.model, ckpt, verbose) if verbose: - logger.log("Loaded model weight from {}".format(path)) + print("Loaded model weights from {}".format(path)) pass - def download(self, path=None, mode="pretrained", verbose=False, + def download(self, path=None, mode="pretrained", verbose=True, url=OPENDR_SERVER_URL + "/perception/object_detection_2d/nanodet/"): """ Downloads all files necessary for inference, evaluation and training. Valid mode options are: ["pretrained", "images", "test_data"]. :param path: folder to which files will be downloaded, if None self.temp_path will be used - :type path: str, optional + :type path: str :param mode: one of: ["pretrained", "images", "test_data"], where "pretrained" downloads a pretrained - network depending on the network choosed in config file, "images" downloads example inference data, - and "test_data" downloads additional image,annotation file and pretrained network for training and testing - :type mode: str, optional - :param model: the specific name of the model to download, all pre-configured configs files have their pretrained - model and can be selected, if None self.cfg.check_point_name will be used - :param verbose: if True, additional information is printed on stdout - :type verbose: bool, optional + network depending on the network chosen in the config file, "images" downloads example inference data, + and "test_data" downloads additional images and corresponding annotations files + :type mode: str + :param verbose: if True, additional information is printed on STDOUT + :type verbose: bool :param url: URL to file location on FTP server - :type url: str, optional + :type url: str """ valid_modes = ["pretrained", "images", "test_data"] @@ -278,9 +299,9 @@ def download(self, path=None, mode="pretrained", verbose=False, if verbose: print("Making metadata...") - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} param_filepath = "nanodet_{}.pth".format(model) metadata["model_paths"].append(param_filepath) @@ -289,13 +310,13 @@ def download(self, path=None, mode="pretrained", verbose=False, except: print("Pretrain weights for this model are not provided!!! \n" - "Only the hole ckeckpoint will be download") + "Only the hole checkpoint will be download") if verbose: print("Making metadata...") - metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", - "has_data": False, "inference_params": {}, "optimized": False, - "optimizer_info": {}, "classes": self.classes} + metadata = {"model_paths": [], "framework": "pytorch", "format": "pth", "has_data": False, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}, + "optimized": False, "optimizer_info": {}} param_filepath = "nanodet_{}.ckpt".format(model) metadata["model_paths"].append(param_filepath) @@ -333,11 +354,138 @@ def reset(self): """This method is not used in this implementation.""" return NotImplementedError - def optimize(self): - """This method is not used in this implementation.""" - return NotImplementedError + def __dummy_input(self): + width, height = self.cfg.data.val.input_size + dummy_input = ( + torch.randn((3, width, height), device=self.device, dtype=torch.float32), + torch.tensor(width, device="cpu", dtype=torch.int64), + torch.tensor(height, device="cpu", dtype=torch.int64), + torch.eye(3, device="cpu", dtype=torch.float32), + ) + return dummy_input + + def _save_onnx(self, onnx_path, do_constant_folding=False, verbose=True, nms_max_num=100): + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) + + os.makedirs(onnx_path, exist_ok=True) + export_path = os.path.join(onnx_path, "nanodet_{}.onnx".format(self.cfg.check_point_name)) + + dummy_input = self.__dummy_input() + + torch.onnx.export( + self.predictor, + dummy_input[0], + export_path, + verbose=verbose, + keep_initializers_as_inputs=True, + do_constant_folding=do_constant_folding, + opset_version=11, + input_names=['data'], + output_names=['output'], + dynamic_axes={'data': {1: 'width', + 2: 'height'}} + ) + + metadata = {"model_paths": ["nanodet_{}.onnx".format(self.cfg.check_point_name)], "framework": "pytorch", + "format": "onnx", "has_data": False, "optimized": True, "optimizer_info": {}, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}} + + with open(os.path.join(onnx_path, "nanodet_{}.json".format(self.cfg.check_point_name)), + 'w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + + if verbose: + print("Finished exporting ONNX model.") + + try: + import onnxsim + except: + print("For compression in optimized models, install onnxsim and rerun optimize.") + return + + import onnx + if verbose: + print("Simplifying ONNX model...") + input_data = {"data": dummy_input[0].detach().cpu().numpy()} + model_sim, flag = onnxsim.simplify(export_path, input_data=input_data) + if flag: + onnx.save(model_sim, export_path) + if verbose: + print("ONNX simplified successfully.") + else: + if verbose: + print("ONNX simplified failed.") + + def _load_onnx(self, onnx_path, verbose=True): + if verbose: + print("Loading ONNX runtime inference session from {}".format(onnx_path)) + + self.ort_session = ort.InferenceSession(onnx_path) + + def _save_jit(self, jit_path, verbose=True, nms_max_num=100): + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) + + os.makedirs(jit_path, exist_ok=True) - def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123): + dummy_input = self.__dummy_input() + + with torch.no_grad(): + export_path = os.path.join(jit_path, "nanodet_{}.pth".format(self.cfg.check_point_name)) + self.predictor.trace_model(dummy_input) + model_traced = torch.jit.script(self.predictor) + + metadata = {"model_paths": ["nanodet_{}.pth".format(self.cfg.check_point_name)], "framework": "pytorch", + "format": "pth", "has_data": False, "optimized": True, "optimizer_info": {}, + "inference_params": {"input_size": self.cfg.data.val.input_size, "classes": self.classes}} + model_traced.save(export_path) + + with open(os.path.join(jit_path, "nanodet_{}.json".format(self.cfg.check_point_name)), + 'w', encoding='utf-8') as f: + json.dump(metadata, f, ensure_ascii=False, indent=4) + + if verbose: + print("Finished export to TorchScript.") + + def _load_jit(self, jit_path, verbose=True): + if verbose: + print("Loading JIT model from {}.".format(jit_path)) + + self.jit_model = torch.jit.load(jit_path, map_location=self.device) + + def optimize(self, export_path, verbose=True, optimization="jit", nms_max_num=100): + """ + Method for optimizing the model with ONNX or JIT. + :param export_path: The file path to the folder where the optimized model will be saved. If a model already + exists at this path, it will be overwritten. + :type export_path: str + :param verbose: if set to True, additional information is printed to STDOUT + :type verbose: bool, optional + :param optimization: the kind of optimization you want to perform [jit, onnx] + :type optimization: str + :param nms_max_num: determines the maximum number of bounding boxes that will be retained following the nms. + :type nms_max_num: int + """ + + optimization = optimization.lower() + if not os.path.exists(export_path): + if optimization == "jit": + self._save_jit(export_path, verbose=verbose, nms_max_num=nms_max_num) + elif optimization == "onnx": + self._save_onnx(export_path, verbose=verbose, nms_max_num=nms_max_num) + else: + assert NotImplementedError + with open(os.path.join(export_path, "nanodet_{}.json".format(self.cfg.check_point_name))) as f: + metadata = json.load(f) + if optimization == "jit": + self._load_jit(os.path.join(export_path, metadata["model_paths"][0]), verbose) + elif optimization == "onnx": + self._load_onnx(os.path.join(export_path, metadata["model_paths"][0]), verbose) + else: + assert NotImplementedError + + def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, logging=False, seed=123, local_rank=1): """ This method is used to train the detector on the COCO dataset. Validation is performed in a val_dataset if provided, else validation is performed in training dataset. @@ -348,27 +496,32 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 :param val_dataset: validation dataset object :type val_dataset: ExternalDataset, DetectionDataset not implemented yet :param logging_path: subdirectory in temp_path to save logger outputs - :type logging_path: str, optional - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True + :type logging_path: str + :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool + :param logging: if set to True, text and STDOUT logging will be used + :type logging: bool :param seed: seed for reproducibility :type seed: int + :param local_rank: for distribution learning + :type local_rank: int """ - mkdir(self.cfg.save_dir) + mkdir(local_rank, self.cfg.save_dir) - if verbose: + if logging: self.logger = NanoDetLightningLogger(self.temp_path + "/" + logging_path) self.logger.dump_cfg(self.cfg) if seed != '' or seed is not None: - if verbose: + if logging: self.logger.info("Set random seed to {}".format(seed)) pl.seed_everything(seed) - if verbose: + if logging: self.logger.info("Setting up data...") + elif verbose: + print("Setting up data...") train_dataset = build_dataset(self.cfg.data.val, dataset, self.cfg.class_names, "train") val_dataset = train_dataset if val_dataset is None else \ @@ -381,7 +534,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 batch_size=self.batch_size, shuffle=True, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=True, ) @@ -390,7 +543,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 batch_size=self.batch_size, shuffle=False, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=False, ) @@ -401,14 +554,15 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 if self.checkpoint_load_iter > 0 else None ) - if verbose: + if logging: self.logger.info("Creating task...") + elif verbose: + print("Creating task...") self.task = TrainingTask(self.cfg, self.model, evaluator) - if self.device == "cpu": - gpu_ids = None - accelerator = None - elif self.device == "cuda": + gpu_ids = None + accelerator = None + if self.device == "cuda": gpu_ids = self.cfg.device.gpu_ids accelerator = None if len(gpu_ids) <= 1 else "ddp" @@ -421,7 +575,7 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 log_every_n_steps=self.cfg.log.interval, num_sanity_val_steps=0, resume_from_checkpoint=model_resume_path, - callbacks=[ProgressBar(refresh_rate=0)], # disable tqdm bar + callbacks=[ProgressBar(refresh_rate=0)], logger=self.logger, benchmark=True, gradient_clip_val=self.cfg.get("grad_clip", 0.0), @@ -429,27 +583,32 @@ def fit(self, dataset, val_dataset=None, logging_path='', verbose=True, seed=123 trainer.fit(self.task, train_dataloader, val_dataloader) - def eval(self, dataset, verbose=True): + def eval(self, dataset, verbose=True, logging=False, local_rank=1): """ This method performs evaluation on a given dataset and returns a dictionary with the evaluation results. :param dataset: dataset object, to perform evaluation on - :type dataset: ExternalDataset, DetectionDataset not implemented yet - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True + :type dataset: ExternalDataset, XMLBasedDataset + :param verbose: if set to True, additional information is printed to STDOUT :type verbose: bool + :param logging: if set to True, text and STDOUT logging will be used + :type logging: bool + :param local_rank: for distribution learning + :type local_rank: int """ timestr = datetime.datetime.now().__format__("%Y_%m_%d_%H:%M:%S") save_dir = os.path.join(self.cfg.save_dir, timestr) - mkdir(save_dir) + mkdir(local_rank, save_dir) - if verbose: + if logging: self.logger = NanoDetLightningLogger(save_dir) self.cfg.update({"test_mode": "val"}) - if verbose: + if logging: self.logger.info("Setting up data...") + elif verbose: + print("Setting up data...") val_dataset = build_dataset(self.cfg.data.val, dataset, self.cfg.class_names, "val") @@ -458,20 +617,22 @@ def eval(self, dataset, verbose=True): batch_size=self.batch_size, shuffle=False, num_workers=self.cfg.device.workers_per_gpu, - pin_memory=True, + pin_memory=False, collate_fn=naive_collate, drop_last=False, ) evaluator = build_evaluator(self.cfg.evaluator, val_dataset) - if verbose: + if logging: self.logger.info("Creating task...") + elif verbose: + print("Creating task...") + self.task = TrainingTask(self.cfg, self.model, evaluator) - if self.device == "cpu": - gpu_ids = None - accelerator = None - elif self.device == "cuda": + gpu_ids = None + accelerator = None + if self.device == "cuda": gpu_ids = self.cfg.device.gpu_ids accelerator = None if len(gpu_ids) <= 1 else "ddp" @@ -483,35 +644,51 @@ def eval(self, dataset, verbose=True): num_sanity_val_steps=0, logger=self.logger, ) - if verbose: + if self.logger: self.logger.info("Starting testing...") - return trainer.test(self.task, val_dataloader, verbose=verbose) + elif verbose: + print("Starting testing...") - def infer(self, input, threshold=0.35, verbose=True): + test_results = (verbose or logging) + return trainer.test(self.task, val_dataloader, verbose=test_results) + + def infer(self, input, threshold=0.35, nms_max_num=100): """ Performs inference - :param input: input can be an Image type image to perform inference - :type input: str, optional + :param input: input image to perform inference on + :type input: opendr.data.Image :param threshold: confidence threshold :type threshold: float, optional - :param verbose: if set to True, additional information is printed to STDOUT and logger txt output, - defaults to True - :type verbose: bool + :param nms_max_num: determines the maximum number of bounding boxes that will be retained following the nms. + :type nms_max_num: int :return: list of bounding boxes of last image of input or last frame of the video - :rtype: BoundingBoxList + :rtype: opendr.engine.target.BoundingBoxList """ + if not self.predictor: + self.predictor = Predictor(self.cfg, self.model, device=self.device, nms_max_num=nms_max_num) - if verbose: - self.logger = Logger(0, use_tensorboard=False) - predictor = Predictor(self.cfg, self.model, device=self.device) if not isinstance(input, Image): input = Image(input) _input = input.opencv() - meta, res = predictor.inference(_input, verbose) - bounding_boxes = BoundingBoxList([]) - for label in res[0]: - for box in res[0][label]: + _input, *metadata = self.predictor.preprocessing(_input) + + if self.ort_session: + if self.jit_model: + warnings.warn( + "Warning: Both JIT and ONNX models are initialized, inference will run in ONNX mode by default.\n" + "To run in JIT please delete the self.ort_session like: detector.ort_session = None.") + preds = self.ort_session.run(['output'], {'data': _input.cpu().detach().numpy()}) + res = self.predictor.postprocessing(torch.from_numpy(preds[0]), _input, *metadata) + elif self.jit_model: + res = self.jit_model(_input, *metadata).cpu() + else: + preds = self.predictor(_input, *metadata) + res = self.predictor.postprocessing(preds, _input, *metadata) + + bounding_boxes = [] + for label in range(len(res)): + for box in res[label]: score = box[-1] if score > threshold: bbox = BoundingBox(left=box[0], top=box[1], @@ -519,7 +696,8 @@ def infer(self, input, threshold=0.35, verbose=True): height=box[3] - box[1], name=label, score=score) - bounding_boxes.data.append(bbox) + bounding_boxes.append(bbox) + bounding_boxes = BoundingBoxList(bounding_boxes) bounding_boxes.data.sort(key=lambda v: v.confidence) return bounding_boxes diff --git a/tests/Makefile b/tests/Makefile index aea3019248..5fdff5383d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -46,6 +46,11 @@ download: $(MV) opendrdata.csd.auth.gr/perception/face_recognition/test_data/images $(DATA_DIR)/database; \ $(WGET) ftp://opendrdata.csd.auth.gr/perception/face_recognition/optimized_model/*; \ $(MV) opendrdata.csd.auth.gr/perception/face_recognition/optimized_model $(DATA_DIR)/optimized_model; \ + $(MKDIR_P) $(DATA_DIR)/object_detection_2d/nanodet; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/images $(DATA_DIR)/object_detection_2d/nanodet/database; \ + $(WGET) ftp://opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model/*; \ + $(MV) opendrdata.csd.auth.gr/perception/object_detection_2d/nanodet/optimized_model $(DATA_DIR)/object_detection_2d/nanodet/optimized_model; \ $(RM) -r opendrdata.csd.auth.gr; \ fi; @@ -57,22 +62,34 @@ $(BUILD_DIR)/test_face_recognition: @+echo "Building face recognition test..." $(CC) $(CFLAGS) -o $(BUILD_DIR)/test_face_recognition sources/c_api/test_face_recognition.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) +$(BUILD_DIR)/test_nanodet: + @+echo "Building Nanodet object detection test..." + $(CC) $(CFLAGS) -o $(BUILD_DIR)/test_nanodet sources/c_api/test_nanodet.c $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) + FMP_INC = -I$(OPENDR_HOME)/projects/python/perception/slam/full_map_posterior_gmapping/src/openslam_gmapping/include $(BUILD_DIR)/test_fmp_gmapping: @+echo "Building Full-Map-Posterior GMapping test..." $(CPP) $(CFLAGS) -o $(BUILD_DIR)/test_fmp_gmapping sources/c_api/test_fmp_gmapping.cpp -lboost_unit_test_framework $(INC) $(OPENDR_INC) $(OPENDR_LD) $(LD) $(FMP_INC) -tests: $(BUILD_DIR)/test_opendr_utils $(BUILD_DIR)/test_face_recognition $(BUILD_DIR)/test_fmp_gmapping + +tests: utils face_recognition nanodet fmp_gmapping + +utils: $(BUILD_DIR)/test_opendr_utils +face_recognition: $(BUILD_DIR)/test_face_recognition +nanodet: $(BUILD_DIR)/test_nanodet +fmp_gmapping: $(BUILD_DIR)/test_fmp_gmapping runtests: download tests @+$(LD_RUN) $(BUILD_DIR)/test_opendr_utils 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_face_recognition 2>/dev/null + @+$(LD_RUN) $(BUILD_DIR)/test_nanodet 2>/dev/null @+$(LD_RUN) $(BUILD_DIR)/test_fmp_gmapping clean: @+echo "Cleaning C tests binaries and temporary files..." @+$(RM) $(BUILD_DIR)/test_opendr_utils @+$(RM) $(BUILD_DIR)/test_face_recognition + @+$(RM) $(BUILD_DIR)/test_nanodet @+$(RM) $(BUILD_DIR)/test_fmp_gmapping @+$(RM) -rf $(DATA_DIR) @+echo "Done!" diff --git a/tests/sources/c_api/test_face_recognition.c b/tests/sources/c_api/test_face_recognition.c index c2adc17e37..cd25b277b2 100644 --- a/tests/sources/c_api/test_face_recognition.c +++ b/tests/sources/c_api/test_face_recognition.c @@ -22,79 +22,79 @@ START_TEST(model_creation_test) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); - ck_assert(model.onnx_session); + ck_assert(model.onnxSession); ck_assert(model.env); - ck_assert(model.session_options); + ck_assert(model.sessionOptions); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); // Load a model that does not exist - load_face_recognition_model("data/optimized_model_not_existant", &model); - ck_assert(!model.onnx_session); + loadFaceRecognitionModel("data/optimized_model_not_existant", &model); + ck_assert(!model.onnxSession); ck_assert(!model.env); - ck_assert(!model.session_options); + ck_assert(!model.sessionOptions); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST START_TEST(database_creation_test) { - face_recognition_model_t model; - load_face_recognition_model("data/optimized_model", &model); + FaceRecognitionModelT model; + loadFaceRecognitionModel("data/optimized_model", &model); // Check that we can create and load a database that exists - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); ck_assert(model.database); - ck_assert(model.database_ids); - ck_assert(model.database_ids); + ck_assert(model.databaseIds); + ck_assert(model.databaseIds); // Check that we can handle errors in the process - build_database_face_recognition("data/database_not_existant", "data/database.dat", &model); - load_database_face_recognition("data/database_not_existant.dat", &model); + buildDatabaseFaceRecognition("data/database_not_existant", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database_not_existant.dat", &model); ck_assert(!model.database); - ck_assert(!model.database_ids); + ck_assert(!model.databaseIds); // Release the resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST START_TEST(inference_creation_test) { // Create a face recognition model - face_recognition_model_t model; + FaceRecognitionModelT model; // Load a pretrained model (see instructions for downloading the data) - load_face_recognition_model("data/optimized_model", &model); + loadFaceRecognitionModel("data/optimized_model", &model); // Build and load the database - build_database_face_recognition("data/database", "data/database.dat", &model); - load_database_face_recognition("data/database.dat", &model); + buildDatabaseFaceRecognition("data/database", "data/database.dat", &model); + loadDatabaseFaceRecognition("data/database.dat", &model); // Load an image and performance inference - opendr_image_t image; - load_image("data/database/1/1.jpg", &image); - opendr_category_target_t res = infer_face_recognition(&model, &image); - free_image(&image); + OpendrImageT image; + loadImage("data/database/1/1.jpg", &image); + OpendrCategoryTargetT res = inferFaceRecognition(&model, &image); + freeImage(&image); char buff[512]; - decode_category_face_recognition(&model, res, buff); + decodeCategoryFaceRecognition(&model, res, buff); ck_assert(!strcmp(buff, "1")); // Load another image - load_image("data/database/5/1.jpg", &image); - res = infer_face_recognition(&model, &image); - free_image(&image); - decode_category_face_recognition(&model, res, buff); + loadImage("data/database/5/1.jpg", &image); + res = inferFaceRecognition(&model, &image); + freeImage(&image); + decodeCategoryFaceRecognition(&model, res, buff); ck_assert(!strcmp(buff, "5")); // Free the model resources - free_face_recognition_model(&model); + freeFaceRecognitionModel(&model); } END_TEST diff --git a/tests/sources/c_api/test_nanodet.c b/tests/sources/c_api/test_nanodet.c new file mode 100644 index 0000000000..22a8e8e695 --- /dev/null +++ b/tests/sources/c_api/test_nanodet.c @@ -0,0 +1,85 @@ +/* + * Copyright 2020-2022 OpenDR European Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include "object_detection_2d_nanodet_jit.h" +#include "opendr_utils.h" + +START_TEST(model_creation_test) { + // Create a nanodet libtorch model + NanodetModelT model; + // Load a pretrained model + loadNanodetModel("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + ck_assert_msg(model.network != 0, "net is NULL"); + + // Release the resources + freeNanodetModel(&model); + + // Check if memory steel exist + ck_assert_msg(model.network, "net is NULL"); +} +END_TEST + +START_TEST(inference_creation_test) { + // Create a nanodet model + NanodetModelT model; + + // Load a pretrained model + loadNanodetModel("data/object_detection_2d/nanodet/optimized_model/nanodet_m.pth", "cpu", 320, 320, 0.35, &model); + + // Load an image and performance inference + OpendrImageT image; + loadImage("data/object_detection_2d/nanodet/database/000000000036.jpg", &image); + OpendrDetectionVectorTargetT res = inferNanodet(&model, &image); + freeImage(&image); + + ck_assert(res.size != 0); + + // Free the model resources + freeDetectionsVector(&res); + freeNanodetModel(&model); +} +END_TEST + +Suite *nanodet_suite(void) { + Suite *s; + TCase *tc_core; + + s = suite_create("Nanodet"); + tc_core = tcase_create("Core"); + + tcase_add_test(tc_core, model_creation_test); + tcase_add_test(tc_core, inference_creation_test); + suite_add_tcase(s, tc_core); + + return s; +} + +int main() { + int no_failed = 0; + Suite *s; + SRunner *runner; + + s = nanodet_suite(); + runner = srunner_create(s); + + srunner_run_all(runner, CK_NORMAL); + no_failed = srunner_ntests_failed(runner); + srunner_free(runner); + return (no_failed == 0) ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/tests/sources/c_api/test_opendr_utils.c b/tests/sources/c_api/test_opendr_utils.c index f872726199..fb68437f44 100644 --- a/tests/sources/c_api/test_opendr_utils.c +++ b/tests/sources/c_api/test_opendr_utils.c @@ -21,16 +21,16 @@ START_TEST(image_load_test) { // Load an image and performance inference - opendr_image_t image; + OpendrImageT image; // An example of an image that exist - load_image("data/database/1/1.jpg", &image); + loadImage("data/database/1/1.jpg", &image); ck_assert(image.data); // An example of an image that does not exist - load_image("images/not_existant/1.jpg", &image); + loadImage("images/not_existant/1.jpg", &image); ck_assert(image.data == 0); // Free the resources - free_image(&image); + freeImage(&image); } END_TEST diff --git a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py index 6b613df539..e4a212fe5d 100644 --- a/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py +++ b/tests/sources/tools/perception/object_detection_2d/nanodet/test_nanodet.py @@ -17,13 +17,15 @@ import gc import shutil import os +import warnings +from torch.jit import TracerWarning import numpy as np from opendr.perception.object_detection_2d import NanodetLearner from opendr.engine.datasets import ExternalDataset device = os.getenv('TEST_DEVICE') if os.getenv('TEST_DEVICE') else 'cpu' -_DEFAULT_MODEL = "plus_m_416" +_DEFAULT_MODEL = "m" def rmfile(path): @@ -47,14 +49,13 @@ def setUpClass(cls): print("\n\n**********************************\nTEST Nanodet Learner\n" "**********************************") - cls.temp_dir = os.path.join(".", "tests", "sources", "tools", "perception", "object_detection_2d", - "nanodet", "nanodet_temp") + cls.temp_dir = os.path.join(".", "nanodet_temp") cls.detector = NanodetLearner(model_to_use=_DEFAULT_MODEL, device=device, temp_path=cls.temp_dir, batch_size=1, iters=1, checkpoint_after_iter=2, lr=1e-4) # Download all required files for testing - cls.detector.download(path=cls.temp_dir, mode="pretrained") - cls.detector.download(path=cls.temp_dir, mode="images") - cls.detector.download(path=cls.temp_dir, mode="test_data") + cls.detector.download(path=cls.temp_dir, mode="pretrained", verbose=False) + cls.detector.download(path=cls.temp_dir, mode="images", verbose=False) + cls.detector.download(path=cls.temp_dir, mode="test_data", verbose=False) @classmethod def tearDownClass(cls): @@ -104,13 +105,15 @@ def test_infer(self): print('Starting inference test for Nanodet...') self.detector.load(os.path.join(self.temp_dir, "nanodet_{}".format(_DEFAULT_MODEL)), verbose=False) img = cv2.imread(os.path.join(self.temp_dir, "000000000036.jpg")) - self.assertIsNotNone(self.detector.infer(input=img, verbose=False), + self.assertIsNotNone(self.detector.infer(input=img), msg="Returned empty BoundingBoxList.") gc.collect() print('Finished inference test for Nanodet...') def test_save_load(self): print('Starting save/load test for Nanodet...') + self.detector.ort_session = None + self.detector.jit_model = None self.detector.save(path=os.path.join(self.temp_dir, "test_model"), verbose=False) starting_param_1 = list(self.detector._model.parameters())[0].detach().clone().to(device) self.detector.model = None @@ -120,12 +123,40 @@ def test_save_load(self): new_param = list(detector2._model.parameters())[0].detach().clone().to(device) self.assertTrue(starting_param_1.allclose(new_param)) + del starting_param_1, new_param # Cleanup rmfile(os.path.join(self.temp_dir, "test_model", "nanodet_{}.json".format(_DEFAULT_MODEL))) rmfile(os.path.join(self.temp_dir, "test_model", "nanodet_{}.pth".format(_DEFAULT_MODEL))) rmdir(os.path.join(self.temp_dir, "test_model")) print('Finished save/load test for Nanodet...') + def test_optimize(self): + # Tracing will issue TracerWarnings, but these can be ignored safely + # because we use this function to create tensors out of constant + # variables that are the same every time we call this function. + warnings.simplefilter("ignore", TracerWarning) + warnings.simplefilter("ignore", RuntimeWarning) + + self.detector.ort_session = None + self.detector.jit_model = None + + self.detector.optimize(os.path.join(self.temp_dir, "onnx"), verbose=False, optimization="onnx") + self.assertIsNotNone(self.detector.ort_session) + + self.detector.optimize(os.path.join(self.temp_dir, "jit"), verbose=False, optimization="jit") + self.assertIsNotNone(self.detector.jit_model) + + # Cleanup + rmfile(os.path.join(self.temp_dir, "onnx", "nanodet_{}.onnx".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "onnx", "nanodet_{}.json".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "jit", "nanodet_{}.pth".format(_DEFAULT_MODEL))) + rmfile(os.path.join(self.temp_dir, "jit", "nanodet_{}.json".format(_DEFAULT_MODEL))) + rmdir(os.path.join(self.temp_dir, "onnx")) + rmdir(os.path.join(self.temp_dir, "jit")) + + warnings.simplefilter("default", TracerWarning) + warnings.simplefilter("default", RuntimeWarning) + if __name__ == "__main__": unittest.main()