From 8a3ef608433c173d8dc64459ca431380d22c9ab1 Mon Sep 17 00:00:00 2001 From: Ben Hoff Date: Mon, 21 Oct 2019 08:04:40 -0400 Subject: [PATCH] added mask RCNN script --- CHANGELOG.md | 1 + .../README.md | 32 +++++++ .../interp.py | 64 ++++++++++++++ .../mapping.json | 84 +++++++++++++++++++ 4 files changed, 181 insertions(+) create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 0c5789ec4933..7a2a78b64a23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Ability to [get basic information about users without admin permissions]( https://github.com/opencv/cvat/issues/750). - Changed REST API: removed PUT and added DELETE methods for /api/v1/users/ID. +- Added Mask-RCNN Auto Annotation Script ### Changed - diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md new file mode 100644 index 000000000000..be7a82121bf0 --- /dev/null +++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md @@ -0,0 +1,32 @@ +# mask_rcnn_inception_resnet_v2_atrous_coco + +## Use Case and High-Level Description + +Mask R-CNN Inception Resnet V2 Atrous is trained on COCO dataset and used for object instance segmentation. +For details, see a [paper](https://arxiv.org/pdf/1703.06870.pdf). + +## Specification + +| Metric | Value | +|---------------------------------|-------------------------------------------| +| Type | Instance segmentation | +| GFlops | 675.314 | +| MParams | 92.368 | +| Source framework | TensorFlow\* | + +## Legal Information + +[https://raw.githubusercontent.com/tensorflow/models/master/LICENSE]() + +## OpenVINO Conversion Notes + +In order to convert the code into the openvino format, please see the [following link](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html#mask_r_cnn_topologies). + +The conversion command from the command line prompt will look something like the following. + +```shell +$ python /opt/intel/openvino/deployment_tools/model_optimizer/mo_tf.py \ + --input_model /path/to/frozen_inference_graph.pb \ + --tensorflow_use_custom_operations_config /opt/intel/openvino/deployment_tools/model_optimizer/extensions/front/tf/mask_rcnn_support.json \ + --tensorflow_object_detection_api_pipeline_config /path/to/pipeline.config +``` diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py new file mode 100644 index 000000000000..1017779dde91 --- /dev/null +++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py @@ -0,0 +1,64 @@ +import numpy as np +import cv2 + + +MASK_THRESHOLD = .5 +PROBABILITY_THRESHOLD = 0.2 + + +# Ref: https://software.intel.com/en-us/forums/computer-vision/topic/804895 +def segm_postprocess(box: list, raw_cls_mask, im_h, im_w, threshold): + ymin, xmin, ymax, xmax = box + + width = int(abs(xmax - xmin)) + height = int(abs(ymax - ymin)) + + result = np.zeros((im_h, im_w), dtype=np.uint8) + resized_mask = cv2.resize(raw_cls_mask, dsize=(height, width), interpolation=cv2.INTER_CUBIC) + + # extract the ROI of the image + ymin = int(round(ymin)) + xmin = int(round(xmin)) + ymax = ymin + height + xmax = xmin + width + result[xmin:xmax, ymin:ymax] = (resized_mask>threshold).astype(np.uint8) * 255 + + return result + + +for detection in detections: + frame_number = detection['frame_id'] + height = detection['frame_height'] + width = detection['frame_width'] + detection = detection['detections'] + + masks = detection['masks'] + boxes = detection['reshape_do_2d'] + + for index, box in enumerate(boxes): + label = int(box[1]) + obj_value = box[2] + if obj_value >= PROBABILITY_THRESHOLD: + x = box[3] * width + y = box[4] * height + right = box[5] * width + bottom = box[6] * height + mask = masks[index][label] + + mask = segm_postprocess((x, y, right, bottom), + mask, + height, + width, + MASK_THRESHOLD) + + contour, _ = cv2.findContours(mask, + cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_TC89_KCOS) + + contour = contour[0] + contour = contour.tolist() + contour = [x[0] for x in contour] + + # NOTE: if you want to see the boxes, uncomment next line + # results.add_box(x, y, right, bottom, label, frame_number) + results.add_polygon(contour, label, frame_number) diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json new file mode 100644 index 000000000000..3efdb307565f --- /dev/null +++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json @@ -0,0 +1,84 @@ +{ + "label_map": { + "1": "person", + "2": "bicycle", + "3": "car", + "4": "motorcycle", + "5": "airplane", + "6": "bus", + "7": "train", + "8": "truck", + "9": "boat", + "10": "traffic_light", + "11": "fire_hydrant", + "13": "stop_sign", + "14": "parking_meter", + "15": "bench", + "16": "bird", + "17": "cat", + "18": "dog", + "19": "horse", + "20": "sheep", + "21": "cow", + "22": "elephant", + "23": "bear", + "24": "zebra", + "25": "giraffe", + "27": "backpack", + "28": "umbrella", + "31": "handbag", + "32": "tie", + "33": "suitcase", + "34": "frisbee", + "35": "skis", + "36": "snowboard", + "37": "sports_ball", + "38": "kite", + "39": "baseball_bat", + "40": "baseball_glove", + "41": "skateboard", + "42": "surfboard", + "43": "tennis_racket", + "44": "bottle", + "46": "wine_glass", + "47": "cup", + "48": "fork", + "49": "knife", + "50": "spoon", + "51": "bowl", + "52": "banana", + "53": "apple", + "54": "sandwich", + "55": "orange", + "56": "broccoli", + "57": "carrot", + "58": "hot_dog", + "59": "pizza", + "60": "donut", + "61": "cake", + "62": "chair", + "63": "couch", + "64": "potted_plant", + "65": "bed", + "67": "dining_table", + "70": "toilet", + "72": "tv", + "73": "laptop", + "74": "mouse", + "75": "remote", + "76": "keyboard", + "77": "cell_phone", + "78": "microwave", + "79": "oven", + "80": "toaster", + "81": "sink", + "83": "refrigerator", + "84": "book", + "85": "clock", + "86": "vase", + "87": "scissors", + "88": "teddy_bear", + "89": "hair_drier", + "90": "toothbrush" + } +}