From 8a3ef608433c173d8dc64459ca431380d22c9ab1 Mon Sep 17 00:00:00 2001
From: Ben Hoff <hoff.benjamin.k@gmail.com>
Date: Mon, 21 Oct 2019 08:04:40 -0400
Subject: [PATCH] added mask RCNN script

---
 CHANGELOG.md                                  |  1 +
 .../README.md                                 | 32 +++++++
 .../interp.py                                 | 64 ++++++++++++++
 .../mapping.json                              | 84 +++++++++++++++++++
 4 files changed, 181 insertions(+)
 create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md
 create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py
 create mode 100644 utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0c5789ec4933..7a2a78b64a23 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Ability to [get basic information about users without admin permissions](
 https://github.com/opencv/cvat/issues/750).
 - Changed REST API: removed PUT and added DELETE methods for /api/v1/users/ID.
+- Added Mask-RCNN Auto Annotation Script
 
 ### Changed
 -
diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md
new file mode 100644
index 000000000000..be7a82121bf0
--- /dev/null
+++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/README.md
@@ -0,0 +1,32 @@
+# mask_rcnn_inception_resnet_v2_atrous_coco
+
+## Use Case and High-Level Description
+
+Mask R-CNN Inception Resnet V2 Atrous  is trained on COCO dataset and used for object instance segmentation. 
+For details, see a [paper](https://arxiv.org/pdf/1703.06870.pdf).
+
+## Specification
+
+| Metric                          | Value                                     |
+|---------------------------------|-------------------------------------------|
+| Type                            | Instance segmentation                     |
+| GFlops                          | 675.314                                   |
+| MParams                         | 92.368                                    |
+| Source framework                | TensorFlow\*                              |
+
+## Legal Information
+
+[https://raw.githubusercontent.com/tensorflow/models/master/LICENSE]()
+
+## OpenVINO Conversion Notes
+
+In order to convert the code into the openvino format, please see the [following link](https://docs.openvinotoolkit.org/latest/_docs_MO_DG_prepare_model_convert_model_tf_specific_Convert_Object_Detection_API_Models.html#mask_r_cnn_topologies).
+
+The conversion command from the command line prompt will look something like the following.
+
+```shell
+$ python /opt/intel/openvino/deployment_tools/model_optimizer/mo_tf.py \
+		--input_model /path/to/frozen_inference_graph.pb \
+		--tensorflow_use_custom_operations_config /opt/intel/openvino/deployment_tools/model_optimizer/extensions/front/tf/mask_rcnn_support.json \
+		--tensorflow_object_detection_api_pipeline_config /path/to/pipeline.config 
+```
diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py
new file mode 100644
index 000000000000..1017779dde91
--- /dev/null
+++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/interp.py
@@ -0,0 +1,64 @@
+import numpy as np
+import cv2
+
+
+MASK_THRESHOLD = .5
+PROBABILITY_THRESHOLD = 0.2
+
+
+# Ref: https://software.intel.com/en-us/forums/computer-vision/topic/804895
+def segm_postprocess(box: list, raw_cls_mask, im_h, im_w, threshold):
+    ymin, xmin, ymax, xmax = box
+
+    width = int(abs(xmax - xmin))
+    height = int(abs(ymax - ymin))
+
+    result = np.zeros((im_h, im_w), dtype=np.uint8)
+    resized_mask = cv2.resize(raw_cls_mask, dsize=(height, width), interpolation=cv2.INTER_CUBIC)
+
+    # extract the ROI of the image
+    ymin = int(round(ymin))
+    xmin = int(round(xmin))
+    ymax = ymin + height
+    xmax = xmin + width
+    result[xmin:xmax, ymin:ymax] = (resized_mask>threshold).astype(np.uint8) * 255
+
+    return result
+
+
+for detection in detections:
+    frame_number = detection['frame_id']
+    height = detection['frame_height']
+    width = detection['frame_width']
+    detection = detection['detections']
+
+    masks = detection['masks']
+    boxes = detection['reshape_do_2d']
+
+    for index, box in enumerate(boxes):
+        label = int(box[1])
+        obj_value = box[2]
+        if obj_value >= PROBABILITY_THRESHOLD:
+            x = box[3] * width
+            y = box[4] * height
+            right = box[5] * width
+            bottom = box[6] * height
+            mask = masks[index][label]
+
+            mask = segm_postprocess((x, y, right, bottom),
+                                    mask,
+                                    height,
+                                    width,
+                                    MASK_THRESHOLD)
+
+            contour, _ = cv2.findContours(mask,
+                                          cv2.RETR_EXTERNAL,
+                                          cv2.CHAIN_APPROX_TC89_KCOS)
+
+            contour = contour[0]
+            contour = contour.tolist()
+            contour = [x[0] for x in contour]
+
+            # NOTE: if you want to see the boxes, uncomment next line
+            # results.add_box(x, y, right, bottom, label, frame_number)
+            results.add_polygon(contour, label, frame_number)
diff --git a/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json
new file mode 100644
index 000000000000..3efdb307565f
--- /dev/null
+++ b/utils/open_model_zoo/mask_rcnn_inception_resnet_v2_atrous_coco/mapping.json
@@ -0,0 +1,84 @@
+{
+    "label_map": {
+        "1": "person",
+        "2": "bicycle",
+        "3": "car",
+        "4": "motorcycle",
+        "5": "airplane",
+        "6": "bus",
+        "7": "train",
+        "8": "truck",
+        "9": "boat",
+        "10": "traffic_light",
+        "11": "fire_hydrant",
+        "13": "stop_sign",
+        "14": "parking_meter",
+        "15": "bench",
+        "16": "bird",
+        "17": "cat",
+        "18": "dog",
+        "19": "horse",
+        "20": "sheep",
+        "21": "cow",
+        "22": "elephant",
+        "23": "bear",
+        "24": "zebra",
+        "25": "giraffe",
+        "27": "backpack",
+        "28": "umbrella",
+        "31": "handbag",
+        "32": "tie",
+        "33": "suitcase",
+        "34": "frisbee",
+        "35": "skis",
+        "36": "snowboard",
+        "37": "sports_ball",
+        "38": "kite",
+        "39": "baseball_bat",
+        "40": "baseball_glove",
+        "41": "skateboard",
+        "42": "surfboard",
+        "43": "tennis_racket",
+        "44": "bottle",
+        "46": "wine_glass",
+        "47": "cup",
+        "48": "fork",
+        "49": "knife",
+        "50": "spoon",
+        "51": "bowl",
+        "52": "banana",
+        "53": "apple",
+        "54": "sandwich",
+        "55": "orange",
+        "56": "broccoli",
+        "57": "carrot",
+        "58": "hot_dog",
+        "59": "pizza",
+        "60": "donut",
+        "61": "cake",
+        "62": "chair",
+        "63": "couch",
+        "64": "potted_plant",
+        "65": "bed",
+        "67": "dining_table",
+        "70": "toilet",
+        "72": "tv",
+        "73": "laptop",
+        "74": "mouse",
+        "75": "remote",
+        "76": "keyboard",
+        "77": "cell_phone",
+        "78": "microwave",
+        "79": "oven",
+        "80": "toaster",
+        "81": "sink",
+        "83": "refrigerator",
+        "84": "book",
+        "85": "clock",
+        "86": "vase",
+        "87": "scissors",
+        "88": "teddy_bear",
+        "89": "hair_drier",
+        "90": "toothbrush"
+    }
+}