From f8a91695b1f48e2ee8fa53d3aa6a46a38e250c53 Mon Sep 17 00:00:00 2001 From: Ben Hoff Date: Thu, 10 Oct 2019 08:45:05 -0400 Subject: [PATCH] adding in Security Semantic Segmentation interp script --- cvat/apps/auto_annotation/model_loader.py | 34 +++++--- .../instance_segmentation/0050/interp.py | 77 +++++++++++++++++ .../instance_segmentation/0050/mapping.json | 84 +++++++++++++++++++ 3 files changed, 185 insertions(+), 10 deletions(-) create mode 100644 utils/open_model_zoo/Security/instance_segmentation/0050/interp.py create mode 100644 utils/open_model_zoo/Security/instance_segmentation/0050/mapping.json diff --git a/cvat/apps/auto_annotation/model_loader.py b/cvat/apps/auto_annotation/model_loader.py index 73d33d81b4fd..86712a9d2668 100644 --- a/cvat/apps/auto_annotation/model_loader.py +++ b/cvat/apps/auto_annotation/model_loader.py @@ -31,14 +31,19 @@ def __init__(self, model, weights): iter_inputs = iter(network.inputs) self._input_blob_name = next(iter_inputs) + self._input_info_name = '' self._output_blob_name = next(iter(network.outputs)) self._require_image_info = False + info_names = ('image_info', 'im_info') + # NOTE: handeling for the inclusion of `image_info` in OpenVino2019 - if 'image_info' in network.inputs: + if any(s in network.inputs for s in info_names): self._require_image_info = True - if self._input_blob_name == 'image_info': + self._input_info_name = set(network.inputs).intersection(info_names) + self._input_info_name = self._input_info_name.pop() + if self._input_blob_name in info_names: self._input_blob_name = next(iter_inputs) self._net = plugin.load(network=network, num_requests=2) @@ -47,22 +52,31 @@ def __init__(self, model, weights): def infer(self, image): _, _, h, w = self._input_layout - in_frame = image if image.shape[:-1] == (h, w) else cv2.resize(image, (w, h)) + scale = min(h / image.shape[0], w / image.shape[1]) + in_frame = image if image.shape[:-1] == (h, w) else cv2.resize(image, None, fx=scale, fy=scale) + + in_frame_size = in_frame.shape[:2] + in_frame = np.pad(in_frame, ((0, h - in_frame_size[0]), + (0, w - in_frame_size[1]), + (0, 0)), + mode='constant', constant_values=0) + in_frame = in_frame.transpose((2, 0, 1)) # Change data layout from HWC to CHW inputs = {self._input_blob_name: in_frame} if self._require_image_info: - info = np.zeros([1, 3]) - info[0, 0] = h - info[0, 1] = w - # frame number - info[0, 2] = 1 - inputs['image_info'] = info + info = np.asarray([[in_frame_size[0], + in_frame_size[1], + scale]], + dtype=np.float32) + + inputs[self._input_info_name] = info results = self._net.infer(inputs) + if len(results) == 1: return results[self._output_blob_name].copy() else: - return results.copy() + return results def load_labelmap(labels_path): diff --git a/utils/open_model_zoo/Security/instance_segmentation/0050/interp.py b/utils/open_model_zoo/Security/instance_segmentation/0050/interp.py new file mode 100644 index 000000000000..15bc5973109c --- /dev/null +++ b/utils/open_model_zoo/Security/instance_segmentation/0050/interp.py @@ -0,0 +1,77 @@ +import numpy as np +import cv2 + + +THRESHOLD = 0.5 + +# See: https://github.com/opencv/open_model_zoo/blob/master/demos/python_demos/instance_segmentation_demo/main.py + +def segm_postprocess(box, raw_cls_mask, im_h, im_w): + # Add zero border to prevent upsampling artifacts on segment borders. + raw_cls_mask = np.pad(raw_cls_mask, ((1, 1), (1, 1)), 'constant', constant_values=0) + scale = int(raw_cls_mask.shape[0] / (raw_cls_mask.shape[0] - 2.0)) + w_half = (box[2] - box[0]) * .5 + h_half = (box[3] - box[1]) * .5 + x_c = (box[2] + box[0]) * .5 + y_c = (box[3] + box[1]) * .5 + w_half *= scale + h_half *= scale + box_exp = np.zeros(box.shape) + box_exp[0] = x_c - w_half + box_exp[2] = x_c + w_half + box_exp[1] = y_c - h_half + box_exp[3] = y_c + h_half + + extended_box = box_exp.astype(int) + + w, h = np.maximum(extended_box[2:] - extended_box[:2] + 1, 1) + x0, y0 = np.clip(extended_box[:2], a_min=0, a_max=[im_w, im_h]) + x1, y1 = np.clip(extended_box[2:] + 1, a_min=0, a_max=[im_w, im_h]) + + raw_cls_mask = cv2.resize(raw_cls_mask, (w, h)) > 0.5 + mask = raw_cls_mask.astype(np.uint8) + # Put an object mask in an image mask. + im_mask = np.zeros((im_h, im_w), dtype=np.uint8) + im_mask[y0:y1, x0:x1] = mask[(y0 - extended_box[1]):(y1 - extended_box[1]), + (x0 - extended_box[0]):(x1 - extended_box[0])] + + return im_mask + + +for detection in detections: + frame_number = detection['frame_id'] + height = detection['frame_height'] + width = detection['frame_width'] + detection = detection['detections'] + + blob_height = 480 + blob_width = 480 + + scale = min(blob_height / height, blob_width / width) + + boxes = detection['boxes'] / scale + scores = detection['scores'] + classes = detection['classes'].astype(np.uint32) + masks = [] + for box, cls, raw_mask in zip(boxes, classes, detection['raw_masks']): + raw_cls_mask = raw_mask[cls, ...] + mask = segm_postprocess(box, raw_cls_mask, height, width) + masks.append(mask) + + # Filter out detections with low confidence. + detections_filter = scores > THRESHOLD + scores = scores[detections_filter] + classes = classes[detections_filter] + boxes = boxes[detections_filter] + masks = list(segm for segm, is_valid in zip(masks, detections_filter) if is_valid) + for mask, label in zip(masks, classes): + # contours, hierarchy + contour, _ = cv2.findContours(mask, + cv2.RETR_EXTERNAL, + cv2.CHAIN_APPROX_TC89_KCOS) + + contour = contour[0] + contour = contour.tolist() + contour = [x[0] for x in contour] + + results.add_polygon(contour, label, frame_number) diff --git a/utils/open_model_zoo/Security/instance_segmentation/0050/mapping.json b/utils/open_model_zoo/Security/instance_segmentation/0050/mapping.json new file mode 100644 index 000000000000..3efdb307565f --- /dev/null +++ b/utils/open_model_zoo/Security/instance_segmentation/0050/mapping.json @@ -0,0 +1,84 @@ +{ + "label_map": { + "1": "person", + "2": "bicycle", + "3": "car", + "4": "motorcycle", + "5": "airplane", + "6": "bus", + "7": "train", + "8": "truck", + "9": "boat", + "10": "traffic_light", + "11": "fire_hydrant", + "13": "stop_sign", + "14": "parking_meter", + "15": "bench", + "16": "bird", + "17": "cat", + "18": "dog", + "19": "horse", + "20": "sheep", + "21": "cow", + "22": "elephant", + "23": "bear", + "24": "zebra", + "25": "giraffe", + "27": "backpack", + "28": "umbrella", + "31": "handbag", + "32": "tie", + "33": "suitcase", + "34": "frisbee", + "35": "skis", + "36": "snowboard", + "37": "sports_ball", + "38": "kite", + "39": "baseball_bat", + "40": "baseball_glove", + "41": "skateboard", + "42": "surfboard", + "43": "tennis_racket", + "44": "bottle", + "46": "wine_glass", + "47": "cup", + "48": "fork", + "49": "knife", + "50": "spoon", + "51": "bowl", + "52": "banana", + "53": "apple", + "54": "sandwich", + "55": "orange", + "56": "broccoli", + "57": "carrot", + "58": "hot_dog", + "59": "pizza", + "60": "donut", + "61": "cake", + "62": "chair", + "63": "couch", + "64": "potted_plant", + "65": "bed", + "67": "dining_table", + "70": "toilet", + "72": "tv", + "73": "laptop", + "74": "mouse", + "75": "remote", + "76": "keyboard", + "77": "cell_phone", + "78": "microwave", + "79": "oven", + "80": "toaster", + "81": "sink", + "83": "refrigerator", + "84": "book", + "85": "clock", + "86": "vase", + "87": "scissors", + "88": "teddy_bear", + "89": "hair_drier", + "90": "toothbrush" + } +}