Skip to content

Commit 85bf95e

Browse files
sungchul2wonjuleee
andauthored
OTX deploy for visual prompting task (#2311)
* Enable `otx deploy` * (WIP) integration test * Docstring * Update args for create_model * Manually set image embedding layout * Enable to use model api for preprocessing - `fit_to_window` doesn't work expectedly, so newly implemented `VisualPromptingOpenvinoAdapter` to use new resize function * Remove skipped test * Updated * Update unit tests on model wrappers * Update * Update configuration * Fix not to patch pretrained path * pylint & update model api version in docstring --------- Co-authored-by: Wonju Lee <wonju.lee@intel.com>
1 parent 91ad751 commit 85bf95e

File tree

16 files changed

+660
-264
lines changed

16 files changed

+660
-264
lines changed

CHANGELOG.md

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@ All notable changes to this project will be documented in this file.
1212
- Add per-class XAI saliency maps for Mask R-CNN model (https://github.com/openvinotoolkit/training_extensions/pull/2227)
1313
- Add new object detector Deformable DETR (<https://github.com/openvinotoolkit/training_extensions/pull/2249>)
1414
- Add new object detector DINO(<https://github.com/openvinotoolkit/training_extensions/pull/2266>)
15-
- Add new visual prompting task (https://github.com/openvinotoolkit/training_extensions/pull/2203), (https://github.com/openvinotoolkit/training_extensions/pull/2274)
15+
- Add new visual prompting task: train/eval (https://github.com/openvinotoolkit/training_extensions/pull/2203)
16+
- Add new visual prompting task: export (https://github.com/openvinotoolkit/training_extensions/pull/2274)
17+
- Add new visual prompting task: deploy (https://github.com/openvinotoolkit/training_extensions/pull/2311)
1618
- Add new object detector ResNeXt101-ATSS (<https://github.com/openvinotoolkit/training_extensions/pull/2309>)
1719

1820
### Enhancements

src/otx/algorithms/visual_prompting/adapters/openvino/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,5 @@
1313
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1414
# See the License for the specific language governing permissions
1515
# and limitations under the License.
16+
17+
from .model_wrappers import * # noqa: F403
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
"""Wrapper Initialization of OTX Visual Prompting."""
2+
3+
# Copyright (C) 2022 Intel Corporation
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions
15+
# and limitations under the License.
16+
17+
from .openvino_adapters import VisualPromptingOpenvinoAdapter # noqa: F401
18+
from .openvino_models import Decoder, ImageEncoder # noqa: F401
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
"""Openvino Adapter Wrappers of OTX Visual Prompting.
2+
3+
There is a bug on fit_to_window resize module in model API.
4+
VisualPromptingOpenvinoAdapter is temporarily implemented to use updated `fit_to_window` resize function.
5+
When model API version in otx is upgraded, it can be removed.
6+
7+
Issue: https://github.com/openvinotoolkit/model_api/issues/99
8+
Updated PR: https://github.com/openvinotoolkit/model_api/pull/100
9+
"""
10+
11+
# Copyright (C) 2023 Intel Corporation
12+
# SPDX-License-Identifier: Apache-2.0
13+
#
14+
15+
from functools import partial
16+
from typing import Tuple
17+
18+
import numpy as np
19+
import openvino.runtime as ov
20+
from openvino.model_api.adapters import OpenvinoAdapter
21+
from openvino.preprocess import ColorFormat, PrePostProcessor
22+
from openvino.runtime import Output, Type
23+
from openvino.runtime import opset10 as opset
24+
from openvino.runtime.utils.decorators import custom_preprocess_function
25+
26+
27+
def resize_image_with_aspect_pad(input: Output, size, keep_aspect_ratio, interpolation, pad_value):
28+
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/utils.py#L273-L341."""
29+
h_axis = 1
30+
w_axis = 2
31+
w, h = size
32+
33+
target_size = list(size)
34+
target_size.reverse()
35+
36+
image_shape = opset.shape_of(input, name="shape")
37+
iw = opset.convert(
38+
opset.gather(image_shape, opset.constant(w_axis), axis=0),
39+
destination_type="f32",
40+
)
41+
ih = opset.convert(
42+
opset.gather(image_shape, opset.constant(h_axis), axis=0),
43+
destination_type="f32",
44+
)
45+
w_ratio = opset.divide(np.float32(w), iw)
46+
h_ratio = opset.divide(np.float32(h), ih)
47+
scale = opset.minimum(w_ratio, h_ratio)
48+
nw = opset.convert(opset.round(opset.multiply(iw, scale), "half_to_even"), destination_type="i32")
49+
nh = opset.convert(opset.round(opset.multiply(ih, scale), "half_to_even"), destination_type="i32")
50+
new_size = opset.concat([opset.unsqueeze(nh, 0), opset.unsqueeze(nw, 0)], axis=0)
51+
image = opset.interpolate(
52+
input,
53+
new_size,
54+
scales=np.array([0.0, 0.0], dtype=np.float32),
55+
axes=[h_axis, w_axis],
56+
mode=interpolation,
57+
shape_calculation_mode="sizes",
58+
)
59+
60+
dx_border = opset.subtract(opset.constant(w, dtype=np.int32), nw)
61+
dy_border = opset.subtract(opset.constant(h, dtype=np.int32), nh)
62+
pads_begin = np.array([0, 0, 0, 0], np.int32)
63+
pads_end = opset.concat(
64+
[
65+
opset.constant([0], dtype=np.int32),
66+
opset.unsqueeze(dy_border, 0),
67+
opset.unsqueeze(dx_border, 0),
68+
opset.constant([0], dtype=np.int32),
69+
],
70+
axis=0,
71+
)
72+
return opset.pad(
73+
image,
74+
pads_begin,
75+
pads_end,
76+
"constant",
77+
opset.constant(pad_value, dtype=np.uint8),
78+
)
79+
80+
81+
def resize_image_with_aspect(size, interpolation, pad_value):
82+
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/utils.py#L356-L365."""
83+
return custom_preprocess_function(
84+
partial(
85+
resize_image_with_aspect_pad,
86+
size=size,
87+
keep_aspect_ratio=True,
88+
interpolation=interpolation,
89+
pad_value=pad_value,
90+
)
91+
)
92+
93+
94+
class VisualPromptingOpenvinoAdapter(OpenvinoAdapter):
95+
"""Openvino Adapter Wrappers of OTX Visual Prompting.
96+
97+
This class is to use fixed `fit_to_window` resize module.
98+
When model API version in otx is upgraded, it can be removed.
99+
"""
100+
101+
def embed_preprocessing(
102+
self,
103+
layout,
104+
resize_mode: str,
105+
interpolation_mode,
106+
target_shape: Tuple[int],
107+
pad_value,
108+
dtype=type(int),
109+
brg2rgb=False,
110+
mean=None,
111+
scale=None,
112+
input_idx=0,
113+
):
114+
"""https://github.com/openvinotoolkit/model_api/blob/0.1.3/model_api/python/openvino/model_api/adapters/openvino_adapter.py#L340-L411."""
115+
ppp = PrePostProcessor(self.model) # type: ignore[has-type]
116+
117+
# Change the input type to the 8-bit image
118+
if dtype == type(int):
119+
ppp.input(input_idx).tensor().set_element_type(Type.u8)
120+
121+
ppp.input(input_idx).tensor().set_layout(ov.Layout("NHWC")).set_color_format(ColorFormat.BGR)
122+
123+
INTERPOLATION_MODE_MAP = {
124+
"LINEAR": "linear",
125+
"CUBIC": "cubic",
126+
"NEAREST": "nearest",
127+
}
128+
129+
RESIZE_MODE_MAP = {"fit_to_window": resize_image_with_aspect}
130+
131+
# Handle resize
132+
# Change to dynamic shape to handle various image size
133+
# TODO: check the number of input channels and rank of input shape
134+
if resize_mode and target_shape:
135+
if resize_mode in RESIZE_MODE_MAP:
136+
input_shape = [1, -1, -1, 3]
137+
ppp.input(input_idx).tensor().set_shape(input_shape)
138+
ppp.input(input_idx).preprocess().custom(
139+
RESIZE_MODE_MAP[resize_mode](
140+
target_shape,
141+
INTERPOLATION_MODE_MAP[interpolation_mode],
142+
pad_value,
143+
)
144+
)
145+
146+
else:
147+
raise ValueError(f"Upsupported resize type in model preprocessing: {resize_mode}")
148+
149+
# Handle layout
150+
ppp.input(input_idx).model().set_layout(ov.Layout(layout))
151+
152+
# Handle color format
153+
if brg2rgb:
154+
ppp.input(input_idx).preprocess().convert_color(ColorFormat.RGB)
155+
156+
ppp.input(input_idx).preprocess().convert_element_type(Type.f32)
157+
158+
if mean:
159+
ppp.input(input_idx).preprocess().mean(mean)
160+
if scale:
161+
ppp.input(input_idx).preprocess().scale(scale)
162+
163+
self.model = ppp.build()
164+
self.load_model()

src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers.py src/otx/algorithms/visual_prompting/adapters/openvino/model_wrappers/openvino_models.py

+76-47
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Model Wrapper of OTX Visual Prompting."""
1+
"""Openvino Model Wrappers of OTX Visual Prompting."""
22

33
# Copyright (C) 2023 Intel Corporation
44
#
@@ -14,16 +14,15 @@
1414
# See the License for the specific language governing permissions
1515
# and limitations under the License.
1616

17-
from typing import Any, Dict, Tuple
17+
from copy import deepcopy
18+
from typing import Any, Dict, List, Optional, Tuple, Union
1819

1920
import cv2
2021
import numpy as np
21-
from openvino.model_api.models import ImageModel
22-
from openvino.model_api.models.types import NumericalValue
22+
from openvino.model_api.adapters.inference_adapter import InferenceAdapter
23+
from openvino.model_api.models import ImageModel, SegmentationModel
24+
from openvino.model_api.models.types import NumericalValue, StringValue
2325

24-
from otx.algorithms.segmentation.adapters.openvino.model_wrappers.blur import (
25-
BlurSegmentation,
26-
)
2726
from otx.api.utils.segmentation_utils import create_hard_prediction_from_soft_prediction
2827

2928

@@ -32,63 +31,93 @@ class ImageEncoder(ImageModel):
3231

3332
__model__ = "image_encoder"
3433

34+
def __init__(self, inference_adapter, configuration=None, preload=False):
35+
super().__init__(inference_adapter, configuration, preload)
36+
3537
@classmethod
3638
def parameters(cls) -> Dict[str, Any]: # noqa: D102
3739
parameters = super().parameters()
38-
parameters["resize_type"].default_value = "fit_to_window"
39-
parameters["mean_values"].default_value = [123.675, 116.28, 103.53]
40-
parameters["scale_values"].default_value = [58.395, 57.12, 57.375]
40+
parameters.update(
41+
{
42+
"resize_type": StringValue(default_value="fit_to_window"),
43+
}
44+
)
4145
return parameters
4246

47+
def preprocess(self, inputs: np.ndarray) -> Tuple[Dict[str, np.ndarray], Dict[str, Any]]:
48+
"""Update meta for image encoder."""
49+
dict_inputs, meta = super().preprocess(inputs)
50+
meta["resize_type"] = self.resize_type
51+
return dict_inputs, meta
4352

44-
class Decoder(BlurSegmentation):
45-
"""Decoder class for visual prompting of openvino model wrapper.
4653

47-
TODO (sungchul): change parent class
48-
"""
54+
class Decoder(SegmentationModel):
55+
"""Decoder class for visual prompting of openvino model wrapper."""
4956

5057
__model__ = "decoder"
5158

52-
def preprocess(self, bbox: np.ndarray, original_size: Tuple[int]) -> Dict[str, Any]:
53-
"""Ready decoder inputs."""
54-
point_coords = bbox.reshape((-1, 2, 2))
55-
point_labels = np.array([2, 3], dtype=np.float32).reshape((-1, 2))
56-
inputs_decoder = {
57-
"point_coords": point_coords,
58-
"point_labels": point_labels,
59-
# TODO (sungchul): how to generate mask_input and has_mask_input
60-
"mask_input": np.zeros((1, 1, 256, 256), dtype=np.float32),
61-
"has_mask_input": np.zeros((1, 1), dtype=np.float32),
62-
"orig_size": np.array(original_size, dtype=np.float32).reshape((-1, 2)),
63-
}
64-
return inputs_decoder
59+
def __init__(
60+
self,
61+
model_adapter: InferenceAdapter,
62+
configuration: Optional[dict] = None,
63+
preload: bool = False,
64+
):
65+
super().__init__(model_adapter, configuration, preload)
66+
self.output_blob_name = "low_res_masks"
6567

6668
@classmethod
6769
def parameters(cls): # noqa: D102
6870
parameters = super().parameters()
6971
parameters.update({"image_size": NumericalValue(value_type=int, default_value=1024, min=0, max=2048)})
7072
return parameters
7173

74+
def preprocess(self, inputs: Dict[str, Any], meta: Dict[str, Any]):
75+
"""Preprocess prompts."""
76+
processed_prompts = []
77+
# TODO (sungchul): process points
78+
for bbox, label in zip(inputs["bboxes"], inputs["labels"]):
79+
# TODO (sungchul): add condition to check whether using bbox or point
80+
point_coords = self._apply_coords(bbox.reshape(-1, 2, 2), inputs["original_size"])
81+
point_labels = np.array([2, 3], dtype=np.float32).reshape((-1, 2))
82+
processed_prompts.append(
83+
{
84+
"point_coords": point_coords,
85+
"point_labels": point_labels,
86+
# TODO (sungchul): how to generate mask_input and has_mask_input
87+
"mask_input": np.zeros((1, 1, 256, 256), dtype=np.float32),
88+
"has_mask_input": np.zeros((1, 1), dtype=np.float32),
89+
"orig_size": np.array(inputs["original_size"], dtype=np.float32).reshape((-1, 2)),
90+
"label": label,
91+
}
92+
)
93+
return processed_prompts
94+
95+
def _apply_coords(self, coords: np.ndarray, original_size: Union[List[int], Tuple[int, int]]) -> np.ndarray:
96+
"""Process coords according to preprocessed image size using image meta."""
97+
old_h, old_w = original_size
98+
new_h, new_w = self._get_preprocess_shape(original_size[0], original_size[1], self.image_size)
99+
coords = deepcopy(coords).astype(np.float32)
100+
coords[..., 0] = coords[..., 0] * (new_w / old_w)
101+
coords[..., 1] = coords[..., 1] * (new_h / old_h)
102+
return coords
103+
104+
def _get_preprocess_shape(self, old_h: int, old_w: int, image_size: int) -> Tuple[int, int]:
105+
"""Compute the output size given input size and target image size."""
106+
scale = image_size / max(old_h, old_w)
107+
new_h, new_w = old_h * scale, old_w * scale
108+
new_w = int(new_w + 0.5)
109+
new_h = int(new_h + 0.5)
110+
return (new_h, new_w)
111+
112+
def _check_io_number(self, number_of_inputs, number_of_outputs):
113+
pass
114+
72115
def _get_inputs(self):
73116
"""Get input layer name and shape."""
74117
image_blob_names = [name for name in self.inputs.keys()]
75118
image_info_blob_names = []
76119
return image_blob_names, image_info_blob_names
77120

78-
def _get_outputs(self):
79-
"""Get output layer name and shape."""
80-
layer_name = "low_res_masks"
81-
layer_shape = self.outputs[layer_name].shape
82-
83-
if len(layer_shape) == 3:
84-
self.out_channels = 0
85-
elif len(layer_shape) == 4:
86-
self.out_channels = layer_shape[1]
87-
else:
88-
raise Exception(f"Unexpected output layer shape {layer_shape}. Only 4D and 3D output layers are supported")
89-
90-
return layer_name
91-
92121
def postprocess(self, outputs: Dict[str, np.ndarray], meta: Dict[str, Any]) -> Tuple[np.ndarray, np.ndarray]:
93122
"""Postprocess to convert soft prediction to hard prediction.
94123
@@ -102,10 +131,10 @@ def postprocess(self, outputs: Dict[str, np.ndarray], meta: Dict[str, Any]) -> T
102131
"""
103132

104133
def sigmoid(x):
105-
return 1 / (1 + np.exp(-x))
134+
return np.tanh(x * 0.5) * 0.5 + 0.5 # to avoid overflow
106135

107136
soft_prediction = outputs[self.output_blob_name].squeeze()
108-
soft_prediction = self.resize_and_crop(soft_prediction, meta["original_size"])
137+
soft_prediction = self.resize_and_crop(soft_prediction, meta["original_size"][0])
109138
soft_prediction = sigmoid(soft_prediction)
110139
meta["soft_prediction"] = soft_prediction
111140

@@ -134,18 +163,18 @@ def resize_and_crop(self, soft_prediction: np.ndarray, original_size: np.ndarray
134163
soft_prediction, (self.image_size, self.image_size), 0, 0, interpolation=cv2.INTER_LINEAR
135164
)
136165

137-
prepadded_size = self.resize_longest_image_size(original_size, self.image_size).astype(np.int64)
166+
prepadded_size = self.get_padded_size(original_size, self.image_size).astype(np.int64)
138167
resized_cropped_soft_prediction = resized_soft_prediction[..., : prepadded_size[0], : prepadded_size[1]]
139168

140169
original_size = original_size.astype(np.int64)
141-
h, w = original_size[0], original_size[1]
170+
h, w = original_size
142171
final_soft_prediction = cv2.resize(
143172
resized_cropped_soft_prediction, (w, h), 0, 0, interpolation=cv2.INTER_LINEAR
144173
)
145174
return final_soft_prediction
146175

147-
def resize_longest_image_size(self, original_size: np.ndarray, longest_side: int) -> np.ndarray:
148-
"""Resizes the longest side of the image to the given size.
176+
def get_padded_size(self, original_size: np.ndarray, longest_side: int) -> np.ndarray:
177+
"""Get padded size from original size and longest side of the image.
149178
150179
Args:
151180
original_size (np.ndarray): The original image size with shape Bx2.

0 commit comments

Comments
 (0)