Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions include/tvm/relax/attrs/vision.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file tvm/relax/attrs/vision.h
* \brief Auxiliary attributes for vision operators.
*/
#ifndef TVM_RELAX_ATTRS_VISION_H_
#define TVM_RELAX_ATTRS_VISION_H_

#include <tvm/ffi/string.h>
#include <tvm/ir/attrs.h>
#include <tvm/ir/type.h>
#include <tvm/relax/expr.h>
#include <tvm/runtime/object.h>

namespace tvm {
namespace relax {

/*! \brief Attributes used in AllClassNonMaximumSuppression operator */
struct AllClassNonMaximumSuppressionAttrs
: public AttrsNodeReflAdapter<AllClassNonMaximumSuppressionAttrs> {
ffi::String output_format;

static void RegisterReflection() {
namespace refl = tvm::ffi::reflection;
refl::ObjectDef<AllClassNonMaximumSuppressionAttrs>().def_ro(
"output_format", &AllClassNonMaximumSuppressionAttrs::output_format,
"Output format, onnx or tensorflow. Returns outputs in a way that can be easily "
"consumed by each frontend.");
}
TVM_FFI_DECLARE_OBJECT_INFO_FINAL("relax.attrs.AllClassNonMaximumSuppressionAttrs",
AllClassNonMaximumSuppressionAttrs, BaseAttrsNode);
}; // struct AllClassNonMaximumSuppressionAttrs

} // namespace relax
} // namespace tvm

#endif // TVM_RELAX_ATTRS_VISION_H_
179 changes: 178 additions & 1 deletion python/tvm/relax/frontend/onnx/onnx_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3386,6 +3386,182 @@ def _impl_v11(cls, bb, inputs, attr, params):
return input_sequence[position]


class NonMaxSuppression(OnnxOpConverter):
"""Converts an onnx NonMaxSuppression node into an equivalent Relax expression."""

@classmethod
def _impl_v10(cls, bb, inputs, attr, params):
"""
NonMaxSuppression performs non-maximum suppression (NMS) on all classes.

Inputs:
- boxes: (N, 4) tensor of bounding boxes in format [x1, y1, x2, y2]
- scores: (N, C) tensor of scores for each box and class
- max_output_boxes_per_class: maximum number of boxes to keep per class
- iou_threshold: IoU threshold for NMS
- score_threshold: score threshold for filtering

Outputs:
- selected_indices: (M, 3) tensor with [batch_idx, class_idx, box_idx]
"""
boxes = inputs[0]
scores = inputs[1]
max_output_boxes_per_class = inputs[2] if len(inputs) > 2 else None
iou_threshold = inputs[3] if len(inputs) > 3 else None
score_threshold = inputs[4] if len(inputs) > 4 else None

center_point_box = attr.get("center_point_box", 0)

if max_output_boxes_per_class is not None and isinstance(
max_output_boxes_per_class, relax.Constant
):
max_output_boxes_per_class = int(max_output_boxes_per_class.data.numpy())
elif max_output_boxes_per_class is not None and isinstance(
max_output_boxes_per_class, relax.Var
):
var_name = max_output_boxes_per_class.name_hint
if var_name in params[1]:
_, param_value = params[1][var_name]
max_output_boxes_per_class = int(param_value.numpy().item())
else:
max_output_boxes_per_class = 100 # Default value
else:
max_output_boxes_per_class = 100 # Default value

if iou_threshold is not None and isinstance(iou_threshold, relax.Constant):
iou_threshold = float(iou_threshold.data.numpy())
else:
iou_threshold = 0.5 # Default value

if score_threshold is not None and isinstance(score_threshold, relax.Constant):
score_threshold = float(score_threshold.data.numpy())
elif score_threshold is not None and isinstance(score_threshold, relax.Var):
var_name = score_threshold.name_hint
if var_name in params[1]:
_, param_value = params[1][var_name]
score_threshold = float(param_value.numpy().item())
else:
score_threshold = 0.0 # Default value
else:
score_threshold = 0.0 # Default value

if center_point_box != 0:
split_result = relax.op.split(boxes, 4, axis=2)
xc = split_result[0]
yc = split_result[1]
w = split_result[2]
h = split_result[3]
half_w = w / relax.const(2.0, boxes.struct_info.dtype)
half_h = h / relax.const(2.0, boxes.struct_info.dtype)
x1 = xc - half_w
x2 = xc + half_w
y1 = yc - half_h
y2 = yc + half_h
boxes = relax.op.concat([y1, x1, y2, x2], axis=2)

nms_out = bb.normalize(
relax.op.vision.all_class_non_max_suppression(
boxes,
scores,
relax.const(max_output_boxes_per_class, dtype="int64"),
relax.const(iou_threshold, dtype="float32"),
relax.const(score_threshold, dtype="float32"),
output_format="onnx",
)
)

selected_indices = bb.emit(relax.TupleGetItem(nms_out, 0))

return selected_indices


class AllClassNMS(OnnxOpConverter):
"""Converts an onnx AllClassNMS node into an equivalent Relax expression."""

@classmethod
def _impl_v1(cls, bb, inputs, attr, params):
"""
AllClassNMS performs non-maximum suppression (NMS) on all classes.

Inputs:
- boxes: (N, 4) tensor of bounding boxes in format [x1, y1, x2, y2]
- scores: (N, C) tensor of scores for each box and class
- max_output_boxes_per_class: maximum number of boxes to keep per class
- iou_threshold: IoU threshold for NMS
- score_threshold: score threshold for filtering

Outputs:
- selected_indices: (M, 3) tensor with [batch_idx, class_idx, box_idx]
"""
boxes = inputs[0]
scores = inputs[1]
max_output_boxes_per_class = inputs[2] if len(inputs) > 2 else None
iou_threshold = inputs[3] if len(inputs) > 3 else None
score_threshold = inputs[4] if len(inputs) > 4 else None

center_point_box = attr.get("center_point_box", 0)

if max_output_boxes_per_class is not None and isinstance(
max_output_boxes_per_class, relax.Constant
):
max_output_boxes_per_class = int(max_output_boxes_per_class.data.numpy())
elif max_output_boxes_per_class is not None and isinstance(
max_output_boxes_per_class, relax.Var
):
var_name = max_output_boxes_per_class.name_hint
if var_name in params[1]:
_, param_value = params[1][var_name]
max_output_boxes_per_class = int(param_value.numpy().item())
else:
max_output_boxes_per_class = 100 # Default value
else:
max_output_boxes_per_class = 100 # Default value

if iou_threshold is not None and isinstance(iou_threshold, relax.Constant):
iou_threshold = float(iou_threshold.data.numpy())
else:
iou_threshold = 0.5 # Default value

if score_threshold is not None and isinstance(score_threshold, relax.Constant):
score_threshold = float(score_threshold.data.numpy())
elif score_threshold is not None and isinstance(score_threshold, relax.Var):
var_name = score_threshold.name_hint
if var_name in params[1]:
_, param_value = params[1][var_name]
score_threshold = float(param_value.numpy().item())
else:
score_threshold = 0.0 # Default value
else:
score_threshold = 0.0 # Default value

if center_point_box != 0:
split_result = relax.op.split(boxes, 4, axis=2)
xc = split_result[0]
yc = split_result[1]
w = split_result[2]
h = split_result[3]
half_w = w / relax.const(2.0, boxes.struct_info.dtype)
half_h = h / relax.const(2.0, boxes.struct_info.dtype)
x1 = xc - half_w
x2 = xc + half_w
y1 = yc - half_h
y2 = yc + half_h
boxes = relax.op.concat([y1, x1, y2, x2], axis=2)

nms_out = bb.normalize(
relax.op.vision.all_class_non_max_suppression(
boxes,
scores,
relax.const(max_output_boxes_per_class, dtype="int64"),
relax.const(iou_threshold, dtype="float32"),
relax.const(score_threshold, dtype="float32"),
output_format="onnx",
)
)

return nms_out


def _get_convert_map():
return {
# defs/experimental
Expand Down Expand Up @@ -3536,7 +3712,8 @@ def _get_convert_map():
# "LRN": LRN,
# "MaxRoiPool": MaxRoiPool,
# "RoiAlign": RoiAlign,
# "NonMaxSuppression": NonMaxSuppression,
"NonMaxSuppression": NonMaxSuppression,
"AllClassNMS": AllClassNMS,
# "GridSample": GridSample,
"Upsample": Upsample,
# others
Expand Down
1 change: 1 addition & 0 deletions python/tvm/relax/op/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@
tanh,
trunc,
)
from .vision import all_class_non_max_suppression


def _register_op_make():
Expand Down
5 changes: 5 additions & 0 deletions python/tvm/relax/op/op_attrs.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ class AttentionAttrs(Attrs):
"""Attributes used in attention operator"""


@tvm_ffi.register_object("relax.attrs.AllClassNonMaximumSuppressionAttrs")
class AllClassNonMaximumSuppressionAttrs(Attrs):
"""Attributes for vision.all_class_non_max_suppression"""


@tvm_ffi.register_object("relax.attrs.Conv1DAttrs")
class Conv1DAttrs(Attrs):
"""Attributes for nn.conv1d"""
Expand Down
18 changes: 18 additions & 0 deletions python/tvm/relax/op/vision/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""VISION operators."""
from .nms import *
20 changes: 20 additions & 0 deletions python/tvm/relax/op/vision/_ffi_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Constructor APIs"""
import tvm_ffi

tvm_ffi.init_ffi_api("relax.op.vision", __name__)
75 changes: 75 additions & 0 deletions python/tvm/relax/op/vision/nms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Non-maximum suppression operator"""
# from tvm import relax # Unused import
from . import _ffi_api


def all_class_non_max_suppression(
boxes,
scores,
max_output_boxes_per_class,
iou_threshold,
score_threshold,
output_format="onnx",
):
"""Non-maximum suppression operator for object detection, corresponding to ONNX
NonMaxSuppression and TensorFlow combined_non_max_suppression.
NMS is performed for each class separately.

Parameters
----------
boxes : relax.Expr
3-D tensor with shape (batch_size, num_boxes, 4)
scores: relax.Expr
3-D tensor with shape (batch_size, num_classes, num_boxes)
max_output_boxes_per_class : relax.Expr
The maxinum number of output selected boxes per class
iou_threshold : relax.Expr
IoU test threshold
score_threshold : relax.Expr
Score threshold to filter out low score boxes early
output_format : str, optional
"onnx" or "tensorflow", see below.

Returns
-------
out : relax.Expr
If `output_format` is "onnx", the output is two tensors. The first is `indices` of size
`(batch_size * num_class* num_boxes , 3)` and the second is a scalar tensor
`num_total_detection` of shape `(1,)` representing the total number of selected
boxes. The three values in `indices` encode batch, class, and box indices.
Rows of `indices` are ordered such that selected boxes from batch 0, class 0 come
first, in descending of scores, followed by boxes from batch 0, class 1 etc. Out of
`batch_size * num_class* num_boxes` rows of indices, only the first `num_total_detection`
rows are valid.

TODO: Implement true dynamic output shapes to match ONNX Runtime behavior exactly.
This would eliminate the need for manual trimming and improve memory efficiency.
If `output_format` is "tensorflow", the output is three tensors, the first
is `indices` of size `(batch_size, num_class * num_boxes , 2)`, the second is `scores` of
size `(batch_size, num_class * num_boxes)`, and the third is `num_total_detection` of size
`(batch_size,)` representing the total number of selected boxes per batch. The two values
in `indices` encode class and box indices. Of num_class * num_boxes boxes in `indices` at
batch b, only the first `num_total_detection[b]` entries are valid. The second axis of
`indices` and `scores` are sorted within each class by box scores, but not across classes.
So the box indices and scores for the class 0 come first in a sorted order, followed by
the class 1 etc.
"""
return _ffi_api.all_class_non_max_suppression(
boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold, output_format
)
1 change: 1 addition & 0 deletions python/tvm/relax/transform/legalize_ops/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@
from . import search
from . import statistical
from . import unary
from . import vision
Loading
Loading