Skip to content

Commit

Permalink
initial import
Browse files Browse the repository at this point in the history
commit 5ff0985625ec75f117af37017ebf4089dafb8a46
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 10:02:45 2021 +0900

    cleanup

commit 199f9b6
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 10:00:15 2021 +0900

    Revert "add gather_nd shape func"

    This reverts commit 1ff4d53.

commit 47a05c4
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 09:53:00 2021 +0900

    format

commit 9dcd0f0
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 09:48:43 2021 +0900

    make it static

commit eb06393
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 09:14:31 2021 +0900

    restore old impl and use it for q != 1 case

commit 115a5df
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 09:00:40 2021 +0900

    fixed score gathering

commit d203562
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 08:53:14 2021 +0900

    minimum fixed

commit 3fe91e8
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 06:59:39 2021 +0900

    batch issue fixed

commit 19e3e84
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 04:29:15 2021 +0900

    zero padding working

    This reverts commit 58c3413.

commit ce7848b
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 13:12:47 2021 +0900

    pylint, do not use -1 for default value

commit 968f3bd
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 13:07:31 2021 +0900

    rename to index_rank and make it Optional

commit 9e06b84
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 18:01:59 2021 +0900

    fix pylint

commit 81dc605
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 17:57:03 2021 +0900

    minor fix

commit 54297b6
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 17:54:16 2021 +0900

    support dynamic scatter nd

commit e25c225
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 17:33:19 2021 +0900

    gather_dim -> num_indices_per_tuple

commit aaa6211
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 17:23:46 2021 +0900

    add dynamic gather_nd test

commit 3a9fe5d
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 17:18:26 2021 +0900

    refactor gather_nd ref funcs

commit 1ff4d53
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 21 14:36:34 2021 +0900

    add gather_nd shape func

commit b020064
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 04:01:11 2021 +0900

    working on zero padding

commit 4567417
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 03:21:52 2021 +0900

    working

commit 7f5c76d
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Sat May 29 02:37:50 2021 +0900

    relay type inference works, debugging topi

commit 4a4b8df
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 15:08:16 2021 +0900

    add max_total_size to attributes

commit 7218b2f
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 14:50:58 2021 +0900

    tf frontend update

commit cde4a1f
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 14:17:14 2021 +0900

    all class nms tf mode first cut

commit 5f349f7
Author: Masahiro Masuda <masahi129@gmail.com>
Date:   Fri May 28 06:54:34 2021 +0900

    begin supporting per batch output

commit 0044365
Author: Trevor Morris <trevmorr@amazon.com>
Date:   Mon May 3 19:46:28 2021 +0000

    initial

commit 168a617
Author: Trevor Morris <trevmorr@amazon.com>
Date:   Fri Apr 16 20:31:32 2021 +0000

    initia;
    l
  • Loading branch information
masahi committed May 29, 2021
1 parent a4fb12d commit 7b87922
Show file tree
Hide file tree
Showing 12 changed files with 310 additions and 38 deletions.
8 changes: 7 additions & 1 deletion include/tvm/relay/attrs/vision.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,14 @@ struct NonMaximumSuppressionAttrs : public tvm::AttrsNode<NonMaximumSuppressionA
/*! \brief Attributes used in non_maximum_suppression operator */
struct AllClassNonMaximumSuppressionAttrs
: public tvm::AttrsNode<AllClassNonMaximumSuppressionAttrs> {
Optional<Integer> max_total_size;
std::string output_format;

TVM_DECLARE_ATTRS(AllClassNonMaximumSuppressionAttrs,
"relay.attrs.AllClassNonMaximumSuppressionAttrs") {}
"relay.attrs.AllClassNonMaximumSuppressionAttrs") {
TVM_ATTR_FIELD(max_total_size).set_default(NullValue<Integer>()).describe("TODO");
TVM_ATTR_FIELD(output_format).set_default("onnx").describe("Output format. onnx or tensorflow");
}
};

/*! \brief Attributes used in roi_align operators */
Expand Down
79 changes: 72 additions & 7 deletions python/tvm/relay/frontend/tensorflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,22 +794,76 @@ def _impl(inputs, attr, params, mod):


def _combined_nms():
def all_class_impl(
boxes,
scores,
max_output_boxes_per_class,
iou_threshold,
score_threshold,
max_total_size,
clip_boxes,
mod,
):
indices, num_detections = _op.vision.all_class_non_max_suppression(
boxes,
scores,
max_output_boxes_per_class,
iou_threshold,
score_threshold,
max_total_size,
output_format="tensorflow",
)
nmsed_box_indices = _op.take(indices, _op.const(1), axis=2)
nmsed_classes = _op.cast(_op.take(indices, _op.const(0), axis=2), "float32")
nmsed_boxes = _op.gather_nd(boxes, _op.expand_dims(nmsed_box_indices, axis=0), batch_dims=1)

indices_shape = _infer_shape(indices, mod)
indices_dims = len(indices_shape)
indices = _op.transpose(indices, axes=[-1] + list(range(indices_dims - 1)))
nmsed_scores = _op.gather_nd(scores, indices, batch_dims=1)

if clip_boxes:
nmsed_boxes = _op.maximum(nmsed_boxes, _expr.const(0, dtype="float32"))
nmsed_boxes = _op.minimum(nmsed_boxes, _expr.const(1, dtype="float32"))

# Fill in invalid entries with 0
box_range = _op.arange(
_op.const(0, dtype="int64"), _op.const(max_total_size, dtype="int64"), dtype="int64"
)
batch_size = indices_shape[0]

if isinstance(batch_size, tvm.tir.Any):
box_range_2d = _op.tile(box_range, _op.concatenate([batch_size, 1]))
else:
box_range_2d = _op.tile(box_range, _op.const([batch_size, 1]))

valid_mask = _op.cast(
_op.less(box_range_2d, _op.expand_dims(num_detections, axis=1)), "float32"
)
nmsed_scores = nmsed_scores * valid_mask
nmsed_classes = nmsed_classes * valid_mask
nmsed_boxes = nmsed_boxes * _op.expand_dims(valid_mask, axis=2)

return _expr.TupleWrapper(
_expr.Tuple([nmsed_boxes, nmsed_scores, nmsed_classes, num_detections]), 4
)

def _impl(inputs, attr, params, mod):
# Get parameter values
boxes = inputs[0]
scores = inputs[1]
try:
max_output_size = int(np.atleast_1d(inputs[2].data.numpy().astype("int64"))[0])
max_output_size = int(np.atleast_1d(inputs[2].data.asnumpy().astype("int64"))[0])
except Exception:
try:
max_output_size = (
_infer_value(inputs[2], params, mod).numpy().astype("int64").tolist()[0]
_infer_value(inputs[2], params, mod).asnumpy().astype("int64").tolist()[0]
)
except Exception:
max_output_size = inputs[2]
max_total_size = inputs[3]
iou_threshold = np.atleast_1d(inputs[4].data.numpy())[0]
score_threshold = np.atleast_1d(inputs[5].data.numpy())[0]
iou_threshold = np.atleast_1d(inputs[4].data.asnumpy())[0]
score_threshold = np.atleast_1d(inputs[5].data.asnumpy())[0]
if attr["pad_per_class"]:
raise tvm.error.OpAttributeUnImplemented(
"pad_per_class for CombinedNonMaxSuppression is not supported"
Expand All @@ -821,9 +875,20 @@ def _impl(inputs, attr, params, mod):
q = boxes_shape[2]
num_classes = scores_shape[2]

if q != num_classes:
# When q is 1, it means same box coords are used for all classes.
boxes = _op.broadcast_to(boxes, (batch_size, num_anchors, num_classes, 4))
if q == 1:
boxes = _op.squeeze(boxes, axis=[2])
scores_trans = _op.transpose(scores, [0, 2, 1])
return all_class_impl(
boxes,
scores_trans,
max_output_size,
iou_threshold,
score_threshold,
max_total_size.data.numpy().item(),
attr["clip_boxes"],
mod,
)

boxes = _op.reshape(boxes, newshape=[batch_size, num_anchors * num_classes, 4])
scores = _op.reshape(scores, newshape=[batch_size, num_anchors * num_classes, 1])

Expand Down
12 changes: 11 additions & 1 deletion python/tvm/relay/op/strategy/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1095,7 +1095,17 @@ def _compute_nms(attrs, inputs, out_type):
max_output_size = inputs[2]
iou_threshold = inputs[3]
score_threshold = inputs[4]
return topi_compute(inputs[0], inputs[1], max_output_size, iou_threshold, score_threshold)
max_total_size = attrs.max_total_size
output_format = attrs.output_format
return topi_compute(
inputs[0],
inputs[1],
max_output_size,
iou_threshold,
score_threshold,
max_total_size,
output_format,
)

return _compute_nms

Expand Down
17 changes: 15 additions & 2 deletions python/tvm/relay/op/vision/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,13 @@ def non_max_suppression(


def all_class_non_max_suppression(
boxes, scores, max_output_boxes_per_class=-1, iou_threshold=-1.0, score_threshold=-1.0
boxes,
scores,
max_output_boxes_per_class=-1,
iou_threshold=-1.0,
score_threshold=-1.0,
max_total_size=None,
output_format="onnx",
):
"""Non-maximum suppression operator for object detection, corresponding to ONNX
NonMaxSuppression and TensorFlow combined_non_max_suppression.
Expand Down Expand Up @@ -185,6 +191,7 @@ def all_class_non_max_suppression(
in descending of scores, followed by boxes from batch 0, class 1 etc. Out of
`batch_size * num_class* num_boxes` rows of indices, only the first `num_total_detection`
rows are valid.
TODO(trvmorr): explain tf mode
"""
if not isinstance(max_output_boxes_per_class, expr.Expr):
max_output_boxes_per_class = expr.const(max_output_boxes_per_class, "int32")
Expand All @@ -194,6 +201,12 @@ def all_class_non_max_suppression(
score_threshold = expr.const(score_threshold, "float32")

out = _make.all_class_non_max_suppression(
boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold
boxes,
scores,
max_output_boxes_per_class,
iou_threshold,
score_threshold,
max_total_size,
output_format,
)
return expr.TupleWrapper(out, 2)
138 changes: 129 additions & 9 deletions python/tvm/topi/cuda/nms.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
from tvm.contrib.thrust import can_use_thrust, can_use_rocthrust
from tvm.ir import register_intrin_lowering
from tvm.tir import if_then_else
from .sort import argsort, argsort_thrust
from .sort import argsort, argsort_thrust, topk
from .scan import exclusive_scan
from ..utils import ceil_div
from ..math import cast
from ..transform import reshape
from .. import reduction
from ..broadcast import minimum
from ..transform import reshape, strided_slice, gather_nd, expand_dims, squeeze
from ..vision.nms_util import (
calculate_overlap,
binary_search,
Expand Down Expand Up @@ -988,8 +990,97 @@ def _collect_selected_indices_ir(num_class, selected_indices, num_detections, ro
return ib.get()


def _collect_selected_indices_tf_ir(
num_class,
selected_indices,
selected_scores,
num_detections,
row_offsets,
collected_indices,
collected_scores,
):
batch_size, num_class = row_offsets.shape
num_boxes = selected_indices.shape[1]

ib = tvm.tir.ir_builder.create()

selected_indices = ib.buffer_ptr(selected_indices)
selected_scores = ib.buffer_ptr(selected_scores)
num_detections = ib.buffer_ptr(num_detections)
row_offsets = ib.buffer_ptr(row_offsets)
collected_indices = ib.buffer_ptr(collected_indices)
collected_scores = ib.buffer_ptr(collected_scores)

max_threads = int(tvm.target.Target.current(allow_none=False).max_num_threads)
nthread_tx = max_threads
nthread_bx = ceil_div(num_boxes, nthread_tx)
nthread_by = batch_size * num_class
tx = te.thread_axis("threadIdx.x")
bx = te.thread_axis("blockIdx.x")
by = te.thread_axis("blockIdx.y")
ib.scope_attr(tx, "thread_extent", nthread_tx)
ib.scope_attr(bx, "thread_extent", nthread_bx)
ib.scope_attr(by, "thread_extent", nthread_by)
zero = cast(0, "int64")

with ib.new_scope():
idx = bx * nthread_tx + tx
idy = cast(by, "int64")
batch_id = idy // num_class
class_id = idy % num_class
offset = row_offsets[batch_id, class_id] + idx

with ib.if_scope(idx < num_detections[batch_id, class_id]):
collected_indices[batch_id, offset, 0] = class_id
collected_indices[batch_id, offset, 1] = cast(selected_indices[idy, idx], "int64")
collected_scores[batch_id, offset] = selected_scores[idy, idx]
with ib.else_scope():
with ib.if_scope(idx < num_boxes):
collected_indices[batch_id, offset, 0] = zero
collected_indices[batch_id, offset, 1] = zero
collected_scores[batch_id, offset] = -1.0

return ib.get()


def collect_selected_indices_tf(selected_indices, selected_scores, num_detections, row_offsets):
batch_size, num_class = row_offsets.shape
num_boxes = selected_indices.shape[1]

selected_indices_buf = tvm.tir.decl_buffer(
selected_indices.shape, selected_indices.dtype, "selected_indices_buf", data_alignment=8
)
selected_scores_buf = tvm.tir.decl_buffer(
selected_scores.shape, selected_scores.dtype, "selected_scores_buf", data_alignment=8
)
num_detections_buf = tvm.tir.decl_buffer(
num_detections.shape, num_detections.dtype, "num_detections_buf", data_alignment=8
)
row_offsets_buf = tvm.tir.decl_buffer(
row_offsets.shape, row_offsets.dtype, "row_offsets_buf", data_alignment=8
)

return te.extern(
[(batch_size, num_class * num_boxes, 2), (batch_size, num_class * num_boxes)],
[selected_indices, selected_scores, num_detections, row_offsets],
lambda ins, outs: _collect_selected_indices_tf_ir(
num_class, ins[0], ins[1], ins[2], ins[3], outs[0], outs[1]
),
dtype=["int64", "float32"],
in_buffers=[selected_indices_buf, selected_scores_buf, num_detections_buf, row_offsets_buf],
name="collect_indices",
tag="collect_indices",
)


def all_class_non_max_suppression(
boxes, scores, max_output_boxes_per_class, iou_threshold, score_threshold
boxes,
scores,
max_output_boxes_per_class,
iou_threshold,
score_threshold,
max_total_size,
output_format="onnx",
):
"""Non-maximum suppression operator for object detection, corresponding to ONNX
NonMaxSuppression and TensorFlow combined_non_max_suppression.
Expand All @@ -1012,6 +1103,8 @@ def all_class_non_max_suppression(
score_threshold : float or tvm.te.Tensor, optional
Score threshold to filter out low score boxes early
output_format : str
Returns
-------
out : [tvm.te.Tensor, tvm.te.Tensor]
Expand All @@ -1029,22 +1122,49 @@ def all_class_non_max_suppression(
sorted_scores, sorted_indices = _dispatch_sort(scores, ret_type="both")
valid_count = _get_valid_box_count(sorted_scores, score_threshold)

selected_indices, num_detections = run_all_class_nms(
if output_format == "onnx":
selected_indices, num_detections = run_all_class_nms(
boxes,
sorted_scores,
sorted_indices,
valid_count,
max_output_boxes_per_class,
iou_threshold,
_nms_loop,
)

row_offsets, num_total_detections = exclusive_scan(
num_detections, return_reduction=True, output_dtype="int64"
)
selected_indices = collect_selected_indices(
num_class, selected_indices, num_detections, row_offsets, _collect_selected_indices_ir
)
return [selected_indices, num_total_detections]

max_detection_per_batch = max_total_size

selected_indices, selected_scores, num_detections = run_all_class_nms(
boxes,
sorted_scores,
sorted_indices,
valid_count,
max_output_boxes_per_class,
iou_threshold,
_nms_loop,
return_scores=True,
)

# tf mode, return (batch_size, max_total_size, 2)
num_detections_per_batch = reshape(num_detections, (batch, num_class))
row_offsets, num_total_detections = exclusive_scan(
num_detections, return_reduction=True, output_dtype="int64"
num_detections_per_batch, return_reduction=True, output_dtype="int64", axis=1
)

selected_indices = collect_selected_indices(
num_class, selected_indices, num_detections, row_offsets, _collect_selected_indices_ir
selected_indices, selected_scores = collect_selected_indices_tf(
selected_indices, selected_scores, num_detections_per_batch, row_offsets
)
topk_indices = topk(selected_scores, k=max_detection_per_batch, axis=1, ret_type="indices")[0]
topk_indices = expand_dims(topk_indices, axis=0)
final_indices = gather_nd(selected_indices, topk_indices, batch_dims=1)
num_detections = minimum(num_total_detections, max_detection_per_batch)

return [selected_indices, num_total_detections]
return [final_indices, num_detections]
4 changes: 3 additions & 1 deletion python/tvm/topi/cuda/vision.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,9 @@ def traverse(op):
traverse(tensor.op)
scheduled_ops.append(op)

traverse(outs[0].op)
for o in outs:
traverse(o.op)

return s


Expand Down
4 changes: 2 additions & 2 deletions python/tvm/topi/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,7 @@ def gather(data, axis, indices):
return cpp.gather(data, axis, indices)


def gather_nd(a, indices):
def gather_nd(a, indices, batch_dims=0):
"""Gather elements from a n-dimension array..
Parameters
Expand All @@ -498,7 +498,7 @@ def gather_nd(a, indices):
-------
ret : tvm.te.Tensor
"""
return cpp.gather_nd(a, indices)
return cpp.gather_nd(a, indices, batch_dims)


def matmul(a, b, transp_a=False, transp_b=False):
Expand Down
Loading

0 comments on commit 7b87922

Please sign in to comment.