From d75ee0a62b8e2fb8912ff226ea8bedb8ed78764d Mon Sep 17 00:00:00 2001 From: Masahiro Masuda Date: Mon, 28 Dec 2020 19:13:04 +0900 Subject: [PATCH] temp disable write by only thread 0 --- python/tvm/topi/cuda/nms.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/python/tvm/topi/cuda/nms.py b/python/tvm/topi/cuda/nms.py index 65f7e3950e1c..210d5a5b1c76 100644 --- a/python/tvm/topi/cuda/nms.py +++ b/python/tvm/topi/cuda/nms.py @@ -530,12 +530,15 @@ def calculate_overlap(out_tensor, box_a_idx, box_b_idx): def nms_inner_loop(ib, j): # the box j is valid, invalidate other boxes that overlap with j above iou_threshold - # When return_indices is False, no need to populate box_indices - if return_indices: - # Only one thread needs to this write - with ib.if_scope(tx == 0): - orig_idx = sorted_index[i * num_anchors + j] - box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx] + # # When return_indices is False, no need to populate box_indices + # if return_indices: + # # Only one thread needs to this write + # with ib.if_scope(tx == 0): + # orig_idx = sorted_index[i * num_anchors + j] + # box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx] + + orig_idx = sorted_index[i * num_anchors + j] + box_indices[i, num_valid_boxes_local[0]] = indices[i, orig_idx] num_valid_boxes_local[0] += 1 @@ -593,8 +596,8 @@ def nms_inner_loop(ib, j): with ib.else_scope(): nms_inner_loop(ib, j) - with ib.if_scope(tx == 0): - num_valid_boxes[i] = num_valid_boxes_local[0] + # with ib.if_scope(tx == 0): + num_valid_boxes[i] = num_valid_boxes_local[0] with ib.else_scope(): num_valid_boxes[i] = 0