support broadcasting on the indexed axis

apache · Jan 8, 2020 · d64e1f1 · d64e1f1
1 parent c1f6d64
commit d64e1f1
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 23 deletions.
diff --git a/src/operator/numpy/np_boolean_mask_assign.cc b/src/operator/numpy/np_boolean_mask_assign.cc
@@ -91,14 +91,21 @@ struct BooleanAssignCPUKernel {
                   const size_t middle,
                   const size_t valid_num,
                   const size_t trailing,
-                  DType* tensor) {
+                  DType* tensor,
+                  const bool broadcast = false) {
     // binary search for the turning point
     size_t mid = bin_search(idx, idx_size, i);
     // final answer is in mid
     for (size_t l = 0; l < leading; ++l) {
       for (size_t t = 0; t < trailing; ++t) {
-        data[(l * middle + mid) * trailing + t] =
-          (scalar) ? tensor[0] : tensor[(l * valid_num + i) * trailing + t];
+        if (scalar) {
+          data[(l * middle + mid) * trailing + t] = tensor[0];
+        } else {
+          data[(l * middle + mid) * trailing + t] =
+            (broadcast) ?
+            tensor[l * trailing + t] :
+            tensor[(l * valid_num + i) * trailing + t];
+        }
       }
     }
   }
@@ -215,14 +222,17 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs,
   // If there's no True in mask, return directly
   if (valid_num == 0) return;
 
+  const TShape& vshape = inputs[2].shape_;
+
   if (inputs.size() == 3U) {
-    const TShape& vshape = inputs[2].shape_;
     if (inputs[2].shape_.Size() != 1) {
-      // tensor case, check tensor size with the valid_num
-      CHECK_EQ(static_cast<size_t>(valid_num), vshape[start_axis])
-        << "boolean array indexing assignment cannot assign " << vshape
-        << " input values to the " << valid_num << " output values where the mask is true"
-        << std::endl;
+      if (vshape[start_axis] != 1) {
+        // tensor case, check tensor size equal to or broadcastable with valid_num
+        CHECK_EQ(static_cast<size_t>(valid_num), vshape[start_axis])
+          << "boolean array indexing assignment cannot assign " << vshape
+          << " input values to the " << valid_num << " output values where the mask is true"
+          << std::endl;
+      }
     }
   }
 
@@ -248,7 +258,7 @@ void NumpyBooleanAssignForwardCPU(const nnvm::NodeAttrs& attrs,
       } else {
        Kernel<BooleanAssignCPUKernel<false>, cpu>::Launch(
           s, valid_num, data.dptr<DType>(), prefix_sum.data(), prefix_sum.size(),
-          leading, middle, valid_num, trailing, inputs[2].dptr<DType>());
+          leading, middle, valid_num, trailing, inputs[2].dptr<DType>(), (vshape[start_axis] == 1));
       }
     });
   } else {

diff --git a/src/operator/numpy/np_boolean_mask_assign.cu b/src/operator/numpy/np_boolean_mask_assign.cu
@@ -93,7 +93,8 @@ struct BooleanAssignGPUKernel {
                              const size_t middle,
                              const size_t valid_num,
                              const size_t trailing,
-                             DType* tensor) {
+                             DType* tensor,
+                             const bool broadcast = false) {
     // binary search for the turning point
     size_t m = i / trailing % valid_num;
     size_t l = i / trailing / valid_num;
@@ -103,7 +104,7 @@ struct BooleanAssignGPUKernel {
     if (scalar) {
       data[dst] = tensor[0];
     } else {
-      data[dst] = tensor[i];
+      data[dst] = broadcast ? tensor[l * trailing + i % trailing] : tensor[i];
     }
   }
 };
@@ -200,14 +201,17 @@ void NumpyBooleanAssignForwardGPU(const nnvm::NodeAttrs& attrs,
   // If there's no True in mask, return directly
   if (valid_num == 0) return;
 
+  const TShape& vshape = inputs[2].shape_;
+
   if (inputs.size() == 3U) {
-    const TShape& vshape = inputs[2].shape_;
     if (inputs[2].shape_.Size() != 1) {
-      // tensor case, check tensor size with the valid_num
-      CHECK_EQ(static_cast<size_t>(valid_num), vshape[start_axis])
-        << "boolean array indexing assignment cannot assign " << vshape
-        << " input values to the " << valid_num << " output values where the mask is true"
-        << std::endl;
+      if (vshape[start_axis] != 1) {
+        // tensor case, check tensor size equal to or broadcastable with valid_num
+        CHECK_EQ(static_cast<size_t>(valid_num), vshape[start_axis])
+          << "boolean array indexing assignment cannot assign " << vshape
+          << " input values to the " << valid_num << " output values where the mask is true"
+          << std::endl;
+      }
     }
   }
 
@@ -235,7 +239,7 @@ void NumpyBooleanAssignForwardGPU(const nnvm::NodeAttrs& attrs,
       MSHADOW_TYPE_SWITCH(data.type_flag_, DType, {
         Kernel<BooleanAssignGPUKernel<false>, gpu>::Launch(
           s, leading * valid_num * trailing, data.dptr<DType>(), prefix_sum, mask_size + 1,
-          leading, middle, valid_num, trailing, inputs[2].dptr<DType>());
+          leading, middle, valid_num, trailing, inputs[2].dptr<DType>(), (vshape[start_axis] == 1));
       });
     }
   } else {

diff --git a/tests/python/unittest/test_numpy_op.py b/tests/python/unittest/test_numpy_op.py
@@ -1296,22 +1296,28 @@ def hybrid_forward(self, F, a, mask, value):
 
     for hybridize in [False]:
         for config in configs:
-            print(config)
             dshape, mshape, start_axis = config
             test_data = np.random.uniform(size=dshape)
-            mx_mask = np.around(np.random.uniform(size=mshape))
-            valid_num = int(mx_mask.sum())
+            valid_num = 0
+            while test_data.size != 0 and valid_num == 0:
+                mx_mask = np.around(np.random.uniform(size=mshape))
+                valid_num = int(mx_mask.sum())
             np_mask = mx_mask.asnumpy().astype(_np.bool)
             vshape = []
+            vshape_broadcast = []
             for i in range(len(dshape)):
                 if i < start_axis:
                     vshape.append(dshape[i])
+                    vshape_broadcast.append(dshape[i])
                 elif i == start_axis:
                     vshape.append(valid_num)
+                    vshape_broadcast.append(1)
                 elif i >= start_axis + len(mshape):
                     vshape.append(dshape[i])
+                    vshape_broadcast.append(dshape[i])
             vshape = tuple(vshape)
-            for val in [42.0, np.array(42.), np.array([42.]), np.random.uniform(size=vshape)]:
+            vshape_broadcast = tuple(vshape_broadcast)
+            for val in [42.0, np.array(42.), np.array([42.]), np.random.uniform(size=vshape), np.random.uniform(size=vshape_broadcast)]:
                 test_block = TestBooleanAssignScalar(val, start_axis) if isinstance(val, float) else TestBooleanAssignTensor(start_axis)
                 if hybridize:
                     test_block.hybridize()