apache · ZihengJiang · Mar 1, 2019 · Feb 11, 2019 · Feb 22, 2019 · Feb 25, 2019
diff --git a/include/tvm/operation.h b/include/tvm/operation.h
@@ -184,22 +184,45 @@ class PlaceholderOpNode : public OperationNode {
 
 /*!
  * \brief A Compute op that compute a tensor on certain domain.
+ * This is the base class for ComputeOp (operating on a scalar at a time) and
+ * TensorComputeOp (operating on a TensorSlice at a time)
  */
-class TVM_DLL ComputeOpNode : public OperationNode {
+class TVM_DLL BaseComputeOpNode : public OperationNode {
  public:
   /*! \brief IterVar on each axis */
   Array<IterVar> axis;
   /*! \brief IterVar on each reduction axis, if the body is a Reduce */
   Array<IterVar> reduce_axis;
+  // override functions
+  Array<IterVar> root_iter_vars() const final;
+  Array<Expr> output_shape(size_t idx) const final;
+  void GatherBound(
+          const Operation& self,
+          const std::unordered_map<Tensor, TensorDom>& tensor_dom,
+          std::unordered_map<IterVar, Range>* out_dom_map) const final;
+  Stmt BuildRealize(
+          const Stage& stage,
+          const std::unordered_map<IterVar, Range>& realize_map,
+          const Stmt& body) const final;
+  virtual size_t num_schedulable_dims() const = 0;
+
+  static constexpr const char* _type_key = "BaseComputeOp";
+  TVM_DECLARE_BASE_NODE_INFO(BaseComputeOpNode, OperationNode);
+};
+
+
+/*!
+ * \brief A Compute op that compute a tensor on certain domain.
+ */
+class TVM_DLL ComputeOpNode : public BaseComputeOpNode {
+ public:
   /*! \brief the compute expression */
   Array<Expr> body;
   /*! \brief constructor */
   ComputeOpNode() {}
   // override functions
   int num_outputs() const final;
-  Array<IterVar> root_iter_vars() const final;
   Type output_dtype(size_t i) const final;
-  Array<Expr> output_shape(size_t i) const final;
   Array<Tensor> InputTensors() const final;
   Operation ReplaceInputs(
       const Operation& self,
@@ -208,18 +231,11 @@ class TVM_DLL ComputeOpNode : public OperationNode {
       const Operation& self,
       const std::unordered_map<const Variable*, IntSet>& dom_map,
       std::unordered_map<Tensor, TensorDom>* out_dom_map) const final;
-  void GatherBound(
-      const Operation& self,
-      const std::unordered_map<Tensor, TensorDom>& tensor_dom,
-      std::unordered_map<IterVar, Range>* out_dom_map) const final;
-  Stmt BuildRealize(
-      const Stage& stage,
-      const std::unordered_map<IterVar, Range>& realize_map,
-      const Stmt& body) const final;
   Stmt BuildProvide(
       const Stage& stage,
       const std::unordered_map<IterVar, Range>& dom_map,
       bool debug_keep_trivial_loop) const final;
+  size_t num_schedulable_dims() const final;
 
   void VisitAttrs(AttrVisitor* v) final {
     v->Visit("name", &name);
@@ -236,18 +252,14 @@ class TVM_DLL ComputeOpNode : public OperationNode {
                         Array<Expr> body);
 
   static constexpr const char* _type_key = "ComputeOp";
-  TVM_DECLARE_NODE_TYPE_INFO(ComputeOpNode, OperationNode);
+  TVM_DECLARE_NODE_TYPE_INFO(ComputeOpNode, BaseComputeOpNode);
 };
 
 /*!
  * \brief A TenorCompute op that compute a tensor with an tensor intrinsic.
  */
-class TensorComputeOpNode : public OperationNode {
+class TensorComputeOpNode : public BaseComputeOpNode {
  public:
-  /*! \brief IterVar on each axis */
-  Array<IterVar> axis;
-  /*! \brief IterVar on each reduction axis, if the intrin will use the reduce axis */
-  Array<IterVar> reduce_axis;
   /*! \brief number of axes that can be scheduled */
   int schedulable_ndim;
   /*! \brief TensorIntrin used to compute */
@@ -260,9 +272,7 @@ class TensorComputeOpNode : public OperationNode {
   TensorComputeOpNode() {}
   // override functions
   int num_outputs() const final;
-  Array<IterVar> root_iter_vars() const final;
   Type output_dtype(size_t i) const final;
-  Array<Expr> output_shape(size_t i) const final;
   Array<Tensor> InputTensors() const final;
   Operation ReplaceInputs(
       const Operation& self,
@@ -271,18 +281,11 @@ class TensorComputeOpNode : public OperationNode {
       const Operation& self,
       const std::unordered_map<const Variable*, IntSet>& dom_map,
       std::unordered_map<Tensor, TensorDom>* out_dom_map) const final;
-  void GatherBound(
-      const Operation& self,
-      const std::unordered_map<Tensor, TensorDom>& tensor_dom,
-      std::unordered_map<IterVar, Range>* out_dom_map) const final;
-  Stmt BuildRealize(
-      const Stage& stage,
-      const std::unordered_map<IterVar, Range>& realize_map,
-      const Stmt& body) const final;
   Stmt BuildProvide(
       const Stage& stage,
       const std::unordered_map<IterVar, Range>& dom_map,
       bool debug_keep_trivial_loop) const final;
+  size_t num_schedulable_dims() const final;
 
   void VisitAttrs(AttrVisitor* v) final {
     v->Visit("name", &name);
@@ -304,7 +307,7 @@ class TensorComputeOpNode : public OperationNode {
                         Array<Region> regions);
 
   static constexpr const char* _type_key = "TensorComputeOp";
-  TVM_DECLARE_NODE_TYPE_INFO(TensorComputeOpNode, OperationNode);
+  TVM_DECLARE_NODE_TYPE_INFO(TensorComputeOpNode, BaseComputeOpNode);
 };
 
 /*!

diff --git a/python/tvm/tensor.py b/python/tvm/tensor.py
@@ -146,7 +146,7 @@ class PlaceholderOp(Operation):
 
 
 @register_node
-class ComputeOp(Operation):
+class BaseComputeOp(Operation):
     """Compute operation."""
     @property
     def axis(self):
@@ -160,7 +160,13 @@ def reduce_axis(self):
 
 
 @register_node
-class TensorComputeOp(Operation):
+class ComputeOp(BaseComputeOp):
+    """Scalar operation."""
+    pass
+
+
+@register_node
+class TensorComputeOp(BaseComputeOp):
     """Tensor operation."""
 
 

diff --git a/src/op/compute_op.cc b/src/op/compute_op.cc
@@ -40,7 +40,7 @@ int ComputeOpNode::num_outputs() const {
   return body.size();
 }
 
-Array<IterVar> ComputeOpNode::root_iter_vars() const {
+Array<IterVar> BaseComputeOpNode::root_iter_vars() const {
   if (reduce_axis.size() == 0) return axis;
   Array<IterVar> ret = axis;
   for (IterVar iv : reduce_axis) {
@@ -54,15 +54,15 @@ Type ComputeOpNode::output_dtype(size_t idx) const {
   return body[idx].type();
 }
 
-Array<Expr> ComputeOpNode::output_shape(size_t idx) const {
+Array<Expr> BaseComputeOpNode::output_shape(size_t idx) const {
   CHECK_LT(idx, num_outputs());
-  // for now, all outputs of ComputeOp have the same shape
-  std::vector<Expr> shape;
-  for (size_t i = 0; i < axis.size(); ++i) {
-    const Range& r = axis[i]->dom;
+  // for now, all outputs of a BaseComputeOp have the same shape
+  Array<Expr> shape;
+  for (const auto& ivar : this->axis) {
+    const Range& r = ivar->dom;
     shape.push_back(r->extent);
   }
-  return Array<Expr>(shape);
+  return shape;
 }
 
 Tensor compute(Array<Expr> shape,
@@ -208,7 +208,7 @@ void ComputeOpNode::PropBoundToInputs(
   for (auto& e : body) ir::PostOrderVisit(e, fvisit);
 }
 
-void ComputeOpNode::GatherBound(
+void BaseComputeOpNode::GatherBound(
     const Operation& self,
     const std::unordered_map<Tensor, TensorDom>& tensor_dom,
     std::unordered_map<IterVar, Range>* out_dom_map) const {
@@ -225,22 +225,22 @@ void ComputeOpNode::GatherBound(
   }
 }
 
-Stmt ComputeOpNode::BuildRealize(
+Stmt BaseComputeOpNode::BuildRealize(
     const Stage& stage,
     const std::unordered_map<IterVar, Range>& realize_map,
-    const Stmt& realize_body) const {
+    const Stmt& body) const {
   CHECK_EQ(stage->op.get(), this);
   HalideIR::Internal::Region bounds;
   for (IterVar iv : this->axis) {
     bounds.push_back(realize_map.at(iv));
   }
-  Stmt realize = realize_body;
+  Stmt realize = body;
   for (int i = this->num_outputs(); i > 0; --i) {
     Tensor t = stage->op.output(i-1);
     realize = ir::Realize::make(t->op, t->value_index,
       t->dtype, bounds, const_true(), realize);
     // alignment requirement, only useful for compute
-    for (size_t i = 0; i < this->axis.size(); ++i) {
+    for (size_t i = 0; i < num_schedulable_dims(); ++i) {
       auto it = stage->iter_var_attrs.find(this->axis[i]);
       if (it != stage->iter_var_attrs.end()) {
         IterVarAttr attr = (*it).second;
@@ -259,6 +259,10 @@ Stmt ComputeOpNode::BuildRealize(
   return realize;
 }
 
+size_t ComputeOpNode::num_schedulable_dims() const {
+  return axis.size();
+}
+
 // Build a reduction body.
 void MakeReduction(const ComputeOpNode* op,
                    const Array<Tensor>& tensors,
@@ -414,7 +418,7 @@ Stmt ComputeOpNode::BuildProvide(
 }
 
 ComputeLoopNest ComputeLoopNest::make(
-    const ComputeOpNode* self,
+    const BaseComputeOpNode* self,
     const Stage& stage,
     const std::unordered_map<IterVar, Range>& dom_map,
     bool debug_keep_trivial_loop) {
@@ -440,8 +444,8 @@ ComputeLoopNest ComputeLoopNest::make(
     for (IterVar iv : self->reduce_axis) {
       update_state[iv] = 2;
     }
-    for (IterVar iv : self->axis) {
-      update_state[iv] = 1;
+    for (size_t i = 0; i < self->num_schedulable_dims(); ++i) {
+      update_state[self->axis[i]] = 1;
     }
     // find which iter var is related to reduction and which is related to axis.
     schedule::PassDownBitMaskOr(stage, &update_state);

diff --git a/src/op/compute_op.h b/src/op/compute_op.h
@@ -41,7 +41,7 @@ struct ComputeLoopNest {
    * \return The constructed loop nest
    */
   static ComputeLoopNest make(
-      const ComputeOpNode* self,
+      const BaseComputeOpNode* self,
       const Stage& stage,
       const std::unordered_map<IterVar, Range>& dom_map,
       bool debug_keep_trivial_loop);