Merge branch 'wyushun_dev_cumsum' of github.com:Oneflow-Inc/oneflow i…

…nto wyushun_dev_cumsum * 'wyushun_dev_cumsum' of github.com:Oneflow-Inc/oneflow: auto format by CI Support inplace for lazy consistent (#7112) fix fold op (#7156) cmake: fix THIRD_PARTY build (#7146) reduce memory usage caused by slice grad (#7144) Fea/nhwc (#6811)
Oneflow-Inc · Dec 31, 2021 · 9daba3e · 9daba3e
2 parents 5c1f2f9 + 32b676a
commit 9daba3e
Show file tree

Hide file tree

Showing 24 changed files with 439 additions and 162 deletions.
diff --git a/cmake/third_party/json.cmake b/cmake/third_party/json.cmake
@@ -12,6 +12,5 @@ FetchContent_Declare(
     URL_HASH MD5=${JSON_URL_HASH}
 )
 
-if(THIRD_PARTY)
-    FetchContent_MakeAvailable(json)
-endif()
+
+FetchContent_MakeAvailable(json)
diff --git a/oneflow/api/python/job_build/job_build_and_infer.cpp b/oneflow/api/python/job_build/job_build_and_infer.cpp
@@ -47,7 +47,7 @@ ONEFLOW_API_PYBIND11_MODULE("", m) {
   m.def("JobBuildAndInferCtx_GetDataType", &JobBuildAndInferCtx_GetDataType);
   m.def("JobBuildAndInferCtx_IsDynamic", &JobBuildAndInferCtx_IsDynamic);
 
-  m.def("JobBuildAndInferCtx_DisableBoxing", &JobBuildAndInferCtx_DisableBoxing);
+  m.def("JobBuildAndInferCtx_IsDisableBoxing", &JobBuildAndInferCtx_IsDisableBoxing);
 
   m.def("JobBuildAndInferCtx_GetSplitAxisFromProducerView",
         &JobBuildAndInferCtx_GetSplitAxisFromProducerView);

diff --git a/oneflow/api/python/job_build/job_build_and_infer.h b/oneflow/api/python/job_build/job_build_and_infer.h
@@ -114,10 +114,10 @@ inline Maybe<bool> JobBuildAndInferCtx_IsDynamic(const std::string& job_name,
   return ctx->IsDynamic(lbn);
 }
 
-inline Maybe<bool> JobBuildAndInferCtx_DisableBoxing(const std::string& job_name,
-                                                     const std::string& lbn) {
+inline Maybe<bool> JobBuildAndInferCtx_IsDisableBoxing(const std::string& job_name,
+                                                       const std::string& lbn) {
   auto* ctx = JUST(GetJobBuildAndInferCtx(job_name));
-  return ctx->DisableBoxing(lbn);
+  return ctx->IsDisableBoxing(lbn);
 }
 
 inline Maybe<std::string> JobBuildAndInferCtx_GetSplitAxisFromProducerView(

diff --git a/oneflow/api/python/job_build/job_build_and_infer_api.h b/oneflow/api/python/job_build/job_build_and_infer_api.h
@@ -90,8 +90,9 @@ inline bool JobBuildAndInferCtx_IsDynamic(const std::string& job_name, const std
   return oneflow::JobBuildAndInferCtx_IsDynamic(job_name, lbn).GetOrThrow();
 }
 
-inline bool JobBuildAndInferCtx_DisableBoxing(const std::string& job_name, const std::string& lbn) {
-  return oneflow::JobBuildAndInferCtx_DisableBoxing(job_name, lbn).GetOrThrow();
+inline bool JobBuildAndInferCtx_IsDisableBoxing(const std::string& job_name,
+                                                const std::string& lbn) {
+  return oneflow::JobBuildAndInferCtx_IsDisableBoxing(job_name, lbn).GetOrThrow();
 }
 
 inline std::string JobBuildAndInferCtx_GetSplitAxisFromProducerView(const std::string& job_name,

diff --git a/oneflow/core/autograd/gradient_funcs/deconv.cpp b/oneflow/core/autograd/gradient_funcs/deconv.cpp
@@ -88,21 +88,21 @@ Maybe<void> DeConvolutionNd::Apply(const DeConvolutionNdCaptureState* ctx,
     }
     const auto& weight = ctx->SavedTensors().at(0);
     if (ctx->ndims == 1) {
-      std::shared_ptr<Tensor> result =
-          JUST(functional::Conv1d(out_grads.at(0), weight, Optional<Tensor>(), ctx->strides,
-                                  ctx->padding_before, ctx->dilation_rate, ctx->groups));
+      std::shared_ptr<Tensor> result = JUST(functional::Conv1d(
+          out_grads.at(0), weight, Optional<Tensor>(), ctx->strides, ctx->padding_before,
+          ctx->dilation_rate, ctx->groups, ctx->data_format));
       result = JUST(functional::Slice(result, start, stop, step));
       in_grads->at(0) = result;
     } else if (ctx->ndims == 2) {
-      std::shared_ptr<Tensor> result =
-          JUST(functional::Conv2d(out_grads.at(0), weight, Optional<Tensor>(), ctx->strides,
-                                  ctx->padding_before, ctx->dilation_rate, ctx->groups));
+      std::shared_ptr<Tensor> result = JUST(functional::Conv2d(
+          out_grads.at(0), weight, Optional<Tensor>(), ctx->strides, ctx->padding_before,
+          ctx->dilation_rate, ctx->groups, ctx->data_format));
       result = JUST(functional::Slice(result, start, stop, step));
       in_grads->at(0) = result;
     } else if (ctx->ndims == 3) {
-      std::shared_ptr<Tensor> result =
-          JUST(functional::Conv3d(out_grads.at(0), weight, Optional<Tensor>(), ctx->strides,
-                                  ctx->padding_before, ctx->dilation_rate, ctx->groups));
+      std::shared_ptr<Tensor> result = JUST(functional::Conv3d(
+          out_grads.at(0), weight, Optional<Tensor>(), ctx->strides, ctx->padding_before,
+          ctx->dilation_rate, ctx->groups, ctx->data_format));
       result = JUST(functional::Slice(result, start, stop, step));
       in_grads->at(0) = result;
     } else {

diff --git a/oneflow/core/autograd/gradient_funcs/slice.cpp b/oneflow/core/autograd/gradient_funcs/slice.cpp
@@ -24,6 +24,7 @@ namespace one {
 
 struct SliceCaptureState : public AutoGradCaptureState {
   bool requires_grad;
+  Shape like_shape;
   std::vector<int64_t> start;
   std::vector<int64_t> stop;
   std::vector<int64_t> step;
@@ -49,17 +50,15 @@ class Slice : public OpExprGradFunction<SliceCaptureState> {
     ctx->start = JUST(composed_attrs.GetAttr<std::vector<int64_t>>("start"));
     ctx->stop = JUST(composed_attrs.GetAttr<std::vector<int64_t>>("stop"));
     ctx->step = JUST(composed_attrs.GetAttr<std::vector<int64_t>>("step"));
-    ctx->SaveTensorForBackward(inputs.at(0));
+    ctx->like_shape = *(inputs.at(0)->shape());
     return Maybe<void>::Ok();
   }
 
   Maybe<void> Apply(const SliceCaptureState* ctx, const TensorTuple& out_grads,
                     TensorTuple* in_grads) const override {
-    const auto& like = ctx->SavedTensors().at(0);
-
     in_grads->resize(1);
-    in_grads->at(0) =
-        JUST(functional::SliceGrad(out_grads.at(0), like, ctx->start, ctx->stop, ctx->step));
+    in_grads->at(0) = JUST(
+        functional::SliceGrad(out_grads.at(0), ctx->like_shape, ctx->start, ctx->stop, ctx->step));
     return Maybe<void>::Ok();
   }
 

diff --git a/oneflow/core/framework/op_interpreter/lazy_op_interpreter.cpp b/oneflow/core/framework/op_interpreter/lazy_op_interpreter.cpp
@@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 */
+#include "oneflow/core/common/cpp_attribute.h"
 #include "oneflow/core/common/maybe.h"
 #include "oneflow/core/common/cpp_attribute.h"
 #include "oneflow/core/framework/consistency_check.h"
@@ -74,12 +75,9 @@ Maybe<Tensor> BuildTensor(const OpAttribute& op_attribute, const std::string& bn
 Maybe<void> CheckTensorMatchAttr(const std::shared_ptr<Tensor>& tensor,
                                  const OpAttribute& op_attribute, const std::string& bn_in_op,
                                  const std::shared_ptr<ParallelDesc>& parallel_desc,
-                                 const bool is_lazy, const bool is_local, const bool requires_grad,
-                                 const bool is_leaf) {
+                                 const bool is_lazy, const bool is_local) {
   CHECK_EQ_OR_RETURN(tensor->is_lazy(), is_lazy);
   CHECK_EQ_OR_RETURN(tensor->is_local(), is_local);
-  CHECK_EQ_OR_RETURN(tensor->requires_grad(), requires_grad);
-  CHECK_EQ_OR_RETURN(tensor->is_leaf(), is_leaf);
 
   CHECK_OR_RETURN(op_attribute.has_logical_blob_desc_signature());
   const auto& blob_desc_sign_map = op_attribute.logical_blob_desc_signature().bn_in_op2blob_desc();
@@ -101,7 +99,8 @@ Maybe<void> CheckTensorMatchAttr(const std::shared_ptr<Tensor>& tensor,
     CHECK_OR_RETURN(nd_sbp_it != nd_sbp_sign_map.end())
         << "nd_sbp of " << bn_in_op << " not found in op " << op_attribute.op_conf().name();
     cfg::NdSbp nd_sbp(nd_sbp_it->second);
-    CHECK_OR_RETURN(JUST(tensor->nd_sbp()) == SymbolOf(nd_sbp));
+    CHECK_OR_RETURN(JUST(tensor->nd_sbp()) == SymbolOf(nd_sbp))
+        << "The input sbp is not valid for an inplace operation, please try to use non-inplace.";
     CHECK_OR_RETURN(JUST(tensor->parallel_desc()) == SymbolOf(*parallel_desc));
   }
   return Maybe<void>::Ok();
@@ -654,6 +653,21 @@ Maybe<void> LazyInterpreter::ApplyImpl(const UserOpExpr& op_expr, const TensorTu
     }
   }
 
+  // Check outputs num and setup output tensor properties.
+  CHECK_EQ_OR_RETURN(outputs->size(), op_expr.output_size());
+
+  // Disable boxing if the computation is inplace.
+  for (int i = 0; i < op_expr.output_size(); ++i) {
+    const auto& output = outputs->at(i);
+    if (output) {
+      const std::string& lbn = TensorNameScope::Global()->Lookup(output);
+      CHECK_OR_RETURN(!lbn.empty()) << "The output which index is " << i
+                                    << " has no tensor name, please check whether the inplaced "
+                                       "output is also an input of the operation "
+                                    << new_op_name;
+      JUST(infer_ctx->DisableBoxing(lbn));
+    }
+  }
   VLOG(2) << "Lazy nn.Graph name " << graph_name << " try to add op: \n"
           << op_conf->DebugString() << std::endl;
   OpAttribute op_attr = *JUST(infer_ctx->AddAndInferConsistentOp(*op_conf));
@@ -664,9 +678,6 @@ Maybe<void> LazyInterpreter::ApplyImpl(const UserOpExpr& op_expr, const TensorTu
 
   int64_t parallel_desc_sym_id = JUST(scope->GetParallelDescSymbolId(*op_conf));
   auto blob_parallel_desc = JUST(GetSymbol<cfg::ParallelConf, ParallelDesc>(parallel_desc_sym_id));
-
-  // Check outputs num and setup output tensor properties.
-  CHECK_EQ_OR_RETURN(outputs->size(), op_expr.output_size());
   for (int i = 0; i < op_expr.output_size(); ++i) {
     const std::string& obn = op_expr.indexed_obns().at(i);
     if (!(*outputs)[i]) {
@@ -675,9 +686,7 @@ Maybe<void> LazyInterpreter::ApplyImpl(const UserOpExpr& op_expr, const TensorTu
     } else {
       const std::shared_ptr<Tensor>& inplace_out = (*outputs)[i];
       JUST(CheckTensorMatchAttr(inplace_out, op_attr, obn, blob_parallel_desc, /* is_lazy= */ true,
-                                is_local,
-                                /* requires_grad */ false,
-                                /* is_leaf */ true));
+                                is_local));
     }
     TensorNameScope::Global()->Record((*outputs)[i], GenLogicalBlobName(new_op_name, obn));
   }

diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
@@ -629,13 +629,22 @@
 - name: "conv1d"
   signature:
     "Tensor (Tensor x, Tensor weight, Tensor bias=None, Int32List stride,
-    Int32List padding, Int32List dilation, Int32 groups=1) => Conv1d"
+    Int32List padding, Int32List dilation, Int32 groups=1,
+    String channel_pos) => Conv1d"
   bind_python: True
 
 - name: "conv2d"
   signature:
     "Tensor (Tensor x, Tensor weight, Tensor bias=None, Int32List stride,
-    Int32List padding, Int32List dilation, Int32 groups=1) => Conv2d"
+    Int32List padding, Int32List dilation, Int32 groups=1,
+    String channel_pos) => Conv2d"
+  bind_python: True
+
+- name: "conv3d"
+  signature:
+    "Tensor (Tensor x, Tensor weight, Tensor bias=None, Int32List stride,
+    Int32List padding, Int32List dilation, Int32 groups=1,
+    String channel_pos) => Conv3d"
   bind_python: True
 
 - name: "fake_quantization"
@@ -663,12 +672,6 @@
     Int32 quantization_bit, String quantization_scheme, Float momentum) => MovingAverageMinMaxObserver"
   bind_python: True
 
-- name: "conv3d"
-  signature:
-    "Tensor (Tensor x, Tensor weight, Tensor bias=None, Int32List stride,
-    Int32List padding, Int32List dilation, Int32 groups=1) => Conv3d"
-  bind_python: True
-
 - name: "conv_data_grad"
   signature:
     'Tensor (Tensor dy, Tensor weight, Tensor x, Int32 num_spatial_dims,
@@ -954,14 +957,14 @@
   signature: "TensorTuple (Tensor dy, Tensor gamma, Tensor normalized, Int64 begin_params_axis, Double epsilon) => LayerNormAffineParamGrad"
   bind_python: False
 
-- name: "avg_pool_2d"
+- name: "avg_pool2d_nhwc"
   signature:
     'Tensor (Tensor x, Int32List kernel_size, Int32List stride, String padding,
     Int32List padding_before, Int32List padding_after,
     String data_format="channels_first", Bool ceil_mode=False) => AvgPool2D'
   bind_python: True
 
-- name: "max_pool_2d"
+- name: "max_pool2d_nhwc"
   signature:
     'Tensor (Tensor x, Int32List kernel_size, Int32List stride, String padding,
     Int32List padding_before, Int32List padding_after,
@@ -1044,7 +1047,7 @@
   bind_python: True
 
 - name: "slice_grad"
-  signature: "Tensor (Tensor dy, Tensor like, Int64List start, Int64List stop, Int64List step) => SliceGrad"
+  signature: "Tensor (Tensor dy, Shape like, Int64List start, Int64List stop, Int64List step) => SliceGrad"
   bind_python: False
 
 - name: "narrow"

diff --git a/oneflow/core/functional/impl/array_functor.cpp b/oneflow/core/functional/impl/array_functor.cpp
@@ -969,15 +969,15 @@ class SliceGradBaseFunctor {
  public:
   SliceGradBaseFunctor() = default;
   virtual ~SliceGradBaseFunctor() = default;
-  Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& dy,
-                           const std::shared_ptr<one::Tensor>& like,
+  Maybe<Tensor> operator()(const std::shared_ptr<one::Tensor>& dy, const Shape& like,
                            const std::vector<int64_t>& start, const std::vector<int64_t>& stop,
                            const std::vector<int64_t>& step) const {
     MutableAttrMap attrs;
+    JUST(attrs.SetAttr<Shape>("like_shape", like));
     JUST(attrs.SetAttr<std::vector<int64_t>>("start", start));
     JUST(attrs.SetAttr<std::vector<int64_t>>("stop", stop));
     JUST(attrs.SetAttr<std::vector<int64_t>>("step", step));
-    return OpInterpUtil::Dispatch<Tensor>(*op_, {dy, like}, attrs);
+    return OpInterpUtil::Dispatch<Tensor>(*op_, {dy}, attrs);
   }
 
  protected:
@@ -992,7 +992,7 @@ class SliceFunctor : public SliceBaseFunctor {
 class SliceGradFunctor : public SliceGradBaseFunctor {
  public:
   SliceGradFunctor() {
-    op_ = CHECK_JUST(one::OpBuilder("slice_grad").Input("dy").Input("like").Output("dx").Build());
+    op_ = CHECK_JUST(one::OpBuilder("slice_grad").Input("dy").Output("dx").Build());
   }
 };
 

diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+#include "oneflow/core/common/data_type.pb.h"
 #include "oneflow/core/common/optional.h"
 #include "oneflow/core/common/scalar.h"
 #include "oneflow/core/framework/attr_map.h"
@@ -71,19 +72,23 @@ class ConvBaseFunctor {
                            const std::shared_ptr<one::Tensor>& weight,
                            const Optional<one::Tensor>& bias, const std::vector<int32_t>& stride,
                            const std::vector<int32_t>& padding,
-                           const std::vector<int32_t>& dilation, const int32_t& groups) const {
+                           const std::vector<int32_t>& dilation, const int32_t& groups,
+                           const std::string& channel_pos) const {
     MutableAttrMap conv_attrs;
     std::vector<int32_t> kernel_size_vec(num_spatial_dims_);
+    int32_t kernel_idx_offset = 2;
+    if (channel_pos == "channels_last") { kernel_idx_offset = 1; }
+
     for (int i = 0; i < num_spatial_dims_; i++) {
-      kernel_size_vec.at(i) = ((weight->shape())->At(i + 2));
+      kernel_size_vec.at(i) = ((weight->shape())->At(i + kernel_idx_offset));
     }
     JUST(conv_attrs.SetAttr<int32_t>("filters", (weight->shape())->At(0)));
     JUST(conv_attrs.SetAttr<std::vector<int32_t>>("padding_before", padding));
     JUST(conv_attrs.SetAttr<std::vector<int32_t>>("kernel_size", kernel_size_vec));
     JUST(conv_attrs.SetAttr<std::vector<int32_t>>("strides", stride));
     JUST(conv_attrs.SetAttr<std::vector<int32_t>>("dilation_rate", dilation));
     JUST(conv_attrs.SetAttr<int32_t>("groups", groups));
-    JUST(conv_attrs.SetAttr<std::string>("data_format", std::string("channels_first")));
+    JUST(conv_attrs.SetAttr<std::string>("data_format", channel_pos));
     const std::shared_ptr<one::Tensor>& conv_out =
         JUST(OpInterpUtil::Dispatch<Tensor>(*conv_op_, {x, weight}, conv_attrs));
     if (bias) {
@@ -1627,10 +1632,11 @@ class OneHotFunctor {
     } else {
       JUST(attrs.SetAttr<int64_t>("depth", num_classes));
     }
+    // Refer to: https://github.com/Oneflow-Inc/oneflow/pull/5315/files#r755823506
     bool is_on_value_double = on_value.IsFloatingPoint();
     bool is_off_value_double = off_value.IsFloatingPoint();
     if (is_on_value_double || is_off_value_double) {
-      JUST(attrs.SetAttr<DataType>("dtype", kDouble));
+      JUST(attrs.SetAttr<DataType>("dtype", kFloat));
       JUST(attrs.SetAttr<double>("floating_on_value", JUST(on_value.As<double>())));
       JUST(attrs.SetAttr<double>("floating_off_value", JUST(off_value.As<double>())));
       JUST(attrs.SetAttr<int64_t>("integer_on_value", 0));