From 124374233f7622f82deff97bd3f9fd85252f78c0 Mon Sep 17 00:00:00 2001 From: clackhan Date: Fri, 7 Jan 2022 17:58:16 +0800 Subject: [PATCH 01/12] support_symmetric_cyclic_nd_sbp_boxing --- oneflow/core/boxing/boxing_dividor_util.cpp | 28 +++++++++++++++++++ oneflow/core/boxing/boxing_dividor_util.h | 1 + .../boxing/eager_boxing_interpreter_mgr.cpp | 27 ++++++++++++------ ...pp => symmetric_acyclic_nd_sbp_boxing.cpp} | 13 +++++---- oneflow/core/framework/placement_sbp_util.cpp | 26 +++++++++++++++++ oneflow/core/framework/placement_sbp_util.h | 4 +++ 6 files changed, 84 insertions(+), 15 deletions(-) rename oneflow/core/boxing/{symmetric_nd_sbp_boxing.cpp => symmetric_acyclic_nd_sbp_boxing.cpp} (88%) diff --git a/oneflow/core/boxing/boxing_dividor_util.cpp b/oneflow/core/boxing/boxing_dividor_util.cpp index fffa308ace1..81b70142fab 100644 --- a/oneflow/core/boxing/boxing_dividor_util.cpp +++ b/oneflow/core/boxing/boxing_dividor_util.cpp @@ -266,4 +266,32 @@ decltype(InFirstDeviceAndAllBroadcast) InFirstDeviceAndAllBroadcast = DECORATE(&RawInFirstDeviceAndAllBroadcast, ThreadLocal); decltype(OutFirstDeviceAndAllBroadcast) OutFirstDeviceAndAllBroadcast = DECORATE(&RawOutFirstDeviceAndAllBroadcast, ThreadLocal); + +namespace { + +Maybe> RawPlacementAndRepeatFirstSbp(Symbol placed_nd_sbp) { + const auto& first_sbp_parallel = placed_nd_sbp->nd_sbp()->sbp_parallel(0); + cfg::NdSbp out_nd_sbp; + for (int64_t i = 0; i < placed_nd_sbp->nd_sbp()->sbp_parallel_size(); ++i) { + out_nd_sbp.mutable_sbp_parallel()->Add()->CopyFrom(first_sbp_parallel); + } + return JUST(PlacedNdSbp::New(SymbolOf(out_nd_sbp), placed_nd_sbp->placement())); +} + +static constexpr auto* PlacementAndRepeatFirstSbp = + DECORATE(&RawPlacementAndRepeatFirstSbp, ThreadLocal); + +Maybe RawInPlacementAndRepeatFirstSbp() { + return std::make_shared( + "InPlacementAndRepeatFirstSbp", + [](Symbol in, Symbol out) -> Maybe> { + return PlacementAndRepeatFirstSbp(in); + }); +} + +} // namespace + +decltype(InPlacementAndRepeatFirstSbp) InPlacementAndRepeatFirstSbp = + DECORATE(&RawInPlacementAndRepeatFirstSbp, ThreadLocal); + } // namespace oneflow diff --git a/oneflow/core/boxing/boxing_dividor_util.h b/oneflow/core/boxing/boxing_dividor_util.h index 18a279c284c..8ec6fd1f5bc 100644 --- a/oneflow/core/boxing/boxing_dividor_util.h +++ b/oneflow/core/boxing/boxing_dividor_util.h @@ -33,6 +33,7 @@ extern Maybe (*InPlacementAndSplit)(int64_t axis); extern Maybe (*OutPlacementAndSplit)(int64_t axis); extern Maybe (*InFirstDeviceAndAllBroadcast)(); extern Maybe (*OutFirstDeviceAndAllBroadcast)(); +extern Maybe (*InPlacementAndRepeatFirstSbp)(); } // namespace oneflow diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index cde1f497772..69e5eaf87e5 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -62,16 +62,25 @@ Maybe OneToNBoxingExpr() { } Maybe SymmetricOnedToNdBoxingExpr() { - return JUST( - BoxingExpr(JUST(UnflattenInHierarchy()), JUST(BoxingExpr("unflatten-hierarchy")), - JUST(BoxingExpr("symmetric-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")))); + return JUST(BoxingExpr( + JUST(UnflattenInHierarchy()), JUST(BoxingExpr("unflatten-hierarchy")), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")))); } Maybe SymmetricNdToOnedBoxingExpr() { - return JUST( - BoxingExpr(JUST(UnflattenOutHierarchy()), - JUST(BoxingExpr("symmetric-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")), - JUST(BoxingExpr("flatten-hierarchy")))); + return JUST(BoxingExpr( + JUST(UnflattenOutHierarchy()), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")), + JUST(BoxingExpr("flatten-hierarchy")))); +} + +Maybe SymmetricNdToNdBoxingExpr() { + return JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) + | JUST(BoxingExpr( + JUST(InPlacementAndRepeatFirstSbp()), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) + | JUST(BoxingExpr("identity")))); } Maybe GenericBoxingExpr() { @@ -99,13 +108,13 @@ Maybe RawMainBoxingExpr() { | JUST(BoxingExpr("ccl-s-to-s")) | JUST(BoxingExpr("nccl-p-to-s")) | JUST(BoxingExpr("ccl-p-to-s")) | JUST(BoxingExpr("symmetric-b-to-p")) | JUST(BoxingExpr("symmetric-b-to-s")) | JUST(BoxingExpr("symmetric-s-to-p")) - | JUST(BoxingExpr("symmetric-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("asymmetric-x-to-b")) | JUST(BoxingExpr("naive-s-to-s")) | JUST(BoxingExpr("naive-1-to-1")) | JUST(BoxingExpr("naive-s-to-b")) | JUST(BoxingExpr("naive-b-to-s")) | JUST(BoxingExpr("naive-p-to-b")) | JUST(BoxingExpr("naive-p-to-s")) | JUST(OneToNBoxingExpr()) | JUST(NToOneBoxingExpr()) | JUST(GenericBoxingExpr()) - | JUST(SymmetricOnedToNdBoxingExpr()) | JUST(SymmetricNdToOnedBoxingExpr()); + | JUST(SymmetricNdToNdBoxingExpr()) | JUST(SymmetricOnedToNdBoxingExpr()) + | JUST(SymmetricNdToOnedBoxingExpr()); return core | JUST(OptionalCudaCopy(core)); } diff --git a/oneflow/core/boxing/symmetric_nd_sbp_boxing.cpp b/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp similarity index 88% rename from oneflow/core/boxing/symmetric_nd_sbp_boxing.cpp rename to oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp index 02abeb28146..52baa4aec75 100644 --- a/oneflow/core/boxing/symmetric_nd_sbp_boxing.cpp +++ b/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp @@ -66,17 +66,18 @@ Maybe Apply1DBoxing(const std::shared_ptr& input, } // namespace -Maybe CheckSymmetricNdSbpBoxing(Symbol in, Symbol out, - const Shape& logical_shape) { +Maybe CheckSymmetricAcyclicNdSbpBoxing(Symbol in, Symbol out, + const Shape& logical_shape) { CHECK_OR_RETURN(in->placement() == out->placement()); CHECK_OR_RETURN(in->nd_sbp() != out->nd_sbp()); CHECK_EQ_OR_RETURN(in->nd_sbp()->sbp_parallel_size(), out->nd_sbp()->sbp_parallel_size()); CHECK_GT_OR_RETURN(in->nd_sbp()->sbp_parallel_size(), 1); + JUST(CheckIsNdSbpBoxingAcyclicAfterDecompose(in, out, logical_shape)); return Maybe::Ok(); } -Maybe SymmetricNdSbpBoxing(const std::shared_ptr& input, - Symbol in, Symbol out) { +Maybe SymmetricAcyclicNdSbpBoxing(const std::shared_ptr& input, + Symbol in, Symbol out) { const auto& out_nd_sbp = out->nd_sbp(); const auto& in_parallel_desc = in->placement(); const auto& out_parallel_desc = out->placement(); @@ -97,7 +98,7 @@ Maybe SymmetricNdSbpBoxing(const std::shared_ptr& inpu return JUST(ReinterpterConsistentTensor(tensor, *input->shape(), out_parallel_desc, out_nd_sbp)); } -COMMAND(RegisterBoxingFunction("symmetric-nd-sbp-to-nd-sbp", CheckSymmetricNdSbpBoxing, - &SymmetricNdSbpBoxing)); +COMMAND(RegisterBoxingFunction("symmetric-acyclic-nd-sbp-to-nd-sbp", + CheckSymmetricAcyclicNdSbpBoxing, &SymmetricAcyclicNdSbpBoxing)); } // namespace oneflow diff --git a/oneflow/core/framework/placement_sbp_util.cpp b/oneflow/core/framework/placement_sbp_util.cpp index ccbc1011d63..15f151b1ae5 100644 --- a/oneflow/core/framework/placement_sbp_util.cpp +++ b/oneflow/core/framework/placement_sbp_util.cpp @@ -629,11 +629,37 @@ Maybe RawCheckIsNdSbpBoxingAcyclic(Symbol in, Symbol::Ok(); } +Maybe RawCheckIsNdSbpBoxingAcyclicAfterDecompose(Symbol in, + Symbol out, + const Shape& logical_shape) { + using namespace private_details; + Symbol src_nd_sbp = in->nd_sbp(); + Symbol dst_nd_sbp = out->nd_sbp(); + const auto& hierarchy = in->placement()->hierarchy(); + std::shared_ptr shape; + + std::tie(shape, src_nd_sbp, dst_nd_sbp) = *JUST(CalcDecomposableEquivalentShapeAndNdSbpPair( + logical_shape, *hierarchy, src_nd_sbp, dst_nd_sbp)); + + std::function>(int64_t)> ExclusiveSrcNdSbpAxis4DstNdSbpAxis; + JUST(MakeExclusiveSrcNdSbpAxis4DstNdSbpAxis(&ExclusiveSrcNdSbpAxis4DstNdSbpAxis, src_nd_sbp, + dst_nd_sbp)); + bool is_acyclic = JUST( + IsNdSbpBoxingAcyclic(src_nd_sbp->sbp_parallel_size(), ExclusiveSrcNdSbpAxis4DstNdSbpAxis)); + CHECK_OR_RETURN(is_acyclic) << Error::UnimplementedError() + << GetCyclicBoxingDebugString(src_nd_sbp, dst_nd_sbp, + ExclusiveSrcNdSbpAxis4DstNdSbpAxis); + return Maybe::Ok(); +} + } // namespace decltype(CheckIsNdSbpBoxingAcyclic) CheckIsNdSbpBoxingAcyclic = DECORATE(&RawCheckIsNdSbpBoxingAcyclic, ThreadLocal); +decltype(CheckIsNdSbpBoxingAcyclicAfterDecompose) CheckIsNdSbpBoxingAcyclicAfterDecompose = + DECORATE(&RawCheckIsNdSbpBoxingAcyclicAfterDecompose, ThreadLocalCopiable); + Maybe>> GetBroadcastGroup( Symbol src_parallel_desc, Symbol dst_parallel_desc) { return CachedBroadcastGroup(src_parallel_desc, dst_parallel_desc, true); diff --git a/oneflow/core/framework/placement_sbp_util.h b/oneflow/core/framework/placement_sbp_util.h index adf883e1b63..dd59a9efcfe 100644 --- a/oneflow/core/framework/placement_sbp_util.h +++ b/oneflow/core/framework/placement_sbp_util.h @@ -76,6 +76,10 @@ Maybe> CalcSubConsistentTensorMeta( extern Maybe (*CheckIsNdSbpBoxingAcyclic)(Symbol in, Symbol out); +extern Maybe (*CheckIsNdSbpBoxingAcyclicAfterDecompose)(Symbol in, + Symbol out, + const Shape& logical_shape); + static constexpr auto* GetSubConsistentTensorMeta = DECORATE(&private_details::CalcSubConsistentTensorMeta, ThreadLocal); From b179fce4bf59f72a735f1eb0b772cfffa634d1d8 Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 09:46:39 +0800 Subject: [PATCH 02/12] rename func --- .../core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp | 2 +- oneflow/core/framework/placement_sbp_util.cpp | 10 +++++----- oneflow/core/framework/placement_sbp_util.h | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp b/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp index 52baa4aec75..58d13f8eb46 100644 --- a/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp +++ b/oneflow/core/boxing/symmetric_acyclic_nd_sbp_boxing.cpp @@ -72,7 +72,7 @@ Maybe CheckSymmetricAcyclicNdSbpBoxing(Symbol in, Symbolnd_sbp() != out->nd_sbp()); CHECK_EQ_OR_RETURN(in->nd_sbp()->sbp_parallel_size(), out->nd_sbp()->sbp_parallel_size()); CHECK_GT_OR_RETURN(in->nd_sbp()->sbp_parallel_size(), 1); - JUST(CheckIsNdSbpBoxingAcyclicAfterDecompose(in, out, logical_shape)); + JUST(CheckIsNdSbpBoxingAcyclicWithDecompose(in, out, logical_shape)); return Maybe::Ok(); } diff --git a/oneflow/core/framework/placement_sbp_util.cpp b/oneflow/core/framework/placement_sbp_util.cpp index 15f151b1ae5..b6f29d86f5c 100644 --- a/oneflow/core/framework/placement_sbp_util.cpp +++ b/oneflow/core/framework/placement_sbp_util.cpp @@ -629,9 +629,9 @@ Maybe RawCheckIsNdSbpBoxingAcyclic(Symbol in, Symbol::Ok(); } -Maybe RawCheckIsNdSbpBoxingAcyclicAfterDecompose(Symbol in, - Symbol out, - const Shape& logical_shape) { +Maybe RawCheckIsNdSbpBoxingAcyclicWithDecompose(Symbol in, + Symbol out, + const Shape& logical_shape) { using namespace private_details; Symbol src_nd_sbp = in->nd_sbp(); Symbol dst_nd_sbp = out->nd_sbp(); @@ -657,8 +657,8 @@ Maybe RawCheckIsNdSbpBoxingAcyclicAfterDecompose(Symbol in, decltype(CheckIsNdSbpBoxingAcyclic) CheckIsNdSbpBoxingAcyclic = DECORATE(&RawCheckIsNdSbpBoxingAcyclic, ThreadLocal); -decltype(CheckIsNdSbpBoxingAcyclicAfterDecompose) CheckIsNdSbpBoxingAcyclicAfterDecompose = - DECORATE(&RawCheckIsNdSbpBoxingAcyclicAfterDecompose, ThreadLocalCopiable); +decltype(CheckIsNdSbpBoxingAcyclicWithDecompose) CheckIsNdSbpBoxingAcyclicWithDecompose = + DECORATE(&RawCheckIsNdSbpBoxingAcyclicWithDecompose, ThreadLocalCopiable); Maybe>> GetBroadcastGroup( Symbol src_parallel_desc, Symbol dst_parallel_desc) { diff --git a/oneflow/core/framework/placement_sbp_util.h b/oneflow/core/framework/placement_sbp_util.h index dd59a9efcfe..6444ccc5426 100644 --- a/oneflow/core/framework/placement_sbp_util.h +++ b/oneflow/core/framework/placement_sbp_util.h @@ -76,9 +76,9 @@ Maybe> CalcSubConsistentTensorMeta( extern Maybe (*CheckIsNdSbpBoxingAcyclic)(Symbol in, Symbol out); -extern Maybe (*CheckIsNdSbpBoxingAcyclicAfterDecompose)(Symbol in, - Symbol out, - const Shape& logical_shape); +extern Maybe (*CheckIsNdSbpBoxingAcyclicWithDecompose)(Symbol in, + Symbol out, + const Shape& logical_shape); static constexpr auto* GetSubConsistentTensorMeta = DECORATE(&private_details::CalcSubConsistentTensorMeta, ThreadLocal); From ec1923653218d3c57b0f6f1910a0173ce107c372 Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 09:48:33 +0800 Subject: [PATCH 03/12] minor fix --- .../boxing/eager_boxing_interpreter_mgr.cpp | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index ad98d71897a..fe07175a2e4 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -61,7 +61,7 @@ Maybe OneToNBoxingExpr() { | JUST(BoxingExpr("identity")))); } -Maybe SymmetricNdToNdBoxingExpr() { +Maybe SymmetricNDimToNDimBoxingExpr() { return JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr( JUST(InPlacementAndRepeatFirstSbp()), @@ -72,12 +72,12 @@ Maybe SymmetricNdToNdBoxingExpr() { Maybe SymmetricOneDimToNDimBoxingExpr() { return JUST(BoxingExpr(JUST(UnflattenInHierarchy()), JUST(BoxingExpr("unflatten-hierarchy")), - JUST(SymmetricNdToNdBoxingExpr()) | JUST(BoxingExpr("identity")))); + JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(BoxingExpr("identity")))); } Maybe SymmetricNDimToOneDimBoxingExpr() { return JUST(BoxingExpr(JUST(UnflattenOutHierarchy()), - JUST(SymmetricNdToNdBoxingExpr()) | JUST(BoxingExpr("identity")), + JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(BoxingExpr("identity")), JUST(BoxingExpr("flatten-hierarchy")))); } @@ -113,21 +113,21 @@ Maybe GenericBoxingExpr() { } Maybe RawMainBoxingExpr() { - const auto& core = JUST(BoxingExpr("identity")) | JUST(BoxingExpr("cuda-copy-h2d")) - | JUST(BoxingExpr("cuda-copy-d2h")) | JUST(BoxingExpr("nccl-p-to-b")) - | JUST(BoxingExpr("ccl-p-to-b")) | JUST(BoxingExpr("nccl-s-to-b")) - | JUST(BoxingExpr("ccl-s-to-b")) | JUST(BoxingExpr("nccl-s-to-s")) - | JUST(BoxingExpr("ccl-s-to-s")) | JUST(BoxingExpr("nccl-p-to-s")) - | JUST(BoxingExpr("ccl-p-to-s")) | JUST(BoxingExpr("symmetric-b-to-p")) - | JUST(BoxingExpr("symmetric-b-to-s")) | JUST(BoxingExpr("symmetric-s-to-p")) - | JUST(SymmetricOneDimXToBBoxingExpr()) - | JUST(ASymmetricOneDimXToBBoxingExpr()) | JUST(BoxingExpr("naive-s-to-s")) - | JUST(BoxingExpr("naive-1-to-1")) | JUST(BoxingExpr("naive-s-to-b")) - | JUST(BoxingExpr("naive-b-to-s")) | JUST(BoxingExpr("naive-p-to-b")) - | JUST(BoxingExpr("naive-p-to-s")) | JUST(OneToNBoxingExpr()) - | JUST(NToOneBoxingExpr()) | JUST(GenericBoxingExpr()) - | JUST(SymmetricNdToNdBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) - | JUST(SymmetricNDimToOneDimBoxingExpr()); + const auto& core = + JUST(BoxingExpr("identity")) | JUST(BoxingExpr("cuda-copy-h2d")) + | JUST(BoxingExpr("cuda-copy-d2h")) | JUST(BoxingExpr("nccl-p-to-b")) + | JUST(BoxingExpr("ccl-p-to-b")) | JUST(BoxingExpr("nccl-s-to-b")) + | JUST(BoxingExpr("ccl-s-to-b")) | JUST(BoxingExpr("nccl-s-to-s")) + | JUST(BoxingExpr("ccl-s-to-s")) | JUST(BoxingExpr("nccl-p-to-s")) + | JUST(BoxingExpr("ccl-p-to-s")) | JUST(BoxingExpr("symmetric-b-to-p")) + | JUST(BoxingExpr("symmetric-b-to-s")) | JUST(BoxingExpr("symmetric-s-to-p")) + | JUST(SymmetricOneDimXToBBoxingExpr()) | JUST(ASymmetricOneDimXToBBoxingExpr()) + | JUST(BoxingExpr("naive-s-to-s")) | JUST(BoxingExpr("naive-1-to-1")) + | JUST(BoxingExpr("naive-s-to-b")) | JUST(BoxingExpr("naive-b-to-s")) + | JUST(BoxingExpr("naive-p-to-b")) | JUST(BoxingExpr("naive-p-to-s")) + | JUST(OneToNBoxingExpr()) | JUST(NToOneBoxingExpr()) | JUST(GenericBoxingExpr()) + | JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) + | JUST(SymmetricNDimToOneDimBoxingExpr()); return core | JUST(OptionalCudaCopy(core)); } From bc6d616ee7518bf934a085fd9df77f8e5085941f Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 10:15:50 +0800 Subject: [PATCH 04/12] solve comment --- .../boxing/eager_boxing_interpreter_mgr.cpp | 57 ++++++++++++------- 1 file changed, 37 insertions(+), 20 deletions(-) diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index fe07175a2e4..8c3f7b91315 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -61,13 +61,15 @@ Maybe OneToNBoxingExpr() { | JUST(BoxingExpr("identity")))); } +Maybe SymmetricCyclicNDimToNDimBoxingExpr() { + return JUST(BoxingExpr(JUST(InPlacementAndRepeatFirstSbp()), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")), + JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")))); +} + Maybe SymmetricNDimToNDimBoxingExpr() { return JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) - | JUST(BoxingExpr( - JUST(InPlacementAndRepeatFirstSbp()), - JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) | JUST(BoxingExpr("identity")), - JUST(BoxingExpr("symmetric-acyclic-nd-sbp-to-nd-sbp")) - | JUST(BoxingExpr("identity")))); + | JUST(SymmetricCyclicNDimToNDimBoxingExpr()); } Maybe SymmetricOneDimToNDimBoxingExpr() { @@ -113,21 +115,36 @@ Maybe GenericBoxingExpr() { } Maybe RawMainBoxingExpr() { - const auto& core = - JUST(BoxingExpr("identity")) | JUST(BoxingExpr("cuda-copy-h2d")) - | JUST(BoxingExpr("cuda-copy-d2h")) | JUST(BoxingExpr("nccl-p-to-b")) - | JUST(BoxingExpr("ccl-p-to-b")) | JUST(BoxingExpr("nccl-s-to-b")) - | JUST(BoxingExpr("ccl-s-to-b")) | JUST(BoxingExpr("nccl-s-to-s")) - | JUST(BoxingExpr("ccl-s-to-s")) | JUST(BoxingExpr("nccl-p-to-s")) - | JUST(BoxingExpr("ccl-p-to-s")) | JUST(BoxingExpr("symmetric-b-to-p")) - | JUST(BoxingExpr("symmetric-b-to-s")) | JUST(BoxingExpr("symmetric-s-to-p")) - | JUST(SymmetricOneDimXToBBoxingExpr()) | JUST(ASymmetricOneDimXToBBoxingExpr()) - | JUST(BoxingExpr("naive-s-to-s")) | JUST(BoxingExpr("naive-1-to-1")) - | JUST(BoxingExpr("naive-s-to-b")) | JUST(BoxingExpr("naive-b-to-s")) - | JUST(BoxingExpr("naive-p-to-b")) | JUST(BoxingExpr("naive-p-to-s")) - | JUST(OneToNBoxingExpr()) | JUST(NToOneBoxingExpr()) | JUST(GenericBoxingExpr()) - | JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) - | JUST(SymmetricNDimToOneDimBoxingExpr()); + // clang-format off + const auto& core = JUST(BoxingExpr("identity")) + | JUST(BoxingExpr("cuda-copy-h2d")) + | JUST(BoxingExpr("cuda-copy-d2h")) + | JUST(BoxingExpr("nccl-p-to-b")) + | JUST(BoxingExpr("ccl-p-to-b")) + | JUST(BoxingExpr("nccl-s-to-b")) + | JUST(BoxingExpr("ccl-s-to-b")) + | JUST(BoxingExpr("nccl-s-to-s")) + | JUST(BoxingExpr("ccl-s-to-s")) + | JUST(BoxingExpr("nccl-p-to-s")) + | JUST(BoxingExpr("ccl-p-to-s")) + | JUST(BoxingExpr("symmetric-b-to-p")) + | JUST(BoxingExpr("symmetric-b-to-s")) + | JUST(BoxingExpr("symmetric-s-to-p")) + | JUST(SymmetricOneDimXToBBoxingExpr()) + | JUST(ASymmetricOneDimXToBBoxingExpr()) + | JUST(BoxingExpr("naive-s-to-s")) + | JUST(BoxingExpr("naive-1-to-1")) + | JUST(BoxingExpr("naive-s-to-b")) + | JUST(BoxingExpr("naive-b-to-s")) + | JUST(BoxingExpr("naive-p-to-b")) + | JUST(BoxingExpr("naive-p-to-s")) + | JUST(OneToNBoxingExpr()) + | JUST(NToOneBoxingExpr()) + | JUST(GenericBoxingExpr()) + | JUST(SymmetricNDimToNDimBoxingExpr()) + | JUST(SymmetricOneDimToNDimBoxingExpr()) + | JUST(SymmetricNDimToOneDimBoxingExpr()); + // clang-format off return core | JUST(OptionalCudaCopy(core)); } From 7d6b72a7dbbd459c8fac489b73d787b758047f47 Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 10:31:25 +0800 Subject: [PATCH 05/12] minor fix --- oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index 8c3f7b91315..46ce575050e 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -144,7 +144,7 @@ Maybe RawMainBoxingExpr() { | JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) | JUST(SymmetricNDimToOneDimBoxingExpr()); - // clang-format off + // clang-format on return core | JUST(OptionalCudaCopy(core)); } From e2156900f22aed557b68d2773a41b1400bb76fca Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 10:47:00 +0800 Subject: [PATCH 06/12] support_nd_sbp_dim_reduce --- .../boxing/eager_boxing_interpreter_mgr.cpp | 3 +- .../core/boxing/nd_sbp_dim_reduce_boxing.cpp | 99 +++++++++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index 46ce575050e..961a550c85c 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -143,7 +143,8 @@ Maybe RawMainBoxingExpr() { | JUST(GenericBoxingExpr()) | JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) - | JUST(SymmetricNDimToOneDimBoxingExpr()); + | JUST(SymmetricNDimToOneDimBoxingExpr()) + | JUST(BoxingExpr("nd-sbp-dim-reduce")); // clang-format on return core | JUST(OptionalCudaCopy(core)); } diff --git a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp new file mode 100644 index 00000000000..f6e646d0acc --- /dev/null +++ b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp @@ -0,0 +1,99 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "oneflow/core/boxing/eager_boxing_interpreter_mgr.h" +#include "oneflow/core/control/global_process_ctx.h" +#include "oneflow/core/framework/nd_sbp.h" +#include "oneflow/core/framework/device.h" +#include "oneflow/core/functional/functional.h" +#include "oneflow/core/graph/boxing/hierarchical_sub_task_graph_builder_impl.h" +#include "oneflow/core/common/decorator.h" + +namespace oneflow { + +namespace { + +Maybe, Symbol>> RawInOutPlacedNdSbpDimReduce( + Symbol in, Symbol out) { + // reduce hierarchy + ParallelDesc reduced_in_placement = *in->placement(); + ParallelDesc reduced_out_placement = *out->placement(); + cfg::NdSbp reduced_in_nd_sbp; + cfg::NdSbp reduced_out_nd_sbp; + InOutParallelDimReduce(*in->placement(), *out->placement(), *in->nd_sbp(), *out->nd_sbp(), + &reduced_in_placement, &reduced_out_placement, &reduced_in_nd_sbp, + &reduced_out_nd_sbp); + return std::make_tuple( + JUST(PlacedNdSbp::New(SymbolOf(reduced_in_nd_sbp), SymbolOf(reduced_in_placement))), + JUST(PlacedNdSbp::New(SymbolOf(reduced_out_nd_sbp), SymbolOf(reduced_out_placement)))); +} + +constexpr auto* InOutPlacedNdSbpDimReduce = DECORATE(&RawInOutPlacedNdSbpDimReduce, ThreadLocal); + +Maybe RawCheckParallelDimReduce(Symbol in, Symbol out, + const Shape& logical_shape) { + Symbol reduced_in; + Symbol reduced_out; + std::tie(reduced_in, reduced_out) = *JUST(InOutPlacedNdSbpDimReduce(in, out)); + + if (reduced_in->nd_sbp()->sbp_parallel_size() == 1 + && reduced_in->nd_sbp()->sbp_parallel_size() == 1) { + return Maybe::Ok(); + } + if (reduced_in->placement() == reduced_out->placement()) { return Maybe::Ok(); } + return Error::CheckFailedError(); +} + +static constexpr auto* CheckParallelDimReduce = + DECORATE(&RawCheckParallelDimReduce, ThreadLocalCopiable); + +} // namespace + +Maybe ParallelDimReduce(const std::shared_ptr& tensor, + Symbol in, Symbol out) { + const auto& tensor_nd_sbp = JUST(tensor->nd_sbp()); + CHECK_OR_RETURN(tensor_nd_sbp == in->nd_sbp()); + const auto& tensor_placement = JUST(tensor->parallel_desc()); + CHECK_OR_RETURN(tensor_placement == in->placement()); + + Symbol reduced_in; + Symbol reduced_out; + std::tie(reduced_in, reduced_out) = *JUST(InOutPlacedNdSbpDimReduce(in, out)); + + const std::shared_ptr& local_tensor = JUST(tensor->cur_rank_phy_tensor()); + + std::shared_ptr reduced_in_tensor = JUST(one::functional::LocalToConsistent( + local_tensor, reduced_out->placement(), *JUST(GetSbpList(reduced_out->nd_sbp())), + *tensor->shape(), tensor->dtype())); + + const auto& boxing_interpreter = + JUST(Global::Get()->GetEagerBoxingInterpreter( + reduced_in->nd_sbp(), reduced_out->nd_sbp(), reduced_in->placement(), + reduced_out->placement(), *tensor->shape())); + std::shared_ptr reduced_out_tensor = JUST( + boxing_interpreter->Interpret(reduced_in_tensor, reduced_in->nd_sbp(), reduced_out->nd_sbp(), + reduced_in->placement(), reduced_out->placement())); + + const std::shared_ptr& reduced_out_local_tensor = + JUST(reduced_out_tensor->cur_rank_phy_tensor()); + + return JUST(one::functional::LocalToConsistent(reduced_out_local_tensor, out->placement(), + *JUST(GetSbpList(out->nd_sbp())), *tensor->shape(), + tensor->dtype())); +} + +COMMAND(RegisterBoxingFunction("nd-sbp-dim-reduce", CheckParallelDimReduce, &ParallelDimReduce)); + +} // namespace oneflow From 01537b56f98b8c36d567dc8bc0d0cb5b7e15a1e2 Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 10:51:06 +0800 Subject: [PATCH 07/12] fix_typo --- oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp index f6e646d0acc..6f449ad924a 100644 --- a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp +++ b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp @@ -75,7 +75,7 @@ Maybe ParallelDimReduce(const std::shared_ptr& tensor, const std::shared_ptr& local_tensor = JUST(tensor->cur_rank_phy_tensor()); std::shared_ptr reduced_in_tensor = JUST(one::functional::LocalToConsistent( - local_tensor, reduced_out->placement(), *JUST(GetSbpList(reduced_out->nd_sbp())), + local_tensor, reduced_in->placement(), *JUST(GetSbpList(reduced_in->nd_sbp())), *tensor->shape(), tensor->dtype())); const auto& boxing_interpreter = From 5be37bb73078a291c94a2954ce1f0b302d301042 Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 11:10:40 +0800 Subject: [PATCH 08/12] add test case --- .../oneflow/test/modules/test_eager_boxing.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/python/oneflow/test/modules/test_eager_boxing.py b/python/oneflow/test/modules/test_eager_boxing.py index da710df1d12..27044bd3a4c 100644 --- a/python/oneflow/test/modules/test_eager_boxing.py +++ b/python/oneflow/test/modules/test_eager_boxing.py @@ -3272,5 +3272,38 @@ def test_eager_consistent_cast_1d_uneven_split(test_case): _test_eager_consistent_cast_1d_uneven_split(test_case, *arg) +def _test_eager_consistent_n_dim_reduce(test_case, device_type, sbp): + np.random.seed(10) + np_arr = np.random.uniform(-1e-05, 1e-05, (16, 32)) + placement0 = flow.placement(device_type, {0: [0]}, (1, 1)) + placement1 = flow.placement(device_type, {0: range(4)}, (2, 2)) + + # oneflow.placement(device_type="cuda", machine_device_ids={0 : [0]}, hierarchy=(1, 1)) + # (sbp, sbp) + x = flow.tensor( + np_arr, placement=placement1, sbp=[sbp, sbp], requires_grad=False, + ) + + # oneflow.placement(device_type="cuda", machine_device_ids={0 : [0, 1, 2, 3]}, hierarchy=(2, 2)) + # (sbp, sbp) + y = x.to_consistent(placement=placement1, sbp=[sbp, sbp]) + + z = y.to_consistent(placement=placement1, sbp=[flow.sbp.broadcast, flow.sbp.broadcast]) + test_case.assertEqual(z.placement, placement1) + + test_case.assertTrue(np.allclose(z.to_local().numpy(), np_arr)) + + +@flow.unittest.skip_unless_1n4d() +@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases") +class TestEagerConsistentCastNDimReduceBoxing(flow.unittest.TestCase): + def test_eager_consistent_n_dim_reduce(test_case): + arg_dict = OrderedDict() + arg_dict["device_type"] = ["cpu", "cuda"] + arg_dict["sbp"] = [flow.broadcast, flow.split(0), flow.split(1)] + for arg in GenArgList(arg_dict): + _test_eager_consistent_n_dim_reduce(test_case, *arg) + + if __name__ == "__main__": unittest.main() From 01232d1572399f1020ebb35c8f68998e4e5d512e Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 12:40:21 +0800 Subject: [PATCH 09/12] fix bug --- .../core/boxing/nd_sbp_dim_reduce_boxing.cpp | 3 ++- .../oneflow/test/modules/test_eager_boxing.py | 21 +++++++++++-------- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp index 6f449ad924a..a5469252c62 100644 --- a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp +++ b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp @@ -14,7 +14,6 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "oneflow/core/boxing/eager_boxing_interpreter_mgr.h" -#include "oneflow/core/control/global_process_ctx.h" #include "oneflow/core/framework/nd_sbp.h" #include "oneflow/core/framework/device.h" #include "oneflow/core/functional/functional.h" @@ -44,6 +43,8 @@ constexpr auto* InOutPlacedNdSbpDimReduce = DECORATE(&RawInOutPlacedNdSbpDimRedu Maybe RawCheckParallelDimReduce(Symbol in, Symbol out, const Shape& logical_shape) { + CHECK_OR_RETURN(in->nd_sbp()->sbp_parallel_size() > 1 || out->nd_sbp()->sbp_parallel_size() > 1); + CHECK_EQ_OR_RETURN(in->placement()->device_tag(), out->placement()->device_tag()); Symbol reduced_in; Symbol reduced_out; std::tie(reduced_in, reduced_out) = *JUST(InOutPlacedNdSbpDimReduce(in, out)); diff --git a/python/oneflow/test/modules/test_eager_boxing.py b/python/oneflow/test/modules/test_eager_boxing.py index 27044bd3a4c..632eb038255 100644 --- a/python/oneflow/test/modules/test_eager_boxing.py +++ b/python/oneflow/test/modules/test_eager_boxing.py @@ -3272,23 +3272,25 @@ def test_eager_consistent_cast_1d_uneven_split(test_case): _test_eager_consistent_cast_1d_uneven_split(test_case, *arg) -def _test_eager_consistent_n_dim_reduce(test_case, device_type, sbp): +def _test_eager_consistent_n_dim_reduce(test_case, device_type, src_sbp, dst_sbp): np.random.seed(10) np_arr = np.random.uniform(-1e-05, 1e-05, (16, 32)) placement0 = flow.placement(device_type, {0: [0]}, (1, 1)) placement1 = flow.placement(device_type, {0: range(4)}, (2, 2)) - + # oneflow.placement(device_type="cuda", machine_device_ids={0 : [0]}, hierarchy=(1, 1)) - # (sbp, sbp) + # (src_sbp, src_sbp) x = flow.tensor( - np_arr, placement=placement1, sbp=[sbp, sbp], requires_grad=False, + np_arr, placement=placement0, sbp=[src_sbp, src_sbp], requires_grad=False, ) # oneflow.placement(device_type="cuda", machine_device_ids={0 : [0, 1, 2, 3]}, hierarchy=(2, 2)) - # (sbp, sbp) - y = x.to_consistent(placement=placement1, sbp=[sbp, sbp]) - - z = y.to_consistent(placement=placement1, sbp=[flow.sbp.broadcast, flow.sbp.broadcast]) + # (dst_sbp, dst_sbp) + y = x.to_consistent(placement=placement1, sbp=[dst_sbp, dst_sbp]) + + z = y.to_consistent( + placement=placement1, sbp=[flow.sbp.broadcast, flow.sbp.broadcast] + ) test_case.assertEqual(z.placement, placement1) test_case.assertTrue(np.allclose(z.to_local().numpy(), np_arr)) @@ -3300,7 +3302,8 @@ class TestEagerConsistentCastNDimReduceBoxing(flow.unittest.TestCase): def test_eager_consistent_n_dim_reduce(test_case): arg_dict = OrderedDict() arg_dict["device_type"] = ["cpu", "cuda"] - arg_dict["sbp"] = [flow.broadcast, flow.split(0), flow.split(1)] + arg_dict["src_sbp"] = [flow.sbp.broadcast, flow.sbp.split(0), flow.sbp.split(1)] + arg_dict["dst_sbp"] = [flow.sbp.broadcast, flow.sbp.split(0), flow.sbp.split(1)] for arg in GenArgList(arg_dict): _test_eager_consistent_n_dim_reduce(test_case, *arg) From c9d42325e37f68da77ae22336bf22a079b5da19e Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 17:39:49 +0800 Subject: [PATCH 10/12] fix bug --- oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp index a5469252c62..69bdaec28a9 100644 --- a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp +++ b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp @@ -50,7 +50,7 @@ Maybe RawCheckParallelDimReduce(Symbol in, Symbolnd_sbp()->sbp_parallel_size() == 1 - && reduced_in->nd_sbp()->sbp_parallel_size() == 1) { + && reduced_out->nd_sbp()->sbp_parallel_size() == 1) { return Maybe::Ok(); } if (reduced_in->placement() == reduced_out->placement()) { return Maybe::Ok(); } From d7c96a1c409a7215d8beab08528b7ea5787b07ce Mon Sep 17 00:00:00 2001 From: clackhan Date: Tue, 11 Jan 2022 17:45:58 +0800 Subject: [PATCH 11/12] refine --- oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp index 961a550c85c..a987d3d4a52 100644 --- a/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp +++ b/oneflow/core/boxing/eager_boxing_interpreter_mgr.cpp @@ -141,10 +141,10 @@ Maybe RawMainBoxingExpr() { | JUST(OneToNBoxingExpr()) | JUST(NToOneBoxingExpr()) | JUST(GenericBoxingExpr()) + | JUST(BoxingExpr("nd-sbp-dim-reduce")) | JUST(SymmetricNDimToNDimBoxingExpr()) | JUST(SymmetricOneDimToNDimBoxingExpr()) - | JUST(SymmetricNDimToOneDimBoxingExpr()) - | JUST(BoxingExpr("nd-sbp-dim-reduce")); + | JUST(SymmetricNDimToOneDimBoxingExpr()); // clang-format on return core | JUST(OptionalCudaCopy(core)); } From af1428803082834f53adf11e315cd2f6155b87b8 Mon Sep 17 00:00:00 2001 From: clackhan Date: Wed, 12 Jan 2022 09:55:10 +0800 Subject: [PATCH 12/12] fix dead loop error --- oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp index 69bdaec28a9..d3b5746cf58 100644 --- a/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp +++ b/oneflow/core/boxing/nd_sbp_dim_reduce_boxing.cpp @@ -53,7 +53,10 @@ Maybe RawCheckParallelDimReduce(Symbol in, Symbolnd_sbp()->sbp_parallel_size() == 1) { return Maybe::Ok(); } - if (reduced_in->placement() == reduced_out->placement()) { return Maybe::Ok(); } + if ((reduced_in->placement() != in->placement() || reduced_out->placement() != out->placement()) + && reduced_in->placement() == reduced_out->placement()) { + return Maybe::Ok(); + } return Error::CheckFailedError(); }