Skip to content

Commit

Permalink
Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into…
Browse files Browse the repository at this point in the history
… fix_reduce_max
  • Loading branch information
shangzhizhou committed Dec 10, 2021
2 parents 0184215 + 11c785a commit af51edd
Show file tree
Hide file tree
Showing 204 changed files with 8,145 additions and 1,585 deletions.
5 changes: 5 additions & 0 deletions paddle/fluid/distributed/fleet_executor/carrier.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class Carrier final {

bool IsInit() const;

// NOTE: This mutex will be used in interceptor's RunOps function.
// This mutex is used for avoiding forward ops and backward ops run
// simultaneously, which will lead to a random hang for some sync ops.
std::mutex run;

DISABLE_COPY_AND_ASSIGN(Carrier);

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

#include "paddle/fluid/distributed/fleet_executor/compute_interceptor.h"
#include "paddle/fluid/distributed/fleet_executor/carrier.h"

#include "paddle/fluid/distributed/fleet_executor/task_node.h"
#include "paddle/fluid/framework/executor_gc_helper.h"
Expand Down Expand Up @@ -169,6 +170,8 @@ void ComputeInterceptor::ReplyCompletedToUpStream() {
}

void ComputeInterceptor::RunOps() {
Carrier& carrier_instance = Carrier::Instance();
std::unique_lock<std::mutex> lock(carrier_instance.run);
VLOG(3) << "ComputeInterceptor " << interceptor_id_ << " running ops for the "
<< step_ + 1 << " time.";
for (auto op : node_->ops()) {
Expand Down
16 changes: 16 additions & 0 deletions paddle/fluid/eager/accumulation/gradient_accumulation.cc
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ class TensorAddFunctor : public boost::static_visitor<> {
}
#endif

#ifdef PADDLE_WITH_IPU
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#else
void operator()(const paddle::platform::IPUPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
"is not supported in imperative mode",
place));
}
#endif

void operator()(const paddle::platform::NPUPinnedPlace& place) {
PADDLE_THROW(paddle::platform::errors::PermissionDenied(
"Gradient accumulation on place (%s) "
Expand Down
Loading

1 comment on commit af51edd

@paddle-bot-old
Copy link

@paddle-bot-old paddle-bot-old bot commented on af51edd Dec 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🕵️ CI failures summary

🔍 PR: #38026 Commit ID: af51edd contains failed CI.

🔹 Failed: PR-CI-CINN

Unknown Failed
2021-12-10 17:33:04 Makefile:140: recipe for target 'all' failed
2021-12-10 17:33:04 make: *** [all] Error 2
2021-12-10 17:33:04 + build_error=2
2021-12-10 17:33:04 + collect_ccache_hits
2021-12-10 17:33:04 ++ ccache -s
2021-12-10 17:33:04 ++ grep 'cache hit rate'
2021-12-10 17:33:04 ++ awk '{print $4}'
2021-12-10 17:33:04 + rate=96.43
2021-12-10 17:33:04 + echo 'ccache hit rate: 96.43%'
2021-12-10 17:33:04 ccache hit rate: 96.43%
2021-12-10 17:33:04 + echo 'ipipe_log_param_Ccache_Hit_Rate: 96.43%'
2021-12-10 17:33:04 + '[' 2 '!=' 0 ']'
2021-12-10 17:33:04 + exit 7
2021-12-10 17:33:04 + EXCODE=7
2021-12-10 17:33:04 + '[' 7 -eq 0 ']'
2021-12-10 17:33:04 + set +x
2021-12-10 17:33:04 Sorry, build failed.
2021-12-10 17:33:04 + exit 7
2021-12-10 17:33:04 {build code state=7}

Please sign in to comment.