diff --git a/oneflow/core/common/error_util.cpp b/oneflow/core/common/error_util.cpp index 86e4b0bee92..89c8fe0e9d6 100644 --- a/oneflow/core/common/error_util.cpp +++ b/oneflow/core/common/error_util.cpp @@ -16,6 +16,7 @@ limitations under the License. #include #include "oneflow/core/common/error_util.h" #include "oneflow/core/common/util.h" +#include "oneflow/core/job/graph_scope_vars.h" namespace oneflow { @@ -97,7 +98,9 @@ std::string FormatFunctionOfStackFrame(const std::string& function) { // msg in stack frame Maybe FormatMsgOfStackFrame(std::string error_msg, bool is_last_stack_frame) { - if (!is_last_stack_frame) { error_msg = *JUST(ShortenMsg(error_msg)); } + const bool debug_mode = GetGraphDebugMode(); + // only shorten the message if it is not the last stack frame AND not in debug mode + if (!is_last_stack_frame && !debug_mode) { error_msg = *JUST(ShortenMsg(error_msg)); } // error_msg of last stack frame come from "<<" if (is_last_stack_frame) { error_msg = StripSpace(error_msg); } std::stringstream ss; diff --git a/oneflow/core/graph/exec_graph.cpp b/oneflow/core/graph/exec_graph.cpp index 6fa2777a1e8..b530f135b28 100644 --- a/oneflow/core/graph/exec_graph.cpp +++ b/oneflow/core/graph/exec_graph.cpp @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "oneflow/core/graph/exec_graph.h" +#include +#include "oneflow/core/common/just.h" #include "oneflow/core/graph/op_graph.h" namespace oneflow { @@ -92,9 +94,10 @@ Maybe CheckPhysicalBlobDesc( continue; } if (*JUST(op.GetParallelDesc4BnInOp(bn)) == *op_parallel_desc) { - JUST(CheckPhysicalBlobDesc(*JUST(GetLogicalBlobDesc(bn)), - nd_sbp_signature->bn_in_op2nd_sbp().at(bn), *op_parallel_desc, - parallel_ctx, *physical_blob_desc)); + JUST_MSG(CheckPhysicalBlobDesc(*JUST(GetLogicalBlobDesc(bn)), + nd_sbp_signature->bn_in_op2nd_sbp().at(bn), *op_parallel_desc, + parallel_ctx, *physical_blob_desc), + std::stringstream() << " check physical shape failed, op name " << op.op_loc()); } } return Maybe::Ok(); @@ -114,15 +117,18 @@ void ExecNode::InferBlobDescs(const ParallelContext* parallel_ctx) { std::bind(&Operator::GetLogicalBlobDesc4Ibn, op().get(), std::placeholders::_1), nd_sbp_signature, parallel_ctx, GetBlobDesc4BnInOp)); } - CHECK_JUST(op_->InferBlobDescsIf(GetBlobDesc4BnInOp, parallel_ctx, &GlobalJobDesc())); + CHECK_JUST_MSG(op_->InferBlobDescsIf(GetBlobDesc4BnInOp, parallel_ctx, &GlobalJobDesc()), + std::stringstream() << " infer blob descs if failed, op name " << op_->op_loc()); if (op_node != nullptr && parallel_ctx->parallel_num() > 1 && nd_sbp_signature != nullptr) { CHECK_JUST(CheckPhysicalBlobDesc( *op(), op()->output_bns(), std::bind(&Operator::GetLogicalBlobDesc4Obn, op().get(), std::placeholders::_1), nd_sbp_signature, parallel_ctx, GetBlobDesc4BnInOp)); } - CHECK_JUST(op_->InferInplaceObn2IbnIf(&mut_inplace_obn2ibn_, &con_inplace_obn2ibn_, - GetBlobDesc4BnInOp, parallel_ctx)); + CHECK_JUST_MSG(op_->InferInplaceObn2IbnIf(&mut_inplace_obn2ibn_, &con_inplace_obn2ibn_, + GetBlobDesc4BnInOp, parallel_ctx), + std::stringstream() + << " infer inplace obn to ibn if failed, op name " << op_->op_loc()); } std::function ExecNode::GetBlobDesc4BnInOpFunc() const {