Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add exe graph physical shape check msg #8002

Merged
merged 55 commits into from
Jun 13, 2022
Merged
Show file tree
Hide file tree
Changes from 51 commits
Commits
Show all changes
55 commits
Select commit Hold shift + click to select a range
e6eb419
fix index select op in graph
strint Apr 11, 2022
781b3e3
add exe graph physical shape check msg
strint Apr 11, 2022
1e979fc
improve the debug information for the python stack trace
xiacijie Apr 15, 2022
e5fc2f1
remove parens
xiacijie Apr 17, 2022
b53d43b
update
xiacijie Apr 18, 2022
93679c5
resolve PR comments
xiacijie Apr 18, 2022
8680a94
update
xiacijie Apr 18, 2022
12d881e
update graph debug test file.
xiacijie Apr 19, 2022
3382c8d
Merge branch 'improve_python_stack_debug_info' into fea/graph_check_msg
xiacijie Apr 21, 2022
dcb50f5
restore self._debug in class Graph and class ModuleBlock
xiacijie Apr 21, 2022
8c57b94
Merge branch 'improve_python_stack_debug_info' into fea/graph_check_msg
xiacijie Apr 21, 2022
40d9bc4
Do not shorten the stack frame string if it is in debug mode
xiacijie Apr 21, 2022
07a8d77
delete TODOs
xiacijie Apr 21, 2022
f5a1482
disable conv3d test (#7969)
daquexian Apr 7, 2022
8469b8e
skip layernorm random_data_warp test (#7941)
guo-ran Apr 7, 2022
b542ff8
Lock click version (#7967)
jackalcooper Apr 7, 2022
30fcf11
add global avgpool unittest (#7585)
hjchen2 Apr 7, 2022
3f821da
fix (#7978)
guo-ran Apr 7, 2022
07e6312
Support negative dim in scatter op (#7934)
mosout Apr 8, 2022
5fac7ce
run barrier callback in BarrierPhyInstrOperand::~BarrierPhyInstrOpera…
lixinqi Apr 8, 2022
ca1aee2
Fix one hot scalar tensor bug (#7975)
BBuf Apr 8, 2022
d256143
support ctor np array from of tensor (#7970)
liufengwei0103 Apr 8, 2022
58a6246
add_manual_seed_all_api (#7957)
clackhan Apr 9, 2022
c3a3f0c
one_embedding add doc string (#7902)
guo-ran Apr 9, 2022
fc455bb
Support numpy scalar parameters (#7935)
wyg1997 Apr 9, 2022
ef6dbb1
fix tensor_scatter_nd_update (#7953)
simonJJJ Apr 9, 2022
a1f23bb
fix one_embedding adam (#7974)
guo-ran Apr 10, 2022
0fae143
speed test with score (#7990)
daquexian Apr 10, 2022
ab1eed9
Feat/graph del by ref (#7857)
strint Apr 10, 2022
273e44c
[PersistentTable] Fix num blocks (#7986)
liujuncheng Apr 10, 2022
54d7b6b
Add auto benchmark for flowvision (#7806)
jackalcooper Apr 10, 2022
a77403d
[PersistentTable] Async write (#7946)
liujuncheng Apr 11, 2022
1337d81
save log in separate dir by default (#7825)
daquexian Apr 11, 2022
ff7b294
fix index select op in graph
strint Apr 11, 2022
c76b2e3
add exe graph physical shape check msg
strint Apr 11, 2022
805ce70
improve the debug information for the python stack trace
xiacijie Apr 15, 2022
a94ff63
remove parens
xiacijie Apr 17, 2022
318b783
update
xiacijie Apr 18, 2022
49d8407
resolve PR comments
xiacijie Apr 18, 2022
a15771c
update
xiacijie Apr 18, 2022
f00b857
update graph debug test file.
xiacijie Apr 19, 2022
36fb2a8
restore self._debug in class Graph and class ModuleBlock
xiacijie Apr 21, 2022
a408d74
Do not shorten the stack frame string if it is in debug mode
xiacijie Apr 21, 2022
7c13d37
delete TODOs
xiacijie Apr 21, 2022
baadf60
Merge branch 'fea/graph_check_msg' of github.com:Oneflow-Inc/oneflow …
xiacijie Apr 24, 2022
28833b7
Merge branch 'master' into fea/graph_check_msg
xiacijie Apr 26, 2022
1d5e196
Revert "Merge branch 'master' into fea/graph_check_msg"
xiacijie Apr 26, 2022
83055c0
Revert "Revert "Merge branch 'master' into fea/graph_check_msg""
xiacijie Apr 26, 2022
69a0ffa
update
xiacijie Apr 26, 2022
2161a39
resolve conflicts
xiacijie Apr 26, 2022
6e708e3
resolve conflicts
xiacijie Apr 26, 2022
0acf7a7
print op loc if check failed
xiacijie May 5, 2022
b5954e4
Merge branch 'master' into fea/graph_check_msg
xiacijie May 5, 2022
cfeec40
Merge branch 'master' into fea/graph_check_msg
strint Jun 13, 2022
f1da60f
Merge branch 'master' into fea/graph_check_msg
mergify[bot] Jun 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion oneflow/core/common/error_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ limitations under the License.
#include <sstream>
#include "oneflow/core/common/error_util.h"
#include "oneflow/core/common/util.h"
#include "oneflow/core/job/graph_scope_vars.h"

namespace oneflow {

Expand Down Expand Up @@ -97,7 +98,9 @@ std::string FormatFunctionOfStackFrame(const std::string& function) {

// msg in stack frame
Maybe<std::string> FormatMsgOfStackFrame(std::string error_msg, bool is_last_stack_frame) {
if (!is_last_stack_frame) { error_msg = *JUST(ShortenMsg(error_msg)); }
const bool debug_mode = GetGraphDebugMode();
// only shorten the message if it is not the last stack frame AND not in debug mode
if (!is_last_stack_frame && !debug_mode) { error_msg = *JUST(ShortenMsg(error_msg)); }
// error_msg of last stack frame come from "<<"
if (is_last_stack_frame) { error_msg = StripSpace(error_msg); }
std::stringstream ss;
Expand Down
6 changes: 4 additions & 2 deletions oneflow/core/graph/exec_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and
limitations under the License.
*/
#include "oneflow/core/graph/exec_graph.h"
#include <sstream>
#include "oneflow/core/common/just.h"
#include "oneflow/core/graph/op_graph.h"

namespace oneflow {
Expand Down Expand Up @@ -92,9 +94,9 @@ Maybe<void> CheckPhysicalBlobDesc(
continue;
}
if (*JUST(op.GetParallelDesc4BnInOp(bn)) == *op_parallel_desc) {
JUST(CheckPhysicalBlobDesc(*JUST(GetLogicalBlobDesc(bn)),
JUST_MSG(CheckPhysicalBlobDesc(*JUST(GetLogicalBlobDesc(bn)),
nd_sbp_signature->bn_in_op2nd_sbp().at(bn), *op_parallel_desc,
parallel_ctx, *physical_blob_desc));
parallel_ctx, *physical_blob_desc), std::stringstream() << " check physical shape failed, op name " << op.op_loc());
xiacijie marked this conversation as resolved.
Show resolved Hide resolved
}
}
return Maybe<void>::Ok();
Expand Down