diff --git a/oneflow/core/framework/op_interpreter/op_interpreter.cpp b/oneflow/core/framework/op_interpreter/op_interpreter.cpp index 88de66d6b44..1af45137f31 100644 --- a/oneflow/core/framework/op_interpreter/op_interpreter.cpp +++ b/oneflow/core/framework/op_interpreter/op_interpreter.cpp @@ -23,6 +23,7 @@ limitations under the License. #include "oneflow/core/framework/op_expr_grad_function.h" #include "oneflow/core/framework/tensor.h" #include "oneflow/core/framework/tensor_tuple.h" +#include "oneflow/core/job/lazy_mode.h" namespace oneflow { namespace one { @@ -94,7 +95,8 @@ Maybe AutogradInterpreter::Apply(const OpExpr& op_expr, const TensorTuple& autograd::AutoGradMode mode(false); JUST(internal_->Apply(op_expr, inputs, outputs, ctx)); } - if (requires_grad) { + // Lazy mode will construct backward compute graph in passes, so disable autograd if lazy mode. + if (requires_grad && !LazyMode::is_enabled()) { const auto& grad_closure = JUST(op_expr.GetOrCreateOpGradClosure()); JUST(grad_closure->Capture(inputs, *outputs, ctx));