Oneflow-Inc · chengtbf · Dec 8, 2021 · Dec 8, 2021 · Dec 10, 2021 · Dec 10, 2021
@@ -22,6 +22,7 @@ limitations under the License.
 #include "oneflow/core/framework/instructions_builder.h"
 #include "oneflow/core/framework/multi_client_session_context.h"
 #include "oneflow/core/framework/nd_sbp.h"
+#include "oneflow/core/framework/tensor_name_scope.h"
 #include "oneflow/core/functional/functional.h"
 #include "oneflow/core/graph/op_graph.h"
 #include "oneflow/core/job/compiler.h"
@@ -253,6 +254,9 @@ Maybe<void> NNGraph::CompileAndInitRuntime() {
   // TODO(chengcheng): CHECK job valid for each rank.
   JUST(CreateAndRegisterNewVariableOpInJobPass());
 
+  // NOTE(chengcheng): TensorNameScope need to be cleared after current graph build.
+  one::TensorNameScope::Global()->Clear();
+
   // NOTE(chengcheng): Global<JobDesc> need be clear before GlobalJobDescScope construct.
   if (Global<JobDesc>::Get() != nullptr) { Global<JobDesc>::Delete(); }
 

@@ -252,6 +252,24 @@ Maybe<void> LazyInterpreter::ApplyImpl(const FeedVariableOpExpr& op_expr, const
   const std::shared_ptr<Tensor>& input_tensor = inputs.at(0);
   CHECK_OR_RETURN(input_tensor->is_eager());
 
+  auto infer_ctx = JUST(GetCurInferCtx());
+
+  // Check outputs num and setup output tensor properties.
+  CHECK_EQ_OR_RETURN(outputs->size(), 1);
+  CHECK_EQ_OR_RETURN(op_expr.output_size(), 1);
+  CHECK_OR_RETURN(!(*outputs)[0]);
+
+  const std::string& opt_lbn = TensorNameScope::Global()->Lookup(input_tensor);
+  if (!opt_lbn.empty()) {
+    // NOTE(chengcheng): This eager tensor has been feed as variable op before, so we just use the
+    //  lbn, and will NOT create duplicate variable op again.
+    (*outputs)[0] = input_tensor;
+    VLOG(2) << "Lazy nn.Graph name " << infer_ctx->job().job_conf().job_name()
+            << " try to add variable op name : \n: " << op_expr.op_name()
+            << " but it has been created as : " << opt_lbn << ". \n So we just reuse this tensor.";
+    return Maybe<void>::Ok();
+  }
+
   std::shared_ptr<Scope> scope = JUST(NewScopeWithParallelDescByTensor(input_tensor));
 
   OperatorConf op_conf;
@@ -275,7 +293,6 @@ Maybe<void> LazyInterpreter::ApplyImpl(const FeedVariableOpExpr& op_expr, const
     if (unlikely(l2 != 0.0)) { var_conf->mutable_regularizer()->mutable_l1_l2_conf()->set_l2(l2); }
   }
 
-  auto infer_ctx = JUST(GetCurInferCtx());
   VLOG(2) << "Lazy nn.Graph name " << infer_ctx->job().job_conf().job_name()
           << " try to add op: \n: " << op_conf.DebugString() << std::endl;
   OpAttribute op_attr = *JUST(infer_ctx->AddAndInferConsistentOp(op_conf));
@@ -288,11 +305,6 @@ Maybe<void> LazyInterpreter::ApplyImpl(const FeedVariableOpExpr& op_expr, const
   int64_t parallel_desc_sym_id = JUST(scope->GetParallelDescSymbolId(op_conf));
   auto blob_parallel_desc = JUST(GetSymbol<cfg::ParallelConf, ParallelDesc>(parallel_desc_sym_id));
 
-  // Check outputs num and setup output tensor properties.
-  CHECK_EQ_OR_RETURN(outputs->size(), 1);
-  CHECK_EQ_OR_RETURN(op_expr.output_size(), 1);
-  CHECK_OR_RETURN(!(*outputs)[0]);
-
   const std::string obn = "out";  // NOTE(chengcheng): obn is NOT op_expr.indexed_obns
   (*outputs)[0] = JUST(BuildTensor(op_attr, obn, blob_parallel_desc, /* is_lazy= */ true,
                                    /* is_local */ input_tensor->is_local()));

@@ -42,5 +42,10 @@ void TensorNameScope::Record(const std::shared_ptr<Tensor>& tensor, const std::s
   tensor_names_[key] = name;
 }
 
+void TensorNameScope::Clear() {
+  std::lock_guard<std::mutex> lock(mutex_);
+  tensor_names_.clear();
+}
+
 }  // namespace one
 }  // namespace oneflow
@@ -31,6 +31,9 @@ class TensorNameScope {
 
   void Record(const std::shared_ptr<Tensor>& tensor, const std::string& name);
 
+  // NOTE(chengcheng): TensorNameScope need to be cleared after current graph build.
+  void Clear();
+
  private:
   TensorNameScope() : default_tensor_name_("") {}
   virtual ~TensorNameScope() = default;