From a17fa00afbad3c2b1e9243c860377c179cfa41b7 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Fri, 10 Dec 2021 19:55:21 +0800
Subject: [PATCH 01/51] add cmake changes for liboneflow_cpp.so

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 CMakeLists.txt             | 21 +++++++++-------
 cmake/oneflow-config.cmake |  2 +-
 cmake/oneflow.cmake        | 49 +++++++++++++++++++++-----------------
 cmake/third_party.cmake    | 14 +++++++----
 4 files changed, 49 insertions(+), 37 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3c963b59e5e..0ce73e00d3e 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ endif()
 option(USE_CLANG_FORMAT "" OFF)
 option(USE_CLANG_TIDY "" OFF)
 option(BUILD_PYTHON "" ON)
+option(BUILD_CPP_API "" ON)
 option(BUILD_MONOLITHIC_LIBONEFLOW "" ON)
 option(BUILD_RDMA "" OFF)
 option(BUILD_CUDA "" ON)
@@ -201,19 +202,21 @@ endif()
 
 if(BUILD_PYTHON)
   set(ONEFLOW_INCLUDE_DIR "${ONEFLOW_PYTHON_DIR}/oneflow/include")
-else() # build_python
-  set(ONEFLOW_INCLUDE_DIR "${PROJECT_BINARY_DIR}/liboneflow/include/oneflow")
-  set(ONEFLOW_LIBRARY_DIR "${PROJECT_BINARY_DIR}/liboneflow/lib")
-  set(ONEFLOW_SHARE_DIR "${PROJECT_BINARY_DIR}/liboneflow/share")
-  make_directory(${ONEFLOW_INCLUDE_DIR})
-  make_directory(${ONEFLOW_LIBRARY_DIR})
-  make_directory(${ONEFLOW_SHARE_DIR})
+endif(BUILD_PYTHON)
+
+if(BUILD_CPP_API)
+  set(LIBONEFLOW_INCLUDE_DIR "${PROJECT_BINARY_DIR}/liboneflow_cpp/include/oneflow")
+  set(LIBONEFLOW_LIBRARY_DIR "${PROJECT_BINARY_DIR}/liboneflow_cpp/lib")
+  set(LIBONEFLOW_SHARE_DIR "${PROJECT_BINARY_DIR}/liboneflow_cpp/share")
+  make_directory(${LIBONEFLOW_INCLUDE_DIR})
+  make_directory(${LIBONEFLOW_LIBRARY_DIR})
+  make_directory(${LIBONEFLOW_SHARE_DIR})
 
   if(BUILD_SHARED_LIBS)
     if(BUILD_MONOLITHIC_LIBONEFLOW)
       set(BUILD_SHARED_LIBS OFF)
     else()
-      set(LIBRARY_OUTPUT_PATH ${ONEFLOW_LIBRARY_DIR})
+      set(LIBRARY_OUTPUT_PATH ${LIBONEFLOW_LIBRARY_DIR})
     endif(BUILD_MONOLITHIC_LIBONEFLOW)
     set(BUILD_SHARED_LIBONEFLOW ON)
   else()
@@ -222,7 +225,7 @@ else() # build_python
     endif()
     set(BUILD_SHARED_LIBONEFLOW OFF)
   endif(BUILD_SHARED_LIBS)
-endif(BUILD_PYTHON)
+endif(BUILD_CPP_API)
 
 include(third_party)
 
diff --git a/cmake/oneflow-config.cmake b/cmake/oneflow-config.cmake
index 99edffe81c8..fddb71ea003 100644
--- a/cmake/oneflow-config.cmake
+++ b/cmake/oneflow-config.cmake
@@ -7,7 +7,7 @@ endif()
 
 set(ONEFLOW_INCLUDE_DIRS ${ONEFLOW_INSTALL_PREFIX}/include)
 
-find_library(ONEFLOW_LIBRARY NAMES oneflow PATHS ${ONEFLOW_INSTALL_PREFIX}/lib REQUIRED)
+find_library(ONEFLOW_LIBRARY NAMES oneflow_cpp PATHS ${ONEFLOW_INSTALL_PREFIX}/lib REQUIRED)
 
 if(NOT TARGET OneFlow::liboneflow) 
   add_library(OneFlow::liboneflow INTERFACE IMPORTED)
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 66dc39ea786..2903aaa84f6 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -119,19 +119,18 @@ foreach(oneflow_single_file ${oneflow_all_src})
       list(APPEND of_pyext_obj_cc ${oneflow_single_file})
       set(group_this ON)
     endif()
+  endif(BUILD_PYTHON)
 
-  else() # build_python
-
+  if(BUILD_CPP_API)
     if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/cpp/.*\\.(h|cpp)$")
       if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/cpp/.*_test\\.cpp$")
         list(APPEND of_all_test_cc ${oneflow_single_file})
       else()
-        list(APPEND of_all_obj_cc ${oneflow_single_file})
+        list(APPEND of_cpp_api_obj_cc ${oneflow_single_file})
       endif()
       set(group_this ON)
     endif()
-
-  endif(BUILD_PYTHON)
+  endif(BUILD_CPP_API)
 
   if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/(core|user|xrt|maybe)/.*\\.cpp$")
     if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/(core|user|xrt|maybe)/.*_test\\.cpp$")
@@ -251,21 +250,12 @@ include_directories(${PROJECT_SOURCE_DIR})  # TO FIND: third_party/eigen3/..
 include_directories(${PROJECT_BINARY_DIR})
 
 # cc obj lib
-if(BUILD_PYTHON)
-  oneflow_add_library(oneflow ${of_all_obj_cc})
-else() # build_python
-  if(BUILD_SHARED_LIBONEFLOW)
-    oneflow_add_library(oneflow SHARED ${of_all_obj_cc})
-  else()
-    oneflow_add_library(oneflow ${of_all_obj_cc})
-  endif()
-endif(BUILD_PYTHON)
+oneflow_add_library(oneflow ${of_all_obj_cc})
 
 add_dependencies(oneflow of_protoobj)
 add_dependencies(oneflow of_cfgobj)
 add_dependencies(oneflow of_functional_obj)
 add_dependencies(oneflow of_git_version)
-set_target_properties(oneflow PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${ONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${ONEFLOW_LIBRARY_DIR}")
 
 if (USE_CLANG_FORMAT)
   add_dependencies(oneflow of_format)
@@ -356,13 +346,28 @@ if(BUILD_PYTHON)
 
 endif(BUILD_PYTHON)
 
+if (BUILD_CPP_API)
+  if(BUILD_SHARED_LIBONEFLOW)
+    oneflow_add_library(oneflow_cpp SHARED ${of_cpp_api_obj_cc})
+  else()
+    oneflow_add_library(oneflow_cpp ${of_cpp_api_obj_cc})
+  endif()
+  set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
+  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${ONEFLOW_MLIR_LIBS} ${oneflow_third_party_libs})
+endif()
+
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
 
 # build test
 if(BUILD_TESTING)
   if (of_all_test_cc)
     oneflow_add_executable(oneflow_testexe ${of_all_test_cc})
-    target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs})
+    target_link_libraries(oneflow_testexe ${oneflow_test_libs})
+    if (BUILD_CPP_API)
+      target_link_libraries(oneflow_testexe oneflow_cpp)
+    else()
+      target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs})
+    endif()
     if (BUILD_CUDA)
       target_link_libraries(oneflow_testexe CUDA::cudart_static)
     endif()
@@ -406,15 +411,15 @@ if(BUILD_PYTHON)
   add_custom_target(oneflow_py ALL)
   add_dependencies(oneflow_py of_include_copy)
 
-else() # build_python
+endif(BUILD_PYTHON)
 
-  add_dependencies(of_include_copy oneflow)
+if (BUILD_CPP_API)
+  add_dependencies(of_include_copy oneflow_cpp)
 
   set(OF_API_DIRS)
   file(GLOB_RECURSE api_h_files "${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h")
   list(APPEND OF_API_DIRS ${api_h_files})
 
-  copy_files("${OF_API_DIRS}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${ONEFLOW_INCLUDE_DIR}" of_include_copy)
-  copy_files("${PROJECT_SOURCE_DIR}/cmake/oneflow-config.cmake" "${PROJECT_SOURCE_DIR}/cmake" "${ONEFLOW_SHARE_DIR}" of_include_copy)
-
-endif(BUILD_PYTHON)
+  copy_files("${OF_API_DIRS}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${LIBONEFLOW_INCLUDE_DIR}" of_include_copy)
+  copy_files("${PROJECT_SOURCE_DIR}/cmake/oneflow-config.cmake" "${PROJECT_SOURCE_DIR}/cmake" "${LIBONEFLOW_SHARE_DIR}" of_include_copy)
+endif(BUILD_CPP_API)
diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index f5c137fefb9..6f87243e035 100644
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -130,6 +130,12 @@ set(oneflow_exe_third_party_libs
     gflags_imported
 )
 
+set(oneflow_test_libs
+    ${GOOGLETEST_STATIC_LIBRARIES}
+    ${GOOGLEMOCK_STATIC_LIBRARIES}
+)
+
+
 set(oneflow_third_party_libs
     ${GOOGLETEST_STATIC_LIBRARIES}
     ${GOOGLEMOCK_STATIC_LIBRARIES}
@@ -308,11 +314,9 @@ add_definitions(-DHALF_ENABLE_CPP11_USER_LITERALS=0)
 
 if (THIRD_PARTY)
   add_custom_target(prepare_oneflow_third_party ALL DEPENDS ${oneflow_third_party_dependencies})
-  if(BUILD_PYTHON)
-    foreach(of_include_src_dir ${ONEFLOW_THIRD_PARTY_INCLUDE_DIRS})
-      copy_all_files_in_dir("${of_include_src_dir}" "${ONEFLOW_INCLUDE_DIR}" prepare_oneflow_third_party)
-    endforeach()
-  endif(BUILD_PYTHON)
+  foreach(of_include_src_dir ${ONEFLOW_THIRD_PARTY_INCLUDE_DIRS})
+    copy_all_files_in_dir("${of_include_src_dir}" "${ONEFLOW_INCLUDE_DIR}" prepare_oneflow_third_party)
+  endforeach()
 else()
   add_custom_target(prepare_oneflow_third_party ALL)
 endif()

From 8cedfaf5e4c5ac292394ac03a5ce216e8452f9fa Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Fri, 10 Dec 2021 23:06:21 +0800
Subject: [PATCH 02/51] add separate target for cpp api test

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 CMakeLists.txt      |  2 +-
 cmake/oneflow.cmake | 48 ++++++++++++++++++++++-----------------------
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0ce73e00d3e..07d42e0b754 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,7 +23,7 @@ endif()
 option(USE_CLANG_FORMAT "" OFF)
 option(USE_CLANG_TIDY "" OFF)
 option(BUILD_PYTHON "" ON)
-option(BUILD_CPP_API "" ON)
+option(BUILD_CPP_API "Option to build OneFlow C++ API (beta)" OFF)
 option(BUILD_MONOLITHIC_LIBONEFLOW "" ON)
 option(BUILD_RDMA "" OFF)
 option(BUILD_CUDA "" ON)
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 2903aaa84f6..cfcd6bc4b10 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -121,17 +121,6 @@ foreach(oneflow_single_file ${oneflow_all_src})
     endif()
   endif(BUILD_PYTHON)
 
-  if(BUILD_CPP_API)
-    if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/cpp/.*\\.(h|cpp)$")
-      if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/cpp/.*_test\\.cpp$")
-        list(APPEND of_all_test_cc ${oneflow_single_file})
-      else()
-        list(APPEND of_cpp_api_obj_cc ${oneflow_single_file})
-      endif()
-      set(group_this ON)
-    endif()
-  endif(BUILD_CPP_API)
-
   if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/(core|user|xrt|maybe)/.*\\.cpp$")
     if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/(core|user|xrt|maybe)/.*_test\\.cpp$")
       # test file
@@ -347,10 +336,13 @@ if(BUILD_PYTHON)
 endif(BUILD_PYTHON)
 
 if (BUILD_CPP_API)
+  file(GLOB_RECURSE of_cpp_api_files 
+    ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.cpp
+    ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h)
   if(BUILD_SHARED_LIBONEFLOW)
-    oneflow_add_library(oneflow_cpp SHARED ${of_cpp_api_obj_cc})
+    oneflow_add_library(oneflow_cpp SHARED ${of_cpp_api_files})
   else()
-    oneflow_add_library(oneflow_cpp ${of_cpp_api_obj_cc})
+    oneflow_add_library(oneflow_cpp ${of_cpp_api_files})
   endif()
   set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
   target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${ONEFLOW_MLIR_LIBS} ${oneflow_third_party_libs})
@@ -358,21 +350,27 @@ endif()
 
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
 
+function(oneflow_add_test target_name)
+  cmake_parse_arguments(arg "" "TEST_NAME" "SRCS" ${ARGN})
+  oneflow_add_executable(${target_name} ${arg_SRCS})
+  if (BUILD_CUDA)
+    target_link_libraries(${target_name} CUDA::cudart_static)
+  endif()
+  set_target_properties(${target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
+  add_test(NAME ${arg_TEST_NAME} COMMAND ${target_name})
+endfunction()
+
 # build test
 if(BUILD_TESTING)
   if (of_all_test_cc)
-    oneflow_add_executable(oneflow_testexe ${of_all_test_cc})
-    target_link_libraries(oneflow_testexe ${oneflow_test_libs})
-    if (BUILD_CPP_API)
-      target_link_libraries(oneflow_testexe oneflow_cpp)
-    else()
-      target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs})
-    endif()
-    if (BUILD_CUDA)
-      target_link_libraries(oneflow_testexe CUDA::cudart_static)
-    endif()
-    set_target_properties(oneflow_testexe PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
-    add_test(NAME oneflow_test COMMAND oneflow_testexe)
+    oneflow_add_test(oneflow_testexe SRCS ${of_all_test_cc} TEST_NAME oneflow_test)
+    target_link_libraries(oneflow_testexe ${of_libs} ${oneflow_third_party_libs} ${oneflow_exe_third_party_libs} ${oneflow_test_libs})
+  endif()
+
+  if (BUILD_CPP_API)
+    file(GLOB_RECURSE cpp_api_test_files ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/tests/*.cpp)
+    oneflow_add_test(oneflow_cpp_api_testexe SRCS ${cpp_api_test_files} TEST_NAME oneflow_cpp_api_test)
+    target_link_libraries(oneflow_cpp_api_testexe oneflow_cpp ${oneflow_test_libs})
   endif()
 endif()
 

From 9f9378bfcf629f13d19446f22ff3800cfd48d020 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sat, 11 Dec 2021 16:40:58 +0800
Subject: [PATCH 03/51] add cpp api test in ci

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml        | 2 ++
 cmake/caches/ci/canary/cuda.cmake | 1 +
 cmake/caches/ci/cuda.cmake        | 1 +
 3 files changed, 4 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2dd327f2410..e228da784bb 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -538,6 +538,8 @@ jobs:
         run: |
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
+          chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
+          docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
       - name: Build documentation
         timeout-minutes: 10
         if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}
diff --git a/cmake/caches/ci/canary/cuda.cmake b/cmake/caches/ci/canary/cuda.cmake
index 1c8116d32ee..8a9e8b61342 100644
--- a/cmake/caches/ci/canary/cuda.cmake
+++ b/cmake/caches/ci/canary/cuda.cmake
@@ -14,4 +14,5 @@ set(CMAKE_CUDA_HOST_COMPILER "/usr/lib64/ccache/g++" CACHE STRING "")
 set(CMAKE_CUDA_ARCHITECTURES "61-real;70-real;75-real;80-real;86-real" CACHE STRING "")
 set(CUDNN_STATIC OFF CACHE BOOL "")
 set(WITH_MLIR ON CACHE BOOL "")
+set(BUILD_CPP_API ON CACHE BOOL "")
 set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "")
diff --git a/cmake/caches/ci/cuda.cmake b/cmake/caches/ci/cuda.cmake
index ecd4128569e..4b15fc0c1be 100644
--- a/cmake/caches/ci/cuda.cmake
+++ b/cmake/caches/ci/cuda.cmake
@@ -15,4 +15,5 @@ set(CMAKE_CUDA_HOST_COMPILER "/usr/lib64/ccache/g++" CACHE STRING "")
 set(CMAKE_CUDA_ARCHITECTURES "61;75" CACHE STRING "")
 set(CUDNN_STATIC ON CACHE BOOL "")
 set(WITH_MLIR ON CACHE BOOL "")
+set(BUILD_CPP_API ON CACHE BOOL "")
 set(CUDA_NVCC_THREADS_NUMBER 8 CACHE STRING "")

From c4db1adccbe3fca7384f2faec1dd3c75f5b20652 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Sat, 11 Dec 2021 16:49:06 +0800
Subject: [PATCH 04/51] graph run

---
 oneflow/api/common/job_build_and_infer_ctx.h |  36 +++
 oneflow/api/common/scope.h                   |  54 ++++
 oneflow/api/cpp/env.cpp                      |  12 +
 oneflow/api/cpp/framework/device.h           |   1 +
 oneflow/api/cpp/framework/graph.cpp          | 267 +++++++++++++++++++
 oneflow/api/cpp/framework/graph.h            | 101 +++++++
 oneflow/api/cpp/framework/tensor.h           |   2 +
 oneflow/api/cpp/tests/graph_test.cpp         |  99 +++++++
 8 files changed, 572 insertions(+)
 create mode 100644 oneflow/api/common/job_build_and_infer_ctx.h
 create mode 100644 oneflow/api/common/scope.h
 create mode 100644 oneflow/api/cpp/framework/graph.cpp
 create mode 100644 oneflow/api/cpp/framework/graph.h
 create mode 100644 oneflow/api/cpp/tests/graph_test.cpp

diff --git a/oneflow/api/common/job_build_and_infer_ctx.h b/oneflow/api/common/job_build_and_infer_ctx.h
new file mode 100644
index 00000000000..8b475f8a2db
--- /dev/null
+++ b/oneflow/api/common/job_build_and_infer_ctx.h
@@ -0,0 +1,36 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_
+#define ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_
+
+#include "oneflow/core/job/job.pb.h"
+#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
+
+namespace oneflow {
+
+inline Maybe<Job> GetCurrentJob() {
+  auto* job_ctx_mgr = Global<LazyJobBuildAndInferCtxMgr>::Get();
+  CHECK_NOTNULL_OR_RETURN(job_ctx_mgr);
+  auto* job_ctx =
+      JUST(job_ctx_mgr->FindJobBuildAndInferCtx(*JUST(job_ctx_mgr->GetCurrentJobName())));
+  CHECK_NOTNULL_OR_RETURN(job_ctx);
+  return job_ctx->job();
+}
+
+}  // namespace oneflow
+
+#endif  // ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_
diff --git a/oneflow/api/common/scope.h b/oneflow/api/common/scope.h
new file mode 100644
index 00000000000..f0626e3ada1
--- /dev/null
+++ b/oneflow/api/common/scope.h
@@ -0,0 +1,54 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef ONEFLOW_API_COMMON_SCOPE_H_
+#define ONEFLOW_API_COMMON_SCOPE_H_
+
+#include <memory>
+#include <string>
+#include "oneflow/core/common/just.h"
+#include "oneflow/core/framework/device.h"
+#include "oneflow/core/framework/instructions_builder.h"
+#include "oneflow/core/framework/session_util.h"
+#include "oneflow/core/job/job_conf.cfg.h"
+#include "oneflow/core/job/job_conf.pb.h"
+#include "oneflow/core/job/scope.h"
+
+namespace oneflow {
+
+inline Maybe<Scope> MakeScope(const JobConfigProto& config_proto, const Device& device) {
+  std::shared_ptr<Scope> scope;
+  std::shared_ptr<cfg::JobConfigProto> cfg_config_proto =
+      std::make_shared<cfg::JobConfigProto>(config_proto);
+  JUST(LogicalRun([&](InstructionsBuilder* builder) -> Maybe<void> {
+    int64_t session_id = 0;
+    std::string device_tag = "cpu";
+    std::string machine_ids = "0";
+    std::string device_ids = "0";
+    if (device.type() == "cuda") {
+      device_tag = "gpu";
+      device_ids = std::to_string(device.device_id());
+    }
+    scope = JUST(builder->BuildInitialScope(session_id, cfg_config_proto, device_tag,
+                                            {machine_ids + ":" + device_ids}, nullptr, false));
+    return Maybe<void>::Ok();
+  }));
+  return scope;
+}
+
+}  // namespace oneflow
+
+#endif  // ONEFLOW_API_COMMON_SCOPE_H_
diff --git a/oneflow/api/cpp/env.cpp b/oneflow/api/cpp/env.cpp
index 6919948af02..15b4a65891f 100644
--- a/oneflow/api/cpp/env.cpp
+++ b/oneflow/api/cpp/env.cpp
@@ -24,14 +24,17 @@ limitations under the License.
 #include <random>
 #include <type_traits>
 #include "oneflow/api/cpp/env.h"
+#include "oneflow/core/common/global.h"
 #include "oneflow/core/common/just.h"
 #include "oneflow/core/common/multi_client.h"
 #include "oneflow/core/common/optional.h"
+#include "oneflow/core/framework/multi_client_session_context.h"
 #include "oneflow/core/framework/shut_down_util.h"
 #include "oneflow/core/job/cluster_instruction.h"
 #include "oneflow/core/job/env.pb.h"
 #include "oneflow/core/job/env_global_objects_scope.h"
 #include "oneflow/core/control/ctrl_bootstrap.h"
+#include "oneflow/core/job/session.h"
 #include "oneflow/core/rpc/include/base.h"
 #include "oneflow/core/vm/vm_util.h"
 #include "oneflow/core/thread/thread_consistent_id.h"
@@ -114,6 +117,12 @@ of::Maybe<void> initEnv() {
   CompleteEnvProto(env_proto);
   of::Global<of::EnvGlobalObjectsScope>::SetAllocated(new of::EnvGlobalObjectsScope());
   JUST(of::Global<of::EnvGlobalObjectsScope>::Get()->Init(env_proto));
+
+  of::ConfigProto config_proto;
+  config_proto.mutable_resource()->set_cpu_device_num(1);  // useless, will be set in TryInit
+  config_proto.set_session_id(of::NewSessionId());
+  of::Global<of::MultiClientSessionContext>::New();
+  of::Global<of::MultiClientSessionContext>::Get()->TryInit(config_proto).GetOrThrow();
   return of::Maybe<void>::Ok();
 }
 
@@ -136,6 +145,9 @@ void release() {
       of::ClusterInstruction::MasterSendHalt();
     }
     of::Global<of::EnvGlobalObjectsScope>::Delete();
+    // TODO(zzk0): segmentation fault
+    // of::Global<of::MultiClientSessionContext>::Get()->TryClose().GetOrThrow();
+    of::Global<of::MultiClientSessionContext>::Delete();
   }
   // TODO close session
   of::SetShuttingDown();
diff --git a/oneflow/api/cpp/framework/device.h b/oneflow/api/cpp/framework/device.h
index e45c7efb0f7..2a7e79b2a23 100644
--- a/oneflow/api/cpp/framework/device.h
+++ b/oneflow/api/cpp/framework/device.h
@@ -32,6 +32,7 @@ namespace oneflow_api {
 
 class Device final {
   friend class Tensor;
+  friend class Graph;
 
  public:
   explicit Device(const std::string& type_or_type_with_device_id);
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
new file mode 100644
index 00000000000..e119a311687
--- /dev/null
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -0,0 +1,267 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include "oneflow/api/common/scope.h"
+#include "oneflow/api/cpp/framework/device.h"
+#include "oneflow/api/cpp/framework/graph.h"
+#include "oneflow/api/cpp/framework/shape.h"
+#include "oneflow/api/cpp/framework/tensor.h"
+#include "oneflow/api/common/job_build_and_infer_ctx.h"
+#include <cstdio>
+#include <fstream>
+#include <istream>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+#include "oneflow/api/python/job_build/job_build_and_infer.h"
+#include "oneflow/core/common/data_type.pb.h"
+#include "oneflow/core/common/global.h"
+#include "oneflow/core/common/hash_container.h"
+#include "oneflow/core/common/just.h"
+#include "oneflow/core/common/shape.h"
+#include "oneflow/core/common/symbol.h"
+#include "oneflow/core/framework/device.h"
+#include "oneflow/core/framework/dtype.h"
+#include "oneflow/core/framework/multi_client_session_context.h"
+#include "oneflow/core/framework/nn_graph.h"
+#include "oneflow/core/framework/scope_util.h"
+#include "oneflow/core/framework/tensor.h"
+#include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/functional/functional_api.yaml.h"
+#include "oneflow/core/graph/op_graph.h"
+#include "oneflow/core/job/job.pb.h"
+#include "oneflow/core/job/job_build_and_infer_ctx.h"
+#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
+#include "oneflow/core/job/job_conf.cfg.h"
+#include "oneflow/core/job/job_conf.pb.h"
+#include "oneflow/core/job/job_set.pb.h"
+#include "oneflow/core/job/lazy_mode.h"
+#include "oneflow/core/job/parallel_desc.h"
+#include "oneflow/core/job/scope.h"
+#include "oneflow/core/job/session.h"
+#include "oneflow/core/operator/interface_blob_conf.pb.h"
+#include "oneflow/core/operator/op_conf.pb.h"
+
+namespace oneflow_api {
+
+namespace of = oneflow;
+
+namespace {
+
+class CompileScope {
+ public:
+  CompileScope(const of::JobConfigProto& job_config, const of::Device& device, XrtKind kind) {
+    std::shared_ptr<of::Scope> scope = CHECK_JUST(of::MakeScope(job_config, device));
+    CHECK_JUST(of::ThreadLocalScopeStackPush(scope));
+
+    of::cfg::JobConfigProto job_config_cfg(job_config);
+#ifdef WITH_OPENVINO
+    if (kind == XrtKind::kOpenvino) {
+      *(job_config_cfg.mutable_xrt_config()->mutable_use_openvino()) = true;
+    }
+#endif
+#ifdef WITH_TENSORRT
+    if (kind == XrtKind::kTensorrt) {
+      *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
+    }
+#endif
+    CHECK_JUST(of::JobBuildAndInferCtx_Open(job_config.job_name()));
+    CHECK_JUST(of::CurJobBuildAndInferCtx_SetJobConf(job_config_cfg));
+  }
+
+  ~CompileScope() {
+    CHECK_JUST(of::JobBuildAndInferCtx_Close());
+    CHECK_JUST(of::ThreadLocalScopeStackPop());
+  }
+
+ private:
+  of::LazyMode::Guard lazy_mode_enabled_guard{true};
+};
+
+std::shared_ptr<of::one::TensorTuple> ConvertToTensorTuple(
+    const std::vector<std::shared_ptr<of::one::Tensor>>& tensors) {
+  auto tensor_tuple = std::make_shared<of::one::TensorTuple>();
+  for (const auto& tensor : tensors) { tensor_tuple->emplace_back(tensor); }
+  return tensor_tuple;
+}
+
+std::string GetDeviceTag(const Device& device) {
+  if (device.type() == "cpu") {
+    return device.type();
+  } else {
+    return "gpu";
+  }
+}
+
+template<class T1, class T2>
+std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2>& hash_map) {
+  std::vector<T1> vec1;
+  std::vector<T2> vec2;
+  for (const auto& entry : hash_map) {
+    vec1.emplace_back(entry.first);
+    vec2.emplace_back(entry.second);
+  }
+  return std::make_pair(vec1, vec2);
+}
+
+}  // namespace
+
+Graph::Graph(const std::string& model_path, const Device& device) : device_(device) {
+  // TODO(zzk0): model_path is a directory, need to concatenate filename
+  // we need a mlir model name.
+  {
+    std::ifstream input(model_path);
+    job_.ParseFromIstream(&input);
+  }
+  graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
+  of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
+}
+
+Graph::Graph(const std::string& model_path) : Graph(model_path, Device("cpu")) {}
+
+std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
+  if (!is_compiled_) {
+    Compile(inputs).GetOrThrow();
+    is_compiled_ = true;
+  }
+  return Run(inputs).GetOrThrow();
+}
+
+of::Maybe<void> Graph::Compile(const std::vector<Tensor>& inputs) {
+  JUST(BuildGraph(inputs));
+  JUST(LoadCheckpoint());
+  JUST(RegisterTensors());
+  JUST(graph_->CompileAndInitRuntime());
+  return of::Maybe<void>::Ok();
+}
+
+of::Maybe<std::vector<Tensor>> Graph::Run(const std::vector<Tensor>& inputs) const {
+  auto input_tensor_tuple = std::make_shared<of::one::TensorTuple>();
+  for (const auto& tensor : inputs) { input_tensor_tuple->emplace_back(tensor.tensor_); }
+
+  JUST(of::RunLazyNNGraph(*input_tensor_tuple, *output_tensor_tuple_, *parameter_tensor_tuple_,
+                          graph_));
+  JUST(of::SoftSyncNNGraphBuffers(*output_tensor_tuple_, graph_));
+
+  std::vector<Tensor> outputs;
+  for (const auto& tensor : *output_tensor_tuple_) { outputs.emplace_back(Tensor(tensor)); }
+  return outputs;
+}
+
+of::Maybe<void> Graph::AddOp(of::OperatorConf op_conf) {
+  {
+    std::shared_ptr<of::Scope> scope = JUST(of::GetCurrentScope());
+    op_conf.set_scope_symbol_id(scope->symbol_id().value_or(0));
+  }
+  op_conf.set_device_tag(GetDeviceTag(device_));
+  if (batch_size_ > 0 && op_conf.has_input_conf()) {
+    op_conf.mutable_input_conf()->mutable_blob_conf()->mutable_shape()->mutable_dim()->Set(
+        0, batch_size_);
+    std::cout << "Print input conf" << std::endl;
+    std::cout << op_conf.ShortDebugString() << std::endl;
+  }
+  auto* ctx = JUST(of::GetCurInferCtx());
+  JUST(ctx->AddAndInferConsistentOp(op_conf));
+  return of::Maybe<void>::Ok();
+}
+
+of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
+  CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(), xrt_kind_);
+  {
+    // TODO(zzk0): remove this; used for input tensor order
+    int input_tensor_order = 0;
+    of::OpGraph op_graph(job_);
+    JUST(op_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
+      const of::OperatorConf& op_conf = node.op().op_conf();
+      JUST(AddOp(op_conf));
+      if (op_conf.has_input_conf()) {
+        // TODO(zzk0): input tensor order
+        input_name_to_tensor_[op_conf.name()] = inputs.at(input_tensor_order++).tensor_;
+      } else if (op_conf.has_variable_conf()) {
+        // TODO(zzk0): load from local path, this branch maybe removed
+        of::LazyMode::Guard lazy_mode_disabled_guard{false};
+
+        of::VariableOpConf variable_conf = op_conf.variable_conf();
+        variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Rand(
+            of::Shape(variable_conf.shape()),
+            JUST(of::DType::Get(static_cast<of::DataType>(variable_conf.data_type()))),
+            *device_.device_, nullptr, false));
+        PrintTensor(Tensor(variable_op_name_to_tensor_[op_conf.name()]));
+      }
+      return of::Maybe<void>::Ok();
+    }));
+  }
+  JUST(of::CurJobBuildAndInferCtx_Complete());
+  JUST(of::CurJobBuildAndInferCtx_Rebuild());
+  {
+    std::shared_ptr<of::Job> complete_job = JUST(of::GetCurrentJob());
+    of::OpGraph complete_graph(*complete_job);
+    JUST(complete_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
+      of::LazyMode::Guard lazy_mode_disabled_guard{false};
+      const of::OperatorConf& op_conf = node.op().op_conf();
+      if (op_conf.has_output_conf()) {
+        of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
+        output_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
+            of::Shape(blob_conf.shape()),
+            JUST(of::DType::Get(static_cast<of::DataType>(blob_conf.data_type()))),
+            *device_.device_));
+        std::cout << "Print output conf" << std::endl;
+        PrintTensor(Tensor(output_name_to_tensor_[op_conf.name()]));
+      }
+      return of::Maybe<void>::Ok();
+    }));
+  }
+  return of::Maybe<void>::Ok();
+}
+
+of::Maybe<void> Graph::LoadCheckpoint() { return of::Maybe<void>::Ok(); }
+
+of::Maybe<void> Graph::RegisterTensors() {
+  {
+    auto pair = Unzip(input_name_to_tensor_);
+    const std::vector<std::string>& input_op_names = pair.first;
+    const std::vector<std::shared_ptr<of::one::Tensor>>& input_tensors = pair.second;
+    JUST(graph_->RegisterInputOpNamesAndTensors(input_op_names, input_tensors));
+  }
+  {
+    auto pair = Unzip(output_name_to_tensor_);
+    const std::vector<std::string>& output_op_names = pair.first;
+    std::vector<std::shared_ptr<of::one::Tensor>>& output_tensors = pair.second;
+    JUST(graph_->RegisterOutputOpNamesAndTensors(output_op_names, output_tensors));
+    output_tensor_tuple_ = ConvertToTensorTuple(output_tensors);
+  }
+  {
+    auto pair = Unzip(variable_op_name_to_tensor_);
+    const std::vector<std::string>& variable_op_names = pair.first;
+    const std::vector<std::shared_ptr<of::one::Tensor>>& variable_tensors = pair.second;
+    JUST(graph_->RegisterVariableOpNamesAndTensors(variable_op_names, variable_tensors));
+    parameter_tensor_tuple_ = ConvertToTensorTuple(variable_tensors);
+  }
+  return of::Maybe<void>::Ok();
+}
+
+Graph Load(const std::string& model_path, const Device& device) {
+  Graph graph(model_path, device);
+  return graph;
+}
+
+Graph Load(const std::string& model_path) {
+  Device device = Device("cpu");
+  return Load(model_path, device);
+}
+
+}  // namespace oneflow_api
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
new file mode 100644
index 00000000000..ffd6661799d
--- /dev/null
+++ b/oneflow/api/cpp/framework/graph.h
@@ -0,0 +1,101 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifndef ONEFLOW_API_CPP_GRAPH_H_
+#define ONEFLOW_API_CPP_GRAPH_H_
+
+#include <memory>
+#include <string>
+#include <vector>
+#include "oneflow/api/cpp/framework/device.h"
+#include "oneflow/api/cpp/framework/shape.h"
+#include "oneflow/api/cpp/framework/tensor.h"
+#include "oneflow/core/common/hash_container.h"
+#include "oneflow/core/framework/tensor.h"
+#include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/job/job.pb.h"
+#include "oneflow/core/operator/op_conf.pb.h"
+
+namespace oneflow {
+
+class NNGraph;
+
+}  // namespace oneflow
+
+namespace oneflow_api {
+
+enum class XrtKind : int { kOneflow = 0, kTensorrt = 1, kOpenvino = 2 };
+
+class Graph final {
+ public:
+  explicit Graph(const std::string& model_path, const Device& device);
+  explicit Graph(const std::string& model_path);
+  std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
+  void set_batch_size(int batch_size) {
+    batch_size_ = batch_size;
+  }
+  void enable_openvino() {
+    xrt_kind_ = XrtKind::kOpenvino;
+  }
+  void enable_tensorrt() {
+    xrt_kind_ = XrtKind::kTensorrt;
+  }
+
+  // not must, better if provided
+  // void To(const Device& device);
+
+ private:
+  oneflow::Maybe<void> Compile(const std::vector<Tensor>& inputs);
+  oneflow::Maybe<std::vector<Tensor>> Run(const std::vector<Tensor>& inputs) const;
+  oneflow::Maybe<void> AddOp(oneflow::OperatorConf op_conf);
+  oneflow::Maybe<void> BuildGraph(const std::vector<Tensor>& inputs);
+  oneflow::Maybe<void> LoadCheckpoint();
+  oneflow::Maybe<void> RegisterTensors();
+
+  std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
+  bool is_compiled_ = false;
+  int batch_size_ = 0;
+  XrtKind xrt_kind_ = XrtKind::kOneflow;
+  Device device_;
+  oneflow::Job job_;
+
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> input_name_to_tensor_;
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> output_name_to_tensor_;
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> variable_op_name_to_tensor_;
+  std::shared_ptr<oneflow::one::TensorTuple> output_tensor_tuple_;
+  std::shared_ptr<oneflow::one::TensorTuple> parameter_tensor_tuple_;
+};
+
+Graph Load(const std::string& model_path, const Device& device);
+
+Graph Load(const std::string& model_path);
+
+// TODO(zzk0): only for debug, remove this
+inline void PrintTensor(const Tensor& tensor) {
+  std::cout << tensor.shape().elem_cnt() << " " << tensor.device().type() << " "
+            << tensor.device().device_id() << " ";
+  for (int i = 0; i < tensor.shape().NumAxes(); ++i) { std::cout << tensor.shape().At(i) << " "; }
+  std::cout << std::endl;
+  // float* data = new float[tensor.shape().elem_cnt() * 4];
+  // tensor.copy_to(data);
+  // for (int i = 0; i < tensor.shape().elem_cnt(); ++i) { std::cout << data[i] << " "; }
+  // std::cout << std::endl;
+  // delete[] data;
+}
+
+}  // namespace oneflow_api
+
+#endif  // ONEFLOW_API_CPP_GRAPH_H_
diff --git a/oneflow/api/cpp/framework/tensor.h b/oneflow/api/cpp/framework/tensor.h
index c25cc494325..aab38d8db7a 100644
--- a/oneflow/api/cpp/framework/tensor.h
+++ b/oneflow/api/cpp/framework/tensor.h
@@ -33,6 +33,8 @@ class Tensor;
 namespace oneflow_api {
 
 class Tensor final {
+  friend class Graph;
+
  public:
   explicit Tensor(const Shape& shape = Shape(), const Device& device = Device("cpu"),
                   const DType& dtype = DType::kFloat);
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
new file mode 100644
index 00000000000..85154409f06
--- /dev/null
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -0,0 +1,99 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#include <gtest/gtest.h>
+#include <cstdint>
+#include "oneflow/api/cpp/framework/device.h"
+#include "oneflow/api/cpp/framework/dtype.h"
+#include "oneflow/api/cpp/framework/graph.h"
+#include "oneflow/api/cpp/framework/shape.h"
+#include "oneflow/api/cpp/framework/tensor.h"
+#include "oneflow/api/cpp/nn/functional/activation.h"
+#include "oneflow/api/cpp/tests/api_test.h"
+#include "oneflow/core/common/shape.h"
+#include "oneflow/core/framework/device.h"
+#include "oneflow/core/framework/dtype.h"
+#include "oneflow/core/functional/functional_api.yaml.h"
+
+namespace oneflow_api {
+
+namespace {
+
+inline Graph LoadGraph(const Device& device) {
+  Graph graph = Load("/home/zhouzekai/models/job_linear/saved_model.pb", device);
+  return graph;
+}
+
+inline void Forward(Graph& graph, const Device& device, int expected_batch_dim = 1) {
+  std::vector<Tensor> inputs{Tensor(
+      oneflow::one::functional::Rand(
+          oneflow::Shape({1, 5000}), oneflow::DType::Float(),
+          oneflow::Device::New(device.type(), device.device_id()).GetOrThrow(), nullptr, false)
+          .GetPtrOrThrow())};
+  std::vector<Tensor> outputs = graph.Forward(inputs);
+  Shape shape = outputs.at(0).shape();
+  ASSERT_EQ(outputs.size(), 1);
+  ASSERT_EQ(shape.At(0), expected_batch_dim);
+  ASSERT_EQ(shape.At(1), 100000);
+}
+
+}  // namespace
+
+TEST(Api, graph_cpu_test) {
+  EnvScope scope;
+  Device device("cpu");
+  Graph graph = LoadGraph(device);
+  Forward(graph, device);
+}
+
+TEST(Api, graph_gpu_test) {
+  EnvScope scope;
+  Device device("cuda", 0);
+  Graph graph = LoadGraph(device);
+  Forward(graph, device);
+}
+
+TEST(Api, graph_openvino_test) {
+  EnvScope scope;
+  Device device("cpu");
+  Graph graph = LoadGraph(device);
+  graph.enable_openvino();
+  Forward(graph, device);
+}
+
+TEST(Api, graph_trt_test) {
+  EnvScope scope;
+  Device device("cuda:0");
+  Graph graph = LoadGraph(device);
+  graph.enable_tensorrt();
+  Forward(graph, device);
+}
+
+TEST(Api, graph_cpu_batching_test) {
+  EnvScope scope;
+  Device device("cpu");
+  Graph graph = LoadGraph(device);
+  Forward(graph, device, 10);
+}
+
+TEST(Api, graph_gpu_batching_test) {
+  EnvScope scope;
+  Device device("cuda", 0);
+  Graph graph = LoadGraph(device);
+  Forward(graph, device, 10);
+}
+
+}  // namespace oneflow_api

From 4298d3b027c7420fc6e3e736e07ea73489aaeb67 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sat, 11 Dec 2021 19:51:42 +0800
Subject: [PATCH 05/51] reverse the order of cudnn and cuda library

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 cmake/third_party.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cmake/third_party.cmake b/cmake/third_party.cmake
index f08b0b8bb7d..8a8edd31ba9 100644
--- a/cmake/third_party.cmake
+++ b/cmake/third_party.cmake
@@ -248,9 +248,9 @@ if (BUILD_CUDA)
   endif()
   include(nccl)
 
-  list(APPEND oneflow_third_party_libs ${VENDOR_CUDA_LIBRARIES})
-  list(APPEND oneflow_third_party_libs ${CUDNN_LIBRARIES})
   list(APPEND oneflow_third_party_libs ${NCCL_LIBRARIES})
+  list(APPEND oneflow_third_party_libs ${CUDNN_LIBRARIES})
+  list(APPEND oneflow_third_party_libs ${VENDOR_CUDA_LIBRARIES})
 
   list(APPEND oneflow_third_party_dependencies nccl)
 

From 9dce5ce5ca3a21a4b1c6f01491a9ff524803c6aa Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sat, 11 Dec 2021 20:17:51 +0800
Subject: [PATCH 06/51] update logic of BUILD_MONOLITHIC_LIBONEFLOW

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 CMakeLists.txt      | 7 +------
 cmake/oneflow.cmake | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b43984068a..854c5fcd194 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -215,16 +215,11 @@ if(BUILD_CPP_API)
 
   if(BUILD_SHARED_LIBS)
     if(BUILD_MONOLITHIC_LIBONEFLOW)
+      message(WARNING "BUILD_SHARED_LIBS will be overrided to OFF because BUILD_MONOLITHIC_LIBONEFLOW is ON")
       set(BUILD_SHARED_LIBS OFF)
     else()
       set(LIBRARY_OUTPUT_PATH ${LIBONEFLOW_LIBRARY_DIR})
     endif(BUILD_MONOLITHIC_LIBONEFLOW)
-    set(BUILD_SHARED_LIBONEFLOW ON)
-  else()
-    if(BUILD_MONOLITHIC_LIBONEFLOW)
-      message(WARNING "BUILD_MONOLITHIC_LIBONEFLOW=ON is meaningless when BUILD_SHARED_LIBS=OFF")
-    endif()
-    set(BUILD_SHARED_LIBONEFLOW OFF)
   endif(BUILD_SHARED_LIBS)
 endif(BUILD_CPP_API)
 
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index cfcd6bc4b10..53c52502a49 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -339,7 +339,7 @@ if (BUILD_CPP_API)
   file(GLOB_RECURSE of_cpp_api_files 
     ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.cpp
     ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h)
-  if(BUILD_SHARED_LIBONEFLOW)
+  if(BUILD_MONOLITHIC_LIBONEFLOW)
     oneflow_add_library(oneflow_cpp SHARED ${of_cpp_api_files})
   else()
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})

From 56152a2efd711eede4935286e1f00ee890c463e3 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sat, 11 Dec 2021 20:27:44 +0800
Subject: [PATCH 07/51] rename BUILD_MONOLITHIC_LIBONEFLOW to
 BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 CMakeLists.txt      | 8 ++++----
 cmake/oneflow.cmake | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 854c5fcd194..9ef552be101 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -24,7 +24,7 @@ option(USE_CLANG_FORMAT "" OFF)
 option(USE_CLANG_TIDY "" OFF)
 option(BUILD_PYTHON "" ON)
 option(BUILD_CPP_API "Option to build OneFlow C++ API (beta)" OFF)
-option(BUILD_MONOLITHIC_LIBONEFLOW "" ON)
+option(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO "Option to build a monolithic liboneflow_cpp.so (only meaningful when BUILD_CPP_API is ON)" ON)
 option(BUILD_RDMA "" OFF)
 option(BUILD_CUDA "" ON)
 option(WITH_ONEDNN "" OFF)
@@ -214,12 +214,12 @@ if(BUILD_CPP_API)
   make_directory(${LIBONEFLOW_SHARE_DIR})
 
   if(BUILD_SHARED_LIBS)
-    if(BUILD_MONOLITHIC_LIBONEFLOW)
-      message(WARNING "BUILD_SHARED_LIBS will be overrided to OFF because BUILD_MONOLITHIC_LIBONEFLOW is ON")
+    if(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO)
+      message(WARNING "BUILD_SHARED_LIBS will be overrided to OFF because BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO is ON")
       set(BUILD_SHARED_LIBS OFF)
     else()
       set(LIBRARY_OUTPUT_PATH ${LIBONEFLOW_LIBRARY_DIR})
-    endif(BUILD_MONOLITHIC_LIBONEFLOW)
+    endif(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO)
   endif(BUILD_SHARED_LIBS)
 endif(BUILD_CPP_API)
 
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 53c52502a49..9d99d4a7884 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -339,7 +339,7 @@ if (BUILD_CPP_API)
   file(GLOB_RECURSE of_cpp_api_files 
     ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.cpp
     ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h)
-  if(BUILD_MONOLITHIC_LIBONEFLOW)
+  if(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO)
     oneflow_add_library(oneflow_cpp SHARED ${of_cpp_api_files})
   else()
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})

From bf1c340337456fc519d86275cabd75bc425e5d87 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Sun, 12 Dec 2021 09:35:06 +0800
Subject: [PATCH 08/51] refine

---
 oneflow/api/cpp/framework/graph.cpp | 46 ++++++++++++++---------------
 oneflow/api/cpp/framework/graph.h   | 19 +++++-------
 2 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index e119a311687..cbe4b80bf31 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -65,17 +65,17 @@ namespace {
 class CompileScope {
  public:
   CompileScope(const of::JobConfigProto& job_config, const of::Device& device, XrtKind kind) {
-    std::shared_ptr<of::Scope> scope = CHECK_JUST(of::MakeScope(job_config, device));
+    const std::shared_ptr<of::Scope> scope = CHECK_JUST(of::MakeScope(job_config, device));
     CHECK_JUST(of::ThreadLocalScopeStackPush(scope));
 
     of::cfg::JobConfigProto job_config_cfg(job_config);
 #ifdef WITH_OPENVINO
-    if (kind == XrtKind::kOpenvino) {
+    if (kind == XrtKind::kOpenVINO) {
       *(job_config_cfg.mutable_xrt_config()->mutable_use_openvino()) = true;
     }
 #endif
 #ifdef WITH_TENSORRT
-    if (kind == XrtKind::kTensorrt) {
+    if (kind == XrtKind::kTensorRT) {
       *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
     }
 #endif
@@ -100,15 +100,15 @@ std::shared_ptr<of::one::TensorTuple> ConvertToTensorTuple(
 }
 
 std::string GetDeviceTag(const Device& device) {
-  if (device.type() == "cpu") {
-    return device.type();
-  } else {
+  if (device.type() == "cuda") {
     return "gpu";
+  } else {
+    return "cpu";
   }
 }
 
 template<class T1, class T2>
-std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2>& hash_map) {
+const std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2>& hash_map) {
   std::vector<T1> vec1;
   std::vector<T2> vec2;
   for (const auto& entry : hash_map) {
@@ -150,7 +150,7 @@ of::Maybe<void> Graph::Compile(const std::vector<Tensor>& inputs) {
 }
 
 of::Maybe<std::vector<Tensor>> Graph::Run(const std::vector<Tensor>& inputs) const {
-  auto input_tensor_tuple = std::make_shared<of::one::TensorTuple>();
+  const auto input_tensor_tuple = std::make_shared<of::one::TensorTuple>();
   for (const auto& tensor : inputs) { input_tensor_tuple->emplace_back(tensor.tensor_); }
 
   JUST(of::RunLazyNNGraph(*input_tensor_tuple, *output_tensor_tuple_, *parameter_tensor_tuple_,
@@ -164,7 +164,7 @@ of::Maybe<std::vector<Tensor>> Graph::Run(const std::vector<Tensor>& inputs) con
 
 of::Maybe<void> Graph::AddOp(of::OperatorConf op_conf) {
   {
-    std::shared_ptr<of::Scope> scope = JUST(of::GetCurrentScope());
+    const std::shared_ptr<of::Scope> scope = JUST(of::GetCurrentScope());
     op_conf.set_scope_symbol_id(scope->symbol_id().value_or(0));
   }
   op_conf.set_device_tag(GetDeviceTag(device_));
@@ -180,11 +180,12 @@ of::Maybe<void> Graph::AddOp(of::OperatorConf op_conf) {
 }
 
 of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
-  CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(), xrt_kind_);
+  CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(),
+                                 xrt_kind_);
   {
     // TODO(zzk0): remove this; used for input tensor order
     int input_tensor_order = 0;
-    of::OpGraph op_graph(job_);
+    const of::OpGraph op_graph(job_);
     JUST(op_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
       const of::OperatorConf& op_conf = node.op().op_conf();
       JUST(AddOp(op_conf));
@@ -193,9 +194,8 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
         input_name_to_tensor_[op_conf.name()] = inputs.at(input_tensor_order++).tensor_;
       } else if (op_conf.has_variable_conf()) {
         // TODO(zzk0): load from local path, this branch maybe removed
-        of::LazyMode::Guard lazy_mode_disabled_guard{false};
-
-        of::VariableOpConf variable_conf = op_conf.variable_conf();
+        const of::LazyMode::Guard lazy_mode_disabled_guard{false};
+        const of::VariableOpConf variable_conf = op_conf.variable_conf();
         variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Rand(
             of::Shape(variable_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(variable_conf.data_type()))),
@@ -208,13 +208,13 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
   JUST(of::CurJobBuildAndInferCtx_Complete());
   JUST(of::CurJobBuildAndInferCtx_Rebuild());
   {
-    std::shared_ptr<of::Job> complete_job = JUST(of::GetCurrentJob());
-    of::OpGraph complete_graph(*complete_job);
+    const std::shared_ptr<of::Job> complete_job = JUST(of::GetCurrentJob());
+    const of::OpGraph complete_graph(*complete_job);
     JUST(complete_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
-      of::LazyMode::Guard lazy_mode_disabled_guard{false};
+      const of::LazyMode::Guard lazy_mode_disabled_guard{false};
       const of::OperatorConf& op_conf = node.op().op_conf();
       if (op_conf.has_output_conf()) {
-        of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
+        const of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
         output_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
             of::Shape(blob_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(blob_conf.data_type()))),
@@ -232,20 +232,20 @@ of::Maybe<void> Graph::LoadCheckpoint() { return of::Maybe<void>::Ok(); }
 
 of::Maybe<void> Graph::RegisterTensors() {
   {
-    auto pair = Unzip(input_name_to_tensor_);
+    const auto pair = Unzip(input_name_to_tensor_);
     const std::vector<std::string>& input_op_names = pair.first;
     const std::vector<std::shared_ptr<of::one::Tensor>>& input_tensors = pair.second;
     JUST(graph_->RegisterInputOpNamesAndTensors(input_op_names, input_tensors));
   }
   {
-    auto pair = Unzip(output_name_to_tensor_);
+    const auto pair = Unzip(output_name_to_tensor_);
     const std::vector<std::string>& output_op_names = pair.first;
-    std::vector<std::shared_ptr<of::one::Tensor>>& output_tensors = pair.second;
+    const std::vector<std::shared_ptr<of::one::Tensor>>& output_tensors = pair.second;
     JUST(graph_->RegisterOutputOpNamesAndTensors(output_op_names, output_tensors));
     output_tensor_tuple_ = ConvertToTensorTuple(output_tensors);
   }
   {
-    auto pair = Unzip(variable_op_name_to_tensor_);
+    const auto pair = Unzip(variable_op_name_to_tensor_);
     const std::vector<std::string>& variable_op_names = pair.first;
     const std::vector<std::shared_ptr<of::one::Tensor>>& variable_tensors = pair.second;
     JUST(graph_->RegisterVariableOpNamesAndTensors(variable_op_names, variable_tensors));
@@ -260,7 +260,7 @@ Graph Load(const std::string& model_path, const Device& device) {
 }
 
 Graph Load(const std::string& model_path) {
-  Device device = Device("cpu");
+  const Device device = Device("cpu");
   return Load(model_path, device);
 }
 
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index ffd6661799d..90573d2a3f1 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -17,8 +17,10 @@ limitations under the License.
 #ifndef ONEFLOW_API_CPP_GRAPH_H_
 #define ONEFLOW_API_CPP_GRAPH_H_
 
+#include <functional>
 #include <memory>
 #include <string>
+#include <unordered_map>
 #include <vector>
 #include "oneflow/api/cpp/framework/device.h"
 #include "oneflow/api/cpp/framework/shape.h"
@@ -27,6 +29,7 @@ limitations under the License.
 #include "oneflow/core/framework/tensor.h"
 #include "oneflow/core/framework/tensor_tuple.h"
 #include "oneflow/core/job/job.pb.h"
+#include "oneflow/core/job/job_conf.cfg.h"
 #include "oneflow/core/operator/op_conf.pb.h"
 
 namespace oneflow {
@@ -37,22 +40,16 @@ class NNGraph;
 
 namespace oneflow_api {
 
-enum class XrtKind : int { kOneflow = 0, kTensorrt = 1, kOpenvino = 2 };
+enum class XrtKind : int { kNone = 0, kTensorRT = 1, kOpenVINO = 2 };
 
 class Graph final {
  public:
   explicit Graph(const std::string& model_path, const Device& device);
   explicit Graph(const std::string& model_path);
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
-  void set_batch_size(int batch_size) {
-    batch_size_ = batch_size;
-  }
-  void enable_openvino() {
-    xrt_kind_ = XrtKind::kOpenvino;
-  }
-  void enable_tensorrt() {
-    xrt_kind_ = XrtKind::kTensorrt;
-  }
+  void set_batch_size(int batch_size) { batch_size_ = batch_size; }
+  void enable_openvino() { xrt_kind_ = XrtKind::kTensorRT; }
+  void enable_tensorrt() { xrt_kind_ = XrtKind::kOpenVINO; }
 
   // not must, better if provided
   // void To(const Device& device);
@@ -68,7 +65,7 @@ class Graph final {
   std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
   bool is_compiled_ = false;
   int batch_size_ = 0;
-  XrtKind xrt_kind_ = XrtKind::kOneflow;
+  XrtKind xrt_kind_ = XrtKind::kNone;
   Device device_;
   oneflow::Job job_;
 

From 91ebd5f014bf53fceb6de9fb5fb48dbcaf855b21 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sun, 12 Dec 2021 09:52:37 +0800
Subject: [PATCH 09/51] [draft] implement graph parameter load and save (#7010)

* implement parameter save (python) and load (c++)

Signed-off-by: daquexian <daquexian566@gmail.com>

* revert accident changes

Signed-off-by: daquexian <daquexian566@gmail.com>

* fix circular reference

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 oneflow/api/cpp/framework/graph.cpp           |  35 ++++++++++++++++--
 oneflow/api/cpp/framework/graph.h             |   1 +
 oneflow/api/cpp/tests/graph_test.cpp          |  23 ++++++++++++
 .../api/cpp/tests/graph_test_model/model.pb   | Bin 0 -> 576 bytes
 .../tests/graph_test_model/model.weight/meta  |   5 +++
 .../tests/graph_test_model/model.weight/out   | Bin 0 -> 16 bytes
 .../core/framework/random_generator_impl.cpp  |  12 +-----
 oneflow/core/framework/tensor_util.cpp        |  21 +++++++++++
 oneflow/core/framework/tensor_util.h          |  14 +++++++
 oneflow/core/functional/impl/nn_functor.cpp   |  12 +-----
 oneflow/core/functional/tensor_index.cpp      |  12 +-----
 python/oneflow/framework/check_point_v2.py    |  33 ++++++++++++-----
 .../framework/register_class_method_util.py   |   2 +
 python/oneflow/framework/tensor.py            |   5 +--
 python/oneflow/ops/initializer_util.py        |  13 +++++++
 15 files changed, 137 insertions(+), 51 deletions(-)
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/model.pb
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/model.weight/meta
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/model.weight/out
 create mode 100644 oneflow/core/framework/tensor_util.cpp
 create mode 100644 oneflow/core/framework/tensor_util.h

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index cbe4b80bf31..4cef22a9520 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 */
 
+#include "oneflow/api/common/ofblob.h"
 #include "oneflow/api/common/scope.h"
 #include "oneflow/api/cpp/framework/device.h"
 #include "oneflow/api/cpp/framework/graph.h"
@@ -41,6 +42,7 @@ limitations under the License.
 #include "oneflow/core/framework/scope_util.h"
 #include "oneflow/core/framework/tensor.h"
 #include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/framework/tensor_util.h"
 #include "oneflow/core/functional/functional_api.yaml.h"
 #include "oneflow/core/graph/op_graph.h"
 #include "oneflow/core/job/job.pb.h"
@@ -120,12 +122,14 @@ const std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2
 
 }  // namespace
 
-Graph::Graph(const std::string& model_path, const Device& device) : device_(device) {
+Graph::Graph(const std::string& model_path, const Device& device)
+    : model_path_(model_path), device_(device) {
   // TODO(zzk0): model_path is a directory, need to concatenate filename
   // we need a mlir model name.
   {
-    std::ifstream input(model_path);
-    job_.ParseFromIstream(&input);
+    std::ifstream input(model_path + "/model.pb");
+    CHECK(input.is_open());
+    CHECK(job_.ParseFromIstream(&input));
   }
   graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
@@ -228,7 +232,30 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<void> Graph::LoadCheckpoint() { return of::Maybe<void>::Ok(); }
+of::Maybe<void> Graph::LoadCheckpoint() {
+  for (const auto& variable_op_name_and_tensor : variable_op_name_to_tensor_) {
+    const auto& variable_op_name = variable_op_name_and_tensor.first;
+    const auto& variable_tensor = variable_op_name_and_tensor.second;
+    const std::string variable_filename = model_path_ + "/" + variable_op_name + "/out";
+    const std::string buffer = [&variable_filename]() {
+      std::ifstream variable_file(variable_filename, std::ios::binary);
+      CHECK(variable_file.is_open());
+      std::stringstream ss;
+      ss << variable_file.rdbuf();
+      return ss.str();
+    }();
+    const auto& callback =
+        std::make_shared<std::function<void(uint64_t)>>([&](uint64_t of_blob_ptr) {
+          CHECK_JUST(of::BlobBufferCopyUtil<void>::From(
+              of_blob_ptr, buffer.data(),
+              variable_tensor->shape()->elem_cnt()
+                  * of::GetSizeOfDataType(variable_tensor->dtype()->data_type())));
+        });
+    JUST(of::one::SyncAccessTensorWithTimeOut(variable_tensor, callback, "mut"));
+  }
+
+  return of::Maybe<void>::Ok();
+}
 
 of::Maybe<void> Graph::RegisterTensors() {
   {
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 90573d2a3f1..6a588bfedd4 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -63,6 +63,7 @@ class Graph final {
   oneflow::Maybe<void> RegisterTensors();
 
   std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
+  const std::string model_path_;
   bool is_compiled_ = false;
   int batch_size_ = 0;
   XrtKind xrt_kind_ = XrtKind::kNone;
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 85154409f06..740e239902d 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -30,6 +30,29 @@ limitations under the License.
 
 namespace oneflow_api {
 
+TEST(Api, graph_test) {
+  EnvScope scope;
+
+  const std::string file_name = __FILE__;
+  const std::string directory = file_name.substr(0, file_name.rfind('/'));
+
+  Graph graph = Load(directory + "/graph_test_model");
+  std::vector<Tensor> inputs;
+  inputs.emplace_back(Shape{2, 2});
+  inputs[0].zeros_();
+
+  Tensor output = graph.Forward(inputs).at(0);
+  Shape shape = output.shape();
+  ASSERT_EQ(shape.At(0), 2);
+  ASSERT_EQ(shape.At(1), 2);
+  std::array<float, 4> buf{};
+  output.copy_to(buf.data());
+  ASSERT_EQ(buf[0], 1);
+  ASSERT_EQ(buf[1], 1);
+  ASSERT_EQ(buf[2], 1);
+  ASSERT_EQ(buf[3], 1);
+}
+
 namespace {
 
 inline Graph LoadGraph(const Device& device) {
diff --git a/oneflow/api/cpp/tests/graph_test_model/model.pb b/oneflow/api/cpp/tests/graph_test_model/model.pb
new file mode 100644
index 0000000000000000000000000000000000000000..540ead82c37e43e7044863fc48022943c595eb98
GIT binary patch
literal 576
zcmd<O&ctQQB^2*l>0XprkP&a7o0(ToS`u%d#GG7E>hS-5Qv(P%oZ=7{V$LrukrLoy
z;b7um5@3>GP-5X?5@G;q=gG}aNzKtKPt8ovD1mC+-we@siCq?=5v)ytNsB3oA&8xe
zRg?h;k{F%18X38G!TNO*Q&Qsd5H>>%*u|5;#R`@ZGT>s$%oEb$62o+^KG1Dk0?6(F
ziAc$CfgCL)$|a0q1xPO5KnrM^5lEgZm`fPRNT3L|Ah^V#gVh)u;UaX2kt>qR4O`fv
z1{j+6u{uSFpNqx8z{)@f6f06p9E?JxT$0#~20MmJ7}W-dpctA(Lag9W;F7{_BRJsD
Qt(D^C;)Ev^B?cu101>#R0ssI2

literal 0
HcmV?d00001

diff --git a/oneflow/api/cpp/tests/graph_test_model/model.weight/meta b/oneflow/api/cpp/tests/graph_test_model/model.weight/meta
new file mode 100644
index 00000000000..873a1836360
--- /dev/null
+++ b/oneflow/api/cpp/tests/graph_test_model/model.weight/meta
@@ -0,0 +1,5 @@
+shape {
+  dim: 2
+  dim: 2
+}
+data_type: kFloat
diff --git a/oneflow/api/cpp/tests/graph_test_model/model.weight/out b/oneflow/api/cpp/tests/graph_test_model/model.weight/out
new file mode 100644
index 0000000000000000000000000000000000000000..dcce8bfb97e5327dd298643776af46107f980856
GIT binary patch
literal 16
NcmZQzXs~BM!T=WZ0{s90

literal 0
HcmV?d00001

diff --git a/oneflow/core/framework/random_generator_impl.cpp b/oneflow/core/framework/random_generator_impl.cpp
index a99b93bf686..888cccaf5d6 100644
--- a/oneflow/core/framework/random_generator_impl.cpp
+++ b/oneflow/core/framework/random_generator_impl.cpp
@@ -18,6 +18,7 @@ limitations under the License.
 #include "oneflow/core/common/util.h"
 #include "oneflow/core/framework/device.h"
 #include "oneflow/core/framework/instructions_builder.h"
+#include "oneflow/core/framework/tensor_util.h"
 #include "oneflow/core/functional/functional.h"
 #include "oneflow/core/job/env_global_objects_scope.h"
 #include "oneflow/core/register/ofblob.h"
@@ -33,17 +34,6 @@ namespace one {
 
 namespace {
 
-Maybe<void> SyncAccessTensorWithTimeOut(
-    const std::shared_ptr<Tensor>& tensor,
-    const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier) {
-  return SpinCounter::SpinWait(1, [&](const std::shared_ptr<SpinCounter>& sc) -> Maybe<void> {
-    return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe<void> {
-      return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback,
-                                               modifier);
-    });
-  });
-}
-
 Maybe<void> CPUSynchronize() {
   if (Global<EnvGlobalObjectsScope>::Get() != nullptr) { return vm::CurrentRankSync(); }
   return Maybe<void>::Ok();
diff --git a/oneflow/core/framework/tensor_util.cpp b/oneflow/core/framework/tensor_util.cpp
new file mode 100644
index 00000000000..2bff9134a16
--- /dev/null
+++ b/oneflow/core/framework/tensor_util.cpp
@@ -0,0 +1,21 @@
+#include "oneflow/core/framework/tensor_util.h"
+
+#include "oneflow/core/common/spin_counter.h"
+#include "oneflow/core/framework/instructions_builder.h"
+
+namespace oneflow {
+namespace one {
+
+Maybe<void> SyncAccessTensorWithTimeOut(
+    const std::shared_ptr<Tensor>& tensor,
+    const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier) {
+  return SpinCounter::SpinWait(1, [&](const std::shared_ptr<SpinCounter>& sc) -> Maybe<void> {
+    return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe<void> {
+      return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback,
+                                               modifier);
+    });
+  });
+}
+
+}  // namespace one
+}  // namespace oneflow
diff --git a/oneflow/core/framework/tensor_util.h b/oneflow/core/framework/tensor_util.h
new file mode 100644
index 00000000000..92915843cbf
--- /dev/null
+++ b/oneflow/core/framework/tensor_util.h
@@ -0,0 +1,14 @@
+#include <string>
+
+#include "oneflow/core/common/maybe.h"
+
+namespace oneflow {
+namespace one {
+
+class Tensor;
+
+Maybe<void> SyncAccessTensorWithTimeOut(
+    const std::shared_ptr<Tensor>& tensor,
+    const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier);
+}
+}
diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp
index d1e33c4ed7f..acc97e4407a 100644
--- a/oneflow/core/functional/impl/nn_functor.cpp
+++ b/oneflow/core/functional/impl/nn_functor.cpp
@@ -22,6 +22,7 @@ limitations under the License.
 #include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
 #include "oneflow/core/framework/tensor.h"
 #include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/framework/tensor_util.h"
 #include "oneflow/core/framework/op_interpreter.h"
 #include "oneflow/core/framework/random_generator.h"
 #include "oneflow/core/functional/functional.h"
@@ -1597,17 +1598,6 @@ class FoldFunctor {
   std::shared_ptr<OpExpr> fold_op_;
 };
 
-Maybe<void> SyncAccessTensorWithTimeOut(
-    const std::shared_ptr<Tensor>& tensor,
-    const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier) {
-  return SpinCounter::SpinWait(1, [&](const std::shared_ptr<SpinCounter>& sc) -> Maybe<void> {
-    return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe<void> {
-      return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback,
-                                               modifier);
-    });
-  });
-}
-
 class OneHotFunctor {
  public:
   OneHotFunctor() {
diff --git a/oneflow/core/functional/tensor_index.cpp b/oneflow/core/functional/tensor_index.cpp
index be99333e61b..48dabbb7668 100644
--- a/oneflow/core/functional/tensor_index.cpp
+++ b/oneflow/core/functional/tensor_index.cpp
@@ -19,6 +19,7 @@ limitations under the License.
 #include "oneflow/core/framework/device.h"
 #include "oneflow/core/framework/instructions_builder.h"
 #include "oneflow/core/framework/tensor_tuple.h"
+#include "oneflow/core/framework/tensor_util.h"
 #include "oneflow/core/framework/nd_sbp.h"
 #include "oneflow/core/functional/functional.h"
 #include "oneflow/core/job/sbp_parallel.h"
@@ -30,17 +31,6 @@ namespace functional {
 
 namespace {
 
-Maybe<void> SyncAccessTensorWithTimeOut(
-    const std::shared_ptr<Tensor>& tensor,
-    const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier) {
-  return SpinCounter::SpinWait(1, [&](const std::shared_ptr<SpinCounter>& sc) -> Maybe<void> {
-    return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe<void> {
-      return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback,
-                                               modifier);
-    });
-  });
-}
-
 int64_t CountSpecifiedDims(const TensorIndex& index) {
   int64_t specified_ndims = 0;
   for (int i = 0; i < index.size(); ++i) {
diff --git a/python/oneflow/framework/check_point_v2.py b/python/oneflow/framework/check_point_v2.py
index 211dfc1459c..d6197fe2332 100644
--- a/python/oneflow/framework/check_point_v2.py
+++ b/python/oneflow/framework/check_point_v2.py
@@ -29,6 +29,8 @@
 import oneflow.core.framework.variable_meta_info_pb2 as variable_meta_info_pb
 import oneflow.framework.dtype as dtype_util
 import oneflow.framework.id_util as id_util
+from oneflow.framework.tensor import Tensor
+import oneflow.nn.graph.graph as graph_util
 import pickle
 
 SNAPSHOT_DONE_FILENAME = "snapshot_done"
@@ -120,10 +122,6 @@ def _save_tensor_to_disk(tensor: "oneflow.Tensor", dir_name: Union[str, Path]) -
 ValueContainer = Union[FileBackendVariableBlob, np.ndarray, "oneflow.Tensor"]
 
 
-def _ElemCnt(shape):
-    return np.prod(shape).astype(int).item()
-
-
 def _LoadSingleVariable(
     path: Optional[str], consistent_src_rank: Optional[int] = None
 ) -> "flow.Tensor":
@@ -205,6 +203,11 @@ def tensor_setstate(self, pickle_dict):
         )
 
 
+def RegisterMethods():
+    Tensor.__setstate__ = tensor_setstate
+    Tensor.__getstate__ = tensor_getstate
+
+
 def legacy_load(
     path: Union[str, Path], consistent_src_rank: Optional[int] = None,
 ) -> Dict[str, "flow.Tensor"]:
@@ -302,6 +305,22 @@ def save(
             disk I/O.
     """
     path: Path = Path(path)
+
+    if isinstance(obj, graph_util.Graph):
+        graph: graph_util.Graph = obj
+        if not graph._is_compiled:
+            raise RuntimeError("graph must be compiled first.")
+
+        path.mkdir(exist_ok=True)
+
+        model_pb_path = path / 'model.pb'
+        model_pb_path.write_bytes(graph._graph_proto.SerializeToString())
+
+        for x in graph._state():
+            _save_tensor_to_disk(x.origin, path / f'{x.name_prefix}{x.name}')
+
+        return
+
     obj = {"protocol_version": PROTOCOL_VERSION, "data": obj}
     with tensor_pickling_context(path, consistent_dst_rank):
         pickled_bytes = pickle.dumps(obj)
@@ -312,11 +331,5 @@ def save(
         pickle_path.write_bytes(pickled_bytes)
 
 
-def generate_values_by_initializer(initializer, shape, dtype):
-    np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype))
-    length = _ElemCnt(shape)
-    return np.array(initializer(length)).astype(np_dtype).reshape(shape)
-
-
 save_load_path = None
 consistent_src_dsk_rank = None
diff --git a/python/oneflow/framework/register_class_method_util.py b/python/oneflow/framework/register_class_method_util.py
index fce74ed0ca5..3bc4f78d00e 100644
--- a/python/oneflow/framework/register_class_method_util.py
+++ b/python/oneflow/framework/register_class_method_util.py
@@ -14,6 +14,7 @@
 limitations under the License.
 """
 import oneflow._oneflow_internal
+import oneflow.framework.check_point_v2 as check_point_v2
 import oneflow.framework.generator as generator
 import oneflow.framework.op_expr_util as op_expr_util
 import oneflow.framework.tensor as tensor_util
@@ -21,4 +22,5 @@
 
 def RegisterMethod4Class():
     tensor_util.RegisterMethods()
+    check_point_v2.RegisterMethods()
     op_expr_util.RegisterMethod4UserOpExpr()
diff --git a/python/oneflow/framework/tensor.py b/python/oneflow/framework/tensor.py
index b519a0e84b3..fc3c460021c 100644
--- a/python/oneflow/framework/tensor.py
+++ b/python/oneflow/framework/tensor.py
@@ -15,7 +15,6 @@
 """
 import oneflow as flow
 from oneflow._oneflow_internal.exception import IndexException
-import oneflow.framework.check_point_v2 as check_point_v2
 import oneflow.framework.tensor_str as tensor_str_util
 import oneflow.ops.initializer_util as initializer_util
 import oneflow._oneflow_internal.lazy_mode as lazy_mode
@@ -686,7 +685,7 @@ def _init_by_initializer_conf(tensor, initializer_conf, random_seed=None):
     shape = tuple(tensor.shape)
     initializer = initializer_util.GetInitializer(initializer_conf, random_seed, shape)
 
-    np_arr = check_point_v2.generate_values_by_initializer(
+    np_arr = initializer_util.generate_values_by_initializer(
         initializer, shape, tensor.dtype
     )
     if tensor.is_consistent:
@@ -753,8 +752,6 @@ def RegisterMethods():
     Tensor.backward = _backward
     Tensor.__getitem__ = _getitem
     Tensor.__setitem__ = _setitem
-    Tensor.__setstate__ = check_point_v2.tensor_setstate
-    Tensor.__getstate__ = check_point_v2.tensor_getstate
     Tensor.__str__ = _str
     Tensor.__repr__ = _repr
     Tensor.__eq__ = _eq
diff --git a/python/oneflow/ops/initializer_util.py b/python/oneflow/ops/initializer_util.py
index 36aaf7f4afc..72889c0e31d 100644
--- a/python/oneflow/ops/initializer_util.py
+++ b/python/oneflow/ops/initializer_util.py
@@ -22,6 +22,7 @@
 import oneflow as flow
 import oneflow.core.job.initializer_conf_pb2 as initializer_conf_util
 import oneflow.core.operator.op_conf_pb2 as op_conf_util
+import oneflow.framework.dtype as dtype_util
 
 
 def constant_initializer(
@@ -1206,3 +1207,15 @@ def EmptyInitializerImpl(
     var_blob_shape: Sequence[int],
 ):
     return None
+
+
+def _elem_cnt(shape):
+    return np.prod(shape).astype(int).item()
+
+
+def generate_values_by_initializer(initializer, shape, dtype):
+    np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype))
+    length = _elem_cnt(shape)
+    return np.array(initializer(length)).astype(np_dtype).reshape(shape)
+
+

From 86004c6bf21a7ef37e4f66ce26084cabdc221d9b Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Sun, 12 Dec 2021 11:06:55 +0800
Subject: [PATCH 10/51] pimpl

---
 oneflow/api/cpp/framework/graph.cpp  | 75 ++++++++++++++++++++++------
 oneflow/api/cpp/framework/graph.h    | 49 ++++--------------
 oneflow/api/cpp/tests/graph_test.cpp |  2 +-
 3 files changed, 71 insertions(+), 55 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 4cef22a9520..e270a398820 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -62,6 +62,8 @@ namespace oneflow_api {
 
 namespace of = oneflow;
 
+enum class XrtKind : int { kNone = 0, kTensorRT = 1, kOpenVINO = 2 };
+
 namespace {
 
 class CompileScope {
@@ -122,7 +124,56 @@ const std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2
 
 }  // namespace
 
+class Graph::GraphImpl final {
+ public:
+  explicit GraphImpl(const std::string& model_path, const Device& device);
+  explicit GraphImpl(const std::string& model_path);
+  std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
+  void set_batch_size(int batch_size) { batch_size_ = batch_size; }
+  void enable_openvino() { xrt_kind_ = XrtKind::kTensorRT; }
+  void enable_tensorrt() { xrt_kind_ = XrtKind::kOpenVINO; }
+
+ private:
+  oneflow::Maybe<void> Compile(const std::vector<Tensor>& inputs);
+  oneflow::Maybe<std::vector<Tensor>> Run(const std::vector<Tensor>& inputs) const;
+  oneflow::Maybe<void> AddOp(oneflow::OperatorConf op_conf);
+  oneflow::Maybe<void> BuildGraph(const std::vector<Tensor>& inputs);
+  oneflow::Maybe<void> LoadCheckpoint();
+  oneflow::Maybe<void> RegisterTensors();
+
+  std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
+  const std::string model_path_;
+  bool is_compiled_ = false;
+  int batch_size_ = 0;
+  XrtKind xrt_kind_ = XrtKind::kNone;
+  Device device_;
+  oneflow::Job job_;
+
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> input_name_to_tensor_;
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> output_name_to_tensor_;
+  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> variable_op_name_to_tensor_;
+  std::shared_ptr<oneflow::one::TensorTuple> output_tensor_tuple_;
+  std::shared_ptr<oneflow::one::TensorTuple> parameter_tensor_tuple_;
+};
+
+std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
+  return graph_->Forward(inputs);
+}
+
 Graph::Graph(const std::string& model_path, const Device& device)
+    : graph_(std::make_shared<GraphImpl>(model_path, device)) {}
+
+Graph::Graph(const std::string& model_path) : graph_(std::make_shared<GraphImpl>(model_path)) {}
+
+Graph::Graph(const std::shared_ptr<GraphImpl>& graph) : graph_(graph) {}
+
+void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); }
+
+void Graph::enable_openvino() { graph_->enable_openvino(); }
+
+void Graph::enable_tensorrt() { graph_->enable_tensorrt(); }
+
+Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     : model_path_(model_path), device_(device) {
   // TODO(zzk0): model_path is a directory, need to concatenate filename
   // we need a mlir model name.
@@ -135,9 +186,9 @@ Graph::Graph(const std::string& model_path, const Device& device)
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
 }
 
-Graph::Graph(const std::string& model_path) : Graph(model_path, Device("cpu")) {}
+Graph::GraphImpl::GraphImpl(const std::string& model_path) : GraphImpl(model_path, Device("cpu")) {}
 
-std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
+std::vector<Tensor> Graph::GraphImpl::Forward(const std::vector<Tensor>& inputs) {
   if (!is_compiled_) {
     Compile(inputs).GetOrThrow();
     is_compiled_ = true;
@@ -145,7 +196,7 @@ std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
   return Run(inputs).GetOrThrow();
 }
 
-of::Maybe<void> Graph::Compile(const std::vector<Tensor>& inputs) {
+of::Maybe<void> Graph::GraphImpl::Compile(const std::vector<Tensor>& inputs) {
   JUST(BuildGraph(inputs));
   JUST(LoadCheckpoint());
   JUST(RegisterTensors());
@@ -153,7 +204,7 @@ of::Maybe<void> Graph::Compile(const std::vector<Tensor>& inputs) {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<std::vector<Tensor>> Graph::Run(const std::vector<Tensor>& inputs) const {
+of::Maybe<std::vector<Tensor>> Graph::GraphImpl::Run(const std::vector<Tensor>& inputs) const {
   const auto input_tensor_tuple = std::make_shared<of::one::TensorTuple>();
   for (const auto& tensor : inputs) { input_tensor_tuple->emplace_back(tensor.tensor_); }
 
@@ -166,7 +217,7 @@ of::Maybe<std::vector<Tensor>> Graph::Run(const std::vector<Tensor>& inputs) con
   return outputs;
 }
 
-of::Maybe<void> Graph::AddOp(of::OperatorConf op_conf) {
+of::Maybe<void> Graph::GraphImpl::AddOp(of::OperatorConf op_conf) {
   {
     const std::shared_ptr<of::Scope> scope = JUST(of::GetCurrentScope());
     op_conf.set_scope_symbol_id(scope->symbol_id().value_or(0));
@@ -183,7 +234,7 @@ of::Maybe<void> Graph::AddOp(of::OperatorConf op_conf) {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
+of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs) {
   CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(),
                                  xrt_kind_);
   {
@@ -197,14 +248,12 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
         // TODO(zzk0): input tensor order
         input_name_to_tensor_[op_conf.name()] = inputs.at(input_tensor_order++).tensor_;
       } else if (op_conf.has_variable_conf()) {
-        // TODO(zzk0): load from local path, this branch maybe removed
         const of::LazyMode::Guard lazy_mode_disabled_guard{false};
         const of::VariableOpConf variable_conf = op_conf.variable_conf();
-        variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Rand(
+        variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
             of::Shape(variable_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(variable_conf.data_type()))),
-            *device_.device_, nullptr, false));
-        PrintTensor(Tensor(variable_op_name_to_tensor_[op_conf.name()]));
+            *device_.device_));
       }
       return of::Maybe<void>::Ok();
     }));
@@ -223,8 +272,6 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
             of::Shape(blob_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(blob_conf.data_type()))),
             *device_.device_));
-        std::cout << "Print output conf" << std::endl;
-        PrintTensor(Tensor(output_name_to_tensor_[op_conf.name()]));
       }
       return of::Maybe<void>::Ok();
     }));
@@ -232,7 +279,7 @@ of::Maybe<void> Graph::BuildGraph(const std::vector<Tensor>& inputs) {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<void> Graph::LoadCheckpoint() {
+of::Maybe<void> Graph::GraphImpl::LoadCheckpoint() {
   for (const auto& variable_op_name_and_tensor : variable_op_name_to_tensor_) {
     const auto& variable_op_name = variable_op_name_and_tensor.first;
     const auto& variable_tensor = variable_op_name_and_tensor.second;
@@ -257,7 +304,7 @@ of::Maybe<void> Graph::LoadCheckpoint() {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<void> Graph::RegisterTensors() {
+of::Maybe<void> Graph::GraphImpl::RegisterTensors() {
   {
     const auto pair = Unzip(input_name_to_tensor_);
     const std::vector<std::string>& input_op_names = pair.first;
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 6a588bfedd4..3e59a29ad4c 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -17,20 +17,9 @@ limitations under the License.
 #ifndef ONEFLOW_API_CPP_GRAPH_H_
 #define ONEFLOW_API_CPP_GRAPH_H_
 
-#include <functional>
-#include <memory>
-#include <string>
-#include <unordered_map>
-#include <vector>
 #include "oneflow/api/cpp/framework/device.h"
-#include "oneflow/api/cpp/framework/shape.h"
 #include "oneflow/api/cpp/framework/tensor.h"
-#include "oneflow/core/common/hash_container.h"
-#include "oneflow/core/framework/tensor.h"
-#include "oneflow/core/framework/tensor_tuple.h"
-#include "oneflow/core/job/job.pb.h"
-#include "oneflow/core/job/job_conf.cfg.h"
-#include "oneflow/core/operator/op_conf.pb.h"
+#include "iostream"
 
 namespace oneflow {
 
@@ -40,41 +29,21 @@ class NNGraph;
 
 namespace oneflow_api {
 
-enum class XrtKind : int { kNone = 0, kTensorRT = 1, kOpenVINO = 2 };
+class Graph {
+ private:
+  class GraphImpl;
 
-class Graph final {
  public:
   explicit Graph(const std::string& model_path, const Device& device);
   explicit Graph(const std::string& model_path);
+  explicit Graph(const std::shared_ptr<GraphImpl>& graph);
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
-  void set_batch_size(int batch_size) { batch_size_ = batch_size; }
-  void enable_openvino() { xrt_kind_ = XrtKind::kTensorRT; }
-  void enable_tensorrt() { xrt_kind_ = XrtKind::kOpenVINO; }
-
-  // not must, better if provided
-  // void To(const Device& device);
+  void set_batch_size(int batch_size);
+  void enable_openvino();
+  void enable_tensorrt();
 
  private:
-  oneflow::Maybe<void> Compile(const std::vector<Tensor>& inputs);
-  oneflow::Maybe<std::vector<Tensor>> Run(const std::vector<Tensor>& inputs) const;
-  oneflow::Maybe<void> AddOp(oneflow::OperatorConf op_conf);
-  oneflow::Maybe<void> BuildGraph(const std::vector<Tensor>& inputs);
-  oneflow::Maybe<void> LoadCheckpoint();
-  oneflow::Maybe<void> RegisterTensors();
-
-  std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
-  const std::string model_path_;
-  bool is_compiled_ = false;
-  int batch_size_ = 0;
-  XrtKind xrt_kind_ = XrtKind::kNone;
-  Device device_;
-  oneflow::Job job_;
-
-  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> input_name_to_tensor_;
-  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> output_name_to_tensor_;
-  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> variable_op_name_to_tensor_;
-  std::shared_ptr<oneflow::one::TensorTuple> output_tensor_tuple_;
-  std::shared_ptr<oneflow::one::TensorTuple> parameter_tensor_tuple_;
+  std::shared_ptr<GraphImpl> graph_;
 };
 
 Graph Load(const std::string& model_path, const Device& device);
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 740e239902d..9589189f3f2 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -56,7 +56,7 @@ TEST(Api, graph_test) {
 namespace {
 
 inline Graph LoadGraph(const Device& device) {
-  Graph graph = Load("/home/zhouzekai/models/job_linear/saved_model.pb", device);
+  Graph graph = Load("/home/zhouzekai/models/large_linear", device);
   return graph;
 }
 

From 63d5f72b66867a99f10afe7556a34b663f1e893a Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Tue, 14 Dec 2021 10:10:10 +0800
Subject: [PATCH 11/51] batching

---
 oneflow/api/cpp/framework.h          |  1 +
 oneflow/api/cpp/framework/graph.cpp  | 18 +++++++--
 oneflow/api/cpp/framework/graph.h    |  4 +-
 oneflow/api/cpp/framework/tensor.cpp |  4 +-
 oneflow/api/cpp/framework/tensor.h   |  2 +-
 oneflow/api/cpp/tests/graph_test.cpp | 59 +++++++++++++++++++++++++++-
 6 files changed, 78 insertions(+), 10 deletions(-)

diff --git a/oneflow/api/cpp/framework.h b/oneflow/api/cpp/framework.h
index efe1ac38b9a..5abd6be7953 100644
--- a/oneflow/api/cpp/framework.h
+++ b/oneflow/api/cpp/framework.h
@@ -21,5 +21,6 @@ limitations under the License.
 #include "framework/shape.h"
 #include "framework/dtype.h"
 #include "framework/tensor.h"
+#include "framework/graph.h"
 
 #endif  // !ONEFLOW_API_CPP_FRAMEWORK_H_
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index e270a398820..6a48589299f 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -57,6 +57,7 @@ limitations under the License.
 #include "oneflow/core/job/session.h"
 #include "oneflow/core/operator/interface_blob_conf.pb.h"
 #include "oneflow/core/operator/op_conf.pb.h"
+#include "oneflow/core/register/logical_blob_id.pb.h"
 
 namespace oneflow_api {
 
@@ -181,6 +182,10 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     std::ifstream input(model_path + "/model.pb");
     CHECK(input.is_open());
     CHECK(job_.ParseFromIstream(&input));
+    static int graph_index = 0;
+    job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name()
+                                          + std::to_string(graph_index));
+    graph_index += 1;
   }
   graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
@@ -190,6 +195,8 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path) : GraphImpl(model_pat
 
 std::vector<Tensor> Graph::GraphImpl::Forward(const std::vector<Tensor>& inputs) {
   if (!is_compiled_) {
+    static std::mutex mtx;
+    std::lock_guard<std::mutex> lock(mtx);
     Compile(inputs).GetOrThrow();
     is_compiled_ = true;
   }
@@ -226,8 +233,6 @@ of::Maybe<void> Graph::GraphImpl::AddOp(of::OperatorConf op_conf) {
   if (batch_size_ > 0 && op_conf.has_input_conf()) {
     op_conf.mutable_input_conf()->mutable_blob_conf()->mutable_shape()->mutable_dim()->Set(
         0, batch_size_);
-    std::cout << "Print input conf" << std::endl;
-    std::cout << op_conf.ShortDebugString() << std::endl;
   }
   auto* ctx = JUST(of::GetCurInferCtx());
   JUST(ctx->AddAndInferConsistentOp(op_conf));
@@ -259,7 +264,6 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
     }));
   }
   JUST(of::CurJobBuildAndInferCtx_Complete());
-  JUST(of::CurJobBuildAndInferCtx_Rebuild());
   {
     const std::shared_ptr<of::Job> complete_job = JUST(of::GetCurrentJob());
     const of::OpGraph complete_graph(*complete_job);
@@ -267,7 +271,13 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
       const of::LazyMode::Guard lazy_mode_disabled_guard{false};
       const of::OperatorConf& op_conf = node.op().op_conf();
       if (op_conf.has_output_conf()) {
-        const of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
+        of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
+        if (batch_size_ > 0) {
+          const std::string input_lbi_str = op_conf.output_conf().in();
+          const of::LogicalBlobId input_lbi = of::GenLogicalBlobId(input_lbi_str);
+          int64_t batch_size = node.LogicalBlobDesc4Lbi(input_lbi).shape().At(0);
+          blob_conf.mutable_shape()->set_dim(0, batch_size);
+        }
         output_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
             of::Shape(blob_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(blob_conf.data_type()))),
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 3e59a29ad4c..aa475ff36f1 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -17,8 +17,8 @@ limitations under the License.
 #ifndef ONEFLOW_API_CPP_GRAPH_H_
 #define ONEFLOW_API_CPP_GRAPH_H_
 
-#include "oneflow/api/cpp/framework/device.h"
-#include "oneflow/api/cpp/framework/tensor.h"
+#include "device.h"
+#include "tensor.h"
 #include "iostream"
 
 namespace oneflow {
diff --git a/oneflow/api/cpp/framework/tensor.cpp b/oneflow/api/cpp/framework/tensor.cpp
index fe62b2dfc61..a6b41953c07 100644
--- a/oneflow/api/cpp/framework/tensor.cpp
+++ b/oneflow/api/cpp/framework/tensor.cpp
@@ -87,7 +87,7 @@ Tensor Tensor::from_buffer(const void* buffer, const Shape& shape, const Device&
 }
 
 template<typename T>
-void Tensor::copy_to(T* buffer) {
+void Tensor::copy_to(T* buffer) const {
   std::shared_ptr<of::one::MirroredTensor> local_tensor =
       tensor_->AsMirroredTensor().GetPtrOrThrow();
   const auto shape = this->shape();
@@ -117,7 +117,7 @@ void Tensor::copy_to(T* buffer) {
 const std::shared_ptr<oneflow::one::Tensor>& Tensor::__internal_tensor() const { return tensor_; }
 
 #define REGISTER_TENSOR_COPY_TO(cpp_dtype) \
-  template void Tensor::copy_to<cpp_dtype>(cpp_dtype * buffer);
+  template void Tensor::copy_to<cpp_dtype>(cpp_dtype * buffer) const;
 
 REGISTER_TENSOR_COPY_TO(float)
 REGISTER_TENSOR_COPY_TO(double)
diff --git a/oneflow/api/cpp/framework/tensor.h b/oneflow/api/cpp/framework/tensor.h
index aab38d8db7a..46e0c37fbaa 100644
--- a/oneflow/api/cpp/framework/tensor.h
+++ b/oneflow/api/cpp/framework/tensor.h
@@ -49,7 +49,7 @@ class Tensor final {
   [[nodiscard]] const std::shared_ptr<oneflow::one::Tensor>& __internal_tensor() const;
 
   template<typename T>
-  void copy_to(T* buffer);
+  void copy_to(T* buffer) const;
 
   [[nodiscard]] static Tensor from_buffer(const void* buffer, const Shape& shape,
                                           const Device& device, const DType& dtype);
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 9589189f3f2..5268b9fc4e3 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -15,7 +15,11 @@ limitations under the License.
 */
 
 #include <gtest/gtest.h>
+#include <array>
+#include <chrono>
 #include <cstdint>
+#include <functional>
+#include <thread>
 #include "oneflow/api/cpp/framework/device.h"
 #include "oneflow/api/cpp/framework/dtype.h"
 #include "oneflow/api/cpp/framework/graph.h"
@@ -53,6 +57,40 @@ TEST(Api, graph_test) {
   ASSERT_EQ(buf[3], 1);
 }
 
+TEST(Api, thread_test) {
+  EnvScope scope;
+  const Graph graphs[]{Load("/home/zhouzekai/models/resnet50"),
+                       Load("/home/zhouzekai/models/resnet50"),
+                       Load("/home/zhouzekai/models/resnet50")};
+  auto graph_forward = [](Graph& graph) {
+    std::vector<Tensor> inputs;
+    inputs.emplace_back(Shape{1, 3, 224, 224});
+    inputs[0].zeros_();
+    for (int i = 0; i < 100; i++) {
+      auto now = std::chrono::high_resolution_clock::now();
+      Tensor output = graph.Forward(inputs).at(0);
+      Shape shape = output.shape();
+      ASSERT_EQ(shape.At(0), 1);
+      ASSERT_EQ(shape.At(1), 1000);
+      std::array<float, 1000> data{};
+      output.copy_to(data.data());
+      std::cout << std::this_thread::get_id() << " " << i << " "
+                << std::chrono::duration_cast<std::chrono::milliseconds>(
+                       std::chrono::high_resolution_clock::now() - now)
+                       .count()
+                << std::endl;
+      float expected_data[]{-1.07454,  -0.319766, -0.497719, -1.15014,  -0.677915,
+                            -0.326854, -0.906118, 0.276201,  0.0704126, -0.519408};
+      for (int i = 0; i < 10; i++) { ASSERT_NEAR(data[i], expected_data[i], 0.00001); }
+    }
+  };
+
+  std::thread threads[]{std::thread(std::bind(graph_forward, graphs[0])),
+                        std::thread(std::bind(graph_forward, graphs[1])),
+                        std::thread(std::bind(graph_forward, graphs[2]))};
+  for (auto& thread : threads) { thread.join(); }
+}
+
 namespace {
 
 inline Graph LoadGraph(const Device& device) {
@@ -63,7 +101,7 @@ inline Graph LoadGraph(const Device& device) {
 inline void Forward(Graph& graph, const Device& device, int expected_batch_dim = 1) {
   std::vector<Tensor> inputs{Tensor(
       oneflow::one::functional::Rand(
-          oneflow::Shape({1, 5000}), oneflow::DType::Float(),
+          oneflow::Shape({expected_batch_dim, 5000}), oneflow::DType::Float(),
           oneflow::Device::New(device.type(), device.device_id()).GetOrThrow(), nullptr, false)
           .GetPtrOrThrow())};
   std::vector<Tensor> outputs = graph.Forward(inputs);
@@ -109,6 +147,7 @@ TEST(Api, graph_cpu_batching_test) {
   EnvScope scope;
   Device device("cpu");
   Graph graph = LoadGraph(device);
+  graph.set_batch_size(10);
   Forward(graph, device, 10);
 }
 
@@ -116,7 +155,25 @@ TEST(Api, graph_gpu_batching_test) {
   EnvScope scope;
   Device device("cuda", 0);
   Graph graph = LoadGraph(device);
+  graph.set_batch_size(10);
   Forward(graph, device, 10);
 }
 
+TEST(Api, tensor_copy_test) {
+  EnvScope scope;
+  std::array<float, 4> data;
+
+  Tensor tensor(Shape{2, 2}, Device("cuda", 0));
+  tensor.copy_to(data.data());
+
+  Tensor tensor1(Shape{2, 2}, Device("cuda", 1));
+  tensor.copy_to(data.data());
+
+  Tensor tensor2(Shape{2, 2}, Device("cuda", 2));
+  tensor.copy_to(data.data());
+
+  Tensor tensor3(Shape{2, 2}, Device("cuda", 3));
+  tensor.copy_to(data.data());
+}
+
 }  // namespace oneflow_api

From c6d27d51ef1635566bd6bd6f2c2d1378566bbb08 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Tue, 14 Dec 2021 10:49:03 +0800
Subject: [PATCH 12/51] share lib directory in test container

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e228da784bb..5ea01f91a2c 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -498,6 +498,7 @@ jobs:
         working-directory: ${{ env.ONEFLOW_SRC }}
         env:
           ONEFLOW_BIN_PATH: ${{ steps.download-digest.outputs.entry-dir }}/bin
+          ONEFLOW_CPP_API_LIB_PATH: ${{ steps.download-digest.outputs.entry-dir }}/liboneflow_cpp/lib
         run: |
           docker run -d --rm --privileged --shm-size=8g \
             --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
@@ -505,6 +506,7 @@ jobs:
             -v /dataset:/dataset:ro -v /model_zoo:/model_zoo:ro \
             -v ${ONEFLOW_WHEEL_PATH}:${ONEFLOW_WHEEL_PATH}:ro \
             -v ${ONEFLOW_BIN_PATH}:${ONEFLOW_BIN_PATH}:ro \
+            -v ${ONEFLOW_CPP_API_LIB_PATH}:${ONEFLOW_CPP_API_LIB_PATH}:ro \
             -v $HOME/test-container-cache/dot-local:/root/.local \
             -v $HOME/test-container-cache/dot-cache:/root/.cache \
             -e ONEFLOW_WHEEL_PATH=${ONEFLOW_WHEEL_PATH} \

From 89e89526d0614582632b1834826e985b3d453d79 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Tue, 14 Dec 2021 14:56:49 +0800
Subject: [PATCH 13/51] fix typo;

---
 oneflow/api/cpp/framework/graph.cpp  |  6 ++++--
 oneflow/api/cpp/tests/graph_test.cpp | 21 ++++++++++++++++++++-
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 6a48589299f..e1886d3692f 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -131,8 +131,8 @@ class Graph::GraphImpl final {
   explicit GraphImpl(const std::string& model_path);
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
   void set_batch_size(int batch_size) { batch_size_ = batch_size; }
-  void enable_openvino() { xrt_kind_ = XrtKind::kTensorRT; }
-  void enable_tensorrt() { xrt_kind_ = XrtKind::kOpenVINO; }
+  void enable_openvino() { xrt_kind_ = XrtKind::kOpenVINO; }
+  void enable_tensorrt() { xrt_kind_ = XrtKind::kTensorRT; }
 
  private:
   oneflow::Maybe<void> Compile(const std::vector<Tensor>& inputs);
@@ -182,6 +182,8 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     std::ifstream input(model_path + "/model.pb");
     CHECK(input.is_open());
     CHECK(job_.ParseFromIstream(&input));
+
+    // prevent model name conflict when launch multiple model instances
     static int graph_index = 0;
     job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name()
                                           + std::to_string(graph_index));
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 5268b9fc4e3..897d3504243 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -57,6 +57,25 @@ TEST(Api, graph_test) {
   ASSERT_EQ(buf[3], 1);
 }
 
+TEST(Api, resnet_test) {
+  EnvScope scope;
+
+  Graph graph = Load("/home/zhouzekai/models/resnet50");
+  std::vector<Tensor> inputs;
+  inputs.emplace_back(Shape{1, 3, 224, 224});
+  inputs[0].zeros_();
+
+  Tensor output = graph.Forward(inputs).at(0);
+  Shape shape = output.shape();
+  ASSERT_EQ(shape.At(0), 1);
+  ASSERT_EQ(shape.At(1), 1000);
+  std::array<float, 1000> data{};
+  output.copy_to(data.data());
+  float expected_data[]{-1.07454,  -0.319766, -0.497719, -1.15014,  -0.677915,
+                            -0.326854, -0.906118, 0.276201,  0.0704126, -0.519408};
+  for (int i = 0; i < 10; i++) { ASSERT_NEAR(data[i], expected_data[i], 0.00001); }
+}
+
 TEST(Api, thread_test) {
   EnvScope scope;
   const Graph graphs[]{Load("/home/zhouzekai/models/resnet50"),
@@ -161,7 +180,7 @@ TEST(Api, graph_gpu_batching_test) {
 
 TEST(Api, tensor_copy_test) {
   EnvScope scope;
-  std::array<float, 4> data;
+  std::array<float, 4> data{};
 
   Tensor tensor(Shape{2, 2}, Device("cuda", 0));
   tensor.copy_to(data.data());

From 7d9aef684a479285c690f38d25525c9b97865e45 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Wed, 15 Dec 2021 10:33:10 +0800
Subject: [PATCH 14/51] add github actions debug

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/simple.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml
index 43633c4877e..43ca463dea4 100644
--- a/.github/workflows/simple.yml
+++ b/.github/workflows/simple.yml
@@ -256,6 +256,9 @@ jobs:
           oneflow-build-env: conda
           conda-env-file: conda-env/dev/gcc7/environment-v2.yml
           conda-env-name: oneflow-dev-gcc7-v2
+      - name: Setup upterm session
+        uses: lhotari/action-upterm@v1
+        if: always() && matrix.build-type == 'gcc7'
       - uses: Oneflow-Inc/get-oneflow@support-clang-12
         name: Build with clang10
         if: ${{ matrix.build-type == 'clang10'}}

From 5c118a8b495843dbbd327ce994a804396f1a3955 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Wed, 15 Dec 2021 15:06:51 +0800
Subject: [PATCH 15/51] Revert "add github actions debug"

This reverts commit 7d9aef684a479285c690f38d25525c9b97865e45.
---
 .github/workflows/simple.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.github/workflows/simple.yml b/.github/workflows/simple.yml
index 43ca463dea4..43633c4877e 100644
--- a/.github/workflows/simple.yml
+++ b/.github/workflows/simple.yml
@@ -256,9 +256,6 @@ jobs:
           oneflow-build-env: conda
           conda-env-file: conda-env/dev/gcc7/environment-v2.yml
           conda-env-name: oneflow-dev-gcc7-v2
-      - name: Setup upterm session
-        uses: lhotari/action-upterm@v1
-        if: always() && matrix.build-type == 'gcc7'
       - uses: Oneflow-Inc/get-oneflow@support-clang-12
         name: Build with clang10
         if: ${{ matrix.build-type == 'clang10'}}

From a0227da1737778817a32099270635280c9a1da0c Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Wed, 15 Dec 2021 15:14:07 +0800
Subject: [PATCH 16/51] add upterm debug after exe test

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5ea01f91a2c..8e5421a5121 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -542,6 +542,9 @@ jobs:
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
+      - name: Setup upterm session
+        if: ${{ always() && !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
+        uses: lhotari/action-upterm@v1
       - name: Build documentation
         timeout-minutes: 10
         if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}

From 375fe9748124f0f857b5b8c0d34749b5cb5448a1 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Wed, 15 Dec 2021 16:06:31 +0800
Subject: [PATCH 17/51] sleep after fail

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 8e5421a5121..eb9db901963 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -542,9 +542,11 @@ jobs:
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
-      - name: Setup upterm session
+      - name: Sleep
         if: ${{ always() && !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
-        uses: lhotari/action-upterm@v1
+        run: |
+          hostname
+          sleep 99999
       - name: Build documentation
         timeout-minutes: 10
         if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}

From 31629d747adae3e4c1118762cada3f1b5de142f8 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Wed, 15 Dec 2021 17:16:23 +0800
Subject: [PATCH 18/51] set LD_LIBRARY_PATH in yml for cpp api test exe

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index eb9db901963..3ac6b147f72 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -541,12 +541,7 @@ jobs:
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
-          docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
-      - name: Sleep
-        if: ${{ always() && !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
-        run: |
-          hostname
-          sleep 99999
+          docker exec -e LD_LIBRARY_PATH=${{ steps.download-digest.outputs.entry-dir }}/liboneflow_cpp/lib ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
       - name: Build documentation
         timeout-minutes: 10
         if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}

From f8e05daed6b11a957e8db8538d9275594524d39d Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Wed, 15 Dec 2021 10:50:36 +0000
Subject: [PATCH 19/51] refine

---
 oneflow/api/cpp/env.cpp             |  5 ++---
 oneflow/api/cpp/framework/graph.cpp | 20 +++++++++-----------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/oneflow/api/cpp/env.cpp b/oneflow/api/cpp/env.cpp
index 15b4a65891f..a2bb622823a 100644
--- a/oneflow/api/cpp/env.cpp
+++ b/oneflow/api/cpp/env.cpp
@@ -138,6 +138,8 @@ void release() {
   if (IsEnvInited()) {
     // sync multi_client
     of::vm::ClusterSync().GetOrThrow();
+    of::Global<of::MultiClientSessionContext>::Get()->TryClose().GetOrThrow();
+    of::Global<of::MultiClientSessionContext>::Delete();
     // destory env
     if (of::IsMultiClient().GetOrThrow()) {
       OF_ENV_BARRIER();
@@ -145,9 +147,6 @@ void release() {
       of::ClusterInstruction::MasterSendHalt();
     }
     of::Global<of::EnvGlobalObjectsScope>::Delete();
-    // TODO(zzk0): segmentation fault
-    // of::Global<of::MultiClientSessionContext>::Get()->TryClose().GetOrThrow();
-    of::Global<of::MultiClientSessionContext>::Delete();
   }
   // TODO close session
   of::SetShuttingDown();
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index e1886d3692f..4b0b3208471 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -35,6 +35,7 @@ limitations under the License.
 #include "oneflow/core/common/just.h"
 #include "oneflow/core/common/shape.h"
 #include "oneflow/core/common/symbol.h"
+#include "oneflow/core/common/util.h"
 #include "oneflow/core/framework/device.h"
 #include "oneflow/core/framework/dtype.h"
 #include "oneflow/core/framework/multi_client_session_context.h"
@@ -182,13 +183,10 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     std::ifstream input(model_path + "/model.pb");
     CHECK(input.is_open());
     CHECK(job_.ParseFromIstream(&input));
-
-    // prevent model name conflict when launch multiple model instances
-    static int graph_index = 0;
-    job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name()
-                                          + std::to_string(graph_index));
-    graph_index += 1;
   }
+  // prevent model name conflict when launch multiple model instances
+  job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name()
+                                        + of::NewUniqueId());
   graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
 }
@@ -256,7 +254,7 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
         input_name_to_tensor_[op_conf.name()] = inputs.at(input_tensor_order++).tensor_;
       } else if (op_conf.has_variable_conf()) {
         const of::LazyMode::Guard lazy_mode_disabled_guard{false};
-        const of::VariableOpConf variable_conf = op_conf.variable_conf();
+        const of::VariableOpConf& variable_conf = op_conf.variable_conf();
         variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
             of::Shape(variable_conf.shape()),
             JUST(of::DType::Get(static_cast<of::DataType>(variable_conf.data_type()))),
@@ -296,7 +294,7 @@ of::Maybe<void> Graph::GraphImpl::LoadCheckpoint() {
     const auto& variable_op_name = variable_op_name_and_tensor.first;
     const auto& variable_tensor = variable_op_name_and_tensor.second;
     const std::string variable_filename = model_path_ + "/" + variable_op_name + "/out";
-    const std::string buffer = [&variable_filename]() {
+    const std::string buffer = [&]() {
       std::ifstream variable_file(variable_filename, std::ios::binary);
       CHECK(variable_file.is_open());
       std::stringstream ss;
@@ -318,20 +316,20 @@ of::Maybe<void> Graph::GraphImpl::LoadCheckpoint() {
 
 of::Maybe<void> Graph::GraphImpl::RegisterTensors() {
   {
-    const auto pair = Unzip(input_name_to_tensor_);
+    const auto& pair = Unzip(input_name_to_tensor_);
     const std::vector<std::string>& input_op_names = pair.first;
     const std::vector<std::shared_ptr<of::one::Tensor>>& input_tensors = pair.second;
     JUST(graph_->RegisterInputOpNamesAndTensors(input_op_names, input_tensors));
   }
   {
-    const auto pair = Unzip(output_name_to_tensor_);
+    const auto& pair = Unzip(output_name_to_tensor_);
     const std::vector<std::string>& output_op_names = pair.first;
     const std::vector<std::shared_ptr<of::one::Tensor>>& output_tensors = pair.second;
     JUST(graph_->RegisterOutputOpNamesAndTensors(output_op_names, output_tensors));
     output_tensor_tuple_ = ConvertToTensorTuple(output_tensors);
   }
   {
-    const auto pair = Unzip(variable_op_name_to_tensor_);
+    const auto& pair = Unzip(variable_op_name_to_tensor_);
     const std::vector<std::string>& variable_op_names = pair.first;
     const std::vector<std::shared_ptr<of::one::Tensor>>& variable_tensors = pair.second;
     JUST(graph_->RegisterVariableOpNamesAndTensors(variable_op_names, variable_tensors));

From 187e47f08c4513676582dac82cfd7883295447f8 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Wed, 15 Dec 2021 14:34:54 +0000
Subject: [PATCH 20/51] add test file && input order

---
 oneflow/api/cpp/framework/graph.cpp           |  25 +--
 oneflow/api/cpp/framework/graph.h             |  10 +-
 oneflow/api/cpp/tests/graph_test.cpp          | 165 +++++++-----------
 .../affine_no_parameter/model.pb              | Bin 0 -> 997 bytes
 .../model.a}/meta                             |   4 +-
 .../affine_with_parameter/model.a/out         | Bin 0 -> 48 bytes
 .../affine_with_parameter/model.b/meta        |   4 +
 .../model.b}/out                              | Bin
 .../affine_with_parameter/model.pb            | Bin 0 -> 902 bytes
 .../api/cpp/tests/graph_test_model/model.pb   | Bin 576 -> 0 bytes
 10 files changed, 88 insertions(+), 120 deletions(-)
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb
 rename oneflow/api/cpp/tests/graph_test_model/{model.weight => affine_with_parameter/model.a}/meta (60%)
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta
 rename oneflow/api/cpp/tests/graph_test_model/{model.weight => affine_with_parameter/model.b}/out (100%)
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb
 delete mode 100644 oneflow/api/cpp/tests/graph_test_model/model.pb

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 4b0b3208471..1d060dbe89b 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -141,7 +141,7 @@ class Graph::GraphImpl final {
   oneflow::Maybe<void> AddOp(oneflow::OperatorConf op_conf);
   oneflow::Maybe<void> BuildGraph(const std::vector<Tensor>& inputs);
   oneflow::Maybe<void> LoadCheckpoint();
-  oneflow::Maybe<void> RegisterTensors();
+  oneflow::Maybe<void> RegisterTensors(const std::vector<Tensor>& inputs);
 
   std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
   const std::string model_path_;
@@ -151,7 +151,7 @@ class Graph::GraphImpl final {
   Device device_;
   oneflow::Job job_;
 
-  oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> input_name_to_tensor_;
+  oneflow::HashMap<std::string, int> input_name_to_order_;
   oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> output_name_to_tensor_;
   oneflow::HashMap<std::string, std::shared_ptr<oneflow::one::Tensor>> variable_op_name_to_tensor_;
   std::shared_ptr<oneflow::one::TensorTuple> output_tensor_tuple_;
@@ -185,8 +185,7 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     CHECK(job_.ParseFromIstream(&input));
   }
   // prevent model name conflict when launch multiple model instances
-  job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name()
-                                        + of::NewUniqueId());
+  job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name() + of::NewUniqueId());
   graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
 }
@@ -206,7 +205,7 @@ std::vector<Tensor> Graph::GraphImpl::Forward(const std::vector<Tensor>& inputs)
 of::Maybe<void> Graph::GraphImpl::Compile(const std::vector<Tensor>& inputs) {
   JUST(BuildGraph(inputs));
   JUST(LoadCheckpoint());
-  JUST(RegisterTensors());
+  JUST(RegisterTensors(inputs));
   JUST(graph_->CompileAndInitRuntime());
   return of::Maybe<void>::Ok();
 }
@@ -243,15 +242,14 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
   CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(),
                                  xrt_kind_);
   {
-    // TODO(zzk0): remove this; used for input tensor order
     int input_tensor_order = 0;
     const of::OpGraph op_graph(job_);
     JUST(op_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
       const of::OperatorConf& op_conf = node.op().op_conf();
       JUST(AddOp(op_conf));
       if (op_conf.has_input_conf()) {
-        // TODO(zzk0): input tensor order
-        input_name_to_tensor_[op_conf.name()] = inputs.at(input_tensor_order++).tensor_;
+        input_name_to_order_[op_conf.name()] = input_tensor_order;
+        input_tensor_order += 1;
       } else if (op_conf.has_variable_conf()) {
         const of::LazyMode::Guard lazy_mode_disabled_guard{false};
         const of::VariableOpConf& variable_conf = op_conf.variable_conf();
@@ -314,11 +312,14 @@ of::Maybe<void> Graph::GraphImpl::LoadCheckpoint() {
   return of::Maybe<void>::Ok();
 }
 
-of::Maybe<void> Graph::GraphImpl::RegisterTensors() {
+of::Maybe<void> Graph::GraphImpl::RegisterTensors(const std::vector<Tensor>& inputs) {
   {
-    const auto& pair = Unzip(input_name_to_tensor_);
-    const std::vector<std::string>& input_op_names = pair.first;
-    const std::vector<std::shared_ptr<of::one::Tensor>>& input_tensors = pair.second;
+    std::vector<std::string> input_op_names(inputs.size());
+    std::vector<std::shared_ptr<of::one::Tensor>> input_tensors(inputs.size());
+    for (const auto& name_order : input_name_to_order_) {
+      input_op_names[name_order.second] = name_order.first;
+      input_tensors[name_order.second] = inputs.at(name_order.second).tensor_;
+    }
     JUST(graph_->RegisterInputOpNamesAndTensors(input_op_names, input_tensors));
   }
   {
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index aa475ff36f1..8923a96e487 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -56,11 +56,11 @@ inline void PrintTensor(const Tensor& tensor) {
             << tensor.device().device_id() << " ";
   for (int i = 0; i < tensor.shape().NumAxes(); ++i) { std::cout << tensor.shape().At(i) << " "; }
   std::cout << std::endl;
-  // float* data = new float[tensor.shape().elem_cnt() * 4];
-  // tensor.copy_to(data);
-  // for (int i = 0; i < tensor.shape().elem_cnt(); ++i) { std::cout << data[i] << " "; }
-  // std::cout << std::endl;
-  // delete[] data;
+  float* data = new float[tensor.shape().elem_cnt() * 4];
+  tensor.copy_to(data);
+  for (int i = 0; i < tensor.shape().elem_cnt(); ++i) { std::cout << data[i] << " "; }
+  std::cout << std::endl;
+  delete[] data;
 }
 
 }  // namespace oneflow_api
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 897d3504243..5f5a4721c19 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -15,119 +15,45 @@ limitations under the License.
 */
 
 #include <gtest/gtest.h>
+#include <algorithm>
 #include <array>
 #include <chrono>
 #include <cstdint>
 #include <functional>
+#include <iostream>
 #include <thread>
+#include <vector>
 #include "oneflow/api/cpp/framework/device.h"
 #include "oneflow/api/cpp/framework/dtype.h"
 #include "oneflow/api/cpp/framework/graph.h"
 #include "oneflow/api/cpp/framework/shape.h"
 #include "oneflow/api/cpp/framework/tensor.h"
-#include "oneflow/api/cpp/nn/functional/activation.h"
 #include "oneflow/api/cpp/tests/api_test.h"
-#include "oneflow/core/common/shape.h"
-#include "oneflow/core/framework/device.h"
-#include "oneflow/core/framework/dtype.h"
-#include "oneflow/core/functional/functional_api.yaml.h"
 
 namespace oneflow_api {
 
-TEST(Api, graph_test) {
-  EnvScope scope;
+namespace {
 
+inline Graph LoadGraph(const Device& device) {
   const std::string file_name = __FILE__;
   const std::string directory = file_name.substr(0, file_name.rfind('/'));
-
-  Graph graph = Load(directory + "/graph_test_model");
-  std::vector<Tensor> inputs;
-  inputs.emplace_back(Shape{2, 2});
-  inputs[0].zeros_();
-
-  Tensor output = graph.Forward(inputs).at(0);
-  Shape shape = output.shape();
-  ASSERT_EQ(shape.At(0), 2);
-  ASSERT_EQ(shape.At(1), 2);
-  std::array<float, 4> buf{};
-  output.copy_to(buf.data());
-  ASSERT_EQ(buf[0], 1);
-  ASSERT_EQ(buf[1], 1);
-  ASSERT_EQ(buf[2], 1);
-  ASSERT_EQ(buf[3], 1);
+  Graph graph = Load(directory + "/graph_test_model/affine_with_parameter");
+  return graph;
 }
 
-TEST(Api, resnet_test) {
-  EnvScope scope;
-
-  Graph graph = Load("/home/zhouzekai/models/resnet50");
+inline void Forward(Graph& graph, const Device& device, int expected_batch_dim = 1) {
+  std::vector<float> data(expected_batch_dim * 3);
+  std::fill(data.begin(), data.end(), 1);
   std::vector<Tensor> inputs;
-  inputs.emplace_back(Shape{1, 3, 224, 224});
-  inputs[0].zeros_();
-
+  inputs.emplace_back(
+      Tensor::from_buffer(data.data(), Shape({expected_batch_dim, 3}), device, DType::kFloat));
   Tensor output = graph.Forward(inputs).at(0);
   Shape shape = output.shape();
-  ASSERT_EQ(shape.At(0), 1);
-  ASSERT_EQ(shape.At(1), 1000);
-  std::array<float, 1000> data{};
-  output.copy_to(data.data());
-  float expected_data[]{-1.07454,  -0.319766, -0.497719, -1.15014,  -0.677915,
-                            -0.326854, -0.906118, 0.276201,  0.0704126, -0.519408};
-  for (int i = 0; i < 10; i++) { ASSERT_NEAR(data[i], expected_data[i], 0.00001); }
-}
-
-TEST(Api, thread_test) {
-  EnvScope scope;
-  const Graph graphs[]{Load("/home/zhouzekai/models/resnet50"),
-                       Load("/home/zhouzekai/models/resnet50"),
-                       Load("/home/zhouzekai/models/resnet50")};
-  auto graph_forward = [](Graph& graph) {
-    std::vector<Tensor> inputs;
-    inputs.emplace_back(Shape{1, 3, 224, 224});
-    inputs[0].zeros_();
-    for (int i = 0; i < 100; i++) {
-      auto now = std::chrono::high_resolution_clock::now();
-      Tensor output = graph.Forward(inputs).at(0);
-      Shape shape = output.shape();
-      ASSERT_EQ(shape.At(0), 1);
-      ASSERT_EQ(shape.At(1), 1000);
-      std::array<float, 1000> data{};
-      output.copy_to(data.data());
-      std::cout << std::this_thread::get_id() << " " << i << " "
-                << std::chrono::duration_cast<std::chrono::milliseconds>(
-                       std::chrono::high_resolution_clock::now() - now)
-                       .count()
-                << std::endl;
-      float expected_data[]{-1.07454,  -0.319766, -0.497719, -1.15014,  -0.677915,
-                            -0.326854, -0.906118, 0.276201,  0.0704126, -0.519408};
-      for (int i = 0; i < 10; i++) { ASSERT_NEAR(data[i], expected_data[i], 0.00001); }
-    }
-  };
-
-  std::thread threads[]{std::thread(std::bind(graph_forward, graphs[0])),
-                        std::thread(std::bind(graph_forward, graphs[1])),
-                        std::thread(std::bind(graph_forward, graphs[2]))};
-  for (auto& thread : threads) { thread.join(); }
-}
-
-namespace {
-
-inline Graph LoadGraph(const Device& device) {
-  Graph graph = Load("/home/zhouzekai/models/large_linear", device);
-  return graph;
-}
-
-inline void Forward(Graph& graph, const Device& device, int expected_batch_dim = 1) {
-  std::vector<Tensor> inputs{Tensor(
-      oneflow::one::functional::Rand(
-          oneflow::Shape({expected_batch_dim, 5000}), oneflow::DType::Float(),
-          oneflow::Device::New(device.type(), device.device_id()).GetOrThrow(), nullptr, false)
-          .GetPtrOrThrow())};
-  std::vector<Tensor> outputs = graph.Forward(inputs);
-  Shape shape = outputs.at(0).shape();
-  ASSERT_EQ(outputs.size(), 1);
   ASSERT_EQ(shape.At(0), expected_batch_dim);
-  ASSERT_EQ(shape.At(1), 100000);
+  ASSERT_EQ(shape.At(1), 4);
+  std::vector<float> buf(expected_batch_dim * 4);
+  output.copy_to(buf.data());
+  for (const float& element : buf) { ASSERT_EQ(element, 4); }
 }
 
 }  // namespace
@@ -136,7 +62,7 @@ TEST(Api, graph_cpu_test) {
   EnvScope scope;
   Device device("cpu");
   Graph graph = LoadGraph(device);
-  Forward(graph, device);
+  Forward(graph, device, 1);
 }
 
 TEST(Api, graph_gpu_test) {
@@ -146,6 +72,17 @@ TEST(Api, graph_gpu_test) {
   Forward(graph, device);
 }
 
+TEST(Api, graph_multi_gpu_test) {
+  EnvScope scope;
+  Device device("cuda", 0);
+  Graph graph = LoadGraph(device);
+  Forward(graph, device);
+
+  Device device1("cuda", 1);
+  Graph graph1 = LoadGraph(device1);
+  Forward(graph1, device1);
+}
+
 TEST(Api, graph_openvino_test) {
   EnvScope scope;
   Device device("cpu");
@@ -178,21 +115,47 @@ TEST(Api, graph_gpu_batching_test) {
   Forward(graph, device, 10);
 }
 
-TEST(Api, tensor_copy_test) {
+TEST(Api, graph_thread_test) {
   EnvScope scope;
-  std::array<float, 4> data{};
 
-  Tensor tensor(Shape{2, 2}, Device("cuda", 0));
-  tensor.copy_to(data.data());
+  Device device("cpu");
+  std::vector<Graph> graphs;
+  for (int i = 0; i < 10; i++) { graphs.emplace_back(LoadGraph(device)); }
+
+  std::vector<std::thread> threads;
+  for (Graph& graph : graphs) {
+    threads.emplace_back(std::thread(std::bind(Forward, graph, device, 1)));
+  }
+  for (auto& thread : threads) { thread.join(); }
+}
 
-  Tensor tensor1(Shape{2, 2}, Device("cuda", 1));
-  tensor.copy_to(data.data());
+TEST(Api, graph_input_order_test) {
+  EnvScope scope;
 
-  Tensor tensor2(Shape{2, 2}, Device("cuda", 2));
-  tensor.copy_to(data.data());
+  const std::string file_name = __FILE__;
+  const std::string directory = file_name.substr(0, file_name.rfind('/'));
+  Device device("cpu");
+  Graph graph = Load(directory + "/graph_test_model/affine_no_parameter", device);
 
-  Tensor tensor3(Shape{2, 2}, Device("cuda", 3));
-  tensor.copy_to(data.data());
+  std::vector<Tensor> inputs;
+  std::vector<float> x(3);
+  std::fill(x.begin(), x.end(), 1);
+  inputs.emplace_back(Tensor::from_buffer(x.data(), Shape({1, 3}), device, DType::kFloat));
+  std::vector<float> a(3 * 2);
+  std::fill(a.begin(), a.end(), 1);
+  inputs.emplace_back(Tensor::from_buffer(a.data(), Shape({3, 2}), device, DType::kFloat));
+  std::vector<float> b(2);
+  std::fill(b.begin(), b.end(), 1);
+  inputs.emplace_back(Tensor::from_buffer(b.data(), Shape({2}), device, DType::kFloat));
+
+  Tensor output = graph.Forward(inputs).at(0);
+  Shape shape = output.shape();
+  ASSERT_EQ(shape.At(0), 1);
+  ASSERT_EQ(shape.At(1), 2);
+  std::array<float, 2> buf{};
+  output.copy_to(buf.data());
+  ASSERT_EQ(buf[0], 4);
+  ASSERT_EQ(buf[1], 4);
 }
 
 }  // namespace oneflow_api
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb
new file mode 100644
index 0000000000000000000000000000000000000000..874b554ad24fb1b171f2d14a384f433dd79b8d44
GIT binary patch
literal 997
zcmaJ=Jx{_=6zyxll&c_K<*Q0!y4i%#cGMWzocsY!%_}9r1ZV@T0VYQhe}ucci;J6s
ziHpC)e}H|hVhgXl@t$|@J?FghV3CJQK<j39J*HmYGFo>dFYqm0mAYQgS-veE*y()8
zHI&@ISIUr+i7b`Hx}fGjM1k9GY-^WfG0}d(*=}yMSK{p@5L3;c1WH5K=?z*#>JNj#
ziudxD_v_v>ft?t{CJ>5ssBWrlhte2qb76zQN)wm?ScRkXMh1p^5%P54^(hw4gy?)<
zsRyv@kLhUQxs#qnk$ILX*{q}ysu9~{!{LcQC7y1NUFvk{#J8y9Sbu*0%eseR>IxnM
znP!n=(q>#~W+d$t$UGd-;I3}ZEnOp1jm%6khg&#I*cWo<rR7UDw+u@LDUk$x7vYh>
zl#>UYQ!_X<b3F|iZgMr?KIA(3xsELPiYOC&A<*D#32(5s;z33>WX4c!og(I={J=>D
ZSCm5kLE%%561mP>DMDfOpH)E>egJs?CEox5

literal 0
HcmV?d00001

diff --git a/oneflow/api/cpp/tests/graph_test_model/model.weight/meta b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta
similarity index 60%
rename from oneflow/api/cpp/tests/graph_test_model/model.weight/meta
rename to oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta
index 873a1836360..421341fc956 100644
--- a/oneflow/api/cpp/tests/graph_test_model/model.weight/meta
+++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta
@@ -1,5 +1,5 @@
 shape {
-  dim: 2
-  dim: 2
+  dim: 3
+  dim: 4
 }
 data_type: kFloat
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out
new file mode 100644
index 0000000000000000000000000000000000000000..be22e342567fcc4be86263602fe799ac97b1e8e1
GIT binary patch
literal 48
OcmZQzXs~A>0RsTk90>IQ

literal 0
HcmV?d00001

diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta
new file mode 100644
index 00000000000..166375025be
--- /dev/null
+++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta
@@ -0,0 +1,4 @@
+shape {
+  dim: 4
+}
+data_type: kFloat
diff --git a/oneflow/api/cpp/tests/graph_test_model/model.weight/out b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/out
similarity index 100%
rename from oneflow/api/cpp/tests/graph_test_model/model.weight/out
rename to oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/out
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb
new file mode 100644
index 0000000000000000000000000000000000000000..1ca388b362c730d16404ba92759772923a438a17
GIT binary patch
literal 902
zcmaKry-vbV6vunpBIO{8N93bQLb}-msf;E@;^O23I5qcD5{!H_&?dm-pgw_%lkeeT
z;^@fW>MQsL*xL`jEZ(g>=iLAQcls}U$#4#6UXQN^)NY$b^|oibj%jF8({>xP*~2|Q
zjZdY3Qr~sdEXWE`q^wvJG#Nw`pa#iq-)ePEXgKrz7|cwQxnM?CBt@=>y2u1ZF5!c~
zjz3ZDQm5;77GmMx+x9Dgt)PfyAQX#`|7)@4`9TE2Fazn(0)MBLfm_8q<W^;P(G%N)
zv@@W+q1_+0Oop5UAwfHKn_}u%@Xyc1Y8&NkI-xpYb$&TiBWe~>zduFP7*LDL%+CZ4
z0yAvTr&f~=9g|v?c^d6`j(!K3l@>e%GTOw#@y82%=Hpj@OuVU5tX^$m8af&2WLzf`
zxPkqpy?R`6-+rpp|G8{A7V;t%_#%P}EG}Bn=IDA1q5|8nt5~zR3v$CaGmr;X6%~SB
ycuH})zEjwVRA}&ASa*R5KPHPt7Bn7haD|e$Qc&Z~`9WNFsAeEF|KT)26Mg|;QwM1P

literal 0
HcmV?d00001

diff --git a/oneflow/api/cpp/tests/graph_test_model/model.pb b/oneflow/api/cpp/tests/graph_test_model/model.pb
deleted file mode 100644
index 540ead82c37e43e7044863fc48022943c595eb98..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 576
zcmd<O&ctQQB^2*l>0XprkP&a7o0(ToS`u%d#GG7E>hS-5Qv(P%oZ=7{V$LrukrLoy
z;b7um5@3>GP-5X?5@G;q=gG}aNzKtKPt8ovD1mC+-we@siCq?=5v)ytNsB3oA&8xe
zRg?h;k{F%18X38G!TNO*Q&Qsd5H>>%*u|5;#R`@ZGT>s$%oEb$62o+^KG1Dk0?6(F
ziAc$CfgCL)$|a0q1xPO5KnrM^5lEgZm`fPRNT3L|Ah^V#gVh)u;UaX2kt>qR4O`fv
z1{j+6u{uSFpNqx8z{)@f6f06p9E?JxT$0#~20MmJ7}W-dpctA(Lag9W;F7{_BRJsD
Qt(D^C;)Ev^B?cu101>#R0ssI2


From ebd7bca275a1c43744a27cb7f660b04cc65a0c38 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Thu, 16 Dec 2021 09:16:08 +0800
Subject: [PATCH 21/51] sleep

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 3ac6b147f72..2eb292acbba 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -542,6 +542,11 @@ jobs:
           docker exec ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
           docker exec -e LD_LIBRARY_PATH=${{ steps.download-digest.outputs.entry-dir }}/liboneflow_cpp/lib ${{ env.TEST_CONTAINER_NAME }} ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_cpp_api_testexe
+      - name: Sleep
+        if: ${{ always() && !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
+        run: |
+          hostname
+          sleep 99999
       - name: Build documentation
         timeout-minutes: 10
         if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}

From 504241ac4c0b077725d7186b71cb495ebd400e90 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Thu, 16 Dec 2021 09:46:17 +0800
Subject: [PATCH 22/51] upload liboneflow_cpp.so

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2eb292acbba..083a76a799b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -315,6 +315,17 @@ jobs:
           ssh-tank-path: ${{ env.SSH_TANK_PATH }}
           src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/bin
           dst-dir: bin
+      - name: Upload liboneflow_cpp library
+        if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && steps.build-cuda.outcome == 'success' }}
+        uses: Oneflow-Inc/get-oneflow/digest/upload@support-clang-12
+        timeout-minutes: 10
+        with:
+          digest: ${{ steps.save-cache.outputs.build-digest }}
+          entry: ${{ matrix.entry }}
+          ssh-tank-host: ${{ env.SSH_TANK_HOST }}
+          ssh-tank-path: ${{ env.SSH_TANK_PATH }}
+          src-dir: ${{ env.MANYLINUX_CACHE_DIR }}/build/liboneflow_cpp/lib
+          dst-dir: liboneflow_cpp/lib
       - name: Upload whl
         if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (steps.build-cpu.outcome == 'success' || steps.build-cuda.outcome == 'success' || steps.build-xla.outcome == 'success') }}
         uses: Oneflow-Inc/get-oneflow/digest/upload@support-clang-12
@@ -444,7 +455,7 @@ jobs:
         run: |
           echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
           exit 1
-      - name: Download wheel and binary
+      - name: Download wheel, binary and liboneflow_cpp lib (if any)
         if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }}
         uses: Oneflow-Inc/get-oneflow/digest/download@support-clang-12
         id: download-digest

From 7883995017f564d9fa79f3beacae0264b5e352eb Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Thu, 16 Dec 2021 09:57:20 +0800
Subject: [PATCH 23/51] modify cmake to trigger compilation

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 .github/workflows/test.yml | 2 +-
 CMakeLists.txt             | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 083a76a799b..a16941ee150 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -455,7 +455,7 @@ jobs:
         run: |
           echo "::error file=test.yml,line=204,col=10::steps.save-cache.outputs.cache-hit != matrix.cache-hit"
           exit 1
-      - name: Download wheel, binary and liboneflow_cpp lib (if any)
+      - name: Download wheel, binary and liboneflow_cpp lib
         if: ${{ !fromJson(matrix.cache-hit) && contains(matrix.runs-on, 'self-hosted') && (!fromJson(matrix.is-xla) || (fromJson(matrix.is-xla) && needs.changed_files.outputs.should_run_single_client_tests == '1')) }}
         uses: Oneflow-Inc/get-oneflow/digest/download@support-clang-12
         id: download-digest
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4150d3d210f..b1c72da0740 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -216,8 +216,7 @@ if(BUILD_CPP_API)
 
   if(BUILD_SHARED_LIBS)
     if(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO)
-      message(WARNING "BUILD_SHARED_LIBS will be overrided to OFF because BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO is ON")
-      set(BUILD_SHARED_LIBS OFF)
+      message(FATAL_ERROR "BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO is incompatible with BUILD_SHARED_LIBS. Please set either of them to OFF.")
     else()
       set(LIBRARY_OUTPUT_PATH ${LIBONEFLOW_LIBRARY_DIR})
     endif(BUILD_MONOLITHIC_LIBONEFLOW_CPP_SO)

From 525fbb26196d16ceb59f5a88ea968fc3b7718250 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Thu, 16 Dec 2021 16:59:24 +0800
Subject: [PATCH 24/51] load job from ir && clean && add mlir model

---
 oneflow/api/cpp/framework/graph.cpp           |  25 ++++++---------
 oneflow/api/cpp/framework/graph.h             |  13 --------
 oneflow/api/cpp/ir_pass.cpp                   |  30 ++++++++++++++++++
 oneflow/api/cpp/tests/graph_test.cpp          |   2 +-
 .../affine_no_parameter/model.mlir            |  11 +++++++
 .../affine_no_parameter/model.pb              | Bin 997 -> 0 bytes
 .../affine_with_parameter/model.mlir          |  11 +++++++
 .../affine_with_parameter/model.pb            | Bin 902 -> 0 bytes
 8 files changed, 63 insertions(+), 29 deletions(-)
 create mode 100644 oneflow/api/cpp/ir_pass.cpp
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir
 delete mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb
 create mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir
 delete mode 100644 oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 1d060dbe89b..3a6e75cec71 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -51,6 +51,7 @@ limitations under the License.
 #include "oneflow/core/job/job_build_and_infer_ctx_mgr.h"
 #include "oneflow/core/job/job_conf.cfg.h"
 #include "oneflow/core/job/job_conf.pb.h"
+#include "oneflow/core/job/job_ir.h"
 #include "oneflow/core/job/job_set.pb.h"
 #include "oneflow/core/job/lazy_mode.h"
 #include "oneflow/core/job/parallel_desc.h"
@@ -177,14 +178,8 @@ void Graph::enable_tensorrt() { graph_->enable_tensorrt(); }
 
 Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     : model_path_(model_path), device_(device) {
-  // TODO(zzk0): model_path is a directory, need to concatenate filename
-  // we need a mlir model name.
-  {
-    std::ifstream input(model_path + "/model.pb");
-    CHECK(input.is_open());
-    CHECK(job_.ParseFromIstream(&input));
-  }
-  // prevent model name conflict when launch multiple model instances
+  CHECK_JUST(of::LoadJobFromIR(&job_, model_path + "/model.mlir"));
+  job_.mutable_job_conf()->mutable_predict_conf();
   job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name() + of::NewUniqueId());
   graph_ = std::make_shared<of::NNGraph>(job_.job_conf().job_name());
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
@@ -244,8 +239,8 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
   {
     int input_tensor_order = 0;
     const of::OpGraph op_graph(job_);
-    JUST(op_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
-      const of::OperatorConf& op_conf = node.op().op_conf();
+    op_graph.TopoForEachNode([&](const of::OpNode* node) -> of::Maybe<void> {
+      const of::OperatorConf& op_conf = node->op().op_conf();
       JUST(AddOp(op_conf));
       if (op_conf.has_input_conf()) {
         input_name_to_order_[op_conf.name()] = input_tensor_order;
@@ -259,21 +254,21 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
             *device_.device_));
       }
       return of::Maybe<void>::Ok();
-    }));
+    });
   }
   JUST(of::CurJobBuildAndInferCtx_Complete());
   {
     const std::shared_ptr<of::Job> complete_job = JUST(of::GetCurrentJob());
     const of::OpGraph complete_graph(*complete_job);
-    JUST(complete_graph.ForEachOpNode([&](const of::OpNode& node) -> of::Maybe<void> {
+    complete_graph.TopoForEachNode([&](const of::OpNode* node) -> of::Maybe<void> {
       const of::LazyMode::Guard lazy_mode_disabled_guard{false};
-      const of::OperatorConf& op_conf = node.op().op_conf();
+      const of::OperatorConf& op_conf = node->op().op_conf();
       if (op_conf.has_output_conf()) {
         of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf();
         if (batch_size_ > 0) {
           const std::string input_lbi_str = op_conf.output_conf().in();
           const of::LogicalBlobId input_lbi = of::GenLogicalBlobId(input_lbi_str);
-          int64_t batch_size = node.LogicalBlobDesc4Lbi(input_lbi).shape().At(0);
+          int64_t batch_size = node->LogicalBlobDesc4Lbi(input_lbi).shape().At(0);
           blob_conf.mutable_shape()->set_dim(0, batch_size);
         }
         output_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty(
@@ -282,7 +277,7 @@ of::Maybe<void> Graph::GraphImpl::BuildGraph(const std::vector<Tensor>& inputs)
             *device_.device_));
       }
       return of::Maybe<void>::Ok();
-    }));
+    });
   }
   return of::Maybe<void>::Ok();
 }
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 8923a96e487..906bffc0a6a 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -50,19 +50,6 @@ Graph Load(const std::string& model_path, const Device& device);
 
 Graph Load(const std::string& model_path);
 
-// TODO(zzk0): only for debug, remove this
-inline void PrintTensor(const Tensor& tensor) {
-  std::cout << tensor.shape().elem_cnt() << " " << tensor.device().type() << " "
-            << tensor.device().device_id() << " ";
-  for (int i = 0; i < tensor.shape().NumAxes(); ++i) { std::cout << tensor.shape().At(i) << " "; }
-  std::cout << std::endl;
-  float* data = new float[tensor.shape().elem_cnt() * 4];
-  tensor.copy_to(data);
-  for (int i = 0; i < tensor.shape().elem_cnt(); ++i) { std::cout << data[i] << " "; }
-  std::cout << std::endl;
-  delete[] data;
-}
-
 }  // namespace oneflow_api
 
 #endif  // ONEFLOW_API_CPP_GRAPH_H_
diff --git a/oneflow/api/cpp/ir_pass.cpp b/oneflow/api/cpp/ir_pass.cpp
new file mode 100644
index 00000000000..ca4111a9da0
--- /dev/null
+++ b/oneflow/api/cpp/ir_pass.cpp
@@ -0,0 +1,30 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+#ifdef WITH_MLIR
+
+#include "oneflow/ir/include/OneFlow/Extension.h"
+#include "oneflow/ir/oneflow-extension/include/OneFlow/OneFlowRoundTrip.h"
+#include <glog/logging.h>
+
+namespace oneflow {
+
+REGISTER_JOB_PASS("IRRoundTripBeforeAD", IRRoundTrip<kBeforeAD>);
+REGISTER_JOB_PASS("IRRoundTrip", IRRoundTrip<kAfterAD>);
+
+}  // namespace oneflow
+
+#endif  // WITH_MLIR
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 5f5a4721c19..e2ad0f32e57 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -37,7 +37,7 @@ namespace {
 inline Graph LoadGraph(const Device& device) {
   const std::string file_name = __FILE__;
   const std::string directory = file_name.substr(0, file_name.rfind('/'));
-  Graph graph = Load(directory + "/graph_test_model/affine_with_parameter");
+  Graph graph = Load(directory + "/graph_test_model/affine_with_parameter", device);
   return graph;
 }
 
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir
new file mode 100644
index 00000000000..30c09f7c841
--- /dev/null
+++ b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir
@@ -0,0 +1,11 @@
+module  {
+  oneflow.job @MyGraph_1(%arg0: tensor<1x3xf32>, %arg1: tensor<3x2xf32>, %arg2: tensor<2xf32>) -> tensor<1x2xf32> {
+    %output = "oneflow.input"(%arg0) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_0", output_lbns = ["_MyGraph_1-input_0/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [1 : si64, 3 : si64]} : (tensor<1x3xf32>) -> tensor<1x3xf32>
+    %output_0 = "oneflow.input"(%arg1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_1", output_lbns = ["_MyGraph_1-input_1/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [3 : si64, 2 : si64]} : (tensor<3x2xf32>) -> tensor<3x2xf32>
+    %output_1 = "oneflow.input"(%arg2) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_2", output_lbns = ["_MyGraph_1-input_2/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [2 : si64]} : (tensor<2xf32>) -> tensor<2xf32>
+    %0 = "oneflow.matmul"(%output, %output_0) {alpha = 1.000000e+00 : f64, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-matmul_0", output_lbns = ["model-matmul_0/out_0"], scope_symbol_id = 4611686018427535359 : i64, transpose_a = false, transpose_b = false} : (tensor<1x3xf32>, tensor<3x2xf32>) -> tensor<1x2xf32>
+    %1 = "oneflow.broadcast_add"(%0, %output_1) {device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-broadcast_add_1", output_lbns = ["model-broadcast_add_1/z_0"], scope_symbol_id = 4611686018427535359 : i64} : (tensor<1x2xf32>, tensor<2xf32>) -> tensor<1x2xf32>
+    %output_2 = "oneflow.output"(%1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-output_0", output_lbns = ["_MyGraph_1-output_0/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [1 : si64, 2 : si64]} : (tensor<1x2xf32>) -> tensor<1x2xf32>
+    oneflow.return %output_2 : tensor<1x2xf32>
+  }
+}
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.pb
deleted file mode 100644
index 874b554ad24fb1b171f2d14a384f433dd79b8d44..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 997
zcmaJ=Jx{_=6zyxll&c_K<*Q0!y4i%#cGMWzocsY!%_}9r1ZV@T0VYQhe}ucci;J6s
ziHpC)e}H|hVhgXl@t$|@J?FghV3CJQK<j39J*HmYGFo>dFYqm0mAYQgS-veE*y()8
zHI&@ISIUr+i7b`Hx}fGjM1k9GY-^WfG0}d(*=}yMSK{p@5L3;c1WH5K=?z*#>JNj#
ziudxD_v_v>ft?t{CJ>5ssBWrlhte2qb76zQN)wm?ScRkXMh1p^5%P54^(hw4gy?)<
zsRyv@kLhUQxs#qnk$ILX*{q}ysu9~{!{LcQC7y1NUFvk{#J8y9Sbu*0%eseR>IxnM
znP!n=(q>#~W+d$t$UGd-;I3}ZEnOp1jm%6khg&#I*cWo<rR7UDw+u@LDUk$x7vYh>
zl#>UYQ!_X<b3F|iZgMr?KIA(3xsELPiYOC&A<*D#32(5s;z33>WX4c!og(I={J=>D
ZSCm5kLE%%561mP>DMDfOpH)E>egJs?CEox5

diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir
new file mode 100644
index 00000000000..15a53af1f48
--- /dev/null
+++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir
@@ -0,0 +1,11 @@
+module  {
+  oneflow.job @MyGraph_0(%arg0: tensor<1x3xf32>) -> tensor<1x4xf32> {
+    %output = "oneflow.input"(%arg0) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-input_0", output_lbns = ["_MyGraph_0-input_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 3 : si64]} : (tensor<1x3xf32>) -> tensor<1x3xf32>
+    %output_0 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.a", output_lbns = ["model.a/out"], scope_symbol_id = 4611686018427482111 : i64, shape = [3 : si64, 4 : si64]} : () -> tensor<3x4xf32>
+    %output_1 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.b", output_lbns = ["model.b/out"], scope_symbol_id = 4611686018427494399 : i64, shape = [4 : si64]} : () -> tensor<4xf32>
+    %0 = "oneflow.matmul"(%output, %output_0) {alpha = 1.000000e+00 : f64, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-matmul_0", output_lbns = ["model-matmul_0/out_0"], scope_symbol_id = 4611686018427486207 : i64, transpose_a = false, transpose_b = false} : (tensor<1x3xf32>, tensor<3x4xf32>) -> tensor<1x4xf32>
+    %1 = "oneflow.broadcast_add"(%0, %output_1) {device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-broadcast_add_1", output_lbns = ["model-broadcast_add_1/z_0"], scope_symbol_id = 4611686018427486207 : i64} : (tensor<1x4xf32>, tensor<4xf32>) -> tensor<1x4xf32>
+    %output_2 = "oneflow.output"(%1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-output_0", output_lbns = ["_MyGraph_0-output_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 4 : si64]} : (tensor<1x4xf32>) -> tensor<1x4xf32>
+    oneflow.return %output_2 : tensor<1x4xf32>
+  }
+}
diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.pb
deleted file mode 100644
index 1ca388b362c730d16404ba92759772923a438a17..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 902
zcmaKry-vbV6vunpBIO{8N93bQLb}-msf;E@;^O23I5qcD5{!H_&?dm-pgw_%lkeeT
z;^@fW>MQsL*xL`jEZ(g>=iLAQcls}U$#4#6UXQN^)NY$b^|oibj%jF8({>xP*~2|Q
zjZdY3Qr~sdEXWE`q^wvJG#Nw`pa#iq-)ePEXgKrz7|cwQxnM?CBt@=>y2u1ZF5!c~
zjz3ZDQm5;77GmMx+x9Dgt)PfyAQX#`|7)@4`9TE2Fazn(0)MBLfm_8q<W^;P(G%N)
zv@@W+q1_+0Oop5UAwfHKn_}u%@Xyc1Y8&NkI-xpYb$&TiBWe~>zduFP7*LDL%+CZ4
z0yAvTr&f~=9g|v?c^d6`j(!K3l@>e%GTOw#@y82%=Hpj@OuVU5tX^$m8af&2WLzf`
zxPkqpy?R`6-+rpp|G8{A7V;t%_#%P}EG}Bn=IDA1q5|8nt5~zR3v$CaGmr;X6%~SB
ycuH})zEjwVRA}&ASa*R5KPHPt7Bn7haD|e$Qc&Z~`9WNFsAeEF|KT)26Mg|;QwM1P


From 2d91e010122a4cd911a7970f6025128b383fc004 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Thu, 16 Dec 2021 17:02:46 +0800
Subject: [PATCH 25/51] [remove useless python code]save to .pb

---
 python/oneflow/framework/check_point_v2.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/oneflow/framework/check_point_v2.py b/python/oneflow/framework/check_point_v2.py
index 5d9bf13d9bb..e03bb4bf6ab 100644
--- a/python/oneflow/framework/check_point_v2.py
+++ b/python/oneflow/framework/check_point_v2.py
@@ -313,8 +313,6 @@ def save(
 
         path.mkdir(exist_ok=True)
 
-        model_pb_path = path / 'model.pb'
-        model_pb_path.write_bytes(graph._graph_proto.SerializeToString())
         serialized_job = str(text_format.MessageToString(graph._forward_job_proto))
         oneflow._oneflow_internal.nn.graph.SaveJobToIR(serialized_job, str(path))
 

From 68dc409456bd84366ff5073a210de5f09d112824 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Thu, 16 Dec 2021 20:07:54 +0800
Subject: [PATCH 26/51] add target of_common_obj to remove duplicate
 REGISTER_PASS  && run of_format

---
 cmake/oneflow.cmake                        | 18 ++++++---
 oneflow/api/{cpp => common}/ir_pass.cpp    |  0
 oneflow/api/cpp/tests/graph_test.cpp       | 43 ++++++++++++++++++++++
 oneflow/api/python/ir.cpp                  |  3 --
 oneflow/core/framework/tensor_util.cpp     | 15 ++++++++
 oneflow/core/framework/tensor_util.h       | 19 +++++++++-
 python/oneflow/framework/check_point_v2.py |  2 +-
 7 files changed, 89 insertions(+), 11 deletions(-)
 rename oneflow/api/{cpp => common}/ir_pass.cpp (100%)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 3670c56e5ed..a90b91353e1 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -104,9 +104,9 @@ foreach(oneflow_single_file ${oneflow_all_src})
   endif()
 
   if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/common/.*\\.(h|cpp)$")
-      list(APPEND of_all_obj_cc ${oneflow_single_file})
-      set(group_this ON)
-    endif()
+    list(APPEND of_common_obj_cc ${oneflow_single_file})
+    set(group_this ON)
+  endif()
 
   if(BUILD_PYTHON)
 
@@ -284,6 +284,14 @@ if (BUILD_CUDA)
   target_link_libraries(oneflow-gen-ods CUDA::cudart_static)
 endif()
 
+# oneflow api common
+oneflow_add_library(of_common_obj ${of_common_obj_cc})
+set(of_common_libs -Wl,--whole-archive of_common_obj -Wl,--no-whole-archive)
+target_link_libraries(of_common_obj oneflow)
+if (WITH_MLIR)
+  target_link_libraries(of_common_obj ${ONEFLOW_MLIR_LIBS})
+endif()
+
 if(BUILD_PYTHON)
 
   # py ext lib
@@ -304,7 +312,7 @@ if(BUILD_PYTHON)
   target_link_libraries(oneflow_internal PRIVATE
                         ${of_libs}
                         of_functional_tensor_obj
-                        ${ONEFLOW_MLIR_LIBS}
+                        ${of_common_libs}
                         ${oneflow_third_party_libs}
                         of_pyext_obj
                         ${oneflow_exe_third_party_libs})
@@ -346,7 +354,7 @@ if (BUILD_CPP_API)
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})
   endif()
   set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
-  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${ONEFLOW_MLIR_LIBS} ${oneflow_third_party_libs})
+  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_common_libs} ${oneflow_third_party_libs})
 endif()
 
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
diff --git a/oneflow/api/cpp/ir_pass.cpp b/oneflow/api/common/ir_pass.cpp
similarity index 100%
rename from oneflow/api/cpp/ir_pass.cpp
rename to oneflow/api/common/ir_pass.cpp
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index e2ad0f32e57..1c782177d42 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -115,6 +115,49 @@ TEST(Api, graph_gpu_batching_test) {
   Forward(graph, device, 10);
 }
 
+TEST(Api, graph_multi_device_test) {
+  EnvScope scope;
+  Device device("cuda", 0);
+  Graph graph = LoadGraph(device);
+  Forward(graph, device, 1);
+
+  Device device1("cuda", 1);
+  Graph graph1 = LoadGraph(device1);
+  Forward(graph1, device1, 1);
+
+  Device device2("cpu");
+  Graph graph2 = LoadGraph(device2);
+  Forward(graph2, device2, 1);
+}
+
+TEST(Api, graph_unload_test) {
+  {
+    EnvScope scope;
+
+    Device device("cuda", 0);
+    Graph graph = LoadGraph(device);
+    Forward(graph, device, 1);
+
+    {
+      Device device1("cuda", 1);
+      Graph graph1 = LoadGraph(device1);
+      Forward(graph1, device1, 1);
+    }
+
+    Device device2("cpu");
+    Graph graph2 = LoadGraph(device2);
+    Forward(graph2, device2, 1);
+  }
+
+  {
+    EnvScope scope;
+
+    Device device("cpu");
+    Graph graph = LoadGraph(device);
+    Forward(graph, device, 1);
+  }
+}
+
 TEST(Api, graph_thread_test) {
   EnvScope scope;
 
diff --git a/oneflow/api/python/ir.cpp b/oneflow/api/python/ir.cpp
index 5840cb9d716..422242d37c4 100644
--- a/oneflow/api/python/ir.cpp
+++ b/oneflow/api/python/ir.cpp
@@ -28,9 +28,6 @@ ONEFLOW_API_PYBIND11_MODULE("ir", m) {
         [](const std::string& lib_path) { MutSharedLibPaths()->insert(lib_path); });
 }
 
-REGISTER_JOB_PASS("IRRoundTripBeforeAD", IRRoundTrip<kBeforeAD>);
-REGISTER_JOB_PASS("IRRoundTrip", IRRoundTrip<kAfterAD>);
-
 }  // namespace oneflow
 
 #endif  // WITH_MLIR
diff --git a/oneflow/core/framework/tensor_util.cpp b/oneflow/core/framework/tensor_util.cpp
index 2bff9134a16..6a615e25173 100644
--- a/oneflow/core/framework/tensor_util.cpp
+++ b/oneflow/core/framework/tensor_util.cpp
@@ -1,3 +1,18 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
 #include "oneflow/core/framework/tensor_util.h"
 
 #include "oneflow/core/common/spin_counter.h"
diff --git a/oneflow/core/framework/tensor_util.h b/oneflow/core/framework/tensor_util.h
index 92915843cbf..ec9502230aa 100644
--- a/oneflow/core/framework/tensor_util.h
+++ b/oneflow/core/framework/tensor_util.h
@@ -1,3 +1,18 @@
+/*
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
 #include <string>
 
 #include "oneflow/core/common/maybe.h"
@@ -10,5 +25,5 @@ class Tensor;
 Maybe<void> SyncAccessTensorWithTimeOut(
     const std::shared_ptr<Tensor>& tensor,
     const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier);
-}
-}
+}  // namespace one
+}  // namespace oneflow
diff --git a/python/oneflow/framework/check_point_v2.py b/python/oneflow/framework/check_point_v2.py
index e03bb4bf6ab..1bb32572b4e 100644
--- a/python/oneflow/framework/check_point_v2.py
+++ b/python/oneflow/framework/check_point_v2.py
@@ -317,7 +317,7 @@ def save(
         oneflow._oneflow_internal.nn.graph.SaveJobToIR(serialized_job, str(path))
 
         for x in graph._state():
-            _save_tensor_to_disk(x.origin, path / f'{x.name_prefix}{x.name}')
+            _save_tensor_to_disk(x.origin, path / f"{x.name_prefix}{x.name}")
 
         return
 

From 84d980a6daeb9ad4035587452efd1160367d8a8c Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 10:40:18 +0800
Subject: [PATCH 27/51] remove openvino

---
 cmake/third_party/absl.cmake        |  8 ++++----
 oneflow/api/cpp/framework/graph.cpp | 10 +---------
 oneflow/api/cpp/framework/graph.h   |  1 -
 3 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/cmake/third_party/absl.cmake b/cmake/third_party/absl.cmake
index a30f673df60..2a85ca963d5 100644
--- a/cmake/third_party/absl.cmake
+++ b/cmake/third_party/absl.cmake
@@ -12,14 +12,14 @@ SET(ABSL_LIBRARY_DIR ${THIRD_PARTY_DIR}/absl/${CMAKE_INSTALL_LIBDIR} CACHE PATH
 
 if(WIN32)
   set(ABSL_BUILD_LIBRARY_DIR ${ABSL_INSTALL}/${CMAKE_INSTALL_LIBDIR})
-  set(ABSL_LIBRARY_NAMES absl_base.lib absl_spinlock_wait.lib absl_dynamic_annotations.lib
+  set(ABSL_LIBRARY_NAMES absl_spinlock_wait.lib absl_dynamic_annotations.lib
     absl_malloc_internal.lib absl_throw_delegate.lib absl_int128.lib absl_strings.lib absl_str_format_internal.lib
-    absl_time.lib absl_bad_optional_access.lib)
+    absl_time.lib absl_bad_optional_access.lib absl_base.lib)
 else()
   set(ABSL_BUILD_LIBRARY_DIR ${ABSL_INSTALL}/${CMAKE_INSTALL_LIBDIR})
-  set(ABSL_LIBRARY_NAMES libabsl_base.a libabsl_spinlock_wait.a libabsl_dynamic_annotations.a
+  set(ABSL_LIBRARY_NAMES libabsl_spinlock_wait.a libabsl_dynamic_annotations.a
     libabsl_malloc_internal.a libabsl_throw_delegate.a libabsl_int128.a libabsl_strings.a libabsl_str_format_internal.a
-    libabsl_time.a libabsl_bad_optional_access.a)
+    libabsl_time.a libabsl_bad_optional_access.a libabsl_base.a)
 endif()
 
 foreach(LIBRARY_NAME ${ABSL_LIBRARY_NAMES})
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 3a6e75cec71..687235e5298 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -65,7 +65,7 @@ namespace oneflow_api {
 
 namespace of = oneflow;
 
-enum class XrtKind : int { kNone = 0, kTensorRT = 1, kOpenVINO = 2 };
+enum class XrtKind : int { kNone = 0, kTensorRT = 1 };
 
 namespace {
 
@@ -76,11 +76,6 @@ class CompileScope {
     CHECK_JUST(of::ThreadLocalScopeStackPush(scope));
 
     of::cfg::JobConfigProto job_config_cfg(job_config);
-#ifdef WITH_OPENVINO
-    if (kind == XrtKind::kOpenVINO) {
-      *(job_config_cfg.mutable_xrt_config()->mutable_use_openvino()) = true;
-    }
-#endif
 #ifdef WITH_TENSORRT
     if (kind == XrtKind::kTensorRT) {
       *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
@@ -133,7 +128,6 @@ class Graph::GraphImpl final {
   explicit GraphImpl(const std::string& model_path);
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
   void set_batch_size(int batch_size) { batch_size_ = batch_size; }
-  void enable_openvino() { xrt_kind_ = XrtKind::kOpenVINO; }
   void enable_tensorrt() { xrt_kind_ = XrtKind::kTensorRT; }
 
  private:
@@ -172,8 +166,6 @@ Graph::Graph(const std::shared_ptr<GraphImpl>& graph) : graph_(graph) {}
 
 void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); }
 
-void Graph::enable_openvino() { graph_->enable_openvino(); }
-
 void Graph::enable_tensorrt() { graph_->enable_tensorrt(); }
 
 Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 906bffc0a6a..715796a6472 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -39,7 +39,6 @@ class Graph {
   explicit Graph(const std::shared_ptr<GraphImpl>& graph);
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
   void set_batch_size(int batch_size);
-  void enable_openvino();
   void enable_tensorrt();
 
  private:

From e297aeb0b987988cb28aae9f709cf2d6df7ff301 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 11:10:30 +0800
Subject: [PATCH 28/51] remove openvino test

---
 oneflow/api/cpp/tests/graph_test.cpp | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 1c782177d42..faa5e66f47c 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -83,14 +83,6 @@ TEST(Api, graph_multi_gpu_test) {
   Forward(graph1, device1);
 }
 
-TEST(Api, graph_openvino_test) {
-  EnvScope scope;
-  Device device("cpu");
-  Graph graph = LoadGraph(device);
-  graph.enable_openvino();
-  Forward(graph, device);
-}
-
 TEST(Api, graph_trt_test) {
   EnvScope scope;
   Device device("cuda:0");

From 0895d7fd6cfaf149e4ff1098dc7de4c7396b65ad Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 16:39:48 +0800
Subject: [PATCH 29/51] refine

---
 oneflow/api/cpp/env.cpp              |  1 -
 oneflow/api/cpp/framework/graph.cpp  | 81 +++++++++++++++++++---------
 oneflow/api/cpp/framework/graph.h    | 18 ++++---
 oneflow/api/cpp/tests/graph_test.cpp | 12 ++---
 4 files changed, 73 insertions(+), 39 deletions(-)

diff --git a/oneflow/api/cpp/env.cpp b/oneflow/api/cpp/env.cpp
index a2bb622823a..ea31af15a69 100644
--- a/oneflow/api/cpp/env.cpp
+++ b/oneflow/api/cpp/env.cpp
@@ -148,7 +148,6 @@ void release() {
     }
     of::Global<of::EnvGlobalObjectsScope>::Delete();
   }
-  // TODO close session
   of::SetShuttingDown();
   of::ResetThisThreadUniqueConsistentId().GetOrThrow();
 }
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 687235e5298..91778f60eac 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -76,11 +76,7 @@ class CompileScope {
     CHECK_JUST(of::ThreadLocalScopeStackPush(scope));
 
     of::cfg::JobConfigProto job_config_cfg(job_config);
-#ifdef WITH_TENSORRT
-    if (kind == XrtKind::kTensorRT) {
-      *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
-    }
-#endif
+    ConfigXrt(job_config_cfg, kind);
     CHECK_JUST(of::JobBuildAndInferCtx_Open(job_config.job_name()));
     CHECK_JUST(of::CurJobBuildAndInferCtx_SetJobConf(job_config_cfg));
   }
@@ -92,6 +88,16 @@ class CompileScope {
 
  private:
   of::LazyMode::Guard lazy_mode_enabled_guard{true};
+
+  void ConfigXrt(of::cfg::JobConfigProto& job_config_cfg, XrtKind kind) {
+#ifdef WITH_TENSORRT
+    if (kind == XrtKind::kTensorRT) {
+      *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
+    }
+#else
+    LOG(WARNING) << "XRT TensorRT is unavailable while tensorrt is enabled";
+#endif
+  }
 };
 
 std::shared_ptr<of::one::TensorTuple> ConvertToTensorTuple(
@@ -124,8 +130,16 @@ const std::pair<std::vector<T1>, std::vector<T2>> Unzip(const of::HashMap<T1, T2
 
 class Graph::GraphImpl final {
  public:
-  explicit GraphImpl(const std::string& model_path, const Device& device);
-  explicit GraphImpl(const std::string& model_path);
+  explicit GraphImpl(const std::string& model_path, const Device& device = Device("cpu"));
+
+  GraphImpl(const GraphImpl& graph) = delete;
+  GraphImpl(GraphImpl&& graph) noexcept;
+
+  ~GraphImpl() = default;
+
+  GraphImpl& operator=(const GraphImpl& graph) = delete;
+  GraphImpl& operator=(GraphImpl&& graph) noexcept;
+
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
   void set_batch_size(int batch_size) { batch_size_ = batch_size; }
   void enable_tensorrt() { xrt_kind_ = XrtKind::kTensorRT; }
@@ -153,21 +167,32 @@ class Graph::GraphImpl final {
   std::shared_ptr<oneflow::one::TensorTuple> parameter_tensor_tuple_;
 };
 
-std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
-  return graph_->Forward(inputs);
-}
-
 Graph::Graph(const std::string& model_path, const Device& device)
     : graph_(std::make_shared<GraphImpl>(model_path, device)) {}
 
-Graph::Graph(const std::string& model_path) : graph_(std::make_shared<GraphImpl>(model_path)) {}
-
 Graph::Graph(const std::shared_ptr<GraphImpl>& graph) : graph_(graph) {}
 
+Graph::Graph(Graph&& graph) noexcept : graph_(std::move(graph.graph_)) {}
+
+Graph& Graph::operator=(Graph&& graph) noexcept {
+  if (&graph == this) { return *this; }
+  graph_ = std::move(graph.graph_);
+  return *this;
+}
+
+std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
+  return graph_->Forward(inputs);
+}
+
 void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); }
 
 void Graph::enable_tensorrt() { graph_->enable_tensorrt(); }
 
+Graph Graph::Load(const std::string& model_path, const Device& device) {
+  Graph graph(model_path, device);
+  return graph;
+}
+
 Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
     : model_path_(model_path), device_(device) {
   CHECK_JUST(of::LoadJobFromIR(&job_, model_path + "/model.mlir"));
@@ -177,7 +202,25 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
   of::Global<of::MultiClientSessionContext>::Get()->AddCGraph(graph_).GetOrThrow();
 }
 
-Graph::GraphImpl::GraphImpl(const std::string& model_path) : GraphImpl(model_path, Device("cpu")) {}
+Graph::GraphImpl::GraphImpl(GraphImpl&& graph) noexcept
+    : graph_(std::move(graph.graph_)),
+      model_path_(graph.model_path_),
+      is_compiled_(graph.is_compiled_),
+      batch_size_(graph.batch_size_),
+      xrt_kind_(graph.xrt_kind_),
+      device_(std::move(graph.device_)),
+      job_(std::move(graph.job_)),
+      input_name_to_order_(std::move(graph.input_name_to_order_)),
+      output_name_to_tensor_(std::move(graph.output_name_to_tensor_)),
+      variable_op_name_to_tensor_(std::move(graph.variable_op_name_to_tensor_)),
+      output_tensor_tuple_(std::move(graph.output_tensor_tuple_)),
+      parameter_tensor_tuple_(std::move(graph.parameter_tensor_tuple_)) {}
+
+Graph::GraphImpl& Graph::GraphImpl::operator=(Graph::GraphImpl&& graph) noexcept {
+  if (&graph == this) { return *this; }
+  graph_ = std::move(graph.graph_);
+  return *this;
+}
 
 std::vector<Tensor> Graph::GraphImpl::Forward(const std::vector<Tensor>& inputs) {
   if (!is_compiled_) {
@@ -326,14 +369,4 @@ of::Maybe<void> Graph::GraphImpl::RegisterTensors(const std::vector<Tensor>& inp
   return of::Maybe<void>::Ok();
 }
 
-Graph Load(const std::string& model_path, const Device& device) {
-  Graph graph(model_path, device);
-  return graph;
-}
-
-Graph Load(const std::string& model_path) {
-  const Device device = Device("cpu");
-  return Load(model_path, device);
-}
-
 }  // namespace oneflow_api
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 715796a6472..970e219b280 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -34,21 +34,27 @@ class Graph {
   class GraphImpl;
 
  public:
-  explicit Graph(const std::string& model_path, const Device& device);
-  explicit Graph(const std::string& model_path);
+  explicit Graph(const std::string& model_path, const Device& device = Device("cpu"));
   explicit Graph(const std::shared_ptr<GraphImpl>& graph);
+
+  Graph(const Graph& graph) = delete;
+  Graph(Graph&& graph) noexcept;
+
+  ~Graph() = default;
+
+  Graph& operator=(const Graph& graph) = delete;
+  Graph& operator=(Graph&& graph) noexcept;
+
   std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
   void set_batch_size(int batch_size);
   void enable_tensorrt();
 
+  static Graph Load(const std::string& model_path, const Device& device = Device("cpu"));
+
  private:
   std::shared_ptr<GraphImpl> graph_;
 };
 
-Graph Load(const std::string& model_path, const Device& device);
-
-Graph Load(const std::string& model_path);
-
 }  // namespace oneflow_api
 
 #endif  // ONEFLOW_API_CPP_GRAPH_H_
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index faa5e66f47c..e18f1659ded 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -23,11 +23,7 @@ limitations under the License.
 #include <iostream>
 #include <thread>
 #include <vector>
-#include "oneflow/api/cpp/framework/device.h"
-#include "oneflow/api/cpp/framework/dtype.h"
-#include "oneflow/api/cpp/framework/graph.h"
-#include "oneflow/api/cpp/framework/shape.h"
-#include "oneflow/api/cpp/framework/tensor.h"
+#include "oneflow/api/cpp/framework.h"
 #include "oneflow/api/cpp/tests/api_test.h"
 
 namespace oneflow_api {
@@ -37,7 +33,7 @@ namespace {
 inline Graph LoadGraph(const Device& device) {
   const std::string file_name = __FILE__;
   const std::string directory = file_name.substr(0, file_name.rfind('/'));
-  Graph graph = Load(directory + "/graph_test_model/affine_with_parameter", device);
+  Graph graph = Graph::Load(directory + "/graph_test_model/affine_with_parameter", device);
   return graph;
 }
 
@@ -159,7 +155,7 @@ TEST(Api, graph_thread_test) {
 
   std::vector<std::thread> threads;
   for (Graph& graph : graphs) {
-    threads.emplace_back(std::thread(std::bind(Forward, graph, device, 1)));
+    threads.emplace_back(std::thread(std::bind(Forward, std::move(graph), device, 1)));
   }
   for (auto& thread : threads) { thread.join(); }
 }
@@ -170,7 +166,7 @@ TEST(Api, graph_input_order_test) {
   const std::string file_name = __FILE__;
   const std::string directory = file_name.substr(0, file_name.rfind('/'));
   Device device("cpu");
-  Graph graph = Load(directory + "/graph_test_model/affine_no_parameter", device);
+  Graph graph = Graph::Load(directory + "/graph_test_model/affine_no_parameter", device);
 
   std::vector<Tensor> inputs;
   std::vector<float> x(3);

From 032b672002f4590b6f6aa41d1065f9b60de8d3f8 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 18:11:10 +0800
Subject: [PATCH 30/51] IValue

---
 oneflow/api/cpp/framework/graph.cpp  | 29 +++++++++++++++++++++++-----
 oneflow/api/cpp/framework/graph.h    |  3 ++-
 oneflow/api/cpp/tests/graph_test.cpp |  8 ++++++--
 3 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 91778f60eac..e16fcbd5dfb 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -18,6 +18,7 @@ limitations under the License.
 #include "oneflow/api/common/scope.h"
 #include "oneflow/api/cpp/framework/device.h"
 #include "oneflow/api/cpp/framework/graph.h"
+#include "oneflow/api/cpp/framework/ivalue.h"
 #include "oneflow/api/cpp/framework/shape.h"
 #include "oneflow/api/cpp/framework/tensor.h"
 #include "oneflow/api/common/job_build_and_infer_ctx.h"
@@ -90,13 +91,13 @@ class CompileScope {
   of::LazyMode::Guard lazy_mode_enabled_guard{true};
 
   void ConfigXrt(of::cfg::JobConfigProto& job_config_cfg, XrtKind kind) {
-#ifdef WITH_TENSORRT
     if (kind == XrtKind::kTensorRT) {
+#ifdef WITH_TENSORRT
       *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true;
-    }
 #else
-    LOG(WARNING) << "XRT TensorRT is unavailable while tensorrt is enabled";
+      LOG(WARNING) << "XRT TensorRT is unavailable while tensorrt is enabled";
 #endif
+    }
   }
 };
 
@@ -180,8 +181,26 @@ Graph& Graph::operator=(Graph&& graph) noexcept {
   return *this;
 }
 
-std::vector<Tensor> Graph::Forward(const std::vector<Tensor>& inputs) {
-  return graph_->Forward(inputs);
+IValue Graph::Forward(const IValue& inputs) {
+  std::vector<Tensor> input_tensors;
+  if (inputs.IsNone()) {
+    // do nothing
+  } else if (inputs.IsTensor()) {
+    input_tensors.emplace_back(inputs.ToTensor());
+  } else if (inputs.IsTensorVector()) {
+    input_tensors = inputs.ToTensorVector();
+  } else {
+    LOG(WARNING) << "Graph currently only support types: Tensor/vector(Tensor)/None";
+  }
+
+  std::vector<Tensor> output_tensors = graph_->Forward(input_tensors);
+  if (output_tensors.empty()) {
+    return IValue{};
+  } else if (output_tensors.size() == 1) {
+    return IValue(output_tensors.at(0));
+  } else {
+    return IValue(output_tensors);
+  }
 }
 
 void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); }
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 970e219b280..59d1fd04a4e 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -18,6 +18,7 @@ limitations under the License.
 #define ONEFLOW_API_CPP_GRAPH_H_
 
 #include "device.h"
+#include "oneflow/api/cpp/framework/ivalue.h"
 #include "tensor.h"
 #include "iostream"
 
@@ -45,7 +46,7 @@ class Graph {
   Graph& operator=(const Graph& graph) = delete;
   Graph& operator=(Graph&& graph) noexcept;
 
-  std::vector<Tensor> Forward(const std::vector<Tensor>& inputs);
+  IValue Forward(const IValue& inputs);
   void set_batch_size(int batch_size);
   void enable_tensorrt();
 
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index e18f1659ded..72fb64e0536 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -43,7 +43,9 @@ inline void Forward(Graph& graph, const Device& device, int expected_batch_dim =
   std::vector<Tensor> inputs;
   inputs.emplace_back(
       Tensor::from_buffer(data.data(), Shape({expected_batch_dim, 3}), device, DType::kFloat));
-  Tensor output = graph.Forward(inputs).at(0);
+  const auto& value = graph.Forward(inputs);
+  ASSERT_TRUE(value.IsTensor());
+  Tensor output = value.ToTensor();
   Shape shape = output.shape();
   ASSERT_EQ(shape.At(0), expected_batch_dim);
   ASSERT_EQ(shape.At(1), 4);
@@ -179,7 +181,9 @@ TEST(Api, graph_input_order_test) {
   std::fill(b.begin(), b.end(), 1);
   inputs.emplace_back(Tensor::from_buffer(b.data(), Shape({2}), device, DType::kFloat));
 
-  Tensor output = graph.Forward(inputs).at(0);
+  const auto& value = graph.Forward(inputs);
+  ASSERT_TRUE(value.IsTensor());
+  Tensor output = value.ToTensor();
   Shape shape = output.shape();
   ASSERT_EQ(shape.At(0), 1);
   ASSERT_EQ(shape.At(1), 2);

From b086f7c1f9f8233daf9308582031d57760a1b270 Mon Sep 17 00:00:00 2001
From: ZeKai Zhou <30856589+zzk0@users.noreply.github.com>
Date: Fri, 17 Dec 2021 21:35:14 +0800
Subject: [PATCH 31/51] Update oneflow/api/cpp/framework/graph.h

Co-authored-by: daquexian <daquexian566@gmail.com>
---
 oneflow/api/cpp/framework/graph.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 59d1fd04a4e..a847584dcc1 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -18,7 +18,7 @@ limitations under the License.
 #define ONEFLOW_API_CPP_GRAPH_H_
 
 #include "device.h"
-#include "oneflow/api/cpp/framework/ivalue.h"
+#include "ivalue.h"
 #include "tensor.h"
 #include "iostream"
 

From e3df599eb68a444e8d01dcb07d45943aa8a10e90 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 21:50:25 +0800
Subject: [PATCH 32/51] refine

---
 oneflow/api/cpp/framework/graph.cpp | 19 ++++++++++++++-----
 oneflow/api/cpp/framework/graph.h   |  3 +--
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index e16fcbd5dfb..000258c3883 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -154,7 +154,7 @@ class Graph::GraphImpl final {
   oneflow::Maybe<void> RegisterTensors(const std::vector<Tensor>& inputs);
 
   std::shared_ptr<oneflow::NNGraph> graph_ = nullptr;
-  const std::string model_path_;
+  std::string model_path_;
   bool is_compiled_ = false;
   int batch_size_ = 0;
   XrtKind xrt_kind_ = XrtKind::kNone;
@@ -169,9 +169,7 @@ class Graph::GraphImpl final {
 };
 
 Graph::Graph(const std::string& model_path, const Device& device)
-    : graph_(std::make_shared<GraphImpl>(model_path, device)) {}
-
-Graph::Graph(const std::shared_ptr<GraphImpl>& graph) : graph_(graph) {}
+    : graph_(std::make_unique<GraphImpl>(model_path, device)) {}
 
 Graph::Graph(Graph&& graph) noexcept : graph_(std::move(graph.graph_)) {}
 
@@ -223,7 +221,7 @@ Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device)
 
 Graph::GraphImpl::GraphImpl(GraphImpl&& graph) noexcept
     : graph_(std::move(graph.graph_)),
-      model_path_(graph.model_path_),
+      model_path_(std::move(graph.model_path_)),
       is_compiled_(graph.is_compiled_),
       batch_size_(graph.batch_size_),
       xrt_kind_(graph.xrt_kind_),
@@ -238,6 +236,17 @@ Graph::GraphImpl::GraphImpl(GraphImpl&& graph) noexcept
 Graph::GraphImpl& Graph::GraphImpl::operator=(Graph::GraphImpl&& graph) noexcept {
   if (&graph == this) { return *this; }
   graph_ = std::move(graph.graph_);
+  model_path_ = std::move(graph.model_path_);
+  is_compiled_ = graph.is_compiled_;
+  batch_size_ = graph.batch_size_;
+  xrt_kind_ = graph.xrt_kind_;
+  device_ = std::move(graph.device_);
+  job_ = std::move(graph.job_);
+  input_name_to_order_ = std::move(graph.input_name_to_order_);
+  output_name_to_tensor_ = std::move(graph.output_name_to_tensor_);
+  variable_op_name_to_tensor_ = std::move(graph.variable_op_name_to_tensor_);
+  output_tensor_tuple_ = std::move(graph.output_tensor_tuple_);
+  parameter_tensor_tuple_ = std::move(graph.parameter_tensor_tuple_);
   return *this;
 }
 
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index 59d1fd04a4e..45f70cd1aae 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -36,7 +36,6 @@ class Graph {
 
  public:
   explicit Graph(const std::string& model_path, const Device& device = Device("cpu"));
-  explicit Graph(const std::shared_ptr<GraphImpl>& graph);
 
   Graph(const Graph& graph) = delete;
   Graph(Graph&& graph) noexcept;
@@ -53,7 +52,7 @@ class Graph {
   static Graph Load(const std::string& model_path, const Device& device = Device("cpu"));
 
  private:
-  std::shared_ptr<GraphImpl> graph_;
+  std::unique_ptr<GraphImpl> graph_;
 };
 
 }  // namespace oneflow_api

From 03c98dfcef61e90276bd85bae66929b8878d6b9b Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 22:13:26 +0800
Subject: [PATCH 33/51] refine

---
 oneflow/api/cpp/framework/graph.cpp | 2 ++
 oneflow/api/cpp/framework/graph.h   | 8 ++------
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 000258c3883..603be913858 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -171,6 +171,8 @@ class Graph::GraphImpl final {
 Graph::Graph(const std::string& model_path, const Device& device)
     : graph_(std::make_unique<GraphImpl>(model_path, device)) {}
 
+Graph::~Graph() = default;
+
 Graph::Graph(Graph&& graph) noexcept : graph_(std::move(graph.graph_)) {}
 
 Graph& Graph::operator=(Graph&& graph) noexcept {
diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h
index b70ad78f304..c2f690b642d 100644
--- a/oneflow/api/cpp/framework/graph.h
+++ b/oneflow/api/cpp/framework/graph.h
@@ -20,7 +20,6 @@ limitations under the License.
 #include "device.h"
 #include "ivalue.h"
 #include "tensor.h"
-#include "iostream"
 
 namespace oneflow {
 
@@ -31,17 +30,13 @@ class NNGraph;
 namespace oneflow_api {
 
 class Graph {
- private:
-  class GraphImpl;
-
  public:
   explicit Graph(const std::string& model_path, const Device& device = Device("cpu"));
+  ~Graph();
 
   Graph(const Graph& graph) = delete;
   Graph(Graph&& graph) noexcept;
 
-  ~Graph() = default;
-
   Graph& operator=(const Graph& graph) = delete;
   Graph& operator=(Graph&& graph) noexcept;
 
@@ -52,6 +47,7 @@ class Graph {
   static Graph Load(const std::string& model_path, const Device& device = Device("cpu"));
 
  private:
+  class GraphImpl;
   std::unique_ptr<GraphImpl> graph_;
 };
 

From a789d1c3c590d3b3fdeb72cba7561a8cd10c613b Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 22:24:49 +0800
Subject: [PATCH 34/51] refine

---
 oneflow/core/framework/tensor_util.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflow/core/framework/tensor_util.cpp b/oneflow/core/framework/tensor_util.cpp
index 6a615e25173..0d16f0d468e 100644
--- a/oneflow/core/framework/tensor_util.cpp
+++ b/oneflow/core/framework/tensor_util.cpp
@@ -33,4 +33,4 @@ Maybe<void> SyncAccessTensorWithTimeOut(
 }
 
 }  // namespace one
-}  // namespace oneflow
+}  // namespace oneflow
\ No newline at end of file

From 60193e0e28e9e6d79bf7ffb8dc3ce57d487a7326 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 22:25:19 +0800
Subject: [PATCH 35/51] refine

---
 oneflow/core/framework/tensor_util.cpp | 2 +-
 oneflow/core/framework/tensor_util.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/oneflow/core/framework/tensor_util.cpp b/oneflow/core/framework/tensor_util.cpp
index 0d16f0d468e..6a615e25173 100644
--- a/oneflow/core/framework/tensor_util.cpp
+++ b/oneflow/core/framework/tensor_util.cpp
@@ -33,4 +33,4 @@ Maybe<void> SyncAccessTensorWithTimeOut(
 }
 
 }  // namespace one
-}  // namespace oneflow
\ No newline at end of file
+}  // namespace oneflow
diff --git a/oneflow/core/framework/tensor_util.h b/oneflow/core/framework/tensor_util.h
index ec9502230aa..028ddf05e2e 100644
--- a/oneflow/core/framework/tensor_util.h
+++ b/oneflow/core/framework/tensor_util.h
@@ -26,4 +26,4 @@ Maybe<void> SyncAccessTensorWithTimeOut(
     const std::shared_ptr<Tensor>& tensor,
     const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier);
 }  // namespace one
-}  // namespace oneflow
+}  // namespace oneflow
\ No newline at end of file

From 8f835925d6729452e2527a6bf518bc47e43543ea Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 22:25:26 +0800
Subject: [PATCH 36/51] refine

---
 oneflow/core/framework/tensor_util.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/oneflow/core/framework/tensor_util.h b/oneflow/core/framework/tensor_util.h
index 028ddf05e2e..ec9502230aa 100644
--- a/oneflow/core/framework/tensor_util.h
+++ b/oneflow/core/framework/tensor_util.h
@@ -26,4 +26,4 @@ Maybe<void> SyncAccessTensorWithTimeOut(
     const std::shared_ptr<Tensor>& tensor,
     const std::shared_ptr<std::function<void(uint64_t)>>& callback, const std::string& modifier);
 }  // namespace one
-}  // namespace oneflow
\ No newline at end of file
+}  // namespace oneflow

From a61c52125cc7401e0e9eccc6faaad5698612bcfe Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Fri, 17 Dec 2021 22:30:11 +0800
Subject: [PATCH 37/51] refine

---
 python/oneflow/ops/initializer_util.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/python/oneflow/ops/initializer_util.py b/python/oneflow/ops/initializer_util.py
index 72889c0e31d..2a21173eb95 100644
--- a/python/oneflow/ops/initializer_util.py
+++ b/python/oneflow/ops/initializer_util.py
@@ -1217,5 +1217,3 @@ def generate_values_by_initializer(initializer, shape, dtype):
     np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype))
     length = _elem_cnt(shape)
     return np.array(initializer(length)).astype(np_dtype).reshape(shape)
-
-

From 25d7f18e5253a60c0d527b989785d5889485cc37 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Sun, 19 Dec 2021 07:51:40 +0800
Subject: [PATCH 38/51] rename in oneflow.cmake

---
 cmake/oneflow.cmake | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index a90b91353e1..d013ffbe8b8 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -104,7 +104,7 @@ foreach(oneflow_single_file ${oneflow_all_src})
   endif()
 
   if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/common/.*\\.(h|cpp)$")
-    list(APPEND of_common_obj_cc ${oneflow_single_file})
+    list(APPEND of_api_common_cc ${oneflow_single_file})
     set(group_this ON)
   endif()
 
@@ -285,11 +285,11 @@ if (BUILD_CUDA)
 endif()
 
 # oneflow api common
-oneflow_add_library(of_common_obj ${of_common_obj_cc})
-set(of_common_libs -Wl,--whole-archive of_common_obj -Wl,--no-whole-archive)
-target_link_libraries(of_common_obj oneflow)
+oneflow_add_library(of_api_common_obj ${of_api_common_cc})
+set(of_api_common_lib -Wl,--whole-archive of_api_common_obj -Wl,--no-whole-archive)
+target_link_libraries(of_api_common_obj oneflow)
 if (WITH_MLIR)
-  target_link_libraries(of_common_obj ${ONEFLOW_MLIR_LIBS})
+  target_link_libraries(of_api_common_obj ${ONEFLOW_MLIR_LIBS})
 endif()
 
 if(BUILD_PYTHON)
@@ -312,7 +312,7 @@ if(BUILD_PYTHON)
   target_link_libraries(oneflow_internal PRIVATE
                         ${of_libs}
                         of_functional_tensor_obj
-                        ${of_common_libs}
+                        ${of_api_common_lib}
                         ${oneflow_third_party_libs}
                         of_pyext_obj
                         ${oneflow_exe_third_party_libs})
@@ -354,7 +354,7 @@ if (BUILD_CPP_API)
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})
   endif()
   set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
-  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_common_libs} ${oneflow_third_party_libs})
+  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_api_common_lib} ${oneflow_third_party_libs})
 endif()
 
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})

From 0a65215d19e88ef49cc288fef4b5885b9f591686 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Mon, 20 Dec 2021 11:07:31 +0800
Subject: [PATCH 39/51] refine oneflow.cmake

---
 cmake/oneflow.cmake | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index d013ffbe8b8..9e87cb0c2d8 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -103,11 +103,6 @@ foreach(oneflow_single_file ${oneflow_all_src})
     set(group_this ON)
   endif()
 
-  if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/common/.*\\.(h|cpp)$")
-    list(APPEND of_api_common_cc ${oneflow_single_file})
-    set(group_this ON)
-  endif()
-
   if(BUILD_PYTHON)
 
     if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/python/.*\\.(h|cpp)$")
@@ -285,11 +280,16 @@ if (BUILD_CUDA)
 endif()
 
 # oneflow api common
-oneflow_add_library(of_api_common_obj ${of_api_common_cc})
-set(of_api_common_lib -Wl,--whole-archive of_api_common_obj -Wl,--no-whole-archive)
-target_link_libraries(of_api_common_obj oneflow)
-if (WITH_MLIR)
-  target_link_libraries(of_api_common_obj ${ONEFLOW_MLIR_LIBS})
+if (BUILD_PYTHON OR BUILD_CPP_API)
+  file(GLOB_RECURSE of_api_common_files
+    ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.h
+    ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.cpp)
+  oneflow_add_library(of_api_common ${of_api_common_files})
+  set(of_api_common -Wl,--whole-archive of_api_common -Wl,--no-whole-archive)
+  target_link_libraries(of_api_common oneflow)
+  if (WITH_MLIR)
+    target_link_libraries(of_api_common ${ONEFLOW_MLIR_LIBS})
+  endif()
 endif()
 
 if(BUILD_PYTHON)
@@ -312,7 +312,7 @@ if(BUILD_PYTHON)
   target_link_libraries(oneflow_internal PRIVATE
                         ${of_libs}
                         of_functional_tensor_obj
-                        ${of_api_common_lib}
+                        ${of_api_common}
                         ${oneflow_third_party_libs}
                         of_pyext_obj
                         ${oneflow_exe_third_party_libs})
@@ -354,7 +354,7 @@ if (BUILD_CPP_API)
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})
   endif()
   set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
-  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_api_common_lib} ${oneflow_third_party_libs})
+  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_api_common} ${oneflow_third_party_libs})
 endif()
 
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})

From 04adb1ce5b06b2c1bb4720610abd213b12f1ae00 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Mon, 20 Dec 2021 14:52:05 +0800
Subject: [PATCH 40/51] make of_api_common object library

---
 cmake/oneflow.cmake | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 9e87cb0c2d8..842123026ed 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -284,8 +284,7 @@ if (BUILD_PYTHON OR BUILD_CPP_API)
   file(GLOB_RECURSE of_api_common_files
     ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.h
     ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.cpp)
-  oneflow_add_library(of_api_common ${of_api_common_files})
-  set(of_api_common -Wl,--whole-archive of_api_common -Wl,--no-whole-archive)
+  oneflow_add_library(of_api_common OBJECT ${of_api_common_files})
   target_link_libraries(of_api_common oneflow)
   if (WITH_MLIR)
     target_link_libraries(of_api_common ${ONEFLOW_MLIR_LIBS})
@@ -312,7 +311,7 @@ if(BUILD_PYTHON)
   target_link_libraries(oneflow_internal PRIVATE
                         ${of_libs}
                         of_functional_tensor_obj
-                        ${of_api_common}
+                        of_api_common
                         ${oneflow_third_party_libs}
                         of_pyext_obj
                         ${oneflow_exe_third_party_libs})
@@ -354,7 +353,7 @@ if (BUILD_CPP_API)
     oneflow_add_library(oneflow_cpp ${of_cpp_api_files})
   endif()
   set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}")
-  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${of_api_common} ${oneflow_third_party_libs})
+  target_link_libraries(oneflow_cpp PRIVATE ${of_libs} of_api_common ${oneflow_third_party_libs})
 endif()
 
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})

From b50aa7c4e57158b1ebcbefe2b1928f3a0c694687 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Mon, 20 Dec 2021 16:12:16 +0800
Subject: [PATCH 41/51] move device util function in api to core

---
 oneflow/api/common/device.cpp                | 52 --------------------
 oneflow/api/common/device.h                  | 30 -----------
 oneflow/api/cpp/framework/device.cpp         |  8 +--
 oneflow/api/python/framework/device.cpp      |  5 +-
 oneflow/api/python/functional/python_arg.cpp |  4 +-
 oneflow/core/framework/device.cpp            | 23 +++++++++
 oneflow/core/framework/device.h              |  1 +
 oneflow/core/functional/impl/eye_functor.cpp |  4 +-
 8 files changed, 33 insertions(+), 94 deletions(-)
 delete mode 100644 oneflow/api/common/device.cpp
 delete mode 100644 oneflow/api/common/device.h

diff --git a/oneflow/api/common/device.cpp b/oneflow/api/common/device.cpp
deleted file mode 100644
index 566f9231066..00000000000
--- a/oneflow/api/common/device.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#include "oneflow/api/common/device.h"
-
-namespace oneflow {
-
-namespace {
-
-void CheckDeviceType(const std::string& type) {
-  if (Device::type_supported.find(type) == Device::type_supported.end()) {
-    std::string error_msg =
-        "Expected one of cpu, cuda device type at start of device string " + type;
-    throw std::runtime_error(error_msg);
-  }
-}
-
-}  // namespace
-
-/* static */ Maybe<Symbol<Device>> DeviceExportUtil::ParseAndNew(
-    const std::string& type_or_type_with_device_id) {
-  std::string type;
-  int device_id = -1;
-  ParsingDeviceTag(type_or_type_with_device_id, &type, &device_id).GetOrThrow();
-  CheckDeviceType(type);
-  if (device_id == -1) {
-    return Device::New(type);
-  } else {
-    return Device::New(type, device_id);
-  }
-}
-
-/* static */ Maybe<Symbol<Device>> DeviceExportUtil::New(const std::string& type,
-                                                         int64_t device_id) {
-  CheckDeviceType(type);
-  return Device::New(type, device_id);
-}
-
-}  // namespace oneflow
diff --git a/oneflow/api/common/device.h b/oneflow/api/common/device.h
deleted file mode 100644
index 8703666af7f..00000000000
--- a/oneflow/api/common/device.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-*/
-
-#ifndef ONEFLOW_API_COMMON_DEVICE_H_
-#define ONEFLOW_API_COMMON_DEVICE_H_
-
-#include "oneflow/core/framework/device.h"
-
-namespace oneflow {
-struct DeviceExportUtil final {
-  static Maybe<Symbol<Device>> ParseAndNew(const std::string& type_or_type_with_device_id);
-
-  static Maybe<Symbol<Device>> New(const std::string& type, int64_t device_id);
-};
-}  // namespace oneflow
-
-#endif  // !ONEFLOW_API_COMMON_DEVICE_H_
diff --git a/oneflow/api/cpp/framework/device.cpp b/oneflow/api/cpp/framework/device.cpp
index dd291de0353..3be4b3f5a46 100644
--- a/oneflow/api/cpp/framework/device.cpp
+++ b/oneflow/api/cpp/framework/device.cpp
@@ -15,9 +15,9 @@ limitations under the License.
 */
 
 #include "oneflow/api/cpp/framework/device.h"
-#include "oneflow/api/common/device.h"
 #include "oneflow/core/common/maybe.h"
 #include "oneflow/core/common/symbol.h"
+#include "oneflow/core/framework/device.h"
 
 namespace oneflow_api {
 
@@ -25,11 +25,11 @@ namespace of = oneflow;
 
 Device::Device(const std::string& type_or_type_with_device_id)
     : device_(std::make_shared<of::Symbol<of::Device>>(
-        of::DeviceExportUtil::ParseAndNew(type_or_type_with_device_id).GetOrThrow())) {}
+        of::Device::ParseAndNew(type_or_type_with_device_id).GetOrThrow())) {}
 
 Device::Device(const std::string& type, int64_t device_id)
-    : device_(std::make_shared<of::Symbol<of::Device>>(
-        of::DeviceExportUtil::New(type, device_id).GetOrThrow())) {}
+    : device_(
+        std::make_shared<of::Symbol<of::Device>>(of::Device::New(type, device_id).GetOrThrow())) {}
 
 const std::string& Device::type() const { return (*device_)->type(); }
 
diff --git a/oneflow/api/python/framework/device.cpp b/oneflow/api/python/framework/device.cpp
index f843d8ccb74..06d5a733bfb 100644
--- a/oneflow/api/python/framework/device.cpp
+++ b/oneflow/api/python/framework/device.cpp
@@ -16,7 +16,6 @@ limitations under the License.
 #include <pybind11/pybind11.h>
 #include <pybind11/operators.h>
 #include "oneflow/core/control/global_process_ctx.h"
-#include "oneflow/api/common/device.h"
 #include "oneflow/api/python/of_api_registry.h"
 #include "oneflow/core/framework/device.h"
 #include "oneflow/core/common/str_util.h"
@@ -29,10 +28,10 @@ namespace oneflow {
 ONEFLOW_API_PYBIND11_MODULE("", m) {
   py::class_<Symbol<Device>, std::shared_ptr<Symbol<Device>>>(m, "device")
       .def(py::init([](const std::string& type_or_type_with_device_id) {
-        return DeviceExportUtil::ParseAndNew(type_or_type_with_device_id).GetOrThrow();
+        return Device::ParseAndNew(type_or_type_with_device_id).GetOrThrow();
       }))
       .def(py::init([](const std::string& type, int64_t device_id) {
-        return DeviceExportUtil::New(type, device_id).GetOrThrow();
+        return Device::New(type, device_id).GetOrThrow();
       }))
       .def_property_readonly("type", [](const Symbol<Device>& d) { return d->type(); })
       .def_property_readonly("index", [](const Symbol<Device>& d) { return d->device_id(); })
diff --git a/oneflow/api/python/functional/python_arg.cpp b/oneflow/api/python/functional/python_arg.cpp
index 15215102162..9eb31f214f0 100644
--- a/oneflow/api/python/functional/python_arg.cpp
+++ b/oneflow/api/python/functional/python_arg.cpp
@@ -15,8 +15,6 @@ limitations under the License.
 */
 
 #include "oneflow/api/python/functional/python_arg.h"
-
-#include "oneflow/api/common/device.h"
 #include "oneflow/api/python/functional/common.h"
 #include "oneflow/api/python/functional/indexing.h"
 #include "oneflow/core/common/scalar.h"
@@ -126,7 +124,7 @@ template<>
 Maybe<Symbol<Device>> PythonArg::ObjectAs<Symbol<Device>>() const {
   if (PyStringCheck(object_)) {
     const char* device_str = JUST(PyStringAsString(object_));
-    return DeviceExportUtil::ParseAndNew(device_str);
+    return Device::ParseAndNew(device_str);
   }
   return PyUnpackDevice(object_);
 }
diff --git a/oneflow/core/framework/device.cpp b/oneflow/core/framework/device.cpp
index 52bb9a8e5d2..2d42b268364 100644
--- a/oneflow/core/framework/device.cpp
+++ b/oneflow/core/framework/device.cpp
@@ -36,6 +36,14 @@ inline size_t HashDevice(const std::string& type, int64_t device_id) {
   return std::hash<std::string>()(type) ^ std::hash<int64_t>()(device_id);
 }
 
+void CheckDeviceType(const std::string& type) {
+  if (Device::type_supported.find(type) == Device::type_supported.end()) {
+    std::string error_msg =
+        "Expected one of cpu, cuda device type at start of device string " + type;
+    throw std::runtime_error(error_msg);
+  }
+}
+
 }  // namespace
 
 Device::Device(const std::string& type, int64_t device_id)
@@ -67,6 +75,7 @@ Maybe<void> Device::Init() {
 
 /* static */ Maybe<Symbol<Device>> Device::ThreadLocalGetOrNew(const std::string& type,
                                                                int64_t device_id) {
+  CheckDeviceType(type);
   CHECK_GE_OR_RETURN(device_id, 0);
   static thread_local HashMap<std::string, HashMap<int64_t, Symbol<Device>>> map;
   auto* device_id2symbol = &map[type];
@@ -80,9 +89,23 @@ Maybe<void> Device::Init() {
 }
 
 /* static */ Maybe<Symbol<Device>> Device::New(const std::string& type) {
+  CheckDeviceType(type);
   return New(type, GlobalProcessCtx::LocalRank());
 }
 
+/* static */ Maybe<Symbol<Device>> Device::ParseAndNew(
+    const std::string& type_or_type_with_device_id) {
+  std::string type;
+  int device_id = -1;
+  JUST(ParsingDeviceTag(type_or_type_with_device_id, &type, &device_id));
+  CheckDeviceType(type);
+  if (device_id == -1) {
+    return Device::New(type);
+  } else {
+    return Device::New(type, device_id);
+  }
+}
+
 Maybe<const std::string&> Device::of_type() const {
   static const HashMap<std::string, std::string> type2device_tag{
       {"cpu", "cpu"},
diff --git a/oneflow/core/framework/device.h b/oneflow/core/framework/device.h
index 6ff3c50250f..c23883977ed 100644
--- a/oneflow/core/framework/device.h
+++ b/oneflow/core/framework/device.h
@@ -57,6 +57,7 @@ class Device final {
   static Maybe<Symbol<Device>> ThreadLocalGetOrNew(const std::string& type, int64_t device_id);
   static Maybe<Symbol<Device>> New(const std::string& type, int64_t device_id);
   static Maybe<Symbol<Device>> New(const std::string& type);
+  static Maybe<Symbol<Device>> ParseAndNew(const std::string& type_or_type_with_device_id);
 
   static Maybe<Symbol<Device>> MakeDeviceByParallelDesc(const ParallelDesc& parallel_desc);
   static const std::unordered_set<std::string> type_supported;
diff --git a/oneflow/core/functional/impl/eye_functor.cpp b/oneflow/core/functional/impl/eye_functor.cpp
index 7e47e77a2fd..4de53d1be8f 100644
--- a/oneflow/core/functional/impl/eye_functor.cpp
+++ b/oneflow/core/functional/impl/eye_functor.cpp
@@ -19,6 +19,7 @@ limitations under the License.
 #include "oneflow/core/common/scalar.h"
 #include "oneflow/core/common/throw.h"
 #include "oneflow/core/common/util.h"
+#include "oneflow/core/framework/device.h"
 #include "oneflow/core/framework/attr_map.h"
 #include "oneflow/core/framework/nd_sbp.h"
 #include "oneflow/core/framework/op_builder.h"
@@ -32,7 +33,6 @@ limitations under the License.
 #include "oneflow/core/functional/impl/common.h"
 #include "oneflow/core/job/lazy_mode.h"
 #include "oneflow/core/job/sbp_parallel.h"
-#include "oneflow/api/common/device.h"
 
 namespace oneflow {
 namespace one {
@@ -66,7 +66,7 @@ class EyeDeviceStrFunctor {
   Maybe<Tensor> operator()(const Scalar& rows, const Optional<Scalar>& cols,
                            const Symbol<DType>& dtype, const std::string& device,
                            const bool& requires_grad) const {
-    const Symbol<Device>& dev = JUST(DeviceExportUtil::ParseAndNew(device));
+    const Symbol<Device>& dev = JUST(Device::ParseAndNew(device));
     return JUST(functional::Eye(rows, cols, dtype, dev, requires_grad));
   }
 };

From 9ba25f46464209e24b1b7501284de29e9a7f0501 Mon Sep 17 00:00:00 2001
From: zzk0 <zzk9460@gmail.com>
Date: Mon, 20 Dec 2021 18:02:43 +0800
Subject: [PATCH 42/51] remove device check in New and ThreadLocalGetOrNew

---
 oneflow/core/framework/device.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/oneflow/core/framework/device.cpp b/oneflow/core/framework/device.cpp
index 2d42b268364..2dd191ec638 100644
--- a/oneflow/core/framework/device.cpp
+++ b/oneflow/core/framework/device.cpp
@@ -75,7 +75,6 @@ Maybe<void> Device::Init() {
 
 /* static */ Maybe<Symbol<Device>> Device::ThreadLocalGetOrNew(const std::string& type,
                                                                int64_t device_id) {
-  CheckDeviceType(type);
   CHECK_GE_OR_RETURN(device_id, 0);
   static thread_local HashMap<std::string, HashMap<int64_t, Symbol<Device>>> map;
   auto* device_id2symbol = &map[type];
@@ -89,7 +88,6 @@ Maybe<void> Device::Init() {
 }
 
 /* static */ Maybe<Symbol<Device>> Device::New(const std::string& type) {
-  CheckDeviceType(type);
   return New(type, GlobalProcessCtx::LocalRank());
 }
 

From 809e578a74adf1dabef428d831cc03a0b91999f0 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Thu, 23 Dec 2021 22:49:47 +0800
Subject: [PATCH 43/51] refine

---
 cmake/oneflow.cmake | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index e830afd3f7d..18fa8ca3e00 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -425,10 +425,12 @@ endif(BUILD_PYTHON)
 if (BUILD_CPP_API)
   add_dependencies(of_include_copy oneflow_cpp)
 
-  set(OF_API_DIRS)
   file(GLOB_RECURSE api_h_files "${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h")
-  list(APPEND OF_API_DIRS ${api_h_files})
-
-  copy_files("${OF_API_DIRS}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${LIBONEFLOW_INCLUDE_DIR}" of_include_copy)
+  copy_files("${api_h_files}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${LIBONEFLOW_INCLUDE_DIR}" of_include_copy)
   copy_files("${PROJECT_SOURCE_DIR}/cmake/oneflow-config.cmake" "${PROJECT_SOURCE_DIR}/cmake" "${LIBONEFLOW_SHARE_DIR}" of_include_copy)
+
+  if(WITH_MLIR)
+    file(GLOB mlir_shared_libs "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib/*.14git")
+    copy_files("${mlir_shared_libs}" "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib" "${LIBONEFLOW_LIBRARY_DIR}" of_include_copy)
+  endif(WITH_MLIR)
 endif(BUILD_CPP_API)

From 770b1de1e64a7a4a9c8d48aebc15ff8eb664a385 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Thu, 23 Dec 2021 22:55:00 +0800
Subject: [PATCH 44/51] fix device test

---
 oneflow/api/cpp/tests/tensor_test.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/oneflow/api/cpp/tests/tensor_test.cpp b/oneflow/api/cpp/tests/tensor_test.cpp
index 3241a220263..5960f961675 100644
--- a/oneflow/api/cpp/tests/tensor_test.cpp
+++ b/oneflow/api/cpp/tests/tensor_test.cpp
@@ -26,13 +26,13 @@ TEST(Api, device) {
   ASSERT_EQ(device.type(), "cpu");
 
 #ifdef WITH_CUDA
-  device = Device("cuda", 1);
+  device = Device("cuda:0");
   ASSERT_EQ(device.type(), "cuda");
-  ASSERT_EQ(device.device_id(), 1);
+  ASSERT_EQ(device.device_id(), 0);
 
-  device = Device("cuda:2");
+  device = Device("cuda", 1);
   ASSERT_EQ(device.type(), "cuda");
-  ASSERT_EQ(device.device_id(), 2);
+  ASSERT_EQ(device.device_id(), 1);
 #endif
 }
 

From 059a7c0d154df6dc5227a26464f647e871e33e9b Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Fri, 24 Dec 2021 13:56:26 +0800
Subject: [PATCH 45/51] refine graph test

---
 cmake/oneflow.cmake                  |  4 ++++
 oneflow/api/cpp/framework.h          |  2 +-
 oneflow/api/cpp/framework/graph.cpp  |  7 -------
 oneflow/api/cpp/tests/api_test.cpp   | 18 ++++++++++++++++++
 oneflow/api/cpp/tests/api_test.h     |  2 ++
 oneflow/api/cpp/tests/graph_test.cpp |  8 ++------
 6 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 18fa8ca3e00..5b786b08355 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -433,4 +433,8 @@ if (BUILD_CPP_API)
     file(GLOB mlir_shared_libs "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib/*.14git")
     copy_files("${mlir_shared_libs}" "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib" "${LIBONEFLOW_LIBRARY_DIR}" of_include_copy)
   endif(WITH_MLIR)
+  
+  if(BUILD_TESTING)
+    copy_all_files_in_dir("${PROJECT_SOURCE_DIR}/oneflow/api/cpp/tests/graph_test_model" "${PROJECT_BINARY_DIR}/bin/graph_test_model" of_include_copy)
+  endif(BUILD_TESTING)
 endif(BUILD_CPP_API)
diff --git a/oneflow/api/cpp/framework.h b/oneflow/api/cpp/framework.h
index e7c00196afb..5d05fb65442 100644
--- a/oneflow/api/cpp/framework.h
+++ b/oneflow/api/cpp/framework.h
@@ -21,7 +21,7 @@ limitations under the License.
 #include "framework/shape.h"
 #include "framework/dtype.h"
 #include "framework/tensor.h"
-#include "framework/graph.h"
 #include "framework/ivalue.h"
+#include "framework/graph.h"
 
 #endif  // ONEFLOW_API_CPP_FRAMEWORK_H_
diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp
index 603be913858..1a3f14d55d6 100644
--- a/oneflow/api/cpp/framework/graph.cpp
+++ b/oneflow/api/cpp/framework/graph.cpp
@@ -22,13 +22,6 @@ limitations under the License.
 #include "oneflow/api/cpp/framework/shape.h"
 #include "oneflow/api/cpp/framework/tensor.h"
 #include "oneflow/api/common/job_build_and_infer_ctx.h"
-#include <cstdio>
-#include <fstream>
-#include <istream>
-#include <memory>
-#include <string>
-#include <utility>
-#include <vector>
 #include "oneflow/api/python/job_build/job_build_and_infer.h"
 #include "oneflow/core/common/data_type.pb.h"
 #include "oneflow/core/common/global.h"
diff --git a/oneflow/api/cpp/tests/api_test.cpp b/oneflow/api/cpp/tests/api_test.cpp
index 88619f3617b..6485ffadbee 100644
--- a/oneflow/api/cpp/tests/api_test.cpp
+++ b/oneflow/api/cpp/tests/api_test.cpp
@@ -15,7 +15,14 @@ limitations under the License.
 */
 
 #include "oneflow/api/cpp/tests/api_test.h"
+#include <cstddef>
 #include <random>
+#ifdef __linux__
+
+#include <unistd.h>        // readlink
+#include <linux/limits.h>  // PATH_MAX
+
+#endif
 
 namespace oneflow_api {
 
@@ -47,4 +54,15 @@ REGISTER_RANDOM_DATA(int8_t)
 REGISTER_RANDOM_DATA(int32_t)
 REGISTER_RANDOM_DATA(int64_t)
 
+std::string GetExeDir() {
+#ifdef __linux__
+  char result[PATH_MAX];
+  ssize_t count = readlink("/proc/self/exe", result, PATH_MAX);
+  std::string exe_path(result, (count > 0) ? count : 0);
+  size_t pos = exe_path.rfind('/');
+  if (pos != std::string::npos) { return exe_path.substr(0, pos); }
+#endif
+  return ".";
+}
+
 }  // namespace oneflow_api
diff --git a/oneflow/api/cpp/tests/api_test.h b/oneflow/api/cpp/tests/api_test.h
index cec50969e69..c196bc90662 100644
--- a/oneflow/api/cpp/tests/api_test.h
+++ b/oneflow/api/cpp/tests/api_test.h
@@ -32,6 +32,8 @@ Shape RandomShape();
 template<typename T>
 std::vector<T> RandomData(size_t size);
 
+std::string GetExeDir();
+
 }  // namespace oneflow_api
 
 #endif  // !ONEFLOW_API_CPP_TESTS_API_TEST_H_
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 72fb64e0536..0aa2a7223cb 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -31,9 +31,7 @@ namespace oneflow_api {
 namespace {
 
 inline Graph LoadGraph(const Device& device) {
-  const std::string file_name = __FILE__;
-  const std::string directory = file_name.substr(0, file_name.rfind('/'));
-  Graph graph = Graph::Load(directory + "/graph_test_model/affine_with_parameter", device);
+  Graph graph = Graph::Load(GetExeDir() + "/graph_test_model/affine_with_parameter", device);
   return graph;
 }
 
@@ -165,10 +163,8 @@ TEST(Api, graph_thread_test) {
 TEST(Api, graph_input_order_test) {
   EnvScope scope;
 
-  const std::string file_name = __FILE__;
-  const std::string directory = file_name.substr(0, file_name.rfind('/'));
   Device device("cpu");
-  Graph graph = Graph::Load(directory + "/graph_test_model/affine_no_parameter", device);
+  Graph graph = Graph::Load(GetExeDir() + "/graph_test_model/affine_no_parameter", device);
 
   std::vector<Tensor> inputs;
   std::vector<float> x(3);

From 85d9608771ab65dcf633182941f34cb083cf21e1 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Fri, 24 Dec 2021 16:11:13 +0800
Subject: [PATCH 46/51] refine GetExeDir()

---
 oneflow/api/cpp/tests/api_test.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/oneflow/api/cpp/tests/api_test.cpp b/oneflow/api/cpp/tests/api_test.cpp
index 6485ffadbee..bf7724e0a01 100644
--- a/oneflow/api/cpp/tests/api_test.cpp
+++ b/oneflow/api/cpp/tests/api_test.cpp
@@ -59,10 +59,12 @@ std::string GetExeDir() {
   char result[PATH_MAX];
   ssize_t count = readlink("/proc/self/exe", result, PATH_MAX);
   std::string exe_path(result, (count > 0) ? count : 0);
-  size_t pos = exe_path.rfind('/');
-  if (pos != std::string::npos) { return exe_path.substr(0, pos); }
+
+  // string(path).rfind('/') will never be string::npos on linux.
+  return exe_path.substr(0, exe_path.rfind('/'));
+#else
+#error oneflow_api::GetExeDir() has not been supported on windows.
 #endif
-  return ".";
 }
 
 }  // namespace oneflow_api

From c1bae17aadc04d1932420fa02dfe2b126105a62d Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Fri, 24 Dec 2021 16:41:52 +0800
Subject: [PATCH 47/51] refine GetExeDir() again

---
 oneflow/api/cpp/tests/api_test.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/oneflow/api/cpp/tests/api_test.cpp b/oneflow/api/cpp/tests/api_test.cpp
index bf7724e0a01..1fbc790bcc2 100644
--- a/oneflow/api/cpp/tests/api_test.cpp
+++ b/oneflow/api/cpp/tests/api_test.cpp
@@ -17,10 +17,14 @@ limitations under the License.
 #include "oneflow/api/cpp/tests/api_test.h"
 #include <cstddef>
 #include <random>
+#include <string>
 #ifdef __linux__
 
-#include <unistd.h>        // readlink
-#include <linux/limits.h>  // PATH_MAX
+#include <unistd.h>  // readlink
+
+#elif defined(__APPLE__)
+
+#include <mach-o/dyld.h>  //  _NSGetExecutablePath
 
 #endif
 
@@ -55,13 +59,23 @@ REGISTER_RANDOM_DATA(int32_t)
 REGISTER_RANDOM_DATA(int64_t)
 
 std::string GetExeDir() {
-#ifdef __linux__
-  char result[PATH_MAX];
-  ssize_t count = readlink("/proc/self/exe", result, PATH_MAX);
-  std::string exe_path(result, (count > 0) ? count : 0);
+  const size_t path_max_size = 4096;  // PATH_MAX = 4096 on linux
+  char result[path_max_size];
 
-  // string(path).rfind('/') will never be string::npos on linux.
-  return exe_path.substr(0, exe_path.rfind('/'));
+  const auto get_dir_from_path = [](char result[], size_t count) -> std::string {
+    std::string exe_path(result, (count > 0) ? count : 0);
+
+    // string(path).rfind('/') will never be string::npos on linux or macos.
+    return exe_path.substr(0, exe_path.rfind('/'));
+  };
+
+#ifdef __linux__
+  ssize_t count = readlink("/proc/self/exe", result, path_max_size);
+  return get_dir_from_path(result, count);
+#elif defined(__APPLE__)
+  uint32_t count = path_max_size;
+  CHECK_EQ(_NSGetExecutablePath(result, &count), 0) << "Fail to get executable file path.";
+  return get_dir_from_path(result, count);
 #else
 #error oneflow_api::GetExeDir() has not been supported on windows.
 #endif

From 7e691108b1be11aab873cd52276bb429e9a08360 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Fri, 24 Dec 2021 21:43:02 +0800
Subject: [PATCH 48/51] fix

---
 cmake/oneflow.cmake                  | 4 ----
 oneflow/api/cpp/tests/graph_test.cpp | 5 +++--
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 41e4e71d802..ffc13a675c7 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -435,8 +435,4 @@ if (BUILD_CPP_API)
     file(GLOB mlir_shared_libs "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib/*.14git")
     copy_files("${mlir_shared_libs}" "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib" "${LIBONEFLOW_LIBRARY_DIR}" of_include_copy)
   endif(WITH_MLIR)
-  
-  if(BUILD_TESTING)
-    copy_all_files_in_dir("${PROJECT_SOURCE_DIR}/oneflow/api/cpp/tests/graph_test_model" "${PROJECT_BINARY_DIR}/bin/graph_test_model" of_include_copy)
-  endif(BUILD_TESTING)
 endif(BUILD_CPP_API)
diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index 0aa2a7223cb..d455ee4cf16 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -31,7 +31,8 @@ namespace oneflow_api {
 namespace {
 
 inline Graph LoadGraph(const Device& device) {
-  Graph graph = Graph::Load(GetExeDir() + "/graph_test_model/affine_with_parameter", device);
+  Graph graph =
+      Graph::Load("./oneflow/api/cpp/tests/graph_test_model/affine_with_parameter", device);
   return graph;
 }
 
@@ -164,7 +165,7 @@ TEST(Api, graph_input_order_test) {
   EnvScope scope;
 
   Device device("cpu");
-  Graph graph = Graph::Load(GetExeDir() + "/graph_test_model/affine_no_parameter", device);
+  Graph graph = Graph::Load("./oneflow/api/cpp/tests/graph_test_model/affine_no_parameter", device);
 
   std::vector<Tensor> inputs;
   std::vector<float> x(3);

From ca56a17064db9a95dd8a4df5e33856d506d60f30 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Fri, 24 Dec 2021 22:38:18 +0800
Subject: [PATCH 49/51] refine

---
 cmake/oneflow.cmake | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index ffc13a675c7..17b5267f67d 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -359,13 +359,13 @@ endif()
 file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR})
 
 function(oneflow_add_test target_name)
-  cmake_parse_arguments(arg "" "TEST_NAME" "SRCS" ${ARGN})
+  cmake_parse_arguments(arg "" "TEST_NAME;WORKING_DIRECTORY" "SRCS" ${ARGN})
   oneflow_add_executable(${target_name} ${arg_SRCS})
   if (BUILD_CUDA)
     target_link_libraries(${target_name} CUDA::cudart_static)
   endif()
   set_target_properties(${target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin")
-  add_test(NAME ${arg_TEST_NAME} COMMAND ${target_name})
+  add_test(NAME ${arg_TEST_NAME} COMMAND ${target_name} WORKING_DIRECTORY ${arg_WORKING_DIRECTORY})
   set_tests_properties(
     ${arg_TEST_NAME}
   PROPERTIES
@@ -382,11 +382,12 @@ if(BUILD_TESTING)
 
   if (BUILD_CPP_API)
     file(GLOB_RECURSE cpp_api_test_files ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/tests/*.cpp)
-    oneflow_add_test(oneflow_cpp_api_testexe SRCS ${cpp_api_test_files} TEST_NAME oneflow_cpp_api_test)
+    oneflow_add_test(oneflow_cpp_api_testexe SRCS ${cpp_api_test_files} TEST_NAME oneflow_cpp_api_test WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
     target_link_libraries(oneflow_cpp_api_testexe oneflow_cpp ${oneflow_test_libs})
   endif()
 endif()
 
+
 # build include
 add_custom_target(of_include_copy ALL)
 

From ae2f7ac45183c08bea00754baad1c69edcf7d012 Mon Sep 17 00:00:00 2001
From: mosout <mosout@qq.com>
Date: Sat, 25 Dec 2021 22:53:37 +0800
Subject: [PATCH 50/51] fix

---
 .github/workflows/test.yml | 2 +-
 cmake/caches/ci/cpu.cmake  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 499cde368df..b0085586049 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -546,7 +546,7 @@ jobs:
         run: |
           docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor
       - name: Exe test
-        if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }}
+        if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }}
         timeout-minutes: 10
         run: |
           chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe
diff --git a/cmake/caches/ci/cpu.cmake b/cmake/caches/ci/cpu.cmake
index ac23ae140b3..fc416e58016 100644
--- a/cmake/caches/ci/cpu.cmake
+++ b/cmake/caches/ci/cpu.cmake
@@ -9,3 +9,4 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CMAKE_GENERATOR Ninja CACHE STRING "")
 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
 set(BUILD_CPP_API ON CACHE BOOL "")
+set(WITH_MLIR ON CACHE BOOL "")

From cbf506cc7498bee368686ae3363e7f406f0155d9 Mon Sep 17 00:00:00 2001
From: daquexian <daquexian566@gmail.com>
Date: Sun, 26 Dec 2021 13:37:29 +0800
Subject: [PATCH 51/51] add WITH_CUDA guard

Signed-off-by: daquexian <daquexian566@gmail.com>
---
 oneflow/api/cpp/tests/graph_test.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp
index d455ee4cf16..497da6b1bbb 100644
--- a/oneflow/api/cpp/tests/graph_test.cpp
+++ b/oneflow/api/cpp/tests/graph_test.cpp
@@ -62,6 +62,7 @@ TEST(Api, graph_cpu_test) {
   Forward(graph, device, 1);
 }
 
+#ifdef WITH_CUDA
 TEST(Api, graph_gpu_test) {
   EnvScope scope;
   Device device("cuda", 0);
@@ -87,6 +88,7 @@ TEST(Api, graph_trt_test) {
   graph.enable_tensorrt();
   Forward(graph, device);
 }
+#endif
 
 TEST(Api, graph_cpu_batching_test) {
   EnvScope scope;
@@ -96,6 +98,7 @@ TEST(Api, graph_cpu_batching_test) {
   Forward(graph, device, 10);
 }
 
+#ifdef WITH_CUDA
 TEST(Api, graph_gpu_batching_test) {
   EnvScope scope;
   Device device("cuda", 0);
@@ -146,6 +149,7 @@ TEST(Api, graph_unload_test) {
     Forward(graph, device, 1);
   }
 }
+#endif
 
 TEST(Api, graph_thread_test) {
   EnvScope scope;