diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 499cde368df..b0085586049 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -546,7 +546,7 @@ jobs: run: | docker exec ${{ env.TEST_CONTAINER_NAME }} python3 -m oneflow --doctor - name: Exe test - if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' }} + if: ${{ !fromJson(matrix.cache-hit) && matrix.test-type == 'misc' && matrix.device == 'cpu' }} timeout-minutes: 10 run: | chmod +x ${{ steps.download-digest.outputs.entry-dir }}/bin/oneflow_testexe diff --git a/cmake/caches/ci/cpu.cmake b/cmake/caches/ci/cpu.cmake index ac23ae140b3..fc416e58016 100644 --- a/cmake/caches/ci/cpu.cmake +++ b/cmake/caches/ci/cpu.cmake @@ -9,3 +9,4 @@ set(CMAKE_BUILD_TYPE Release CACHE STRING "") set(CMAKE_GENERATOR Ninja CACHE STRING "") set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "") set(BUILD_CPP_API ON CACHE BOOL "") +set(WITH_MLIR ON CACHE BOOL "") diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake index 662dcc15e9f..17b5267f67d 100644 --- a/cmake/oneflow.cmake +++ b/cmake/oneflow.cmake @@ -103,11 +103,6 @@ foreach(oneflow_single_file ${oneflow_all_src}) set(group_this ON) endif() - if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/common/.*\\.(h|cpp)$") - list(APPEND of_all_obj_cc ${oneflow_single_file}) - set(group_this ON) - endif() - if(BUILD_PYTHON) if("${oneflow_single_file}" MATCHES "^${PROJECT_SOURCE_DIR}/oneflow/api/python/.*\\.(h|cpp)$") @@ -284,6 +279,18 @@ elseif(WIN32) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /WHOLEARCHIVE:oneflow") endif() +# oneflow api common +if (BUILD_PYTHON OR BUILD_CPP_API) + file(GLOB_RECURSE of_api_common_files + ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.h + ${PROJECT_SOURCE_DIR}/oneflow/api/common/*.cpp) + oneflow_add_library(of_api_common OBJECT ${of_api_common_files}) + target_link_libraries(of_api_common oneflow) + if (WITH_MLIR) + target_link_libraries(of_api_common ${ONEFLOW_MLIR_LIBS}) + endif() +endif() + if(BUILD_PYTHON) # py ext lib @@ -304,7 +311,7 @@ if(BUILD_PYTHON) target_link_libraries(oneflow_internal PRIVATE ${of_libs} of_functional_tensor_obj - ${ONEFLOW_MLIR_LIBS} + of_api_common ${oneflow_third_party_libs} of_pyext_obj ${oneflow_exe_third_party_libs}) @@ -346,19 +353,19 @@ if (BUILD_CPP_API) oneflow_add_library(oneflow_cpp ${of_cpp_api_files}) endif() set_target_properties(oneflow_cpp PROPERTIES ARCHIVE_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}" LIBRARY_OUTPUT_DIRECTORY "${LIBONEFLOW_LIBRARY_DIR}") - target_link_libraries(oneflow_cpp PRIVATE ${of_libs} ${ONEFLOW_MLIR_LIBS} ${oneflow_third_party_libs}) + target_link_libraries(oneflow_cpp PRIVATE ${of_libs} of_api_common ${oneflow_third_party_libs}) endif() file(RELATIVE_PATH PROJECT_BINARY_DIR_RELATIVE ${PROJECT_SOURCE_DIR} ${PROJECT_BINARY_DIR}) function(oneflow_add_test target_name) - cmake_parse_arguments(arg "" "TEST_NAME" "SRCS" ${ARGN}) + cmake_parse_arguments(arg "" "TEST_NAME;WORKING_DIRECTORY" "SRCS" ${ARGN}) oneflow_add_executable(${target_name} ${arg_SRCS}) if (BUILD_CUDA) target_link_libraries(${target_name} CUDA::cudart_static) endif() set_target_properties(${target_name} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/bin") - add_test(NAME ${arg_TEST_NAME} COMMAND ${target_name}) + add_test(NAME ${arg_TEST_NAME} COMMAND ${target_name} WORKING_DIRECTORY ${arg_WORKING_DIRECTORY}) set_tests_properties( ${arg_TEST_NAME} PROPERTIES @@ -375,11 +382,12 @@ if(BUILD_TESTING) if (BUILD_CPP_API) file(GLOB_RECURSE cpp_api_test_files ${PROJECT_SOURCE_DIR}/oneflow/api/cpp/tests/*.cpp) - oneflow_add_test(oneflow_cpp_api_testexe SRCS ${cpp_api_test_files} TEST_NAME oneflow_cpp_api_test) + oneflow_add_test(oneflow_cpp_api_testexe SRCS ${cpp_api_test_files} TEST_NAME oneflow_cpp_api_test WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) target_link_libraries(oneflow_cpp_api_testexe oneflow_cpp ${oneflow_test_libs}) endif() endif() + # build include add_custom_target(of_include_copy ALL) @@ -420,10 +428,12 @@ endif(BUILD_PYTHON) if (BUILD_CPP_API) add_dependencies(of_include_copy oneflow_cpp) - set(OF_API_DIRS) file(GLOB_RECURSE api_h_files "${PROJECT_SOURCE_DIR}/oneflow/api/cpp/*.h") - list(APPEND OF_API_DIRS ${api_h_files}) - - copy_files("${OF_API_DIRS}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${LIBONEFLOW_INCLUDE_DIR}" of_include_copy) + copy_files("${api_h_files}" "${PROJECT_SOURCE_DIR}/oneflow/api/cpp" "${LIBONEFLOW_INCLUDE_DIR}" of_include_copy) copy_files("${PROJECT_SOURCE_DIR}/cmake/oneflow-config.cmake" "${PROJECT_SOURCE_DIR}/cmake" "${LIBONEFLOW_SHARE_DIR}" of_include_copy) + + if(WITH_MLIR) + file(GLOB mlir_shared_libs "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib/*.14git") + copy_files("${mlir_shared_libs}" "${PROJECT_BINARY_DIR}/oneflow/ir/llvm_monorepo-build/lib" "${LIBONEFLOW_LIBRARY_DIR}" of_include_copy) + endif(WITH_MLIR) endif(BUILD_CPP_API) diff --git a/cmake/third_party/absl.cmake b/cmake/third_party/absl.cmake index a30f673df60..2a85ca963d5 100644 --- a/cmake/third_party/absl.cmake +++ b/cmake/third_party/absl.cmake @@ -12,14 +12,14 @@ SET(ABSL_LIBRARY_DIR ${THIRD_PARTY_DIR}/absl/${CMAKE_INSTALL_LIBDIR} CACHE PATH if(WIN32) set(ABSL_BUILD_LIBRARY_DIR ${ABSL_INSTALL}/${CMAKE_INSTALL_LIBDIR}) - set(ABSL_LIBRARY_NAMES absl_base.lib absl_spinlock_wait.lib absl_dynamic_annotations.lib + set(ABSL_LIBRARY_NAMES absl_spinlock_wait.lib absl_dynamic_annotations.lib absl_malloc_internal.lib absl_throw_delegate.lib absl_int128.lib absl_strings.lib absl_str_format_internal.lib - absl_time.lib absl_bad_optional_access.lib) + absl_time.lib absl_bad_optional_access.lib absl_base.lib) else() set(ABSL_BUILD_LIBRARY_DIR ${ABSL_INSTALL}/${CMAKE_INSTALL_LIBDIR}) - set(ABSL_LIBRARY_NAMES libabsl_base.a libabsl_spinlock_wait.a libabsl_dynamic_annotations.a + set(ABSL_LIBRARY_NAMES libabsl_spinlock_wait.a libabsl_dynamic_annotations.a libabsl_malloc_internal.a libabsl_throw_delegate.a libabsl_int128.a libabsl_strings.a libabsl_str_format_internal.a - libabsl_time.a libabsl_bad_optional_access.a) + libabsl_time.a libabsl_bad_optional_access.a libabsl_base.a) endif() foreach(LIBRARY_NAME ${ABSL_LIBRARY_NAMES}) diff --git a/oneflow/api/common/device.cpp b/oneflow/api/common/device.cpp deleted file mode 100644 index 566f9231066..00000000000 --- a/oneflow/api/common/device.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -#include "oneflow/api/common/device.h" - -namespace oneflow { - -namespace { - -void CheckDeviceType(const std::string& type) { - if (Device::type_supported.find(type) == Device::type_supported.end()) { - std::string error_msg = - "Expected one of cpu, cuda device type at start of device string " + type; - throw std::runtime_error(error_msg); - } -} - -} // namespace - -/* static */ Maybe> DeviceExportUtil::ParseAndNew( - const std::string& type_or_type_with_device_id) { - std::string type; - int device_id = -1; - ParsingDeviceTag(type_or_type_with_device_id, &type, &device_id).GetOrThrow(); - CheckDeviceType(type); - if (device_id == -1) { - return Device::New(type); - } else { - return Device::New(type, device_id); - } -} - -/* static */ Maybe> DeviceExportUtil::New(const std::string& type, - int64_t device_id) { - CheckDeviceType(type); - return Device::New(type, device_id); -} - -} // namespace oneflow diff --git a/oneflow/api/common/device.h b/oneflow/api/common/ir_pass.cpp similarity index 63% rename from oneflow/api/common/device.h rename to oneflow/api/common/ir_pass.cpp index 8703666af7f..ca4111a9da0 100644 --- a/oneflow/api/common/device.h +++ b/oneflow/api/common/ir_pass.cpp @@ -14,17 +14,17 @@ See the License for the specific language governing permissions and limitations under the License. */ -#ifndef ONEFLOW_API_COMMON_DEVICE_H_ -#define ONEFLOW_API_COMMON_DEVICE_H_ +#ifdef WITH_MLIR -#include "oneflow/core/framework/device.h" +#include "oneflow/ir/include/OneFlow/Extension.h" +#include "oneflow/ir/oneflow-extension/include/OneFlow/OneFlowRoundTrip.h" +#include namespace oneflow { -struct DeviceExportUtil final { - static Maybe> ParseAndNew(const std::string& type_or_type_with_device_id); - static Maybe> New(const std::string& type, int64_t device_id); -}; +REGISTER_JOB_PASS("IRRoundTripBeforeAD", IRRoundTrip); +REGISTER_JOB_PASS("IRRoundTrip", IRRoundTrip); + } // namespace oneflow -#endif // !ONEFLOW_API_COMMON_DEVICE_H_ +#endif // WITH_MLIR diff --git a/oneflow/api/common/job_build_and_infer_ctx.h b/oneflow/api/common/job_build_and_infer_ctx.h new file mode 100644 index 00000000000..8b475f8a2db --- /dev/null +++ b/oneflow/api/common/job_build_and_infer_ctx.h @@ -0,0 +1,36 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_ +#define ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_ + +#include "oneflow/core/job/job.pb.h" +#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h" + +namespace oneflow { + +inline Maybe GetCurrentJob() { + auto* job_ctx_mgr = Global::Get(); + CHECK_NOTNULL_OR_RETURN(job_ctx_mgr); + auto* job_ctx = + JUST(job_ctx_mgr->FindJobBuildAndInferCtx(*JUST(job_ctx_mgr->GetCurrentJobName()))); + CHECK_NOTNULL_OR_RETURN(job_ctx); + return job_ctx->job(); +} + +} // namespace oneflow + +#endif // ONEFLOW_API_COMMON_JOB_BUILD_AND_INFER_CTX_H_ diff --git a/oneflow/api/common/scope.h b/oneflow/api/common/scope.h new file mode 100644 index 00000000000..f0626e3ada1 --- /dev/null +++ b/oneflow/api/common/scope.h @@ -0,0 +1,54 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ONEFLOW_API_COMMON_SCOPE_H_ +#define ONEFLOW_API_COMMON_SCOPE_H_ + +#include +#include +#include "oneflow/core/common/just.h" +#include "oneflow/core/framework/device.h" +#include "oneflow/core/framework/instructions_builder.h" +#include "oneflow/core/framework/session_util.h" +#include "oneflow/core/job/job_conf.cfg.h" +#include "oneflow/core/job/job_conf.pb.h" +#include "oneflow/core/job/scope.h" + +namespace oneflow { + +inline Maybe MakeScope(const JobConfigProto& config_proto, const Device& device) { + std::shared_ptr scope; + std::shared_ptr cfg_config_proto = + std::make_shared(config_proto); + JUST(LogicalRun([&](InstructionsBuilder* builder) -> Maybe { + int64_t session_id = 0; + std::string device_tag = "cpu"; + std::string machine_ids = "0"; + std::string device_ids = "0"; + if (device.type() == "cuda") { + device_tag = "gpu"; + device_ids = std::to_string(device.device_id()); + } + scope = JUST(builder->BuildInitialScope(session_id, cfg_config_proto, device_tag, + {machine_ids + ":" + device_ids}, nullptr, false)); + return Maybe::Ok(); + })); + return scope; +} + +} // namespace oneflow + +#endif // ONEFLOW_API_COMMON_SCOPE_H_ diff --git a/oneflow/api/cpp/env.cpp b/oneflow/api/cpp/env.cpp index 6919948af02..ea31af15a69 100644 --- a/oneflow/api/cpp/env.cpp +++ b/oneflow/api/cpp/env.cpp @@ -24,14 +24,17 @@ limitations under the License. #include #include #include "oneflow/api/cpp/env.h" +#include "oneflow/core/common/global.h" #include "oneflow/core/common/just.h" #include "oneflow/core/common/multi_client.h" #include "oneflow/core/common/optional.h" +#include "oneflow/core/framework/multi_client_session_context.h" #include "oneflow/core/framework/shut_down_util.h" #include "oneflow/core/job/cluster_instruction.h" #include "oneflow/core/job/env.pb.h" #include "oneflow/core/job/env_global_objects_scope.h" #include "oneflow/core/control/ctrl_bootstrap.h" +#include "oneflow/core/job/session.h" #include "oneflow/core/rpc/include/base.h" #include "oneflow/core/vm/vm_util.h" #include "oneflow/core/thread/thread_consistent_id.h" @@ -114,6 +117,12 @@ of::Maybe initEnv() { CompleteEnvProto(env_proto); of::Global::SetAllocated(new of::EnvGlobalObjectsScope()); JUST(of::Global::Get()->Init(env_proto)); + + of::ConfigProto config_proto; + config_proto.mutable_resource()->set_cpu_device_num(1); // useless, will be set in TryInit + config_proto.set_session_id(of::NewSessionId()); + of::Global::New(); + of::Global::Get()->TryInit(config_proto).GetOrThrow(); return of::Maybe::Ok(); } @@ -129,6 +138,8 @@ void release() { if (IsEnvInited()) { // sync multi_client of::vm::ClusterSync().GetOrThrow(); + of::Global::Get()->TryClose().GetOrThrow(); + of::Global::Delete(); // destory env if (of::IsMultiClient().GetOrThrow()) { OF_ENV_BARRIER(); @@ -137,7 +148,6 @@ void release() { } of::Global::Delete(); } - // TODO close session of::SetShuttingDown(); of::ResetThisThreadUniqueConsistentId().GetOrThrow(); } diff --git a/oneflow/api/cpp/framework.h b/oneflow/api/cpp/framework.h index 214ac3d2f5e..5d05fb65442 100644 --- a/oneflow/api/cpp/framework.h +++ b/oneflow/api/cpp/framework.h @@ -22,5 +22,6 @@ limitations under the License. #include "framework/dtype.h" #include "framework/tensor.h" #include "framework/ivalue.h" +#include "framework/graph.h" #endif // ONEFLOW_API_CPP_FRAMEWORK_H_ diff --git a/oneflow/api/cpp/framework/device.cpp b/oneflow/api/cpp/framework/device.cpp index dd291de0353..3be4b3f5a46 100644 --- a/oneflow/api/cpp/framework/device.cpp +++ b/oneflow/api/cpp/framework/device.cpp @@ -15,9 +15,9 @@ limitations under the License. */ #include "oneflow/api/cpp/framework/device.h" -#include "oneflow/api/common/device.h" #include "oneflow/core/common/maybe.h" #include "oneflow/core/common/symbol.h" +#include "oneflow/core/framework/device.h" namespace oneflow_api { @@ -25,11 +25,11 @@ namespace of = oneflow; Device::Device(const std::string& type_or_type_with_device_id) : device_(std::make_shared>( - of::DeviceExportUtil::ParseAndNew(type_or_type_with_device_id).GetOrThrow())) {} + of::Device::ParseAndNew(type_or_type_with_device_id).GetOrThrow())) {} Device::Device(const std::string& type, int64_t device_id) - : device_(std::make_shared>( - of::DeviceExportUtil::New(type, device_id).GetOrThrow())) {} + : device_( + std::make_shared>(of::Device::New(type, device_id).GetOrThrow())) {} const std::string& Device::type() const { return (*device_)->type(); } diff --git a/oneflow/api/cpp/framework/device.h b/oneflow/api/cpp/framework/device.h index e45c7efb0f7..2a7e79b2a23 100644 --- a/oneflow/api/cpp/framework/device.h +++ b/oneflow/api/cpp/framework/device.h @@ -32,6 +32,7 @@ namespace oneflow_api { class Device final { friend class Tensor; + friend class Graph; public: explicit Device(const std::string& type_or_type_with_device_id); diff --git a/oneflow/api/cpp/framework/graph.cpp b/oneflow/api/cpp/framework/graph.cpp new file mode 100644 index 00000000000..1a3f14d55d6 --- /dev/null +++ b/oneflow/api/cpp/framework/graph.cpp @@ -0,0 +1,395 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include "oneflow/api/common/ofblob.h" +#include "oneflow/api/common/scope.h" +#include "oneflow/api/cpp/framework/device.h" +#include "oneflow/api/cpp/framework/graph.h" +#include "oneflow/api/cpp/framework/ivalue.h" +#include "oneflow/api/cpp/framework/shape.h" +#include "oneflow/api/cpp/framework/tensor.h" +#include "oneflow/api/common/job_build_and_infer_ctx.h" +#include "oneflow/api/python/job_build/job_build_and_infer.h" +#include "oneflow/core/common/data_type.pb.h" +#include "oneflow/core/common/global.h" +#include "oneflow/core/common/hash_container.h" +#include "oneflow/core/common/just.h" +#include "oneflow/core/common/shape.h" +#include "oneflow/core/common/symbol.h" +#include "oneflow/core/common/util.h" +#include "oneflow/core/framework/device.h" +#include "oneflow/core/framework/dtype.h" +#include "oneflow/core/framework/multi_client_session_context.h" +#include "oneflow/core/framework/nn_graph.h" +#include "oneflow/core/framework/scope_util.h" +#include "oneflow/core/framework/tensor.h" +#include "oneflow/core/framework/tensor_tuple.h" +#include "oneflow/core/framework/tensor_util.h" +#include "oneflow/core/functional/functional_api.yaml.h" +#include "oneflow/core/graph/op_graph.h" +#include "oneflow/core/job/job.pb.h" +#include "oneflow/core/job/job_build_and_infer_ctx.h" +#include "oneflow/core/job/job_build_and_infer_ctx_mgr.h" +#include "oneflow/core/job/job_conf.cfg.h" +#include "oneflow/core/job/job_conf.pb.h" +#include "oneflow/core/job/job_ir.h" +#include "oneflow/core/job/job_set.pb.h" +#include "oneflow/core/job/lazy_mode.h" +#include "oneflow/core/job/parallel_desc.h" +#include "oneflow/core/job/scope.h" +#include "oneflow/core/job/session.h" +#include "oneflow/core/operator/interface_blob_conf.pb.h" +#include "oneflow/core/operator/op_conf.pb.h" +#include "oneflow/core/register/logical_blob_id.pb.h" + +namespace oneflow_api { + +namespace of = oneflow; + +enum class XrtKind : int { kNone = 0, kTensorRT = 1 }; + +namespace { + +class CompileScope { + public: + CompileScope(const of::JobConfigProto& job_config, const of::Device& device, XrtKind kind) { + const std::shared_ptr scope = CHECK_JUST(of::MakeScope(job_config, device)); + CHECK_JUST(of::ThreadLocalScopeStackPush(scope)); + + of::cfg::JobConfigProto job_config_cfg(job_config); + ConfigXrt(job_config_cfg, kind); + CHECK_JUST(of::JobBuildAndInferCtx_Open(job_config.job_name())); + CHECK_JUST(of::CurJobBuildAndInferCtx_SetJobConf(job_config_cfg)); + } + + ~CompileScope() { + CHECK_JUST(of::JobBuildAndInferCtx_Close()); + CHECK_JUST(of::ThreadLocalScopeStackPop()); + } + + private: + of::LazyMode::Guard lazy_mode_enabled_guard{true}; + + void ConfigXrt(of::cfg::JobConfigProto& job_config_cfg, XrtKind kind) { + if (kind == XrtKind::kTensorRT) { +#ifdef WITH_TENSORRT + *(job_config_cfg.mutable_xrt_config()->mutable_use_tensorrt()) = true; +#else + LOG(WARNING) << "XRT TensorRT is unavailable while tensorrt is enabled"; +#endif + } + } +}; + +std::shared_ptr ConvertToTensorTuple( + const std::vector>& tensors) { + auto tensor_tuple = std::make_shared(); + for (const auto& tensor : tensors) { tensor_tuple->emplace_back(tensor); } + return tensor_tuple; +} + +std::string GetDeviceTag(const Device& device) { + if (device.type() == "cuda") { + return "gpu"; + } else { + return "cpu"; + } +} + +template +const std::pair, std::vector> Unzip(const of::HashMap& hash_map) { + std::vector vec1; + std::vector vec2; + for (const auto& entry : hash_map) { + vec1.emplace_back(entry.first); + vec2.emplace_back(entry.second); + } + return std::make_pair(vec1, vec2); +} + +} // namespace + +class Graph::GraphImpl final { + public: + explicit GraphImpl(const std::string& model_path, const Device& device = Device("cpu")); + + GraphImpl(const GraphImpl& graph) = delete; + GraphImpl(GraphImpl&& graph) noexcept; + + ~GraphImpl() = default; + + GraphImpl& operator=(const GraphImpl& graph) = delete; + GraphImpl& operator=(GraphImpl&& graph) noexcept; + + std::vector Forward(const std::vector& inputs); + void set_batch_size(int batch_size) { batch_size_ = batch_size; } + void enable_tensorrt() { xrt_kind_ = XrtKind::kTensorRT; } + + private: + oneflow::Maybe Compile(const std::vector& inputs); + oneflow::Maybe> Run(const std::vector& inputs) const; + oneflow::Maybe AddOp(oneflow::OperatorConf op_conf); + oneflow::Maybe BuildGraph(const std::vector& inputs); + oneflow::Maybe LoadCheckpoint(); + oneflow::Maybe RegisterTensors(const std::vector& inputs); + + std::shared_ptr graph_ = nullptr; + std::string model_path_; + bool is_compiled_ = false; + int batch_size_ = 0; + XrtKind xrt_kind_ = XrtKind::kNone; + Device device_; + oneflow::Job job_; + + oneflow::HashMap input_name_to_order_; + oneflow::HashMap> output_name_to_tensor_; + oneflow::HashMap> variable_op_name_to_tensor_; + std::shared_ptr output_tensor_tuple_; + std::shared_ptr parameter_tensor_tuple_; +}; + +Graph::Graph(const std::string& model_path, const Device& device) + : graph_(std::make_unique(model_path, device)) {} + +Graph::~Graph() = default; + +Graph::Graph(Graph&& graph) noexcept : graph_(std::move(graph.graph_)) {} + +Graph& Graph::operator=(Graph&& graph) noexcept { + if (&graph == this) { return *this; } + graph_ = std::move(graph.graph_); + return *this; +} + +IValue Graph::Forward(const IValue& inputs) { + std::vector input_tensors; + if (inputs.IsNone()) { + // do nothing + } else if (inputs.IsTensor()) { + input_tensors.emplace_back(inputs.ToTensor()); + } else if (inputs.IsTensorVector()) { + input_tensors = inputs.ToTensorVector(); + } else { + LOG(WARNING) << "Graph currently only support types: Tensor/vector(Tensor)/None"; + } + + std::vector output_tensors = graph_->Forward(input_tensors); + if (output_tensors.empty()) { + return IValue{}; + } else if (output_tensors.size() == 1) { + return IValue(output_tensors.at(0)); + } else { + return IValue(output_tensors); + } +} + +void Graph::set_batch_size(int batch_size) { graph_->set_batch_size(batch_size); } + +void Graph::enable_tensorrt() { graph_->enable_tensorrt(); } + +Graph Graph::Load(const std::string& model_path, const Device& device) { + Graph graph(model_path, device); + return graph; +} + +Graph::GraphImpl::GraphImpl(const std::string& model_path, const Device& device) + : model_path_(model_path), device_(device) { + CHECK_JUST(of::LoadJobFromIR(&job_, model_path + "/model.mlir")); + job_.mutable_job_conf()->mutable_predict_conf(); + job_.mutable_job_conf()->set_job_name(job_.mutable_job_conf()->job_name() + of::NewUniqueId()); + graph_ = std::make_shared(job_.job_conf().job_name()); + of::Global::Get()->AddCGraph(graph_).GetOrThrow(); +} + +Graph::GraphImpl::GraphImpl(GraphImpl&& graph) noexcept + : graph_(std::move(graph.graph_)), + model_path_(std::move(graph.model_path_)), + is_compiled_(graph.is_compiled_), + batch_size_(graph.batch_size_), + xrt_kind_(graph.xrt_kind_), + device_(std::move(graph.device_)), + job_(std::move(graph.job_)), + input_name_to_order_(std::move(graph.input_name_to_order_)), + output_name_to_tensor_(std::move(graph.output_name_to_tensor_)), + variable_op_name_to_tensor_(std::move(graph.variable_op_name_to_tensor_)), + output_tensor_tuple_(std::move(graph.output_tensor_tuple_)), + parameter_tensor_tuple_(std::move(graph.parameter_tensor_tuple_)) {} + +Graph::GraphImpl& Graph::GraphImpl::operator=(Graph::GraphImpl&& graph) noexcept { + if (&graph == this) { return *this; } + graph_ = std::move(graph.graph_); + model_path_ = std::move(graph.model_path_); + is_compiled_ = graph.is_compiled_; + batch_size_ = graph.batch_size_; + xrt_kind_ = graph.xrt_kind_; + device_ = std::move(graph.device_); + job_ = std::move(graph.job_); + input_name_to_order_ = std::move(graph.input_name_to_order_); + output_name_to_tensor_ = std::move(graph.output_name_to_tensor_); + variable_op_name_to_tensor_ = std::move(graph.variable_op_name_to_tensor_); + output_tensor_tuple_ = std::move(graph.output_tensor_tuple_); + parameter_tensor_tuple_ = std::move(graph.parameter_tensor_tuple_); + return *this; +} + +std::vector Graph::GraphImpl::Forward(const std::vector& inputs) { + if (!is_compiled_) { + static std::mutex mtx; + std::lock_guard lock(mtx); + Compile(inputs).GetOrThrow(); + is_compiled_ = true; + } + return Run(inputs).GetOrThrow(); +} + +of::Maybe Graph::GraphImpl::Compile(const std::vector& inputs) { + JUST(BuildGraph(inputs)); + JUST(LoadCheckpoint()); + JUST(RegisterTensors(inputs)); + JUST(graph_->CompileAndInitRuntime()); + return of::Maybe::Ok(); +} + +of::Maybe> Graph::GraphImpl::Run(const std::vector& inputs) const { + const auto input_tensor_tuple = std::make_shared(); + for (const auto& tensor : inputs) { input_tensor_tuple->emplace_back(tensor.tensor_); } + + JUST(of::RunLazyNNGraph(*input_tensor_tuple, *output_tensor_tuple_, *parameter_tensor_tuple_, + graph_)); + JUST(of::SoftSyncNNGraphBuffers(*output_tensor_tuple_, graph_)); + + std::vector outputs; + for (const auto& tensor : *output_tensor_tuple_) { outputs.emplace_back(Tensor(tensor)); } + return outputs; +} + +of::Maybe Graph::GraphImpl::AddOp(of::OperatorConf op_conf) { + { + const std::shared_ptr scope = JUST(of::GetCurrentScope()); + op_conf.set_scope_symbol_id(scope->symbol_id().value_or(0)); + } + op_conf.set_device_tag(GetDeviceTag(device_)); + if (batch_size_ > 0 && op_conf.has_input_conf()) { + op_conf.mutable_input_conf()->mutable_blob_conf()->mutable_shape()->mutable_dim()->Set( + 0, batch_size_); + } + auto* ctx = JUST(of::GetCurInferCtx()); + JUST(ctx->AddAndInferConsistentOp(op_conf)); + return of::Maybe::Ok(); +} + +of::Maybe Graph::GraphImpl::BuildGraph(const std::vector& inputs) { + CompileScope build_graph_scope(job_.job_conf(), *device_.device_->shared_from_symbol(), + xrt_kind_); + { + int input_tensor_order = 0; + const of::OpGraph op_graph(job_); + op_graph.TopoForEachNode([&](const of::OpNode* node) -> of::Maybe { + const of::OperatorConf& op_conf = node->op().op_conf(); + JUST(AddOp(op_conf)); + if (op_conf.has_input_conf()) { + input_name_to_order_[op_conf.name()] = input_tensor_order; + input_tensor_order += 1; + } else if (op_conf.has_variable_conf()) { + const of::LazyMode::Guard lazy_mode_disabled_guard{false}; + const of::VariableOpConf& variable_conf = op_conf.variable_conf(); + variable_op_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty( + of::Shape(variable_conf.shape()), + JUST(of::DType::Get(static_cast(variable_conf.data_type()))), + *device_.device_)); + } + return of::Maybe::Ok(); + }); + } + JUST(of::CurJobBuildAndInferCtx_Complete()); + { + const std::shared_ptr complete_job = JUST(of::GetCurrentJob()); + const of::OpGraph complete_graph(*complete_job); + complete_graph.TopoForEachNode([&](const of::OpNode* node) -> of::Maybe { + const of::LazyMode::Guard lazy_mode_disabled_guard{false}; + const of::OperatorConf& op_conf = node->op().op_conf(); + if (op_conf.has_output_conf()) { + of::InterfaceBlobConf blob_conf = op_conf.output_conf().blob_conf(); + if (batch_size_ > 0) { + const std::string input_lbi_str = op_conf.output_conf().in(); + const of::LogicalBlobId input_lbi = of::GenLogicalBlobId(input_lbi_str); + int64_t batch_size = node->LogicalBlobDesc4Lbi(input_lbi).shape().At(0); + blob_conf.mutable_shape()->set_dim(0, batch_size); + } + output_name_to_tensor_[op_conf.name()] = JUST(of::one::functional::Empty( + of::Shape(blob_conf.shape()), + JUST(of::DType::Get(static_cast(blob_conf.data_type()))), + *device_.device_)); + } + return of::Maybe::Ok(); + }); + } + return of::Maybe::Ok(); +} + +of::Maybe Graph::GraphImpl::LoadCheckpoint() { + for (const auto& variable_op_name_and_tensor : variable_op_name_to_tensor_) { + const auto& variable_op_name = variable_op_name_and_tensor.first; + const auto& variable_tensor = variable_op_name_and_tensor.second; + const std::string variable_filename = model_path_ + "/" + variable_op_name + "/out"; + const std::string buffer = [&]() { + std::ifstream variable_file(variable_filename, std::ios::binary); + CHECK(variable_file.is_open()); + std::stringstream ss; + ss << variable_file.rdbuf(); + return ss.str(); + }(); + const auto& callback = + std::make_shared>([&](uint64_t of_blob_ptr) { + CHECK_JUST(of::BlobBufferCopyUtil::From( + of_blob_ptr, buffer.data(), + variable_tensor->shape()->elem_cnt() + * of::GetSizeOfDataType(variable_tensor->dtype()->data_type()))); + }); + JUST(of::one::SyncAccessTensorWithTimeOut(variable_tensor, callback, "mut")); + } + + return of::Maybe::Ok(); +} + +of::Maybe Graph::GraphImpl::RegisterTensors(const std::vector& inputs) { + { + std::vector input_op_names(inputs.size()); + std::vector> input_tensors(inputs.size()); + for (const auto& name_order : input_name_to_order_) { + input_op_names[name_order.second] = name_order.first; + input_tensors[name_order.second] = inputs.at(name_order.second).tensor_; + } + JUST(graph_->RegisterInputOpNamesAndTensors(input_op_names, input_tensors)); + } + { + const auto& pair = Unzip(output_name_to_tensor_); + const std::vector& output_op_names = pair.first; + const std::vector>& output_tensors = pair.second; + JUST(graph_->RegisterOutputOpNamesAndTensors(output_op_names, output_tensors)); + output_tensor_tuple_ = ConvertToTensorTuple(output_tensors); + } + { + const auto& pair = Unzip(variable_op_name_to_tensor_); + const std::vector& variable_op_names = pair.first; + const std::vector>& variable_tensors = pair.second; + JUST(graph_->RegisterVariableOpNamesAndTensors(variable_op_names, variable_tensors)); + parameter_tensor_tuple_ = ConvertToTensorTuple(variable_tensors); + } + return of::Maybe::Ok(); +} + +} // namespace oneflow_api diff --git a/oneflow/api/cpp/framework/graph.h b/oneflow/api/cpp/framework/graph.h new file mode 100644 index 00000000000..c2f690b642d --- /dev/null +++ b/oneflow/api/cpp/framework/graph.h @@ -0,0 +1,56 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#ifndef ONEFLOW_API_CPP_GRAPH_H_ +#define ONEFLOW_API_CPP_GRAPH_H_ + +#include "device.h" +#include "ivalue.h" +#include "tensor.h" + +namespace oneflow { + +class NNGraph; + +} // namespace oneflow + +namespace oneflow_api { + +class Graph { + public: + explicit Graph(const std::string& model_path, const Device& device = Device("cpu")); + ~Graph(); + + Graph(const Graph& graph) = delete; + Graph(Graph&& graph) noexcept; + + Graph& operator=(const Graph& graph) = delete; + Graph& operator=(Graph&& graph) noexcept; + + IValue Forward(const IValue& inputs); + void set_batch_size(int batch_size); + void enable_tensorrt(); + + static Graph Load(const std::string& model_path, const Device& device = Device("cpu")); + + private: + class GraphImpl; + std::unique_ptr graph_; +}; + +} // namespace oneflow_api + +#endif // ONEFLOW_API_CPP_GRAPH_H_ diff --git a/oneflow/api/cpp/framework/tensor.cpp b/oneflow/api/cpp/framework/tensor.cpp index ab01924b280..27b95f3ddbd 100644 --- a/oneflow/api/cpp/framework/tensor.cpp +++ b/oneflow/api/cpp/framework/tensor.cpp @@ -100,7 +100,7 @@ Tensor Tensor::from_buffer(const void* buffer, const Shape& shape, const Device& } template -void Tensor::copy_to(T* buffer) { +void Tensor::copy_to(T* buffer) const { std::shared_ptr local_tensor = tensor_->AsMirroredTensor().GetPtrOrThrow(); const auto shape = this->shape(); @@ -130,7 +130,7 @@ void Tensor::copy_to(T* buffer) { const std::shared_ptr& Tensor::__internal_tensor() const { return tensor_; } #define REGISTER_TENSOR_COPY_TO(cpp_dtype) \ - template void Tensor::copy_to(cpp_dtype * buffer); + template void Tensor::copy_to(cpp_dtype * buffer) const; REGISTER_TENSOR_COPY_TO(float) REGISTER_TENSOR_COPY_TO(double) diff --git a/oneflow/api/cpp/framework/tensor.h b/oneflow/api/cpp/framework/tensor.h index 2a99bca8c0f..08ac8daf488 100644 --- a/oneflow/api/cpp/framework/tensor.h +++ b/oneflow/api/cpp/framework/tensor.h @@ -33,6 +33,8 @@ class Tensor; namespace oneflow_api { class Tensor final { + friend class Graph; + public: explicit Tensor(const Shape& shape = Shape(), const Device& device = Device("cpu"), const DType& dtype = DType::kFloat); @@ -56,7 +58,7 @@ class Tensor final { [[nodiscard]] const std::shared_ptr& __internal_tensor() const; template - void copy_to(T* buffer); + void copy_to(T* buffer) const; [[nodiscard]] static Tensor from_buffer(const void* buffer, const Shape& shape, const Device& device, const DType& dtype); diff --git a/oneflow/api/cpp/tests/api_test.cpp b/oneflow/api/cpp/tests/api_test.cpp index 88619f3617b..1fbc790bcc2 100644 --- a/oneflow/api/cpp/tests/api_test.cpp +++ b/oneflow/api/cpp/tests/api_test.cpp @@ -15,7 +15,18 @@ limitations under the License. */ #include "oneflow/api/cpp/tests/api_test.h" +#include #include +#include +#ifdef __linux__ + +#include // readlink + +#elif defined(__APPLE__) + +#include // _NSGetExecutablePath + +#endif namespace oneflow_api { @@ -47,4 +58,27 @@ REGISTER_RANDOM_DATA(int8_t) REGISTER_RANDOM_DATA(int32_t) REGISTER_RANDOM_DATA(int64_t) +std::string GetExeDir() { + const size_t path_max_size = 4096; // PATH_MAX = 4096 on linux + char result[path_max_size]; + + const auto get_dir_from_path = [](char result[], size_t count) -> std::string { + std::string exe_path(result, (count > 0) ? count : 0); + + // string(path).rfind('/') will never be string::npos on linux or macos. + return exe_path.substr(0, exe_path.rfind('/')); + }; + +#ifdef __linux__ + ssize_t count = readlink("/proc/self/exe", result, path_max_size); + return get_dir_from_path(result, count); +#elif defined(__APPLE__) + uint32_t count = path_max_size; + CHECK_EQ(_NSGetExecutablePath(result, &count), 0) << "Fail to get executable file path."; + return get_dir_from_path(result, count); +#else +#error oneflow_api::GetExeDir() has not been supported on windows. +#endif +} + } // namespace oneflow_api diff --git a/oneflow/api/cpp/tests/api_test.h b/oneflow/api/cpp/tests/api_test.h index cec50969e69..c196bc90662 100644 --- a/oneflow/api/cpp/tests/api_test.h +++ b/oneflow/api/cpp/tests/api_test.h @@ -32,6 +32,8 @@ Shape RandomShape(); template std::vector RandomData(size_t size); +std::string GetExeDir(); + } // namespace oneflow_api #endif // !ONEFLOW_API_CPP_TESTS_API_TEST_H_ diff --git a/oneflow/api/cpp/tests/graph_test.cpp b/oneflow/api/cpp/tests/graph_test.cpp new file mode 100644 index 00000000000..497da6b1bbb --- /dev/null +++ b/oneflow/api/cpp/tests/graph_test.cpp @@ -0,0 +1,197 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "oneflow/api/cpp/framework.h" +#include "oneflow/api/cpp/tests/api_test.h" + +namespace oneflow_api { + +namespace { + +inline Graph LoadGraph(const Device& device) { + Graph graph = + Graph::Load("./oneflow/api/cpp/tests/graph_test_model/affine_with_parameter", device); + return graph; +} + +inline void Forward(Graph& graph, const Device& device, int expected_batch_dim = 1) { + std::vector data(expected_batch_dim * 3); + std::fill(data.begin(), data.end(), 1); + std::vector inputs; + inputs.emplace_back( + Tensor::from_buffer(data.data(), Shape({expected_batch_dim, 3}), device, DType::kFloat)); + const auto& value = graph.Forward(inputs); + ASSERT_TRUE(value.IsTensor()); + Tensor output = value.ToTensor(); + Shape shape = output.shape(); + ASSERT_EQ(shape.At(0), expected_batch_dim); + ASSERT_EQ(shape.At(1), 4); + std::vector buf(expected_batch_dim * 4); + output.copy_to(buf.data()); + for (const float& element : buf) { ASSERT_EQ(element, 4); } +} + +} // namespace + +TEST(Api, graph_cpu_test) { + EnvScope scope; + Device device("cpu"); + Graph graph = LoadGraph(device); + Forward(graph, device, 1); +} + +#ifdef WITH_CUDA +TEST(Api, graph_gpu_test) { + EnvScope scope; + Device device("cuda", 0); + Graph graph = LoadGraph(device); + Forward(graph, device); +} + +TEST(Api, graph_multi_gpu_test) { + EnvScope scope; + Device device("cuda", 0); + Graph graph = LoadGraph(device); + Forward(graph, device); + + Device device1("cuda", 1); + Graph graph1 = LoadGraph(device1); + Forward(graph1, device1); +} + +TEST(Api, graph_trt_test) { + EnvScope scope; + Device device("cuda:0"); + Graph graph = LoadGraph(device); + graph.enable_tensorrt(); + Forward(graph, device); +} +#endif + +TEST(Api, graph_cpu_batching_test) { + EnvScope scope; + Device device("cpu"); + Graph graph = LoadGraph(device); + graph.set_batch_size(10); + Forward(graph, device, 10); +} + +#ifdef WITH_CUDA +TEST(Api, graph_gpu_batching_test) { + EnvScope scope; + Device device("cuda", 0); + Graph graph = LoadGraph(device); + graph.set_batch_size(10); + Forward(graph, device, 10); +} + +TEST(Api, graph_multi_device_test) { + EnvScope scope; + Device device("cuda", 0); + Graph graph = LoadGraph(device); + Forward(graph, device, 1); + + Device device1("cuda", 1); + Graph graph1 = LoadGraph(device1); + Forward(graph1, device1, 1); + + Device device2("cpu"); + Graph graph2 = LoadGraph(device2); + Forward(graph2, device2, 1); +} + +TEST(Api, graph_unload_test) { + { + EnvScope scope; + + Device device("cuda", 0); + Graph graph = LoadGraph(device); + Forward(graph, device, 1); + + { + Device device1("cuda", 1); + Graph graph1 = LoadGraph(device1); + Forward(graph1, device1, 1); + } + + Device device2("cpu"); + Graph graph2 = LoadGraph(device2); + Forward(graph2, device2, 1); + } + + { + EnvScope scope; + + Device device("cpu"); + Graph graph = LoadGraph(device); + Forward(graph, device, 1); + } +} +#endif + +TEST(Api, graph_thread_test) { + EnvScope scope; + + Device device("cpu"); + std::vector graphs; + for (int i = 0; i < 10; i++) { graphs.emplace_back(LoadGraph(device)); } + + std::vector threads; + for (Graph& graph : graphs) { + threads.emplace_back(std::thread(std::bind(Forward, std::move(graph), device, 1))); + } + for (auto& thread : threads) { thread.join(); } +} + +TEST(Api, graph_input_order_test) { + EnvScope scope; + + Device device("cpu"); + Graph graph = Graph::Load("./oneflow/api/cpp/tests/graph_test_model/affine_no_parameter", device); + + std::vector inputs; + std::vector x(3); + std::fill(x.begin(), x.end(), 1); + inputs.emplace_back(Tensor::from_buffer(x.data(), Shape({1, 3}), device, DType::kFloat)); + std::vector a(3 * 2); + std::fill(a.begin(), a.end(), 1); + inputs.emplace_back(Tensor::from_buffer(a.data(), Shape({3, 2}), device, DType::kFloat)); + std::vector b(2); + std::fill(b.begin(), b.end(), 1); + inputs.emplace_back(Tensor::from_buffer(b.data(), Shape({2}), device, DType::kFloat)); + + const auto& value = graph.Forward(inputs); + ASSERT_TRUE(value.IsTensor()); + Tensor output = value.ToTensor(); + Shape shape = output.shape(); + ASSERT_EQ(shape.At(0), 1); + ASSERT_EQ(shape.At(1), 2); + std::array buf{}; + output.copy_to(buf.data()); + ASSERT_EQ(buf[0], 4); + ASSERT_EQ(buf[1], 4); +} + +} // namespace oneflow_api diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir new file mode 100644 index 00000000000..30c09f7c841 --- /dev/null +++ b/oneflow/api/cpp/tests/graph_test_model/affine_no_parameter/model.mlir @@ -0,0 +1,11 @@ +module { + oneflow.job @MyGraph_1(%arg0: tensor<1x3xf32>, %arg1: tensor<3x2xf32>, %arg2: tensor<2xf32>) -> tensor<1x2xf32> { + %output = "oneflow.input"(%arg0) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_0", output_lbns = ["_MyGraph_1-input_0/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [1 : si64, 3 : si64]} : (tensor<1x3xf32>) -> tensor<1x3xf32> + %output_0 = "oneflow.input"(%arg1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_1", output_lbns = ["_MyGraph_1-input_1/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [3 : si64, 2 : si64]} : (tensor<3x2xf32>) -> tensor<3x2xf32> + %output_1 = "oneflow.input"(%arg2) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-input_2", output_lbns = ["_MyGraph_1-input_2/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [2 : si64]} : (tensor<2xf32>) -> tensor<2xf32> + %0 = "oneflow.matmul"(%output, %output_0) {alpha = 1.000000e+00 : f64, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-matmul_0", output_lbns = ["model-matmul_0/out_0"], scope_symbol_id = 4611686018427535359 : i64, transpose_a = false, transpose_b = false} : (tensor<1x3xf32>, tensor<3x2xf32>) -> tensor<1x2xf32> + %1 = "oneflow.broadcast_add"(%0, %output_1) {device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-broadcast_add_1", output_lbns = ["model-broadcast_add_1/z_0"], scope_symbol_id = 4611686018427535359 : i64} : (tensor<1x2xf32>, tensor<2xf32>) -> tensor<1x2xf32> + %output_2 = "oneflow.output"(%1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_1-output_0", output_lbns = ["_MyGraph_1-output_0/out"], scope_symbol_id = 4611686018427527167 : i64, shape = [1 : si64, 2 : si64]} : (tensor<1x2xf32>) -> tensor<1x2xf32> + oneflow.return %output_2 : tensor<1x2xf32> + } +} diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta new file mode 100644 index 00000000000..421341fc956 --- /dev/null +++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/meta @@ -0,0 +1,5 @@ +shape { + dim: 3 + dim: 4 +} +data_type: kFloat diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out new file mode 100644 index 00000000000..be22e342567 Binary files /dev/null and b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.a/out differ diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta new file mode 100644 index 00000000000..166375025be --- /dev/null +++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/meta @@ -0,0 +1,4 @@ +shape { + dim: 4 +} +data_type: kFloat diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/out b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/out new file mode 100644 index 00000000000..dcce8bfb97e Binary files /dev/null and b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.b/out differ diff --git a/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir new file mode 100644 index 00000000000..15a53af1f48 --- /dev/null +++ b/oneflow/api/cpp/tests/graph_test_model/affine_with_parameter/model.mlir @@ -0,0 +1,11 @@ +module { + oneflow.job @MyGraph_0(%arg0: tensor<1x3xf32>) -> tensor<1x4xf32> { + %output = "oneflow.input"(%arg0) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-input_0", output_lbns = ["_MyGraph_0-input_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 3 : si64]} : (tensor<1x3xf32>) -> tensor<1x3xf32> + %output_0 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.a", output_lbns = ["model.a/out"], scope_symbol_id = 4611686018427482111 : i64, shape = [3 : si64, 4 : si64]} : () -> tensor<3x4xf32> + %output_1 = "oneflow.variable"() {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], nd_sbp = ["B"], op_name = "model.b", output_lbns = ["model.b/out"], scope_symbol_id = 4611686018427494399 : i64, shape = [4 : si64]} : () -> tensor<4xf32> + %0 = "oneflow.matmul"(%output, %output_0) {alpha = 1.000000e+00 : f64, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-matmul_0", output_lbns = ["model-matmul_0/out_0"], scope_symbol_id = 4611686018427486207 : i64, transpose_a = false, transpose_b = false} : (tensor<1x3xf32>, tensor<3x4xf32>) -> tensor<1x4xf32> + %1 = "oneflow.broadcast_add"(%0, %output_1) {device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], op_name = "model-broadcast_add_1", output_lbns = ["model-broadcast_add_1/z_0"], scope_symbol_id = 4611686018427486207 : i64} : (tensor<1x4xf32>, tensor<4xf32>) -> tensor<1x4xf32> + %output_2 = "oneflow.output"(%1) {data_type = 2 : i32, device_name = ["@0:0"], device_tag = "cpu", hierarchy = [1], is_dynamic = false, nd_sbp = ["B"], op_name = "_MyGraph_0-output_0", output_lbns = ["_MyGraph_0-output_0/out"], scope_symbol_id = 4611686018427469823 : i64, shape = [1 : si64, 4 : si64]} : (tensor<1x4xf32>) -> tensor<1x4xf32> + oneflow.return %output_2 : tensor<1x4xf32> + } +} diff --git a/oneflow/api/cpp/tests/tensor_test.cpp b/oneflow/api/cpp/tests/tensor_test.cpp index 3241a220263..5960f961675 100644 --- a/oneflow/api/cpp/tests/tensor_test.cpp +++ b/oneflow/api/cpp/tests/tensor_test.cpp @@ -26,13 +26,13 @@ TEST(Api, device) { ASSERT_EQ(device.type(), "cpu"); #ifdef WITH_CUDA - device = Device("cuda", 1); + device = Device("cuda:0"); ASSERT_EQ(device.type(), "cuda"); - ASSERT_EQ(device.device_id(), 1); + ASSERT_EQ(device.device_id(), 0); - device = Device("cuda:2"); + device = Device("cuda", 1); ASSERT_EQ(device.type(), "cuda"); - ASSERT_EQ(device.device_id(), 2); + ASSERT_EQ(device.device_id(), 1); #endif } diff --git a/oneflow/api/python/framework/device.cpp b/oneflow/api/python/framework/device.cpp index f843d8ccb74..06d5a733bfb 100644 --- a/oneflow/api/python/framework/device.cpp +++ b/oneflow/api/python/framework/device.cpp @@ -16,7 +16,6 @@ limitations under the License. #include #include #include "oneflow/core/control/global_process_ctx.h" -#include "oneflow/api/common/device.h" #include "oneflow/api/python/of_api_registry.h" #include "oneflow/core/framework/device.h" #include "oneflow/core/common/str_util.h" @@ -29,10 +28,10 @@ namespace oneflow { ONEFLOW_API_PYBIND11_MODULE("", m) { py::class_, std::shared_ptr>>(m, "device") .def(py::init([](const std::string& type_or_type_with_device_id) { - return DeviceExportUtil::ParseAndNew(type_or_type_with_device_id).GetOrThrow(); + return Device::ParseAndNew(type_or_type_with_device_id).GetOrThrow(); })) .def(py::init([](const std::string& type, int64_t device_id) { - return DeviceExportUtil::New(type, device_id).GetOrThrow(); + return Device::New(type, device_id).GetOrThrow(); })) .def_property_readonly("type", [](const Symbol& d) { return d->type(); }) .def_property_readonly("index", [](const Symbol& d) { return d->device_id(); }) diff --git a/oneflow/api/python/functional/python_arg.cpp b/oneflow/api/python/functional/python_arg.cpp index 15215102162..9eb31f214f0 100644 --- a/oneflow/api/python/functional/python_arg.cpp +++ b/oneflow/api/python/functional/python_arg.cpp @@ -15,8 +15,6 @@ limitations under the License. */ #include "oneflow/api/python/functional/python_arg.h" - -#include "oneflow/api/common/device.h" #include "oneflow/api/python/functional/common.h" #include "oneflow/api/python/functional/indexing.h" #include "oneflow/core/common/scalar.h" @@ -126,7 +124,7 @@ template<> Maybe> PythonArg::ObjectAs>() const { if (PyStringCheck(object_)) { const char* device_str = JUST(PyStringAsString(object_)); - return DeviceExportUtil::ParseAndNew(device_str); + return Device::ParseAndNew(device_str); } return PyUnpackDevice(object_); } diff --git a/oneflow/api/python/ir.cpp b/oneflow/api/python/ir.cpp index 5840cb9d716..422242d37c4 100644 --- a/oneflow/api/python/ir.cpp +++ b/oneflow/api/python/ir.cpp @@ -28,9 +28,6 @@ ONEFLOW_API_PYBIND11_MODULE("ir", m) { [](const std::string& lib_path) { MutSharedLibPaths()->insert(lib_path); }); } -REGISTER_JOB_PASS("IRRoundTripBeforeAD", IRRoundTrip); -REGISTER_JOB_PASS("IRRoundTrip", IRRoundTrip); - } // namespace oneflow #endif // WITH_MLIR diff --git a/oneflow/core/framework/device.cpp b/oneflow/core/framework/device.cpp index 52bb9a8e5d2..2dd191ec638 100644 --- a/oneflow/core/framework/device.cpp +++ b/oneflow/core/framework/device.cpp @@ -36,6 +36,14 @@ inline size_t HashDevice(const std::string& type, int64_t device_id) { return std::hash()(type) ^ std::hash()(device_id); } +void CheckDeviceType(const std::string& type) { + if (Device::type_supported.find(type) == Device::type_supported.end()) { + std::string error_msg = + "Expected one of cpu, cuda device type at start of device string " + type; + throw std::runtime_error(error_msg); + } +} + } // namespace Device::Device(const std::string& type, int64_t device_id) @@ -83,6 +91,19 @@ Maybe Device::Init() { return New(type, GlobalProcessCtx::LocalRank()); } +/* static */ Maybe> Device::ParseAndNew( + const std::string& type_or_type_with_device_id) { + std::string type; + int device_id = -1; + JUST(ParsingDeviceTag(type_or_type_with_device_id, &type, &device_id)); + CheckDeviceType(type); + if (device_id == -1) { + return Device::New(type); + } else { + return Device::New(type, device_id); + } +} + Maybe Device::of_type() const { static const HashMap type2device_tag{ {"cpu", "cpu"}, diff --git a/oneflow/core/framework/device.h b/oneflow/core/framework/device.h index f667c6c476f..399dcb6bd3b 100644 --- a/oneflow/core/framework/device.h +++ b/oneflow/core/framework/device.h @@ -57,6 +57,7 @@ class Device final { static Maybe> ThreadLocalGetOrNew(const std::string& type, int64_t device_id); static Maybe> New(const std::string& type, int64_t device_id); static Maybe> New(const std::string& type); + static Maybe> ParseAndNew(const std::string& type_or_type_with_device_id); static Maybe> MakeDeviceByParallelDesc(const ParallelDesc& parallel_desc); static const std::unordered_set type_supported; diff --git a/oneflow/core/framework/random_generator_impl.cpp b/oneflow/core/framework/random_generator_impl.cpp index a99b93bf686..888cccaf5d6 100644 --- a/oneflow/core/framework/random_generator_impl.cpp +++ b/oneflow/core/framework/random_generator_impl.cpp @@ -18,6 +18,7 @@ limitations under the License. #include "oneflow/core/common/util.h" #include "oneflow/core/framework/device.h" #include "oneflow/core/framework/instructions_builder.h" +#include "oneflow/core/framework/tensor_util.h" #include "oneflow/core/functional/functional.h" #include "oneflow/core/job/env_global_objects_scope.h" #include "oneflow/core/register/ofblob.h" @@ -33,17 +34,6 @@ namespace one { namespace { -Maybe SyncAccessTensorWithTimeOut( - const std::shared_ptr& tensor, - const std::shared_ptr>& callback, const std::string& modifier) { - return SpinCounter::SpinWait(1, [&](const std::shared_ptr& sc) -> Maybe { - return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe { - return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback, - modifier); - }); - }); -} - Maybe CPUSynchronize() { if (Global::Get() != nullptr) { return vm::CurrentRankSync(); } return Maybe::Ok(); diff --git a/oneflow/core/framework/tensor_util.cpp b/oneflow/core/framework/tensor_util.cpp new file mode 100644 index 00000000000..6a615e25173 --- /dev/null +++ b/oneflow/core/framework/tensor_util.cpp @@ -0,0 +1,36 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include "oneflow/core/framework/tensor_util.h" + +#include "oneflow/core/common/spin_counter.h" +#include "oneflow/core/framework/instructions_builder.h" + +namespace oneflow { +namespace one { + +Maybe SyncAccessTensorWithTimeOut( + const std::shared_ptr& tensor, + const std::shared_ptr>& callback, const std::string& modifier) { + return SpinCounter::SpinWait(1, [&](const std::shared_ptr& sc) -> Maybe { + return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe { + return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback, + modifier); + }); + }); +} + +} // namespace one +} // namespace oneflow diff --git a/oneflow/core/framework/tensor_util.h b/oneflow/core/framework/tensor_util.h new file mode 100644 index 00000000000..ec9502230aa --- /dev/null +++ b/oneflow/core/framework/tensor_util.h @@ -0,0 +1,29 @@ +/* +Copyright 2020 The OneFlow Authors. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ +#include + +#include "oneflow/core/common/maybe.h" + +namespace oneflow { +namespace one { + +class Tensor; + +Maybe SyncAccessTensorWithTimeOut( + const std::shared_ptr& tensor, + const std::shared_ptr>& callback, const std::string& modifier); +} // namespace one +} // namespace oneflow diff --git a/oneflow/core/functional/impl/eye_functor.cpp b/oneflow/core/functional/impl/eye_functor.cpp index 7e47e77a2fd..4de53d1be8f 100644 --- a/oneflow/core/functional/impl/eye_functor.cpp +++ b/oneflow/core/functional/impl/eye_functor.cpp @@ -19,6 +19,7 @@ limitations under the License. #include "oneflow/core/common/scalar.h" #include "oneflow/core/common/throw.h" #include "oneflow/core/common/util.h" +#include "oneflow/core/framework/device.h" #include "oneflow/core/framework/attr_map.h" #include "oneflow/core/framework/nd_sbp.h" #include "oneflow/core/framework/op_builder.h" @@ -32,7 +33,6 @@ limitations under the License. #include "oneflow/core/functional/impl/common.h" #include "oneflow/core/job/lazy_mode.h" #include "oneflow/core/job/sbp_parallel.h" -#include "oneflow/api/common/device.h" namespace oneflow { namespace one { @@ -66,7 +66,7 @@ class EyeDeviceStrFunctor { Maybe operator()(const Scalar& rows, const Optional& cols, const Symbol& dtype, const std::string& device, const bool& requires_grad) const { - const Symbol& dev = JUST(DeviceExportUtil::ParseAndNew(device)); + const Symbol& dev = JUST(Device::ParseAndNew(device)); return JUST(functional::Eye(rows, cols, dtype, dev, requires_grad)); } }; diff --git a/oneflow/core/functional/impl/nn_functor.cpp b/oneflow/core/functional/impl/nn_functor.cpp index 7e200b3b6f8..a8d63068c46 100644 --- a/oneflow/core/functional/impl/nn_functor.cpp +++ b/oneflow/core/functional/impl/nn_functor.cpp @@ -22,6 +22,7 @@ limitations under the License. #include "oneflow/core/framework/op_interpreter/op_interpreter_util.h" #include "oneflow/core/framework/tensor.h" #include "oneflow/core/framework/tensor_tuple.h" +#include "oneflow/core/framework/tensor_util.h" #include "oneflow/core/framework/op_interpreter.h" #include "oneflow/core/framework/random_generator.h" #include "oneflow/core/functional/functional.h" @@ -1597,17 +1598,6 @@ class FoldFunctor { std::shared_ptr fold_op_; }; -Maybe SyncAccessTensorWithTimeOut( - const std::shared_ptr& tensor, - const std::shared_ptr>& callback, const std::string& modifier) { - return SpinCounter::SpinWait(1, [&](const std::shared_ptr& sc) -> Maybe { - return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe { - return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback, - modifier); - }); - }); -} - class OneHotFunctor { public: OneHotFunctor() { diff --git a/oneflow/core/functional/tensor_index.cpp b/oneflow/core/functional/tensor_index.cpp index c846ab992bf..dbc4d159998 100644 --- a/oneflow/core/functional/tensor_index.cpp +++ b/oneflow/core/functional/tensor_index.cpp @@ -19,6 +19,7 @@ limitations under the License. #include "oneflow/core/framework/device.h" #include "oneflow/core/framework/instructions_builder.h" #include "oneflow/core/framework/tensor_tuple.h" +#include "oneflow/core/framework/tensor_util.h" #include "oneflow/core/framework/nd_sbp.h" #include "oneflow/core/functional/functional.h" #include "oneflow/core/job/sbp_parallel.h" @@ -30,17 +31,6 @@ namespace functional { namespace { -Maybe SyncAccessTensorWithTimeOut( - const std::shared_ptr& tensor, - const std::shared_ptr>& callback, const std::string& modifier) { - return SpinCounter::SpinWait(1, [&](const std::shared_ptr& sc) -> Maybe { - return PhysicalRun([&](InstructionsBuilder* builder) -> Maybe { - return builder->SyncAccessBlobByCallback(JUST(tensor->AsMirroredTensor()), sc, callback, - modifier); - }); - }); -} - int64_t CountSpecifiedDims(const TensorIndex& index) { int64_t specified_ndims = 0; for (int i = 0; i < index.size(); ++i) { diff --git a/python/oneflow/framework/check_point_v2.py b/python/oneflow/framework/check_point_v2.py index 5966ac5ba1a..1bb32572b4e 100644 --- a/python/oneflow/framework/check_point_v2.py +++ b/python/oneflow/framework/check_point_v2.py @@ -29,6 +29,8 @@ import oneflow.core.framework.variable_meta_info_pb2 as variable_meta_info_pb import oneflow.framework.dtype as dtype_util import oneflow.framework.id_util as id_util +from oneflow.framework.tensor import Tensor +import oneflow.nn.graph.graph as graph_util import pickle SNAPSHOT_DONE_FILENAME = "snapshot_done" @@ -120,10 +122,6 @@ def _save_tensor_to_disk(tensor: "oneflow.Tensor", dir_name: Union[str, Path]) - ValueContainer = Union[FileBackendVariableBlob, np.ndarray, "oneflow.Tensor"] -def _ElemCnt(shape): - return np.prod(shape).astype(int).item() - - def _LoadSingleVariable( path: Optional[str], consistent_src_rank: Optional[int] = None ) -> "flow.Tensor": @@ -205,6 +203,11 @@ def tensor_setstate(self, pickle_dict): ) +def RegisterMethods(): + Tensor.__setstate__ = tensor_setstate + Tensor.__getstate__ = tensor_getstate + + def legacy_load( path: Union[str, Path], consistent_src_rank: Optional[int] = None, ) -> Dict[str, "flow.Tensor"]: @@ -303,12 +306,12 @@ def save( """ path: Path = Path(path) - if isinstance(obj, oneflow.nn.Graph): - graph: oneflow.nn.Graph = obj + if isinstance(obj, graph_util.Graph): + graph: graph_util.Graph = obj if not graph._is_compiled: raise RuntimeError("graph must be compiled first.") - os.makedirs(path, exist_ok=True) + path.mkdir(exist_ok=True) serialized_job = str(text_format.MessageToString(graph._forward_job_proto)) oneflow._oneflow_internal.nn.graph.SaveJobToIR(serialized_job, str(path)) @@ -328,11 +331,5 @@ def save( pickle_path.write_bytes(pickled_bytes) -def generate_values_by_initializer(initializer, shape, dtype): - np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype)) - length = _ElemCnt(shape) - return np.array(initializer(length)).astype(np_dtype).reshape(shape) - - save_load_path = None consistent_src_dsk_rank = None diff --git a/python/oneflow/framework/register_class_method_util.py b/python/oneflow/framework/register_class_method_util.py index fce74ed0ca5..3bc4f78d00e 100644 --- a/python/oneflow/framework/register_class_method_util.py +++ b/python/oneflow/framework/register_class_method_util.py @@ -14,6 +14,7 @@ limitations under the License. """ import oneflow._oneflow_internal +import oneflow.framework.check_point_v2 as check_point_v2 import oneflow.framework.generator as generator import oneflow.framework.op_expr_util as op_expr_util import oneflow.framework.tensor as tensor_util @@ -21,4 +22,5 @@ def RegisterMethod4Class(): tensor_util.RegisterMethods() + check_point_v2.RegisterMethods() op_expr_util.RegisterMethod4UserOpExpr() diff --git a/python/oneflow/framework/tensor.py b/python/oneflow/framework/tensor.py index 8fd569ac385..75afe06267e 100644 --- a/python/oneflow/framework/tensor.py +++ b/python/oneflow/framework/tensor.py @@ -15,7 +15,6 @@ """ import oneflow as flow from oneflow._oneflow_internal.exception import IndexException -import oneflow.framework.check_point_v2 as check_point_v2 import oneflow.framework.tensor_str as tensor_str_util import oneflow.ops.initializer_util as initializer_util import oneflow._oneflow_internal.lazy_mode as lazy_mode @@ -694,7 +693,7 @@ def _init_by_initializer_conf(tensor, initializer_conf, random_seed=None): shape = tuple(tensor.shape) initializer = initializer_util.GetInitializer(initializer_conf, random_seed, shape) - np_arr = check_point_v2.generate_values_by_initializer( + np_arr = initializer_util.generate_values_by_initializer( initializer, shape, tensor.dtype ) if tensor.is_consistent: @@ -762,8 +761,6 @@ def RegisterMethods(): Tensor.backward = _backward Tensor.__getitem__ = _getitem Tensor.__setitem__ = _setitem - Tensor.__setstate__ = check_point_v2.tensor_setstate - Tensor.__getstate__ = check_point_v2.tensor_getstate Tensor.__str__ = _str Tensor.__repr__ = _repr Tensor.__eq__ = _eq diff --git a/python/oneflow/ops/initializer_util.py b/python/oneflow/ops/initializer_util.py index 36aaf7f4afc..2a21173eb95 100644 --- a/python/oneflow/ops/initializer_util.py +++ b/python/oneflow/ops/initializer_util.py @@ -22,6 +22,7 @@ import oneflow as flow import oneflow.core.job.initializer_conf_pb2 as initializer_conf_util import oneflow.core.operator.op_conf_pb2 as op_conf_util +import oneflow.framework.dtype as dtype_util def constant_initializer( @@ -1206,3 +1207,13 @@ def EmptyInitializerImpl( var_blob_shape: Sequence[int], ): return None + + +def _elem_cnt(shape): + return np.prod(shape).astype(int).item() + + +def generate_values_by_initializer(initializer, shape, dtype): + np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype)) + length = _elem_cnt(shape) + return np.array(initializer(length)).astype(np_dtype).reshape(shape)