From 20b44d0441ebcb4c3c67413e367f8ce5f70f14fd Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 8 Nov 2017 21:50:44 +0800 Subject: [PATCH 01/28] WIP send recv op --- .clang-format | 5 +- paddle/framework/lod_tensor.cc | 4 + paddle/framework/lod_tensor.h | 2 + paddle/operators/detail/simple_block_queue.h | 52 ++++++++ paddle/operators/recv_op.cc | 122 +++++++++++++++++++ paddle/operators/send_op.cc | 0 paddle/operators/send_recv.proto | 42 +++++++ paddle/operators/send_recv_test.cc | 63 ++++++++++ 8 files changed, 289 insertions(+), 1 deletion(-) create mode 100644 paddle/operators/detail/simple_block_queue.h create mode 100644 paddle/operators/recv_op.cc create mode 100644 paddle/operators/send_op.cc create mode 100644 paddle/operators/send_recv.proto create mode 100644 paddle/operators/send_recv_test.cc diff --git a/.clang-format b/.clang-format index 9ba433b1736242..d661ad8f2e61c7 100644 --- a/.clang-format +++ b/.clang-format @@ -24,5 +24,8 @@ Standard: Cpp11 AllowAllParametersOfDeclarationOnNextLine: true BinPackParameters: false BinPackArguments: false +--- +Language: Proto +# Don't format .proto files. +DisableFormat: true ... - diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index 2bcfffb134f464..a42bf36798db92 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -173,5 +173,9 @@ void AppendLoD(LoD* lod, const std::vector>& lod_length) { } } +void SerializeToStream(std::ostream& os) { + // TODO(typhoonzero): serialize to ostream +} + } // namespace framework } // namespace paddle diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 1437da399a2828..8a424d2e15804d 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -187,5 +187,7 @@ void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, void AppendLoD(LoD* lod, const std::vector>& lod_length); +void SerializeToStream(std::ostream& os); + } // namespace framework } // namespace paddle diff --git a/paddle/operators/detail/simple_block_queue.h b/paddle/operators/detail/simple_block_queue.h new file mode 100644 index 00000000000000..44899217579532 --- /dev/null +++ b/paddle/operators/detail/simple_block_queue.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include +#include +#include + +namespace paddle { +namespace operators { +namespace detail { + +template +class SimpleBlockQueue { + private: + std::mutex mutex_; + std::condition_variable condition_; + std::deque queue_; + + public: + void Push(T const& value) { + { + std::unique_lock lock(this->mutex_); + queue_.push_front(value); + } + this->condition_.notify_one(); + } + + T Pop() { + std::unique_lock lock(this->mutex_); + this->condition_.wait(lock, [=] { return !this->queue_.empty(); }); + T rc(std::move(this->queue_.back())); + this->queue_.pop_back(); + return rc; + } +}; + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc new file mode 100644 index 00000000000000..e0c8b219cdaa5f --- /dev/null +++ b/paddle/operators/recv_op.cc @@ -0,0 +1,122 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include + +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/simple_block_queue.h" + +#include +#include +#include +#include +#include +#include "sendrecv.grpc.pb.h" + +using grpc::Server; +using grpc::ServerBuilder; +using grpc::ServerContext; +using grpc::ServerReader; +using grpc::ServerReaderWriter; +using grpc::ServerWriter; +using grpc::Status; +using sendrecv::SendRecvOp; +using sendrecv::TensorMessage; + +class SendRecvServerImpl final : public SendRecvOp::Service { + public: + explicit RouteGuideImpl() {} + + Status SendTensor(ServerContext *context, const TensorMessage *in_tensor, + TensorMessage *out_tensor) override { + framework::LodTensor t; + // load t from in_tensor messge. + } + + Status SendTensorStream( + ServerContext *context, + ServerReaderWriter *stream) override { + std::vector received_tensor_chunk; + TensorMessage tensormsg; + while (stream->Read(&tensormsg)) { + // TODO(typhoonzero): implement stream methods. + // stream->Write(tensormsg); + // framework::LodTensor t; + // lodtensor_queue_.Push(); + } + return Status::OK; + } + + private: + SimpleBlockQueue lodtensor_queue_; +}; + +void RunServer(const std::string &endpoint) { + std::string server_address(endpoint); + RouteGuideImpl service; + + ServerBuilder builder; + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); + builder.RegisterService(&service); + std::unique_ptr server(builder.BuildAndStart()); + LOG(INFO) << "Server listening on " << server_address << std::endl; + server->Wait(); +} + +namespace paddle { +namespace operators { + +class RecvOp : public framework::OperatorBase { + public: + RecvOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + // TODO(typhoonzero): start RPC server here + // TODO(typhoonzero): how to trigger server side net graph non-blocking? + } +}; + +class RecvOpMaker : public framework::OpProtoAndCheckerMaker { + public: + RecvOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor to be saved"); + AddComment(R"DOC( +Recv operator + +This operator will recv tensor from send_op +)DOC"); + AddAttr("endpoint", + "(string, default 127.0.0.1:6164)" + "IP address to listen on.") + .SetDefault("127.0.0.1:6164") + .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(save, ops::SaveOp, ops::SaveOpProtoMaker); diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/paddle/operators/send_recv.proto b/paddle/operators/send_recv.proto new file mode 100644 index 00000000000000..10417715e549c1 --- /dev/null +++ b/paddle/operators/send_recv.proto @@ -0,0 +1,42 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +syntax = "proto3"; + +package sendrecv; + +import "../framework/framework.proto" // SendRecv RPC definations. service SendRecvOp { // For parameter server round-robin like hashing, do not split tensors. // Send and recv only one tensor rpc SendTensor(TensorMessage) returns (TensorMessage) {} // Bidirectional streaming RPC that client send and recv streams of tensors. rpc SendTensorStream(stream TensorMessage) returns (stream TensorMessage) {} + +// For large tensor hashing +rpc SendTensorChunk(TensorChunk) returns (TensorChunk) {} +rpc SendTensorChunkStream(stream TensorChunk) returns (stream TensorChunk) {} +} + +// TensorMessage a single paddle LoDTensor information. +message TensorMessage { + uint32 version = 1; + paddle.framework.TensorDesc tensor_desc = 2; + uint64 tensor_data_size = 3; + bytes tensor_data = 4; + uint64 lod_data_size = 5; + bytes lod_data = 6; +} + +message TensorChunk { + uint32 version = 1; + paddle.framework.TensorDesc tensor_desc = 2; + uint64 chunk_offset = 3; // offset of tensor data. + uint64 chunk_size = 4; + bytes chunk_data = 5; +} diff --git a/paddle/operators/send_recv_test.cc b/paddle/operators/send_recv_test.cc new file mode 100644 index 00000000000000..aa4342007b72db --- /dev/null +++ b/paddle/operators/send_recv_test.cc @@ -0,0 +1,63 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" + +USE_NO_KERNEL_OP(send); +USE_NO_KERNEL_OP(recv); + +TEST(SendRecvOp, CPU) { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("test_var"); + auto tensor = var->GetMutable(); + tensor->Resize({10, 10}); + paddle::framework::LoD expect_lod; + expect_lod.resize(1); + expect_lod[0].push_back(0); + expect_lod[0].push_back(1); + expect_lod[0].push_back(2); + expect_lod[0].push_back(3); + + tensor->set_lod(expect_lod); + int* expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + paddle::framework::AttributeMap attrs; + attrs.insert({"file_path", std::string("tensor.save")}); + + auto save_op = paddle::framework::OpRegistry::CreateOp( + "save", {{"X", {"test_var"}}}, {}, attrs); + save_op->Run(scope, ctx); + + auto load_var = scope.Var("out_var"); + auto target = load_var->GetMutable(); + auto load_op = paddle::framework::OpRegistry::CreateOp( + "load", {}, {{"Out", {"out_var"}}}, attrs); + load_op->Run(scope, ctx); + int* actual = target->data(); + for (int64_t i = 0; i < tensor->numel(); ++i) { + EXPECT_EQ(expect[i], actual[i]); + } + auto& actual_lod = target->lod(); + EXPECT_EQ(expect_lod.size(), actual_lod.size()); + for (size_t i = 0; i < expect_lod.size(); ++i) { + for (size_t j = 0; j < expect_lod[i].size(); ++j) { + EXPECT_EQ(expect_lod[i][j], actual_lod[i][j]); + } + } +} From 9adb4476e4599922ced2b5556db22ef214e483d8 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 9 Nov 2017 21:30:38 +0800 Subject: [PATCH 02/28] WIP send recv --- CMakeLists.txt | 1 + cmake/external/grpc.cmake | 61 ++++++++++++ paddle/framework/lod_tensor.cc | 158 +++++++++++++++++++++++++++---- paddle/framework/lod_tensor.h | 8 +- paddle/operators/CMakeLists.txt | 41 +++++++- paddle/operators/load_op.cc | 70 +------------- paddle/operators/recv_op.cc | 91 +++++++++++++----- paddle/operators/save_op.cc | 68 +------------ paddle/operators/send_op.cc | 122 ++++++++++++++++++++++++ paddle/operators/send_recv.proto | 35 ++++--- 10 files changed, 460 insertions(+), 195 deletions(-) create mode 100644 cmake/external/grpc.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index fd3582a1bca199..60cb5674af749a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,6 +128,7 @@ include(external/any) # download libn::any include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 include(external/nccl) +include(external/grpc) include(cudnn) # set cudnn libraries, must before configure include(configure) # add paddle env configuration diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake new file mode 100644 index 00000000000000..bd44af1a3269d9 --- /dev/null +++ b/cmake/external/grpc.cmake @@ -0,0 +1,61 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +include (ExternalProject) + +SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) +SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) +SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include" CACHE PATH "grpc include directory." FORCE) + +ExternalProject_Add(grpc + DEPENDS protobuf zlib + GIT_REPOSITORY "https://github.com/grpc/grpc.git" + GIT_TAG "v1.7.x" + PREFIX ${GRPC_SOURCES_DIR} + UPDATE_COMMAND "" + # TODO(jhseu): Remove this PATCH_COMMAND once grpc removes the dependency + # on "grpc" from the "grpc++_unsecure" rule. + #PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_SOURCES_DIR} + BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc++_unsecure + COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc_cpp_plugin + INSTALL_COMMAND "" + CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} + -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=${GRPC_INSTALL_DIR}/lib + -DCMAKE_POSITION_INDEPENDENT_CODE=ON + -DWITH_GFLAGS=ON + -Dgflags_DIR=${GRPC_INSTALL_DIR}/lib/cmake/gflags + -DBUILD_TESTING=OFF + -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} + ${EXTERNAL_OPTIONAL_ARGS} + CMAKE_CACHE_ARGS + -DCMAKE_BUILD_TYPE:STRING=Release + -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF + -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIRS} + -DPROTOBUF_LIBRARIES:STRING=${protobuf_STATIC_LIBRARIES} + -DZLIB_ROOT:STRING=${ZLIB_INSTALL} + -DgRPC_SSL_PROVIDER:STRING=NONE +) + +# grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. +ExternalProject_Add_Step(grpc copy_rand + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_SOURCE_DIR}/patches/grpc/rand.h ${GRPC_INCLUDE_DIR}/openssl/rand.h + DEPENDEES patch + DEPENDERS build +) \ No newline at end of file diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index a42bf36798db92..73a2fa9837c4d6 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -13,6 +13,8 @@ limitations under the License. */ #include "paddle/framework/lod_tensor.h" +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" #include "paddle/memory/memcpy.h" #include "paddle/memory/memory.h" @@ -27,7 +29,7 @@ namespace paddle { namespace framework { -LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { +LoD SliceLevels(const LoD &in, size_t level_begin, size_t level_end) { LoD new_lod; new_lod.reserve(level_end - level_begin); for (size_t i = level_begin; i < level_end; i++) { @@ -39,7 +41,7 @@ LoD SliceLevels(const LoD& in, size_t level_begin, size_t level_end) { return new_lod; } -LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, +LoD SliceInLevel(const LoD &in, size_t level, size_t elem_begin, size_t elem_end) { PADDLE_ENFORCE_LT(level, in.size()); PADDLE_ENFORCE_LT(elem_end, in[level].size()); @@ -50,9 +52,9 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, res[0].assign(in[level].begin() + elem_begin, in[level].begin() + elem_end + 1); for (size_t lvl = 1; lvl < res.size(); lvl++) { - const auto& in_level = in[level + lvl]; - const auto& above_level = res[lvl - 1]; - auto& out_level = res[lvl]; + const auto &in_level = in[level + lvl]; + const auto &above_level = res[lvl - 1]; + auto &out_level = res[lvl]; out_level.assign(in_level.begin() + above_level.front(), in_level.begin() + above_level.back() + 1); } @@ -60,33 +62,33 @@ LoD SliceInLevel(const LoD& in, size_t level, size_t elem_begin, // to make the first offset equals 0, all the elements minus the first // element size_t front = res[lvl].front(); - for (auto& ele : res[lvl]) { + for (auto &ele : res[lvl]) { ele -= front; } } return res; } -LoD ToAbsOffset(const LoD& in) { +LoD ToAbsOffset(const LoD &in) { // the lowest level stores relative offsets if (in.empty() || in.size() == 1) return in; LoD result = in; for (int level = result.size() - 2; level >= 0; level--) { - for (auto& ele : result[level]) { + for (auto &ele : result[level]) { ele = result[level + 1][ele]; } } return result; } -bool operator==(const LoD& a, const LoD& b) { +bool operator==(const LoD &a, const LoD &b) { if (a.size() != b.size()) { return false; } for (size_t i = 0; i < a.size(); i++) { - const auto& a_level = a[i]; - const auto& b_level = b[i]; + const auto &a_level = a[i]; + const auto &b_level = b[i]; if (a_level.size() != b_level.size()) { return false; } @@ -136,9 +138,9 @@ void LoDTensor::ShrinkInLevel(size_t level, size_t elem_begin, ShareDataWith(Slice(begin, end)); } -void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, - std::vector>* lod_length, - size_t* start_offset) { +void GetFineGrainedLoDLength(const LoD &lod, size_t start_idx, size_t end_idx, + std::vector> *lod_length, + size_t *start_offset) { lod_length->clear(); PADDLE_ENFORCE(start_idx < lod.size() - 1, "start_idx should be >= 0 and < lod.size() - 1."); @@ -158,12 +160,12 @@ void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, *start_offset = start_idx; } -void AppendLoD(LoD* lod, const std::vector>& lod_length) { +void AppendLoD(LoD *lod, const std::vector> &lod_length) { PADDLE_ENFORCE_EQ( lod->size(), lod_length.size(), "The lod_length should has the same size with the appended lod."); for (size_t i = 0; i < lod->size(); ++i) { - auto& level = (*lod)[i]; + auto &level = (*lod)[i]; if (level.empty()) { level.push_back(0); } @@ -173,8 +175,130 @@ void AppendLoD(LoD* lod, const std::vector>& lod_length) { } } -void SerializeToStream(std::ostream& os) { +void SerializeToStream(std::ostream &os, const LoDTensor &tensor) { // TODO(typhoonzero): serialize to ostream + { // the 1st field, uint32_t version + constexpr uint32_t version = 0; + os.write(reinterpret_cast(&version), sizeof(version)); + } + { // the 2nd field, tensor description + // int32_t size + // void* protobuf message + framework::TensorDesc desc; + desc.set_data_type(framework::ToDataType(tensor.type())); + auto dims = framework::vectorize(tensor.dims()); + auto *pb_dims = desc.mutable_dims(); + pb_dims->Resize(static_cast(dims.size()), 0); + std::copy(dims.begin(), dims.end(), pb_dims->begin()); + int32_t size = desc.ByteSize(); + os.write(reinterpret_cast(&size), sizeof(size)); + auto out = desc.SerializeAsString(); + os.write(out.data(), size); + } + { // the 3rd field, tensor data + uint64_t size = tensor.memory_size(); + auto *data_ptr = tensor.data(); + PADDLE_ENFORCE(size < std::numeric_limits::max(), + "Index overflow when writing tensor"); + if (platform::is_gpu_place(tensor.place())) { +#ifdef PADDLE_WITH_CUDA + constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB + std::unique_ptr buf(new char[kBufSize]); + auto &gpu_dev_ctx = + static_cast(dev_ctx); + platform::CPUPlace cpu; + uintptr_t data = reinterpret_cast(data_ptr); + while (size != 0) { + size_t size_to_write = std::min(kBufSize, static_cast(size)); + memory::Copy(cpu, buf.get(), + boost::get(tensor.place()), + reinterpret_cast(data), size_to_write, + gpu_dev_ctx.stream()); + gpu_dev_ctx.Wait(); + os.write(buf.get(), size_to_write); + data += size_to_write; + size -= size_to_write; + } +#else + PADDLE_THROW("Unexpected branch"); +#endif + } else { + os.write(static_cast(data_ptr), + static_cast(size)); + } + } + { // the 4th field, lod information + // uint64_t lod_level + // uint64_t lod_level_1 size in byte. + // int* lod_level_1 data + // ... + auto lod = tensor.lod(); + uint64_t size = lod.size(); + os.write(reinterpret_cast(&size), sizeof(size)); + + for (auto &each : lod) { + size = each.size() * sizeof(framework::LoD::value_type::value_type); + os.write(reinterpret_cast(&size), sizeof(size)); + os.write(reinterpret_cast(each.data()), + static_cast(size)); + } + } +} + +void DeserializeFromStream(std::istream &is, LoDTensor *tensor) { + uint32_t version; + is.read(reinterpret_cast(&version), sizeof(version)); + PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); + framework::TensorDesc desc; + { // int32_t size + // proto buffer + int32_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::unique_ptr buf(new char[size]); + is.read(reinterpret_cast(buf.get()), size); + PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), + "Cannot parse tensor desc"); + } + { // read tensor + std::vector dims; + dims.reserve(static_cast(desc.dims().size())); + std::copy(desc.dims().begin(), desc.dims().end(), std::back_inserter(dims)); + tensor->Resize(framework::make_ddim(dims)); + + void *buf; + platform::Place cpu = platform::CPUPlace(); + switch (desc.data_type()) { + case framework::FP32: + buf = tensor->mutable_data(cpu); + break; + case framework::FP64: + buf = tensor->mutable_data(cpu); + break; + case framework::INT32: + buf = tensor->mutable_data(cpu); + break; + case framework::INT64: + buf = tensor->mutable_data(cpu); + break; + default: + PADDLE_THROW("DataType %d not supported", desc.data_type()); + } + is.read(static_cast(buf), tensor->memory_size()); + } + { // read lod + uint64_t lod_level; + is.read(reinterpret_cast(&lod_level), sizeof(lod_level)); + auto &lod = *tensor->mutable_lod(); + lod.resize(lod_level); + for (uint64_t i = 0; i < lod_level; ++i) { + uint64_t size; + is.read(reinterpret_cast(&size), sizeof(size)); + std::vector tmp(size / sizeof(size_t)); + is.read(reinterpret_cast(tmp.data()), + static_cast(size)); + lod[i] = tmp; + } + } } } // namespace framework diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 8a424d2e15804d..c5dbe50d1297e9 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -187,7 +187,13 @@ void GetFineGrainedLoDLength(const LoD& lod, size_t start_idx, size_t end_idx, void AppendLoD(LoD* lod, const std::vector>& lod_length); -void SerializeToStream(std::ostream& os); +/* + * Serialize/Desiralize LoDTensor to std::ostream + * You can pass ofstream or ostringstream to serilize to file + * or to a in memory string. + */ +void SerializeToStream(std::ostream& os, const LoDTensor& tensor); +void DeserializeFromStream(std::istream& is, LoDTensor* tensor); } // namespace framework } // namespace paddle diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f22f86468db7be..a24badd8d0f137 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -146,6 +146,37 @@ function(op_library TARGET) endif() endfunction() + +function(grpc_library TARGET_NAME) +set(oneValueArgs "") +set(multiValueArgs SRCS DEPS) +cmake_parse_arguments(proto_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + +set(_all_grpc_gen_srcs) + +foreach(FIL ${ARGN}) + + get_filename_component(ABS_FIL ${FIL} ABSOLUTE) + get_filename_component(FIL_WE ${FIL} NAME_WE) + get_filename_component(hw_proto_path "${hw_proto}" PATH) + + protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${grpc_library_SRCS}") + set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc") + set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h") + LIST(APPEND ${_all_grpc_gen_srcs} ${grpc_grpc_srcs}) + add_custom_command( + OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" + COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" + --plugin=protoc-gen-grpc="${gRPC_CPP_PLUGIN_EXECUTABLE}" + "${ABS_FIL}" + DEPENDS "${ABS_FIL}") + +endforeach() +cc_library("${TARGET_NAME}" SRC "${_all_grpc_gen_srcs}" DEPS protobuf gRPC) +endfunction() + + add_subdirectory(math) add_subdirectory(nccl) @@ -167,7 +198,13 @@ set(DEPS_OPS lod_rank_table_op lstm_op tensor_array_read_write_op - gru_op) + gru_op + send_op + recv_op) + +grpc_library(sendrecvop_proto SRCS send_recv.proto) +op_library(send_op SRCS send_op.cc DEPS sendrecvop_proto grpc) +op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_proto grpc) op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) @@ -197,6 +234,8 @@ endforeach() set(GLOB_OP_LIB ${OP_LIBRARY} CACHE INTERNAL "Global OP library") + + cc_test(gather_test SRCS gather_test.cc DEPS tensor) cc_test(net_op_test SRCS net_op_test.cc DEPS net_op) cc_test(scatter_test SRCS scatter_test.cc DEPS tensor) diff --git a/paddle/operators/load_op.cc b/paddle/operators/load_op.cc index b71a33a6b1ce80..b2afd4713f48be 100644 --- a/paddle/operators/load_op.cc +++ b/paddle/operators/load_op.cc @@ -38,75 +38,7 @@ class LoadOp : public framework::OperatorBase { out_var_name); auto *tensor = out_var->GetMutable(); - - uint32_t version; - fin.read(reinterpret_cast(&version), sizeof(version)); - PADDLE_ENFORCE_EQ(version, 0U, "Only version 0 is supported"); - framework::TensorDesc desc; - { // int32_t size - // proto buffer - int32_t size; - fin.read(reinterpret_cast(&size), sizeof(size)); - std::unique_ptr buf(new char[size]); - fin.read(reinterpret_cast(buf.get()), size); - PADDLE_ENFORCE(desc.ParseFromArray(buf.get(), size), - "Cannot parse tensor desc"); - } - { // read tensor - std::vector dims; - dims.reserve(static_cast(desc.dims().size())); - std::copy(desc.dims().begin(), desc.dims().end(), - std::back_inserter(dims)); - tensor->Resize(framework::make_ddim(dims)); - - void *buf; - platform::Place cpu = platform::CPUPlace(); - switch (desc.data_type()) { - case framework::FP32: - buf = tensor->mutable_data(cpu); - break; - case framework::FP64: - buf = tensor->mutable_data(cpu); - break; - case framework::INT32: - buf = tensor->mutable_data(cpu); - break; - case framework::INT64: - buf = tensor->mutable_data(cpu); - break; - default: - PADDLE_THROW("DataType %d not supported", desc.data_type()); - } - fin.read(static_cast(buf), tensor->memory_size()); - } - { // read lod - uint64_t lod_level; - fin.read(reinterpret_cast(&lod_level), sizeof(lod_level)); - auto &lod = *tensor->mutable_lod(); - lod.resize(lod_level); - for (uint64_t i = 0; i < lod_level; ++i) { - uint64_t size; - fin.read(reinterpret_cast(&size), sizeof(size)); - std::vector tmp(size / sizeof(size_t)); - fin.read(reinterpret_cast(tmp.data()), - static_cast(size)); - lod[i] = tmp; - } - } - - auto place = dev_ctx.GetPlace(); - if (platform::is_gpu_place(place)) { - // copy CPU to GPU - framework::LoDTensor cpu_tensor; - cpu_tensor.ShareDataWith(*tensor); - cpu_tensor.set_lod(tensor->lod()); - - // reset tensor - out_var->Clear(); - tensor = out_var->GetMutable(); - tensor->set_lod(cpu_tensor.lod()); - tensor->CopyFrom(cpu_tensor, place, dev_ctx); - } + framework::DeserializeFromStream(fin, tensor); } }; diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index e0c8b219cdaa5f..4eb3c1d48766dd 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -14,8 +14,8 @@ #include #include -#include -#include +#include +#include #include "paddle/framework/data_type.h" #include "paddle/framework/framework.pb.h" @@ -28,7 +28,7 @@ #include #include #include -#include "sendrecv.grpc.pb.h" +#include "paddle/operators/send_recv.grpc.pb.h" using grpc::Server; using grpc::ServerBuilder; @@ -42,36 +42,58 @@ using sendrecv::TensorMessage; class SendRecvServerImpl final : public SendRecvOp::Service { public: - explicit RouteGuideImpl() {} + explicit SendRecvServerImpl() {} - Status SendTensor(ServerContext *context, const TensorMessage *in_tensor, - TensorMessage *out_tensor) override { + Status SendTensor(ServerContext *context, const std::string *in_tensor, + std::string *out_tensor) override { framework::LodTensor t; - // load t from in_tensor messge. + // TODO(typhoonzero): desirealize in_tensor and run pserver network. + std::istringstream iss(*in_tensor); + framework::Tensor t; + framework::DesirializeFromStream(iss, &t); + lodtensor_queue_.Push(std::move(t)); + // Block util the sub graph is done. + auto t = lodtensor_return_queue_.Pop(); + std::ostringstream oss; + framework::SerializeToStream(oss, &t); + *out_tensor = oss.str(); } Status SendTensorStream( ServerContext *context, ServerReaderWriter *stream) override { - std::vector received_tensor_chunk; - TensorMessage tensormsg; - while (stream->Read(&tensormsg)) { - // TODO(typhoonzero): implement stream methods. - // stream->Write(tensormsg); - // framework::LodTensor t; - // lodtensor_queue_.Push(); - } + // TODO(typhoonzero): implement stream methods. + return Status::OK; + } + + Status SendSelectedRows(ServerContext *context, const std::string *in_sr, + std::string *out_sr) { + // TODO(typhoonzero): implement SendSelectedRows return Status::OK; } + Status SendSelectedRowsStream( + ServerContext *context, + ServerReaderWriter *stream) override { + // TODO(typhoonzero): implement SendSelectedRowsStream + return Status::OK; + } + + const framework::LodTensor &Get() const { return lodtensor_queue_.Pop(); } + + void Push(framework::LodTensor &tensor) { + lodtensor_return_queue_.Push(tensor); + } + private: SimpleBlockQueue lodtensor_queue_; + SimpleBlockQueue lodtensor_return_queue_; + SimpleBlockQueue selected_rows_queue_; + SimpleBlockQueue selected_rows_return_queue_; }; -void RunServer(const std::string &endpoint) { - std::string server_address(endpoint); - RouteGuideImpl service; - +void RunServer(const SendRecvServerImpl &service, + const std::string &server_address) { ServerBuilder builder; builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); builder.RegisterService(&service); @@ -91,8 +113,33 @@ class RecvOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) {} void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { - // TODO(typhoonzero): start RPC server here - // TODO(typhoonzero): how to trigger server side net graph non-blocking? + constexpr RecvOpName = "RecvOp@SendRecvServerImpl"; + auto *var = scope.FindVar(RecvOpName); + if (var == nullptr) { + // create RPC server object if it is not inited. + std::string endpoint = Attr("endpoint"); + var = scope.Var(RecvOpName); + SendRecvServerImpl *service = var->GetMutable(); + + // start server in a thread in background + std::thread server_thread(RunServer(*service, endpoit)); + } + SendRecvServerImpl *service = var->Get(); + framework::LoDTensor &t = service->Get(); + // set graph input var + auto *var = scope.Var(Input("X")); + auto *tensor = var->GetMutable(); + // FIXME(typhoonzero): do not copy + tensor->CopyFrom(t, dev_ctx.GetPlace(), dev_ctx); + + auto *block = Attr("OptimizeBlock"); + auto *program = block->Program(); + framework::Executor executor(dev_ctx); + // Run sub graph to get optimized tensor + executor.Run(*program, &scope, block->ID(), false /*create_local_scope*/); + + auto *out_var = scope.FindVar("Out"); + service->Push(out_var->Get()); } }; @@ -119,4 +166,4 @@ This operator will recv tensor from send_op namespace ops = paddle::operators; -REGISTER_OPERATOR(save, ops::SaveOp, ops::SaveOpProtoMaker); +REGISTER_OPERATOR(recv, ops::RecvOp, ops::RecvOpMaker); diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index 56909fb65f44ad..403d604b77db59 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -88,73 +88,7 @@ class SaveOp : public framework::OperatorBase { "SaveOp only support LoDTensor, %s has wrong type", iname); auto &tensor = var->Get(); - - { // the 1st field, uint32_t version - constexpr uint32_t version = 0; - fout.write(reinterpret_cast(&version), sizeof(version)); - } - { // the 2nd field, tensor description - // int32_t size - // void* protobuf message - framework::TensorDesc desc; - desc.set_data_type(framework::ToDataType(tensor.type())); - auto dims = framework::vectorize(tensor.dims()); - auto *pb_dims = desc.mutable_dims(); - pb_dims->Resize(static_cast(dims.size()), 0); - std::copy(dims.begin(), dims.end(), pb_dims->begin()); - int32_t size = desc.ByteSize(); - fout.write(reinterpret_cast(&size), sizeof(size)); - auto out = desc.SerializeAsString(); - fout.write(out.data(), size); - } - { // the 3rd field, tensor data - uint64_t size = tensor.memory_size(); - auto *data_ptr = tensor.data(); - PADDLE_ENFORCE(size < std::numeric_limits::max(), - "Index overflow when writing tensor"); - if (platform::is_gpu_place(tensor.place())) { -#ifdef PADDLE_WITH_CUDA - constexpr size_t kBufSize = 1024 * 1024 * 64; // 64MB - std::unique_ptr buf(new char[kBufSize]); - auto &gpu_dev_ctx = - static_cast(dev_ctx); - platform::CPUPlace cpu; - uintptr_t data = reinterpret_cast(data_ptr); - while (size != 0) { - size_t size_to_write = std::min(kBufSize, static_cast(size)); - memory::Copy(cpu, buf.get(), - boost::get(tensor.place()), - reinterpret_cast(data), size_to_write, - gpu_dev_ctx.stream()); - gpu_dev_ctx.Wait(); - fout.write(buf.get(), size_to_write); - data += size_to_write; - size -= size_to_write; - } -#else - PADDLE_THROW("Unexpected branch"); -#endif - } else { - fout.write(static_cast(data_ptr), - static_cast(size)); - } - } - { // the 4th field, lod information - // uint64_t lod_level - // uint64_t lod_level_1 size in byte. - // int* lod_level_1 data - // ... - auto lod = tensor.lod(); - uint64_t size = lod.size(); - fout.write(reinterpret_cast(&size), sizeof(size)); - - for (auto &each : lod) { - size = each.size() * sizeof(framework::LoD::value_type::value_type); - fout.write(reinterpret_cast(&size), sizeof(size)); - fout.write(reinterpret_cast(each.data()), - static_cast(size)); - } - } + framework::SerializeToStream(fout, tensor); } }; diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index e69de29bb2d1d6..dd4bc3271e6d4f 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -0,0 +1,122 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include +#include +#include +#include + +#include "paddle/framework/data_type.h" +#include "paddle/framework/framework.pb.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/simple_block_queue.h" + +#include +#include +#include +#include +#include +#include "paddle/operators/send_recv.grpc.pb.h" + +using grpc::Channel; +using grpc::ClientContext; +using grpc::ClientReader; +using grpc::ClientReaderWriter; +using grpc::ClientWriter; +using grpc::Status; +using sendrecv::SendRecvOp; +using sendrecv::SendTensor; + +// RPCClient is a class to send tensors to pserver sub-network +// using different hashing methods. +class RPCClient { + public: + RPCClient(std::shared_ptr channel) + : stub_(SendRecvOp::NewStub(channel)) {} + + bool SendTensor(const framework::LoDTensor &tensor) { + ClientContext context; + Status status = stub_->SendTensor(&context, tensor); + if (!status.ok()) { + std::cout << "GetFeature rpc failed." << std::endl; + return false; + } + return true; + } + + std::unique_ptr stub_; +}; + +namespace paddle { +namespace operators { + +// TODO(typhoonzero): this is a simple implementation which only send +// one tensor +class SendOp : public framework::OperatorBase { + public: + SendOp(const std::string &type, const framework::VariableNameMap &inputs, + const framework::VariableNameMap &outputs, + const framework::AttributeMap &attrs) + : OperatorBase(type, inputs, outputs, attrs) {} + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + constexpr SendOpName = "SendOp@RPCClient"; + auto *var = scope.FindVar(SendOpName); + if (var == nullptr) { + // create RPC server object if it is not inited. + std::string endpoint = Attr("endpoint"); + var = scope.Var(SendOpName); + RPCClient *client = var->GetMutable(); + } + RPCClient *client = var->Get(); + + auto iname = Input("X"); + auto oname = Output("Out"); + auto *var = scope.FindVar(iname); + auto *tensor = var->Get(); + // call sync send + auto *optimized_tensor = client->SendTensor(*tensor); + // FIXME(typhoonzero): do not copy + auto *out_var = scope.FindVar(oname); + out_var->GetMutable(); + out_var->CopyFrom(*optimized_tensor, dev_ctx.GetPlace(), dev_ctx); + } +}; + +class SendOpMaker : public framework::OpProtoAndCheckerMaker { + public: + SendOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("X", "(Tensor) Input tensor to be saved"); + AddOutput("Out", "(Tensor) Output fetched from server"); + AddComment(R"DOC( +Recv operator + +This operator will recv tensor from send_op +)DOC"); + AddAttr("endpoint", + "(string, default 127.0.0.1:6164)" + "IP address to listen on.") + .SetDefault("127.0.0.1:6164") + .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OPERATOR(send, ops::SendOp, ops::SendOpMaker); diff --git a/paddle/operators/send_recv.proto b/paddle/operators/send_recv.proto index 10417715e549c1..962abff8049b5e 100644 --- a/paddle/operators/send_recv.proto +++ b/paddle/operators/send_recv.proto @@ -16,27 +16,26 @@ syntax = "proto3"; package sendrecv; -import "../framework/framework.proto" // SendRecv RPC definations. service SendRecvOp { // For parameter server round-robin like hashing, do not split tensors. // Send and recv only one tensor rpc SendTensor(TensorMessage) returns (TensorMessage) {} // Bidirectional streaming RPC that client send and recv streams of tensors. rpc SendTensorStream(stream TensorMessage) returns (stream TensorMessage) {} - -// For large tensor hashing -rpc SendTensorChunk(TensorChunk) returns (TensorChunk) {} -rpc SendTensorChunkStream(stream TensorChunk) returns (stream TensorChunk) {} +import "../framework/framework.proto" // SendRecv RPC definations. + +service SendRecvOp { + // For parameter server round-robin like hashing, do not split tensors. + // Send and recv only one tensor + rpc SendTensor(TensorMessage) returns (TensorMessage) {} + // Bidirectional streaming RPC that client send and recv streams of tensors. + rpc SendTensorStream(stream TensorMessage) returns (stream TensorMessage) {} + + // For large tensor hashing + rpc SendSelectedRows(SelectedRows) returns (SelectedRows) {} + rpc SendSelectedRowsStream(stream SelectedRows) returns (stream SelectedRows) {} } -// TensorMessage a single paddle LoDTensor information. +// TensorMessage is serialized binary tensor data. message TensorMessage { - uint32 version = 1; - paddle.framework.TensorDesc tensor_desc = 2; - uint64 tensor_data_size = 3; - bytes tensor_data = 4; - uint64 lod_data_size = 5; - bytes lod_data = 6; + string serialized = 1; } -message TensorChunk { - uint32 version = 1; - paddle.framework.TensorDesc tensor_desc = 2; - uint64 chunk_offset = 3; // offset of tensor data. - uint64 chunk_size = 4; - bytes chunk_data = 5; +// SelectedRows +message SelectedRows { + string serialized = 1; } From a487601363d2a34bc9af5b1d990c6f1ed0b90f2e Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Sun, 12 Nov 2017 17:45:23 +0800 Subject: [PATCH 03/28] put grpc impl in details --- cmake/generic.cmake | 26 +- .../v2/framework/tests/test_send_recv.py | 273 ++++++++++++++++++ 2 files changed, 286 insertions(+), 13 deletions(-) create mode 100644 python/paddle/v2/framework/tests/test_send_recv.py diff --git a/cmake/generic.cmake b/cmake/generic.cmake index ec0c6f01c39a5d..fa59ec59e4317f 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -470,27 +470,27 @@ endfunction() function(grpc_library TARGET_NAME) - set(oneValueArgs SRCS) - set(multiValueArgs "") + set(oneValueArgs PROTO) + set(multiValueArgs SRCS) set(options "") cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - message("processing ${grpc_library_SRCS}") + message("processing ${grpc_library_PROTO}") - get_filename_component(ABS_FIL ${grpc_library_SRCS} ABSOLUTE) - get_filename_component(FIL_WE ${grpc_library_SRCS} NAME_WE) - get_filename_component(FIL_PATH ${ABS_FIL} PATH) + get_filename_component(ABS_PROTO ${grpc_library_PROTO} ABSOLUTE) + get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) + get_filename_component(PROTO_PATH ${ABS_FIL} PATH) - protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${grpc_library_SRCS}") - set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.cc") - set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.grpc.pb.h") + protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") + set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") + set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") add_custom_command( OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} - ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${FIL_PATH}" - --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_FIL}" - DEPENDS "${ABS_FIL}") + ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" + --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" + DEPENDS "${ABS_PROTO}") - cc_library("${TARGET_NAME}" SRCS "${grpc_grpc_srcs}" "${grpc_proto_srcs}" DEPS protobuf grpc) + cc_library("${TARGET_NAME}" SRCS "${SRCS}" "${grpc_grpc_srcs}" "${grpc_proto_srcs}" DEPS protobuf grpc) endfunction() diff --git a/python/paddle/v2/framework/tests/test_send_recv.py b/python/paddle/v2/framework/tests/test_send_recv.py new file mode 100644 index 00000000000000..c670ab0a2477d3 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_send_recv.py @@ -0,0 +1,273 @@ +import unittest +import numpy as np +import random +import itertools +import paddle.v2.framework.core as core +import collections +from paddle.v2.framework.backward import append_backward_ops +from paddle.v2.framework.op import Operator +from paddle.v2.framework.executor import Executor +from paddle.v2.framework.framework import Program, OpProtoHolder + + +def create_op(scope, op_type, inputs, outputs, attrs): + kwargs = dict() + + def __create_var__(name, var_name): + scope.var(var_name).get_tensor() + kwargs[name].append(var_name) + + for in_name, in_dup in Operator.get_op_inputs(op_type): + if in_name in inputs: + kwargs[in_name] = [] + if in_dup: + sub_in = inputs[in_name] + for sub_in_name, _ in sub_in: + __create_var__(in_name, sub_in_name) + else: + __create_var__(in_name, in_name) + + for out_name, out_dup in Operator.get_op_outputs(op_type): + if out_name in outputs: + kwargs[out_name] = [] + if out_dup: + sub_out = outputs[out_name] + for sub_out_name, _ in sub_out: + __create_var__(out_name, sub_out_name) + else: + __create_var__(out_name, out_name) + + for attr_name in Operator.get_op_attr_names(op_type): + if attr_name in attrs: + kwargs[attr_name] = attrs[attr_name] + + return Operator(op_type, **kwargs) + + +def set_input(scope, op, inputs, place): + def __set_input__(var_name, var): + if isinstance(var, tuple) or isinstance(var, np.ndarray): + tensor = scope.find_var(var_name).get_tensor() + if isinstance(var, tuple): + tensor.set_lod(var[1]) + var = var[0] + tensor.set_dims(var.shape) + tensor.set(var, place) + elif isinstance(var, float): + scope.find_var(var_name).set_float(var) + elif isinstance(var, int): + scope.find_var(var_name).set_int(var) + + for in_name, in_dup in Operator.get_op_inputs(op.type()): + if in_name in inputs: + if in_dup: + sub_in = inputs[in_name] + for sub_in_name, sub_in_val in sub_in: + __set_input__(sub_in_name, sub_in_val) + else: + __set_input__(in_name, inputs[in_name]) + + +def append_input_output(block, op_proto, np_list, is_input): + '''Insert VarDesc and generate Python variable instance''' + proto_list = op_proto.inputs if is_input else op_proto.outputs + + def create_var(block, name, np_list, var_proto): + if name not in np_list: + assert var_proto.intermediate, "{} not found".format(name) + shape = None + lod_level = None + else: + np_value = np_list[name] + if isinstance(np_value, tuple): + shape = list(np_value[0].shape) + lod_level = len(np_value[1]) + else: + shape = list(np_value.shape) + lod_level = 0 + return block.create_var( + dtype="float32", shape=shape, lod_level=lod_level, name=name) + + var_dict = {} + for var_proto in proto_list: + var_name = str(var_proto.name) + if is_input: + if (var_name not in np_list) and var_proto.dispensable: + continue + assert (var_name in np_list) or (var_proto.dispensable), \ + "Missing {} as input".format(var_name) + if var_proto.duplicable: + assert isinstance(np_list[var_name], list), \ + "Duplicable {} should be set as list".format(var_name) + var_list = [] + for (name, np_value) in np_list[var_name]: + var_list.append( + create_var(block, name, {name: np_value}, var_proto)) + var_dict[var_name] = var_list + else: + var_dict[var_name] = create_var(block, var_name, np_list, var_proto) + + return var_dict + + +class SendRecvTest(unittest.TestCase): + def feed_var(self, input_vars, place): + feed_map = {} + for var_name in input_vars: + if isinstance(input_vars[var_name], list): + for name, np_value in self.inputs[var_name]: + tensor = core.LoDTensor() + tensor.set(np_value, place) + feed_map[name] = tensor + else: + tensor = core.LoDTensor() + if isinstance(self.inputs[var_name], tuple): + tensor.set(self.inputs[var_name][0], place) + tensor.set_lod(self.inputs[var_name][1]) + else: + tensor.set(self.inputs[var_name], place) + feed_map[var_name] = tensor + + return feed_map + + def check_output_with_place(self, place, atol): + op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) + + program = Program() + block = program.global_block() + + inputs = append_input_output(block, op_proto, self.inputs, True) + outputs = append_input_output(block, op_proto, self.outputs, False) + + op = block.append_op( + type=self.op_type, + inputs=inputs, + outputs=outputs, + attrs=self.attrs if hasattr(self, "attrs") else dict()) + # infer variable type and infer shape in compile-time + op.desc.infer_var_type(block.desc) + op.desc.infer_shape(block.desc) + + fetch_list = [] + for var_name, var in outputs.iteritems(): + if var_name in self.outputs: + if isinstance(var, list): + for v in var: + fetch_list.append(v) + else: + fetch_list.append(var) + + feed_map = self.feed_var(inputs, place) + + exe = Executor(place) + outs = exe.run(program, feed=feed_map, fetch_list=fetch_list) + + for out_name, out_dup in Operator.get_op_outputs(self.op_type): + if out_name not in self.outputs: + continue + + def find_actual(target_name, fetch_list): + found = [ + i for i, var in enumerate(fetch_list) + if var.name == target_name + ] + self.assertTrue( + len(found) == 1, "Found {} {}".format( + len(found), target_name)) + return found[0] + + if out_dup: + sub_out = self.outputs[out_name] + if not isinstance(sub_out, list): + raise AssertionError("sub_out type %s is not list", + type(sub_out)) + for sub_out_name, expect in sub_out: + idx = find_actual(sub_out_name, fetch_list) + actual = outs[idx] + actual_t = np.array(actual) + expect_t = expect[0] \ + if isinstance(expect, tuple) else expect + self.assertTrue( + np.allclose( + actual_t, expect_t, atol=atol), + "Output (" + sub_out_name + ") has diff at " + + str(place)) + if isinstance(expect, tuple): + self.assertListEqual( + actual.lod(), expect[1], "Output (" + sub_out_name + + ") has different lod at " + str(place)) + else: + idx = find_actual(out_name, fetch_list) + actual = outs[idx] + actual_t = np.array(actual) + expect = self.outputs[out_name] + expect_t = expect[0] if isinstance(expect, tuple) else expect + self.assertTrue( + np.allclose( + actual_t, expect_t, atol=atol), + "Output (" + out_name + ") has diff at " + str(place)) + if isinstance(expect, tuple): + self.assertListEqual(actual.lod(), expect[1], + "Output (" + out_name + + ") has different lod at " + str(place)) + + def check_output(self, atol=1e-5): + places = [core.CPUPlace()] + if core.is_compile_gpu() and core.op_support_gpu(self.op_type): + places.append(core.GPUPlace(0)) + for place in places: + self.check_output_with_place(place, atol) + + def __assert_is_close(self, numeric_grads, analytic_grads, names, + max_relative_error, msg_prefix): + + for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): + abs_a = np.abs(a) + abs_a[abs_a < 1e-3] = 1 + + diff_mat = np.abs(a - b) / abs_a + max_diff = np.max(diff_mat) + + def err_msg(): + offset = np.argmax(diff_mat > max_relative_error) + return ("%s Variable %s max gradient diff %f over limit %f, " + "the first error element is %d, %f, %f") % ( + msg_prefix, name, max_diff, max_relative_error, + offset, a.flatten()[offset], b.flatten()[offset]) + + self.assertLessEqual(max_diff, max_relative_error, err_msg()) + + @staticmethod + def _create_var_descs_(block, var_dict): + # FIXME: Try unify with `append_input_output` + for param_name in var_dict: + var = var_dict[param_name] + if not isinstance(var, list) and not isinstance(var, tuple): + var = [(param_name, var, None)] + if not isinstance(var[0], list) and not isinstance(var[0], tuple): + var = [(param_name, var[0], var[1])] + + for i, item in enumerate(var): + if not isinstance(item[0], basestring): + item = [[param_name] + list(item)] + if len(item) == 2: + # only set var name and value, set lod to None + var[i] = list(item) + [None] + + var_descs = [(block.create_var( + name=name, shape=each.shape, dtype=each.dtype), each, lod) + for name, each, lod in var] + + yield param_name, var_descs + + @staticmethod + def _merge_list(iterable): + return reduce(lambda a, b: list(a) + list(b), iterable, []) + + @staticmethod + def _numpy_to_lod_tensor(np_value, lod, place): + tensor = core.LoDTensor() + tensor.set(np_value, place) + if lod is not None: + tensor.set_lod(lod) + return tensor From a637f38b39d88a7309fc39a71aa151b0e34e71e0 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Sun, 12 Nov 2017 18:17:25 +0800 Subject: [PATCH 04/28] put grpc impl in details --- cmake/generic.cmake | 2 +- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/recv_op.cc | 70 +-------------------------------- paddle/operators/send_op.cc | 36 ----------------- 4 files changed, 3 insertions(+), 107 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index fa59ec59e4317f..4e8b6ee6597538 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -479,7 +479,7 @@ function(grpc_library TARGET_NAME) get_filename_component(ABS_PROTO ${grpc_library_PROTO} ABSOLUTE) get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) - get_filename_component(PROTO_PATH ${ABS_FIL} PATH) + get_filename_component(PROTO_PATH ${ABS_PROTO} PATH) protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 50afde4c14409c..0fa163e83df685 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -178,7 +178,7 @@ set(DEPS_OPS send_op recv_op) -grpc_library(sendrecvop_grpc SRCS send_recv.proto) +add_subdirectory(detail) op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc) op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc) diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 4f1422ccef771a..9f57657675c8f0 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -21,77 +21,9 @@ #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/send_recv_impl.h" #include "paddle/operators/detail/simple_block_queue.h" -#include -#include -#include -#include -#include -#include "paddle/operators/send_recv.grpc.pb.h" - -using grpc::Server; -using grpc::ServerBuilder; -using grpc::ServerContext; -using grpc::ServerReader; -using grpc::ServerReaderWriter; -using grpc::ServerWriter; -using grpc::Status; -using sendrecv::SendRecvOp; -using sendrecv::VariableMessage; - -class SendRecvServerImpl final : public SendRecvOp::Service { - public: - explicit SendRecvServerImpl() {} - - void SetScope(framework::Scope *scope) { scope_ = scope; } - - Status InitVariables(ServerContext *context, - ServerReader *in_var_reader) override { - // set up all variables to run server side block - PADDLE_ENFORCE(scope_); - VariableMessage in_buf; - while (in_var_reader->Read(&in_buf)) { - // create var if not exist - auto *var = scope_->Var(in_buf.varname); - auto *tensor = var->GetMutable(); - std::istringstream iss(in_buf.serialized); - framework::DeserializeFromStream(iss, *tensor); - } - return Status::OK; - } - - Status SendTensor(ServerContext *context, const std::string *in_tensor, - std::string *out_tensor) override { - framework::LodTensor t; - // TODO(typhoonzero): desirealize in_tensor and run pserver network. - std::istringstream iss(*in_tensor); - framework::Tensor t; - framework::DesirializeFromStream(iss, &t); - lodtensor_queue_.Push(std::move(t)); - // Block util the sub graph is done. - auto t = lodtensor_return_queue_.Pop(); - std::ostringstream oss; - framework::SerializeToStream(oss, &t); - *out_tensor = oss.str(); - - return Status::OK; - } - - const framework::LodTensor &Get() const { return lodtensor_queue_.Pop(); } - - void Push(framework::LodTensor &tensor) { - lodtensor_return_queue_.Push(tensor); - } - - private: - framework::Scope *scope_; - SimpleBlockQueue lodtensor_queue_; - SimpleBlockQueue lodtensor_return_queue_; - SimpleBlockQueue selected_rows_queue_; - SimpleBlockQueue selected_rows_return_queue_; -}; - void RunServer(const SendRecvServerImpl &service, const std::string &server_address) { ServerBuilder builder; diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index dd4bc3271e6d4f..b32d5267a94d83 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -23,42 +23,6 @@ #include "paddle/framework/op_registry.h" #include "paddle/operators/detail/simple_block_queue.h" -#include -#include -#include -#include -#include -#include "paddle/operators/send_recv.grpc.pb.h" - -using grpc::Channel; -using grpc::ClientContext; -using grpc::ClientReader; -using grpc::ClientReaderWriter; -using grpc::ClientWriter; -using grpc::Status; -using sendrecv::SendRecvOp; -using sendrecv::SendTensor; - -// RPCClient is a class to send tensors to pserver sub-network -// using different hashing methods. -class RPCClient { - public: - RPCClient(std::shared_ptr channel) - : stub_(SendRecvOp::NewStub(channel)) {} - - bool SendTensor(const framework::LoDTensor &tensor) { - ClientContext context; - Status status = stub_->SendTensor(&context, tensor); - if (!status.ok()) { - std::cout << "GetFeature rpc failed." << std::endl; - return false; - } - return true; - } - - std::unique_ptr stub_; -}; - namespace paddle { namespace operators { From fc5739d0247d5091d3a39312249c75bf7bf0cc2f Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Sun, 12 Nov 2017 19:36:51 +0800 Subject: [PATCH 05/28] update wip --- paddle/operators/detail/CMakeLists.txt | 1 + paddle/operators/detail/recv_impl.cc | 57 ++++++++++++++++ paddle/operators/detail/send_impl.cc | 33 ++++++++++ paddle/operators/detail/send_recv_impl.h | 82 ++++++++++++++++++++++++ 4 files changed, 173 insertions(+) create mode 100644 paddle/operators/detail/CMakeLists.txt create mode 100644 paddle/operators/detail/recv_impl.cc create mode 100644 paddle/operators/detail/send_impl.cc create mode 100644 paddle/operators/detail/send_recv_impl.h diff --git a/paddle/operators/detail/CMakeLists.txt b/paddle/operators/detail/CMakeLists.txt new file mode 100644 index 00000000000000..cbc104454915ac --- /dev/null +++ b/paddle/operators/detail/CMakeLists.txt @@ -0,0 +1 @@ +grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto) diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc new file mode 100644 index 00000000000000..befcaecec61568 --- /dev/null +++ b/paddle/operators/detail/recv_impl.cc @@ -0,0 +1,57 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "send_recv_impl.h" + +namespace paddle { +namespace operators { +namespace detail { + +Status SendRecvServerImpl::InitVariables( + ServerContext *context, + ServerReader *in_var_reader) override { + // set up all variables to run server side block + PADDLE_ENFORCE(scope_); + VariableMessage in_buf; + while (in_var_reader->Read(&in_buf)) { + // create var if not exist + auto *var = scope_->Var(in_buf.varname); + auto *tensor = var->GetMutable(); + std::istringstream iss(in_buf.serialized); + framework::DeserializeFromStream(iss, *tensor); + } + return Status::OK; +} + +Status SendRecvServerImpl::SendTensor(ServerContext *context, + const std::string *in_tensor, + std::string *out_tensor) override { + framework::LodTensor t; + // TODO(typhoonzero): desirealize in_tensor and run pserver network. + std::istringstream iss(*in_tensor); + framework::Tensor t; + framework::DesirializeFromStream(iss, &t); + lodtensor_queue_.Push(std::move(t)); + // Block util the sub graph is done. + auto t = lodtensor_return_queue_.Pop(); + std::ostringstream oss; + framework::SerializeToStream(oss, &t); + *out_tensor = oss.str(); + + return Status::OK; +} + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc new file mode 100644 index 00000000000000..2e512d5de1cab0 --- /dev/null +++ b/paddle/operators/detail/send_impl.cc @@ -0,0 +1,33 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#include "send_recv_impl.h" + +namespace paddle { +namespace operators { +namespace detail { + +bool RPCClient::SendTensor(const framework::LoDTensor &tensor) { + ClientContext context; + Status status = stub_->SendTensor(&context, tensor); + if (!status.ok()) { + std::cout << "GetFeature rpc failed." << std::endl; + return false; + } + return true; +} + +} // namespace detail +} // namespace operators +} // namespace paddle diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h new file mode 100644 index 00000000000000..9cc0d3b8d8f93c --- /dev/null +++ b/paddle/operators/detail/send_recv_impl.h @@ -0,0 +1,82 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/framework/data_type.h" +#include "paddle/framework/lod_tensor.h" +#include "paddle/operators/detail/simple_block_queue.h" + +#include +#include +#include +#include +#include +#include "paddle/operators/send_recv.grpc.pb.h" + +using grpc::Channel; +using grpc::ClientContext; +using grpc::ClientReader; +using grpc::ClientReaderWriter; +using grpc::ClientWriter; +using grpc::Status; +using sendrecv::SendRecvOp; +using sendrecv::SendTensor; + +namespace paddle { +namespace operators { +namespace detail { + +class SendRecvServerImpl final : public SendRecvOp::Service { + public: + explicit SendRecvServerImpl() {} + + void SetScope(framework::Scope *scope) { scope_ = scope; } + + Status InitVariables(ServerContext *context, + ServerReader *in_var_reader) override; + + Status SendTensor(ServerContext *context, const std::string *in_tensor, + std::string *out_tensor) override; + + const framework::LodTensor &Get() const { return lodtensor_queue_.Pop(); } + + void Push(framework::LodTensor &tensor) { + lodtensor_return_queue_.Push(tensor); + } + + private: + framework::Scope *scope_; + SimpleBlockQueue lodtensor_queue_; + SimpleBlockQueue lodtensor_return_queue_; + SimpleBlockQueue selected_rows_queue_; + SimpleBlockQueue selected_rows_return_queue_; +}; + +// RPCClient is a class to send tensors to pserver sub-network +// using different hashing methods. +class RPCClient { + public: + RPCClient(std::shared_ptr channel) + : stub_(SendRecvOp::NewStub(channel)) {} + + bool SendTensor(const framework::LoDTensor &tensor); + + private: + std::unique_ptr stub_; +}; + +} // namespace detail +} // namespace operators +} // namespace paddle From e6a2f536bf40ad21f91edee5d8e1c2755b594fa4 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 16:53:18 +0800 Subject: [PATCH 06/28] update proto --- cmake/external/grpc.cmake | 2 +- cmake/generic.cmake | 12 +++++- paddle/framework/lod_tensor.cc | 3 +- paddle/framework/lod_tensor.h | 5 ++- paddle/operators/detail/CMakeLists.txt | 2 +- paddle/operators/detail/recv_impl.cc | 34 +++++++++-------- paddle/operators/detail/send_impl.cc | 34 ++++++++++++++++- paddle/operators/detail/send_recv_impl.h | 48 ++++++++++++++---------- paddle/operators/send_recv.proto | 40 -------------------- 9 files changed, 97 insertions(+), 83 deletions(-) delete mode 100644 paddle/operators/send_recv.proto diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index 9a1a484aa5b74e..5fcafb4ba78a14 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -30,7 +30,7 @@ ExternalProject_Add( UPDATE_COMMAND "" # TODO(jhseu): Remove this PATCH_COMMAND once grpc removes the dependency # on "grpc" from the "grpc++_unsecure" rule. - PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_SOURCES_DIR}/src/extern_grpc/ + # PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_SOURCES_DIR}/src/extern_grpc/ # BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc_cpp_plugin # COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc++_unsecure # INSTALL_COMMAND "" diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 4e8b6ee6597538..e02aca02a57c74 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -471,7 +471,7 @@ endfunction() function(grpc_library TARGET_NAME) set(oneValueArgs PROTO) - set(multiValueArgs SRCS) + set(multiValueArgs SRCS DEPS) set(options "") cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -484,6 +484,7 @@ function(grpc_library TARGET_NAME) protobuf_generate_cpp(grpc_proto_srcs grpc_proto_hdrs "${ABS_PROTO}") set(grpc_grpc_srcs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.cc") set(grpc_grpc_hdrs "${CMAKE_CURRENT_BINARY_DIR}/${PROTO_WE}.grpc.pb.h") + cc_library("${TARGET_NAME}_proto" SRCS "${grpc_proto_srcs}") add_custom_command( OUTPUT "${grpc_grpc_srcs}" "${grpc_grpc_hdrs}" @@ -492,5 +493,12 @@ function(grpc_library TARGET_NAME) --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" DEPENDS "${ABS_PROTO}") - cc_library("${TARGET_NAME}" SRCS "${SRCS}" "${grpc_grpc_srcs}" "${grpc_proto_srcs}" DEPS protobuf grpc) + SET(default_cxx_flags ${CMAKE_CXX_FLAGS}) + STRING(REPLACE "-Werror=non-virtual-dtor" "" no_warn_flags ${CMAKE_CXX_FLAGS}) + SET(CMAKE_CXX_FLAGS "${no_warn_flags}" CACHE STRING "Compiler flags") + cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}" DEPS grpc) + message("grpc flags: ${no_warn_flags}") + + cc_library("${TARGET_NAME}" SRCS "${grpc_library_SRCS}" DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}") + SET(CMAKE_CXX_FLAGS "${default_cxx_flags}" CACHE STRING "Compiler flags") endfunction() diff --git a/paddle/framework/lod_tensor.cc b/paddle/framework/lod_tensor.cc index cc90f1436a0ac2..fdf6de4babff3b 100644 --- a/paddle/framework/lod_tensor.cc +++ b/paddle/framework/lod_tensor.cc @@ -187,7 +187,8 @@ void AppendLoD(LoD *lod, const LoD &lod_length) { } } -void SerializeToStream(std::ostream &os, const LoDTensor &tensor) { +void SerializeToStream(std::ostream &os, const LoDTensor &tensor, + const platform::DeviceContext &dev_ctx) { // TODO(typhoonzero): serialize to ostream { // the 1st field, uint32_t version constexpr uint32_t version = 0; diff --git a/paddle/framework/lod_tensor.h b/paddle/framework/lod_tensor.h index 9bee72efff0ac9..9a796fa229c66f 100644 --- a/paddle/framework/lod_tensor.h +++ b/paddle/framework/lod_tensor.h @@ -191,9 +191,10 @@ void AppendLoD(LoD* lod, const LoD& lod_length); /* * Serialize/Desiralize LoDTensor to std::ostream * You can pass ofstream or ostringstream to serilize to file - * or to a in memory string. + * or to a in memory string. GPU tensor will be copied to CPU. */ -void SerializeToStream(std::ostream& os, const LoDTensor& tensor); +void SerializeToStream(std::ostream& os, const LoDTensor& tensor, + const platform::DeviceContext& dev_ctx); void DeserializeFromStream(std::istream& is, LoDTensor* tensor); } // namespace framework diff --git a/paddle/operators/detail/CMakeLists.txt b/paddle/operators/detail/CMakeLists.txt index cbc104454915ac..f6bdc63cc2cfae 100644 --- a/paddle/operators/detail/CMakeLists.txt +++ b/paddle/operators/detail/CMakeLists.txt @@ -1 +1 @@ -grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto) +grpc_library(sendrecvop_grpc SRCS recv_impl.cc send_impl.cc PROTO send_recv.proto DEPS lod_tensor selected_rows) diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc index befcaecec61568..6e1616a0b8e7cd 100644 --- a/paddle/operators/detail/recv_impl.cc +++ b/paddle/operators/detail/recv_impl.cc @@ -19,35 +19,39 @@ namespace operators { namespace detail { Status SendRecvServerImpl::InitVariables( - ServerContext *context, - ServerReader *in_var_reader) override { + ServerContext *context, ServerReader *in_var_reader, + VoidMessage *void_ret) { // set up all variables to run server side block PADDLE_ENFORCE(scope_); VariableMessage in_buf; while (in_var_reader->Read(&in_buf)) { // create var if not exist - auto *var = scope_->Var(in_buf.varname); + auto *var = scope_->Var(in_buf.varname()); auto *tensor = var->GetMutable(); - std::istringstream iss(in_buf.serialized); - framework::DeserializeFromStream(iss, *tensor); + std::istringstream iss(in_buf.serialized()); + framework::DeserializeFromStream(iss, tensor); } + *void_ret = VoidMessage(); return Status::OK; } -Status SendRecvServerImpl::SendTensor(ServerContext *context, - const std::string *in_tensor, - std::string *out_tensor) override { - framework::LodTensor t; +Status SendRecvServerImpl::SendVariable(ServerContext *context, + const VariableMessage *in_var, + VariableMessage *out_var) { + framework::LoDTensor t; // TODO(typhoonzero): desirealize in_tensor and run pserver network. - std::istringstream iss(*in_tensor); - framework::Tensor t; - framework::DesirializeFromStream(iss, &t); + std::istringstream iss(in_var->serialized()); + framework::DeserializeFromStream(iss, &t); lodtensor_queue_.Push(std::move(t)); // Block util the sub graph is done. - auto t = lodtensor_return_queue_.Pop(); + t = lodtensor_return_queue_.Pop(); std::ostringstream oss; - framework::SerializeToStream(oss, &t); - *out_tensor = oss.str(); + // FIXME(typhoonzero): get context from op. + framework::SerializeToStream(oss, t, platform::CPUDeviceContext()); + std::string *varname = out_var->mutable_varname(); + *varname = in_var->varname(); + std::string *serialized = out_var->mutable_serialized(); + *serialized = oss.str(); return Status::OK; } diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 2e512d5de1cab0..00a42163a7bf3b 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -18,16 +18,46 @@ namespace paddle { namespace operators { namespace detail { -bool RPCClient::SendTensor(const framework::LoDTensor &tensor) { +bool RPCClient::SendTensor(const framework::LoDTensor &tensor, ) { ClientContext context; Status status = stub_->SendTensor(&context, tensor); if (!status.ok()) { - std::cout << "GetFeature rpc failed." << std::endl; + std::cout << "SendTensor rpc failed." << std::endl; return false; } return true; } +Status RPCClient::InitVariables() { + // write streams of Variable to server + ClientContext context; + VoidMessage void_ret; + std::unique_ptr> writer( + stub_->InitVariables(&context, &void_ret)); + // send vars in scope to server using this stream. + std::vector names = scope_.GetAllNames(); + for (auto n = names.begin(); n != names.end(); n++) { + auto *var = scope_.FindVar(*n); + // TODO(typhoonzero): serialize by type. + auto *tensor = var->Get(); + VariableMessage msg; + msg.varname = *n; + std::ostringstream oss; + framework::SerializeToStream(oss, *tensor); + // FIXME(typhoonzero): no copy + msg.serialized = oss.str(); + writer->Write(msg); + } + return Status::OK; +} + +Status SendVariable(const framework::Variable *var, + framework::Variable *out_var) { + // ClientContext context; + // stub_->SendVariable(&context, ) + return Status::OK; +} + } // namespace detail } // namespace operators } // namespace paddle diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index 9cc0d3b8d8f93c..51be28a854111e 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -16,50 +16,60 @@ #include "paddle/framework/data_type.h" #include "paddle/framework/lod_tensor.h" +#include "paddle/framework/scope.h" +#include "paddle/framework/selected_rows.h" #include "paddle/operators/detail/simple_block_queue.h" -#include -#include -#include -#include -#include -#include "paddle/operators/send_recv.grpc.pb.h" +// #include +// #include +// #include +// #include +#include "paddle/operators/detail/send_recv.grpc.pb.h" +#include "paddle/operators/detail/send_recv.pb.h" + +#include using grpc::Channel; +using grpc::ServerContext; +using grpc::ServerReader; + using grpc::ClientContext; using grpc::ClientReader; using grpc::ClientReaderWriter; using grpc::ClientWriter; using grpc::Status; -using sendrecv::SendRecvOp; -using sendrecv::SendTensor; +using sendrecv::SendRecvService; +using sendrecv::VariableMessage; +using sendrecv::VoidMessage; namespace paddle { namespace operators { namespace detail { -class SendRecvServerImpl final : public SendRecvOp::Service { +class SendRecvServerImpl final : public SendRecvService::Service { public: explicit SendRecvServerImpl() {} void SetScope(framework::Scope *scope) { scope_ = scope; } Status InitVariables(ServerContext *context, - ServerReader *in_var_reader) override; + ServerReader *in_var_reader, + VoidMessage *void_ret) override; - Status SendTensor(ServerContext *context, const std::string *in_tensor, - std::string *out_tensor) override; + Status SendVariable(ServerContext *context, const VariableMessage *in_var, + VariableMessage *out_var) override; - const framework::LodTensor &Get() const { return lodtensor_queue_.Pop(); } + const framework::LoDTensor Get() { return this->lodtensor_queue_.Pop(); } - void Push(framework::LodTensor &tensor) { - lodtensor_return_queue_.Push(tensor); + void Push(const framework::LoDTensor &tensor) { + this->lodtensor_return_queue_.Push(tensor); } private: + // Scope for send recv to run. framework::Scope *scope_; - SimpleBlockQueue lodtensor_queue_; - SimpleBlockQueue lodtensor_return_queue_; + SimpleBlockQueue lodtensor_queue_; + SimpleBlockQueue lodtensor_return_queue_; SimpleBlockQueue selected_rows_queue_; SimpleBlockQueue selected_rows_return_queue_; }; @@ -69,12 +79,12 @@ class SendRecvServerImpl final : public SendRecvOp::Service { class RPCClient { public: RPCClient(std::shared_ptr channel) - : stub_(SendRecvOp::NewStub(channel)) {} + : stub_(SendRecvService::NewStub(channel)) {} bool SendTensor(const framework::LoDTensor &tensor); private: - std::unique_ptr stub_; + std::unique_ptr stub_; }; } // namespace detail diff --git a/paddle/operators/send_recv.proto b/paddle/operators/send_recv.proto deleted file mode 100644 index 72a9232dc34fb0..00000000000000 --- a/paddle/operators/send_recv.proto +++ /dev/null @@ -1,40 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. */ - -syntax = "proto3"; -option optimize_for = LITE_RUNTIME; - -package sendrecv; - -service SendRecvOp { - // Init Variables before running server side block. - rpc InitVariables(stream VariableMessage) returns (VoidMessage); - // For parameter server round-robin like hashing, do not split tensors. - // Send and recv only one tensor - rpc SendVariable(VariableMessage) returns (VariableMessage); -} - -// VariableMessage is serialized paddle variable message. -// It can be: -// Tensor -// LoDTensor -// SelectedRows -message VariableMessage { - string varname = 1; - string serialized = 2; -} - -message VoidMessage { - -} \ No newline at end of file From f009f973d00a24c2bd769750838414175e27baca Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 18:24:20 +0800 Subject: [PATCH 07/28] update proto --- cmake/generic.cmake | 14 ++++++++----- paddle/operators/detail/send_impl.cc | 26 +++++++----------------- paddle/operators/detail/send_recv_impl.h | 6 +++++- 3 files changed, 21 insertions(+), 25 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index e02aca02a57c74..cd1cf34340b521 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -493,12 +493,16 @@ function(grpc_library TARGET_NAME) --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" DEPENDS "${ABS_PROTO}") - SET(default_cxx_flags ${CMAKE_CXX_FLAGS}) - STRING(REPLACE "-Werror=non-virtual-dtor" "" no_warn_flags ${CMAKE_CXX_FLAGS}) - SET(CMAKE_CXX_FLAGS "${no_warn_flags}" CACHE STRING "Compiler flags") + # NOTE: grpc generated code do not generate virtual-dtor + set_source_files_properties( + ${grpc_grpc_srcs} + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}" DEPS grpc) - message("grpc flags: ${no_warn_flags}") + set_source_files_properties( + ${grpc_library_SRCS} + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") cc_library("${TARGET_NAME}" SRCS "${grpc_library_SRCS}" DEPS "${TARGET_NAME}_grpc" "${TARGET_NAME}_proto" "${grpc_library_DEPS}") - SET(CMAKE_CXX_FLAGS "${default_cxx_flags}" CACHE STRING "Compiler flags") endfunction() diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 00a42163a7bf3b..39240b77dfefe8 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -18,28 +18,17 @@ namespace paddle { namespace operators { namespace detail { -bool RPCClient::SendTensor(const framework::LoDTensor &tensor, ) { - ClientContext context; - Status status = stub_->SendTensor(&context, tensor); - if (!status.ok()) { - std::cout << "SendTensor rpc failed." << std::endl; - return false; - } - return true; -} - -Status RPCClient::InitVariables() { +bool InitVariables(const std::vector& var_list) { // write streams of Variable to server ClientContext context; VoidMessage void_ret; std::unique_ptr> writer( stub_->InitVariables(&context, &void_ret)); // send vars in scope to server using this stream. - std::vector names = scope_.GetAllNames(); - for (auto n = names.begin(); n != names.end(); n++) { - auto *var = scope_.FindVar(*n); + for (auto n = var_list.begin(); n != var_list.end(); n++) { + auto* var = scope_.FindVar(*n); // TODO(typhoonzero): serialize by type. - auto *tensor = var->Get(); + auto* tensor = var->Get(); VariableMessage msg; msg.varname = *n; std::ostringstream oss; @@ -48,14 +37,13 @@ Status RPCClient::InitVariables() { msg.serialized = oss.str(); writer->Write(msg); } - return Status::OK; + return true; } -Status SendVariable(const framework::Variable *var, - framework::Variable *out_var) { +bool SendVariable(const framework::Variable* var) { // ClientContext context; // stub_->SendVariable(&context, ) - return Status::OK; + return true; } } // namespace detail diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index 51be28a854111e..e57683bbf58d3f 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -81,10 +81,14 @@ class RPCClient { RPCClient(std::shared_ptr channel) : stub_(SendRecvService::NewStub(channel)) {} - bool SendTensor(const framework::LoDTensor &tensor); + void SetScope(framework::Scope *scope) { scope_ = scope; } + bool InitVariables(const std::vector &var_list); + bool SendVariable(const framework::Variable *var); private: std::unique_ptr stub_; + // FIXME(typhoonzero): borrow scope pointer, this is not thread-safe! + framework::Scope *scope_; }; } // namespace detail From 7d30ad816c7dce5f41b94aeb692443110b487fe6 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 19:28:26 +0800 Subject: [PATCH 08/28] update proto --- paddle/operators/detail/send_impl.cc | 47 +++++++++++++++++++----- paddle/operators/detail/send_recv_impl.h | 2 +- 2 files changed, 38 insertions(+), 11 deletions(-) diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 39240b77dfefe8..749b9a121a39c4 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -18,7 +18,8 @@ namespace paddle { namespace operators { namespace detail { -bool InitVariables(const std::vector& var_list) { +bool RPCClient::InitVariables(const std::vector& var_list) { + PADDLE_ENFORCE(scope_); // write streams of Variable to server ClientContext context; VoidMessage void_ret; @@ -26,23 +27,49 @@ bool InitVariables(const std::vector& var_list) { stub_->InitVariables(&context, &void_ret)); // send vars in scope to server using this stream. for (auto n = var_list.begin(); n != var_list.end(); n++) { - auto* var = scope_.FindVar(*n); - // TODO(typhoonzero): serialize by type. - auto* tensor = var->Get(); + auto* var = scope_->FindVar(*n); + // TODO(typhoonzero): support SelectedRows + PADDLE_ENFORCE(var->IsType(), + "Only support LoDTensor, %s has wrong type", *n); + auto& tensor = var->Get(); VariableMessage msg; - msg.varname = *n; + msg.set_varname(*n); std::ostringstream oss; - framework::SerializeToStream(oss, *tensor); + framework::SerializeToStream(oss, tensor, platform::CPUDeviceContext()); // FIXME(typhoonzero): no copy - msg.serialized = oss.str(); + msg.set_serialized(oss.str()); writer->Write(msg); } return true; } -bool SendVariable(const framework::Variable* var) { - // ClientContext context; - // stub_->SendVariable(&context, ) +bool RPCClient::SendVariable(const std::string& inname, + const std::string& outname) { + ClientContext context; + VariableMessage msg, out_msg; + // FIXME(typhoonzero): pass device context to here. + auto ctx = platform::CPUDeviceContext(); + auto* var = scope_->FindVar(inname); + PADDLE_ENFORCE(var); + // TODO(typhoonzero): support SelectedRows + PADDLE_ENFORCE(var->IsType(), + "Only support LoDTensor, %s has wrong type", inname); + const framework::LoDTensor& tensor = var->Get(); + std::ostringstream oss; + framework::SerializeToStream(oss, tensor, ctx); + msg.set_varname(inname); + msg.set_serialized(oss.str()); + Status status = stub_->SendVariable(&context, msg, &out_msg); + if (status.ok()) { + return false; + } + std::istringstream iss(out_msg.serialized()); + framework::LoDTensor ret_tensor; + framework::DeserializeFromStream(iss, &ret_tensor); + auto* outvar = scope_->FindVar(outname); + framework::LoDTensor* out_tensor = outvar->GetMutable(); + // FIXME(typhoonzero): do not copy. + out_tensor->CopyFrom(ret_tensor, ctx.GetPlace(), ctx); return true; } diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index e57683bbf58d3f..81527668c0a449 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -83,7 +83,7 @@ class RPCClient { void SetScope(framework::Scope *scope) { scope_ = scope; } bool InitVariables(const std::vector &var_list); - bool SendVariable(const framework::Variable *var); + bool SendVariable(const std::string &inname, const std::string &outname); private: std::unique_ptr stub_; From 12f86d9de8b15e5576a73b68dae5a5f6591b9734 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 19:30:23 +0800 Subject: [PATCH 09/28] clean cmake --- cmake/external/grpc.cmake | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index 5fcafb4ba78a14..dbca4b2e0b2f46 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -28,13 +28,8 @@ ExternalProject_Add( GIT_TAG "v1.7.x" PREFIX ${GRPC_SOURCES_DIR} UPDATE_COMMAND "" - # TODO(jhseu): Remove this PATCH_COMMAND once grpc removes the dependency - # on "grpc" from the "grpc++_unsecure" rule. - # PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_CURRENT_SOURCE_DIR}/patches/grpc/CMakeLists.txt ${GRPC_SOURCES_DIR}/src/extern_grpc/ - # BUILD_COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc_cpp_plugin - # COMMAND ${CMAKE_COMMAND} --build . --config Release --target grpc++_unsecure - # INSTALL_COMMAND "" BUILD_COMMAND make grpc_cpp_plugin grpc++_unsecure + # TODO(typhoonzero): install into third_party/install INSTALL_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} @@ -60,10 +55,3 @@ SET_PROPERTY(TARGET grpc PROPERTY IMPORTED_LOCATION ${GRPC_LIBRARIES}) include_directories(${GRPC_INCLUDE_DIR}) ADD_DEPENDENCIES(grpc extern_grpc) -# grpc/src/core/ext/census/tracing.c depends on the existence of openssl/rand.h. -# ExternalProject_Add_Step(grpc copy_rand -# COMMAND ${CMAKE_COMMAND} -E copy -# ${CMAKE_SOURCE_DIR}/patches/grpc/rand.h ${GRPC_INCLUDE_DIR}/openssl/rand.h -# DEPENDEES patch -# DEPENDERS build -# ) From e0ae95be25f0c1d00239df8364c6d28c6b5449ca Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 21:09:22 +0800 Subject: [PATCH 10/28] wip on op implementations --- paddle/operators/CMakeLists.txt | 9 ++++++ paddle/operators/detail/send_impl.cc | 13 ++++---- paddle/operators/detail/send_recv.proto | 39 ++++++++++++++++++++++++ paddle/operators/detail/send_recv_impl.h | 9 +++--- paddle/operators/send_op.cc | 39 ++++++++++++------------ 5 files changed, 79 insertions(+), 30 deletions(-) create mode 100644 paddle/operators/detail/send_recv.proto diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 0fa163e83df685..f4ca3c993d6756 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -180,7 +180,16 @@ set(DEPS_OPS add_subdirectory(detail) op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc) +set_source_files_properties( + send_op.cc + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") + op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc) +set_source_files_properties( + recv_op.cc + PROPERTIES + COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") op_library(cond_op SRCS cond_op.cc DEPS framework_proto tensor operator net_op) op_library(cross_entropy_op DEPS cross_entropy) diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 749b9a121a39c4..1621b2ebe4aed7 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -18,8 +18,8 @@ namespace paddle { namespace operators { namespace detail { -bool RPCClient::InitVariables(const std::vector& var_list) { - PADDLE_ENFORCE(scope_); +bool RPCClient::InitVariables(const framework::Scope& scope, + const std::vector& var_list) { // write streams of Variable to server ClientContext context; VoidMessage void_ret; @@ -27,7 +27,7 @@ bool RPCClient::InitVariables(const std::vector& var_list) { stub_->InitVariables(&context, &void_ret)); // send vars in scope to server using this stream. for (auto n = var_list.begin(); n != var_list.end(); n++) { - auto* var = scope_->FindVar(*n); + auto* var = scope.FindVar(*n); // TODO(typhoonzero): support SelectedRows PADDLE_ENFORCE(var->IsType(), "Only support LoDTensor, %s has wrong type", *n); @@ -43,13 +43,14 @@ bool RPCClient::InitVariables(const std::vector& var_list) { return true; } -bool RPCClient::SendVariable(const std::string& inname, +bool RPCClient::SendVariable(const framework::Scope& scope, + const std::string& inname, const std::string& outname) { ClientContext context; VariableMessage msg, out_msg; // FIXME(typhoonzero): pass device context to here. auto ctx = platform::CPUDeviceContext(); - auto* var = scope_->FindVar(inname); + auto* var = scope.FindVar(inname); PADDLE_ENFORCE(var); // TODO(typhoonzero): support SelectedRows PADDLE_ENFORCE(var->IsType(), @@ -66,7 +67,7 @@ bool RPCClient::SendVariable(const std::string& inname, std::istringstream iss(out_msg.serialized()); framework::LoDTensor ret_tensor; framework::DeserializeFromStream(iss, &ret_tensor); - auto* outvar = scope_->FindVar(outname); + auto* outvar = scope.FindVar(outname); framework::LoDTensor* out_tensor = outvar->GetMutable(); // FIXME(typhoonzero): do not copy. out_tensor->CopyFrom(ret_tensor, ctx.GetPlace(), ctx); diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto new file mode 100644 index 00000000000000..1ee714c4f30705 --- /dev/null +++ b/paddle/operators/detail/send_recv.proto @@ -0,0 +1,39 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +syntax = "proto3"; + +package sendrecv; + +service SendRecvService { + // Init Variables before running server side block. + rpc InitVariables(stream VariableMessage) returns (VoidMessage) {} + // For parameter server round-robin like hashing, do not split tensors. + // Send and recv only one tensor + rpc SendVariable(VariableMessage) returns (VariableMessage) {} +} + +// VariableMessage is serialized paddle variable message. +// It can be: +// Tensor +// LoDTensor +// SelectedRows +message VariableMessage { + string varname = 1; + string serialized = 2; +} + +message VoidMessage { + +} \ No newline at end of file diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index 81527668c0a449..5ca1a13322b2ba 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -81,14 +81,13 @@ class RPCClient { RPCClient(std::shared_ptr channel) : stub_(SendRecvService::NewStub(channel)) {} - void SetScope(framework::Scope *scope) { scope_ = scope; } - bool InitVariables(const std::vector &var_list); - bool SendVariable(const std::string &inname, const std::string &outname); + bool InitVariables(const framework::Scope &scope, + const std::vector &var_list); + bool SendVariable(const framework::Scope &scope, const std::string &inname, + const std::string &outname); private: std::unique_ptr stub_; - // FIXME(typhoonzero): borrow scope pointer, this is not thread-safe! - framework::Scope *scope_; }; } // namespace detail diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index b32d5267a94d83..4283bcf0b654c7 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -21,6 +21,7 @@ #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/send_recv_impl.h" #include "paddle/operators/detail/simple_block_queue.h" namespace paddle { @@ -33,30 +34,30 @@ class SendOp : public framework::OperatorBase { SendOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { - constexpr SendOpName = "SendOp@RPCClient"; - auto *var = scope.FindVar(SendOpName); - if (var == nullptr) { - // create RPC server object if it is not inited. + : OperatorBase(type, inputs, outputs, attrs) { + // init client when the operator is created at runtime. + if (!client_) { std::string endpoint = Attr("endpoint"); - var = scope.Var(SendOpName); - RPCClient *client = var->GetMutable(); + client_.reset(new detail::RPCClient( + grpc::CreateChannel(endpoint, grpc::InsecureChannelCredentials()))); + // TODO(typhoonzero): how to call InitVariables } - RPCClient *client = var->Get(); - + } + SendOp(const SendOp &) = delete; + SendOp &operator=(const SendOp &) = delete; + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { auto iname = Input("X"); auto oname = Output("Out"); - auto *var = scope.FindVar(iname); - auto *tensor = var->Get(); - // call sync send - auto *optimized_tensor = client->SendTensor(*tensor); - // FIXME(typhoonzero): do not copy - auto *out_var = scope.FindVar(oname); - out_var->GetMutable(); - out_var->CopyFrom(*optimized_tensor, dev_ctx.GetPlace(), dev_ctx); + // TODO(typhoonzero): block until server has initalized. + bool ret = client_->SendVariable(scope, iname, oname); + if (!ret) { + LOG(ERROR) << "send variable error"; + } } + + private: + std::unique_ptr client_{nullptr}; }; class SendOpMaker : public framework::OpProtoAndCheckerMaker { From 4ef4c291c4cbe3fe70d7ac958c2d2d9a6a687695 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 13 Nov 2017 21:15:16 +0800 Subject: [PATCH 11/28] wip on op implementations --- paddle/operators/send_op.cc | 5 ----- 1 file changed, 5 deletions(-) diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index 4283bcf0b654c7..fc842c8fcfa478 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -12,10 +12,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -#include -#include #include -#include #include "paddle/framework/data_type.h" #include "paddle/framework/framework.pb.h" @@ -43,8 +40,6 @@ class SendOp : public framework::OperatorBase { // TODO(typhoonzero): how to call InitVariables } } - SendOp(const SendOp &) = delete; - SendOp &operator=(const SendOp &) = delete; void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { auto iname = Input("X"); From 22b414b9242cb7ccfbd50cb98dbc87139dcdeca1 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 14 Nov 2017 11:38:33 +0800 Subject: [PATCH 12/28] compile ok adding ut --- paddle/operators/detail/recv_impl.cc | 9 +++--- paddle/operators/detail/send_recv_impl.h | 6 ++-- paddle/operators/recv_op.cc | 41 ++++++++++++------------ paddle/operators/send_op.cc | 5 +-- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc index 6e1616a0b8e7cd..94bc674a5a5e3e 100644 --- a/paddle/operators/detail/recv_impl.cc +++ b/paddle/operators/detail/recv_impl.cc @@ -22,14 +22,13 @@ Status SendRecvServerImpl::InitVariables( ServerContext *context, ServerReader *in_var_reader, VoidMessage *void_ret) { // set up all variables to run server side block - PADDLE_ENFORCE(scope_); VariableMessage in_buf; while (in_var_reader->Read(&in_buf)) { // create var if not exist - auto *var = scope_->Var(in_buf.varname()); - auto *tensor = var->GetMutable(); - std::istringstream iss(in_buf.serialized()); - framework::DeserializeFromStream(iss, tensor); + // auto *var = scope_->Var(in_buf.varname()); + // auto *tensor = var->GetMutable(); + // std::istringstream iss(in_buf.serialized()); + // framework::DeserializeFromStream(iss, tensor); } *void_ret = VoidMessage(); return Status::OK; diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index 5ca1a13322b2ba..55063fe0df7a71 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -30,8 +30,10 @@ #include using grpc::Channel; +using grpc::Server; using grpc::ServerContext; using grpc::ServerReader; +using grpc::ServerBuilder; using grpc::ClientContext; using grpc::ClientReader; @@ -50,8 +52,6 @@ class SendRecvServerImpl final : public SendRecvService::Service { public: explicit SendRecvServerImpl() {} - void SetScope(framework::Scope *scope) { scope_ = scope; } - Status InitVariables(ServerContext *context, ServerReader *in_var_reader, VoidMessage *void_ret) override; @@ -66,8 +66,6 @@ class SendRecvServerImpl final : public SendRecvService::Service { } private: - // Scope for send recv to run. - framework::Scope *scope_; SimpleBlockQueue lodtensor_queue_; SimpleBlockQueue lodtensor_return_queue_; SimpleBlockQueue selected_rows_queue_; diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 9f57657675c8f0..ddfe41587206cd 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -18,48 +18,43 @@ #include #include "paddle/framework/data_type.h" +#include "paddle/framework/executer.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" #include "paddle/operators/detail/send_recv_impl.h" #include "paddle/operators/detail/simple_block_queue.h" -void RunServer(const SendRecvServerImpl &service, +namespace paddle { +namespace operators { + +void RunServer(std::shared_ptr service, const std::string &server_address) { ServerBuilder builder; builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); - builder.RegisterService(&service); + builder.RegisterService(service.get()); std::unique_ptr server(builder.BuildAndStart()); LOG(INFO) << "Server listening on " << server_address << std::endl; server->Wait(); } -namespace paddle { -namespace operators { - class RecvOp : public framework::OperatorBase { public: RecvOp(const std::string &type, const framework::VariableNameMap &inputs, const framework::VariableNameMap &outputs, const framework::AttributeMap &attrs) - : OperatorBase(type, inputs, outputs, attrs) {} - void Run(const framework::Scope &scope, - const platform::DeviceContext &dev_ctx) const override { - constexpr RecvOpName = "RecvOp@SendRecvServerImpl"; - auto *var = scope.FindVar(RecvOpName); - if (var == nullptr) { - // create RPC server object if it is not inited. + : OperatorBase(type, inputs, outputs, attrs) { + if (!rpc_service_) { std::string endpoint = Attr("endpoint"); - var = scope.Var(RecvOpName); - SendRecvServerImpl *service = var->GetMutable(); - - // start server in a thread in background - std::thread server_thread(RunServer(*service, endpoit)); + std::thread server_thread(RunServer(rpc_service_, endpoint)); } - SendRecvServerImpl *service = var->Get(); - framework::LoDTensor &t = service->Get(); + } + void Run(const framework::Scope &scope, + const platform::DeviceContext &dev_ctx) const override { + // blocking get one var from client. + const framework::LoDTensor &t = rpc_service_->Get(); // set graph input var - auto *var = scope.Var(Input("X")); + auto *var = scope.FindVar(Input("X")); auto *tensor = var->GetMutable(); // FIXME(typhoonzero): do not copy tensor->CopyFrom(t, dev_ctx.GetPlace(), dev_ctx); @@ -71,8 +66,12 @@ class RecvOp : public framework::OperatorBase { executor.Run(*program, &scope, block->ID(), false /*create_local_scope*/); auto *out_var = scope.FindVar("Out"); - service->Push(out_var->Get()); + // push back + rpc_service_->Push(out_var->Get()); } + + protected: + std::shared_ptr rpc_service_; }; class RecvOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index fc842c8fcfa478..3c7cc9eb909df2 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -18,6 +18,7 @@ #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" + #include "paddle/operators/detail/send_recv_impl.h" #include "paddle/operators/detail/simple_block_queue.h" @@ -51,8 +52,8 @@ class SendOp : public framework::OperatorBase { } } - private: - std::unique_ptr client_{nullptr}; + protected: + std::shared_ptr client_{nullptr}; }; class SendOpMaker : public framework::OpProtoAndCheckerMaker { From c230adf0084064dc514406ac52f05c60b15e253a Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Tue, 14 Nov 2017 20:48:27 +0800 Subject: [PATCH 13/28] wip unitest --- paddle/operators/CMakeLists.txt | 1 + paddle/operators/recv_op.cc | 14 +- paddle/operators/send_recv_op_test.cc | 118 ++++++++ .../v2/framework/tests/test_send_recv.py | 273 ------------------ 4 files changed, 128 insertions(+), 278 deletions(-) create mode 100644 paddle/operators/send_recv_op_test.cc delete mode 100644 python/paddle/v2/framework/tests/test_send_recv.py diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index f4ca3c993d6756..afce29de321a57 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -239,3 +239,4 @@ if(WITH_GPU) nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) +cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op grpc) diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index ddfe41587206cd..4f84d82f94c302 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -18,7 +18,7 @@ #include #include "paddle/framework/data_type.h" -#include "paddle/framework/executer.h" +#include "paddle/framework/executor.h" #include "paddle/framework/framework.pb.h" #include "paddle/framework/lod_tensor.h" #include "paddle/framework/op_registry.h" @@ -46,15 +46,16 @@ class RecvOp : public framework::OperatorBase { : OperatorBase(type, inputs, outputs, attrs) { if (!rpc_service_) { std::string endpoint = Attr("endpoint"); - std::thread server_thread(RunServer(rpc_service_, endpoint)); + std::thread server_thread(RunServer, rpc_service_, endpoint); } } void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { // blocking get one var from client. const framework::LoDTensor &t = rpc_service_->Get(); + framework::Scope &recv_scope = scope.NewScope(); // set graph input var - auto *var = scope.FindVar(Input("X")); + auto *var = recv_scope.FindVar(Input("X")); auto *tensor = var->GetMutable(); // FIXME(typhoonzero): do not copy tensor->CopyFrom(t, dev_ctx.GetPlace(), dev_ctx); @@ -63,9 +64,10 @@ class RecvOp : public framework::OperatorBase { auto *program = block->Program(); framework::Executor executor(dev_ctx); // Run sub graph to get optimized tensor - executor.Run(*program, &scope, block->ID(), false /*create_local_scope*/); + executor.Run(*program, &recv_scope, block->ID(), + false /*create_local_scope*/); - auto *out_var = scope.FindVar("Out"); + auto *out_var = recv_scope.FindVar("Out"); // push back rpc_service_->Push(out_var->Get()); } @@ -89,6 +91,8 @@ This operator will recv tensor from send_op "IP address to listen on.") .SetDefault("127.0.0.1:6164") .AddCustomChecker([](const std::string &ip) { return !ip.empty(); }); + AddAttr("OptimizeBlock", "type BlockDescBind*", + "optimize network run in server"); } }; diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc new file mode 100644 index 00000000000000..b0c623522c9074 --- /dev/null +++ b/paddle/operators/send_recv_op_test.cc @@ -0,0 +1,118 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +// TODO(typhoonzero): add python bindings for this test as +// a RemoteOptimizer. + +#include + +#include "gtest/gtest.h" +#include "paddle/framework/op_registry.h" +#include "paddle/framework/operator.h" +#include "paddle/framework/program_desc.h" + +USE_NO_KERNEL_OP(send); +USE_NO_KERNEL_OP(recv); + +void InitTensorsInScope(paddle::framework::Scope &scope, + paddle::platform::CPUPlace &place) { + paddle::platform::CPUDeviceContext ctx(place); + auto var = scope.Var("X"); + auto tensor = var->GetMutable(); + tensor->Resize({10, 10}); + int *expect = tensor->mutable_data(place); + for (int64_t i = 0; i < tensor->numel(); ++i) { + expect[i] = static_cast(i); + } + + auto out_var = scope.Var("Out"); + auto out_tensor = out_var->GetMutable(); + out_tensor->Resize({10, 10}); + tensor->mutable_data(place); // allocate +} + +void AddOp(const std::string &type, + const paddle::framework::VariableNameMap &inputs, + const paddle::framework::VariableNameMap &outputs, + paddle::framework::AttributeMap attrs, + paddle::framework::BlockDescBind *block) { + // insert output + for (auto kv : outputs) { + for (auto v : kv.second) { + auto var = block->Var(v); + var->SetDataType(paddle::framework::DataType::FP32); + } + } + + // insert op + auto op = block->AppendOp(); + op->SetType(type); + for (auto &kv : inputs) { + op->SetInput(kv.first, kv.second); + } + for (auto &kv : outputs) { + op->SetOutput(kv.first, kv.second); + } + op->SetAttrMap(attrs); +} + +void StartServerNet() { + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + + // sub program run in recv_op, for simple test we use sum + paddle::framework::ProgramDescBind program; + paddle::framework::BlockDescBind *block = program.MutableBlock(0); + AddOp("sum", {{"X", {"X"}}}, {{"Out", {"Out"}}}, {}, block); + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + attrs.insert({"OptimizeBlock", block}); + auto recv_op = paddle::framework::OpRegistry::CreateOp( + "recv", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + recv_op->Run(scope, ctx); +} + +TEST(SendRecvOp, CPU) { + std::thread server_thread(StartServerNet); + + // local net + paddle::framework::Scope scope; + paddle::platform::CPUPlace place; + InitTensorsInScope(scope, place); + // FIXME(typhoonzero): call client side init tensors here. + + paddle::framework::AttributeMap attrs; + attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); + + auto send_op = paddle::framework::OpRegistry::CreateOp( + "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + paddle::platform::CPUDeviceContext ctx(place); + send_op->Run(scope, ctx); + + auto in_var = scope.Var("X"); + auto tensor = in_var->GetMutable(); + int *expected = tensor->data(); + + auto out_var = scope.Var("Out"); + auto target = out_var->GetMutable(); + int *actual = target->data(); + for (int64_t i = 0; i < target->numel(); ++i) { + // sumed value + EXPECT_EQ(expected[i] * 2, actual[i]); + } + server_thread.join(); +} diff --git a/python/paddle/v2/framework/tests/test_send_recv.py b/python/paddle/v2/framework/tests/test_send_recv.py deleted file mode 100644 index c670ab0a2477d3..00000000000000 --- a/python/paddle/v2/framework/tests/test_send_recv.py +++ /dev/null @@ -1,273 +0,0 @@ -import unittest -import numpy as np -import random -import itertools -import paddle.v2.framework.core as core -import collections -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.op import Operator -from paddle.v2.framework.executor import Executor -from paddle.v2.framework.framework import Program, OpProtoHolder - - -def create_op(scope, op_type, inputs, outputs, attrs): - kwargs = dict() - - def __create_var__(name, var_name): - scope.var(var_name).get_tensor() - kwargs[name].append(var_name) - - for in_name, in_dup in Operator.get_op_inputs(op_type): - if in_name in inputs: - kwargs[in_name] = [] - if in_dup: - sub_in = inputs[in_name] - for sub_in_name, _ in sub_in: - __create_var__(in_name, sub_in_name) - else: - __create_var__(in_name, in_name) - - for out_name, out_dup in Operator.get_op_outputs(op_type): - if out_name in outputs: - kwargs[out_name] = [] - if out_dup: - sub_out = outputs[out_name] - for sub_out_name, _ in sub_out: - __create_var__(out_name, sub_out_name) - else: - __create_var__(out_name, out_name) - - for attr_name in Operator.get_op_attr_names(op_type): - if attr_name in attrs: - kwargs[attr_name] = attrs[attr_name] - - return Operator(op_type, **kwargs) - - -def set_input(scope, op, inputs, place): - def __set_input__(var_name, var): - if isinstance(var, tuple) or isinstance(var, np.ndarray): - tensor = scope.find_var(var_name).get_tensor() - if isinstance(var, tuple): - tensor.set_lod(var[1]) - var = var[0] - tensor.set_dims(var.shape) - tensor.set(var, place) - elif isinstance(var, float): - scope.find_var(var_name).set_float(var) - elif isinstance(var, int): - scope.find_var(var_name).set_int(var) - - for in_name, in_dup in Operator.get_op_inputs(op.type()): - if in_name in inputs: - if in_dup: - sub_in = inputs[in_name] - for sub_in_name, sub_in_val in sub_in: - __set_input__(sub_in_name, sub_in_val) - else: - __set_input__(in_name, inputs[in_name]) - - -def append_input_output(block, op_proto, np_list, is_input): - '''Insert VarDesc and generate Python variable instance''' - proto_list = op_proto.inputs if is_input else op_proto.outputs - - def create_var(block, name, np_list, var_proto): - if name not in np_list: - assert var_proto.intermediate, "{} not found".format(name) - shape = None - lod_level = None - else: - np_value = np_list[name] - if isinstance(np_value, tuple): - shape = list(np_value[0].shape) - lod_level = len(np_value[1]) - else: - shape = list(np_value.shape) - lod_level = 0 - return block.create_var( - dtype="float32", shape=shape, lod_level=lod_level, name=name) - - var_dict = {} - for var_proto in proto_list: - var_name = str(var_proto.name) - if is_input: - if (var_name not in np_list) and var_proto.dispensable: - continue - assert (var_name in np_list) or (var_proto.dispensable), \ - "Missing {} as input".format(var_name) - if var_proto.duplicable: - assert isinstance(np_list[var_name], list), \ - "Duplicable {} should be set as list".format(var_name) - var_list = [] - for (name, np_value) in np_list[var_name]: - var_list.append( - create_var(block, name, {name: np_value}, var_proto)) - var_dict[var_name] = var_list - else: - var_dict[var_name] = create_var(block, var_name, np_list, var_proto) - - return var_dict - - -class SendRecvTest(unittest.TestCase): - def feed_var(self, input_vars, place): - feed_map = {} - for var_name in input_vars: - if isinstance(input_vars[var_name], list): - for name, np_value in self.inputs[var_name]: - tensor = core.LoDTensor() - tensor.set(np_value, place) - feed_map[name] = tensor - else: - tensor = core.LoDTensor() - if isinstance(self.inputs[var_name], tuple): - tensor.set(self.inputs[var_name][0], place) - tensor.set_lod(self.inputs[var_name][1]) - else: - tensor.set(self.inputs[var_name], place) - feed_map[var_name] = tensor - - return feed_map - - def check_output_with_place(self, place, atol): - op_proto = OpProtoHolder.instance().get_op_proto(self.op_type) - - program = Program() - block = program.global_block() - - inputs = append_input_output(block, op_proto, self.inputs, True) - outputs = append_input_output(block, op_proto, self.outputs, False) - - op = block.append_op( - type=self.op_type, - inputs=inputs, - outputs=outputs, - attrs=self.attrs if hasattr(self, "attrs") else dict()) - # infer variable type and infer shape in compile-time - op.desc.infer_var_type(block.desc) - op.desc.infer_shape(block.desc) - - fetch_list = [] - for var_name, var in outputs.iteritems(): - if var_name in self.outputs: - if isinstance(var, list): - for v in var: - fetch_list.append(v) - else: - fetch_list.append(var) - - feed_map = self.feed_var(inputs, place) - - exe = Executor(place) - outs = exe.run(program, feed=feed_map, fetch_list=fetch_list) - - for out_name, out_dup in Operator.get_op_outputs(self.op_type): - if out_name not in self.outputs: - continue - - def find_actual(target_name, fetch_list): - found = [ - i for i, var in enumerate(fetch_list) - if var.name == target_name - ] - self.assertTrue( - len(found) == 1, "Found {} {}".format( - len(found), target_name)) - return found[0] - - if out_dup: - sub_out = self.outputs[out_name] - if not isinstance(sub_out, list): - raise AssertionError("sub_out type %s is not list", - type(sub_out)) - for sub_out_name, expect in sub_out: - idx = find_actual(sub_out_name, fetch_list) - actual = outs[idx] - actual_t = np.array(actual) - expect_t = expect[0] \ - if isinstance(expect, tuple) else expect - self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + sub_out_name + ") has diff at " + - str(place)) - if isinstance(expect, tuple): - self.assertListEqual( - actual.lod(), expect[1], "Output (" + sub_out_name + - ") has different lod at " + str(place)) - else: - idx = find_actual(out_name, fetch_list) - actual = outs[idx] - actual_t = np.array(actual) - expect = self.outputs[out_name] - expect_t = expect[0] if isinstance(expect, tuple) else expect - self.assertTrue( - np.allclose( - actual_t, expect_t, atol=atol), - "Output (" + out_name + ") has diff at " + str(place)) - if isinstance(expect, tuple): - self.assertListEqual(actual.lod(), expect[1], - "Output (" + out_name + - ") has different lod at " + str(place)) - - def check_output(self, atol=1e-5): - places = [core.CPUPlace()] - if core.is_compile_gpu() and core.op_support_gpu(self.op_type): - places.append(core.GPUPlace(0)) - for place in places: - self.check_output_with_place(place, atol) - - def __assert_is_close(self, numeric_grads, analytic_grads, names, - max_relative_error, msg_prefix): - - for a, b, name in itertools.izip(numeric_grads, analytic_grads, names): - abs_a = np.abs(a) - abs_a[abs_a < 1e-3] = 1 - - diff_mat = np.abs(a - b) / abs_a - max_diff = np.max(diff_mat) - - def err_msg(): - offset = np.argmax(diff_mat > max_relative_error) - return ("%s Variable %s max gradient diff %f over limit %f, " - "the first error element is %d, %f, %f") % ( - msg_prefix, name, max_diff, max_relative_error, - offset, a.flatten()[offset], b.flatten()[offset]) - - self.assertLessEqual(max_diff, max_relative_error, err_msg()) - - @staticmethod - def _create_var_descs_(block, var_dict): - # FIXME: Try unify with `append_input_output` - for param_name in var_dict: - var = var_dict[param_name] - if not isinstance(var, list) and not isinstance(var, tuple): - var = [(param_name, var, None)] - if not isinstance(var[0], list) and not isinstance(var[0], tuple): - var = [(param_name, var[0], var[1])] - - for i, item in enumerate(var): - if not isinstance(item[0], basestring): - item = [[param_name] + list(item)] - if len(item) == 2: - # only set var name and value, set lod to None - var[i] = list(item) + [None] - - var_descs = [(block.create_var( - name=name, shape=each.shape, dtype=each.dtype), each, lod) - for name, each, lod in var] - - yield param_name, var_descs - - @staticmethod - def _merge_list(iterable): - return reduce(lambda a, b: list(a) + list(b), iterable, []) - - @staticmethod - def _numpy_to_lod_tensor(np_value, lod, place): - tensor = core.LoDTensor() - tensor.set(np_value, place) - if lod is not None: - tensor.set_lod(lod) - return tensor From 453421b8935d8b4b97ae4e08e40ed7aa2b9c7458 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 15 Nov 2017 18:59:53 +0800 Subject: [PATCH 14/28] add extern cares for linking --- cmake/external/cares.cmake | 41 +++++++++++++++++++++ cmake/external/grpc.cmake | 49 ++++++++++++------------- cmake/external/zlib.cmake | 2 + cmake/generic.cmake | 2 +- paddle/operators/CMakeLists.txt | 6 ++- paddle/operators/recv_op.cc | 11 +++++- paddle/operators/send_recv_op_test.cc | 12 +++++- python/paddle/v2/framework/optimizer.py | 29 ++++++++++++++- 8 files changed, 121 insertions(+), 31 deletions(-) create mode 100644 cmake/external/cares.cmake diff --git a/cmake/external/cares.cmake b/cmake/external/cares.cmake new file mode 100644 index 00000000000000..a1b0fe3bfd902f --- /dev/null +++ b/cmake/external/cares.cmake @@ -0,0 +1,41 @@ +# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +include (ExternalProject) + +# NOTE: c-ares is needed when linking with grpc. + +SET(CARES_SOURCES_DIR ${THIRD_PARTY_PATH}/cares) +SET(CARES_INSTALL_DIR ${THIRD_PARTY_PATH}/install/cares) +SET(CARES_INCLUDE_DIR "${CARES_INSTALL_DIR}/include/" CACHE PATH "cares include directory." FORCE) + +ExternalProject_Add( + extern_cares + GIT_REPOSITORY "https://github.com/c-ares/c-ares.git" + GIT_TAG "cares-1_13_0" + PREFIX ${CARES_SOURCES_DIR} + UPDATE_COMMAND "" + CONFIGURE_COMMAND ./buildconf && ./configure --disable-shared --prefix=${CARES_INSTALL_DIR} + BUILD_IN_SOURCE 1 + BUILD_COMMAND make + INSTALL_COMMAND make install +) + +ADD_LIBRARY(cares STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET cares PROPERTY IMPORTED_LOCATION + "${CARES_INSTALL_DIR}/lib/libcares.a") + +include_directories(${CARES_INCLUDE_DIR}) +ADD_DEPENDENCIES(cares extern_cares) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index dbca4b2e0b2f46..dd9528eb276820 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -17,41 +17,38 @@ include (ExternalProject) SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) -SET(GRPC_INCLUDE_DIR "${GRPC_SOURCES_DIR}/src/extern_grpc/include/" CACHE PATH "grpc include directory." FORCE) -SET(GRPC_LIBRARIES "${GRPC_SOURCES_DIR}/src/extern_grpc-build/libgrpc++_unsecure.a" CACHE FILEPATH "GRPC_LIBRARIES" FORCE) +SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) SET(GRPC_CPP_PLUGIN "${GRPC_SOURCES_DIR}/src/extern_grpc-build/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) ExternalProject_Add( extern_grpc - DEPENDS protobuf zlib + # DEPENDS protobuf zlib GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_TAG "v1.7.x" PREFIX ${GRPC_SOURCES_DIR} UPDATE_COMMAND "" - BUILD_COMMAND make grpc_cpp_plugin grpc++_unsecure - # TODO(typhoonzero): install into third_party/install - INSTALL_COMMAND "" - CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} - -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS} - -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_DIR} - -DCMAKE_INSTALL_LIBDIR=${GRPC_INSTALL_DIR}/lib - -DCMAKE_POSITION_INDEPENDENT_CODE=ON - -DgRPC_BUILD_TESTS=OFF - -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} - ${EXTERNAL_OPTIONAL_ARGS} - CMAKE_CACHE_ARGS - -DCMAKE_BUILD_TYPE:STRING=Release - -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF - -DPROTOBUF_INCLUDE_DIRS:STRING=${PROTOBUF_INCLUDE_DIR} - -DPROTOBUF_LIBRARIES:STRING=${PROTOBUF_LIBRARY} - -DZLIB_ROOT:STRING=${ZLIB_INSTALL_DIR} - -DgRPC_SSL_PROVIDER:STRING=NONE + CONFIGURE_COMMAND "" + BUILD_IN_SOURCE 1 + BUILD_COMMAND make + INSTALL_COMMAND make prefix=${GRPC_INSTALL_DIR} install ) -ADD_LIBRARY(grpc STATIC IMPORTED GLOBAL) -SET_PROPERTY(TARGET grpc PROPERTY IMPORTED_LOCATION ${GRPC_LIBRARIES}) +# FIXME(typhoonzero): hack to get static lib path, try a better way like merge them. +ADD_LIBRARY(grpc++_unsecure STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc++_unsecure PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc++_unsecure.a") + +ADD_LIBRARY(grpc++ STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc++ PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc++.a") +ADD_LIBRARY(gpr STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET gpr PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgpr.a") + +ADD_LIBRARY(grpc_unsecure STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET grpc_unsecure PROPERTY IMPORTED_LOCATION + "${GRPC_INSTALL_DIR}/lib/libgrpc_unsecure.a") + include_directories(${GRPC_INCLUDE_DIR}) -ADD_DEPENDENCIES(grpc extern_grpc) +ADD_DEPENDENCIES(grpc++_unsecure extern_grpc) diff --git a/cmake/external/zlib.cmake b/cmake/external/zlib.cmake index a98e069b7cd165..1638cd8fdfc345 100644 --- a/cmake/external/zlib.cmake +++ b/cmake/external/zlib.cmake @@ -50,6 +50,8 @@ ExternalProject_Add( ) LIST(APPEND external_project_dependencies zlib) +ADD_LIBRARY(zlib_target STATIC IMPORTED GLOBAL) +SET_PROPERTY(TARGET zlib_target PROPERTY IMPORTED_LOCATION ${ZLIB_LIBRARIES}) IF(WITH_C_API) INSTALL(DIRECTORY ${ZLIB_INCLUDE_DIR} DESTINATION third_party/zlib) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index cd1cf34340b521..eeac0b2101314e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -498,7 +498,7 @@ function(grpc_library TARGET_NAME) ${grpc_grpc_srcs} PROPERTIES COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") - cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}" DEPS grpc) + cc_library("${TARGET_NAME}_grpc" SRCS "${grpc_grpc_srcs}") set_source_files_properties( ${grpc_library_SRCS} diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index afce29de321a57..a6cc6b1a48ebc6 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -239,4 +239,8 @@ if(WITH_GPU) nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) -cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op grpc) +cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) + + +cc_binary(test_send_recv_server SRCS send_recv_op_main_server.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) +cc_binary(test_send_recv_client SRCS send_recv_op_main_client.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 4f84d82f94c302..65ef70ce3d30ff 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -17,6 +17,9 @@ #include #include +#include +#include + #include "paddle/framework/data_type.h" #include "paddle/framework/executor.h" #include "paddle/framework/framework.pb.h" @@ -45,13 +48,18 @@ class RecvOp : public framework::OperatorBase { const framework::AttributeMap &attrs) : OperatorBase(type, inputs, outputs, attrs) { if (!rpc_service_) { + rpc_service_.reset(new detail::SendRecvServerImpl()); std::string endpoint = Attr("endpoint"); - std::thread server_thread(RunServer, rpc_service_, endpoint); + server_thread_.reset(new std::thread(RunServer, rpc_service_, endpoint)); } } + + virtual ~RecvOp() { server_thread_->join(); } + void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { // blocking get one var from client. + std::cout << "before get from client..." << std::endl; const framework::LoDTensor &t = rpc_service_->Get(); framework::Scope &recv_scope = scope.NewScope(); // set graph input var @@ -74,6 +82,7 @@ class RecvOp : public framework::OperatorBase { protected: std::shared_ptr rpc_service_; + std::shared_ptr server_thread_; }; class RecvOpMaker : public framework::OpProtoAndCheckerMaker { diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index b0c623522c9074..fda1a0fe977ca5 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -15,6 +15,8 @@ // TODO(typhoonzero): add python bindings for this test as // a RemoteOptimizer. +#include +#include #include #include "gtest/gtest.h" @@ -68,27 +70,34 @@ void AddOp(const std::string &type, } void StartServerNet() { + std::cout << "starting rpc server..." << std::endl; paddle::framework::Scope scope; paddle::platform::CPUPlace place; InitTensorsInScope(scope, place); // sub program run in recv_op, for simple test we use sum + std::cout << "before creating block..." << std::endl; paddle::framework::ProgramDescBind program; paddle::framework::BlockDescBind *block = program.MutableBlock(0); + std::cout << "adding op..." << std::endl; AddOp("sum", {{"X", {"X"}}}, {{"Out", {"Out"}}}, {}, block); paddle::framework::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"OptimizeBlock", block}); + std::cout << "create recv op..." << std::endl; auto recv_op = paddle::framework::OpRegistry::CreateOp( "recv", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); + std::cout << "before run..." << std::endl; recv_op->Run(scope, ctx); } TEST(SendRecvOp, CPU) { std::thread server_thread(StartServerNet); - + std::cout << "####server thread started..." << std::endl; + sleep(10); + std::cout << "####starting trainer..." << std::endl; // local net paddle::framework::Scope scope; paddle::platform::CPUPlace place; @@ -101,6 +110,7 @@ TEST(SendRecvOp, CPU) { auto send_op = paddle::framework::OpRegistry::CreateOp( "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); + std::cout << "####before send..." << std::endl; send_op->Run(scope, ctx); auto in_var = scope.Var("X"); diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 5b4cdecf2c4285..2469ad7ae66fe0 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -21,7 +21,7 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, global_step=None): + def __init__(self, global_step=None, server_list=None): self._global_step = global_step # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters @@ -29,6 +29,12 @@ def __init__(self, global_step=None): # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...} self._accumulators = defaultdict(lambda: dict()) self.helper = None + # server_list for send parameter to remote side optimization. + # for parameter server, create local optimization net inside recv_op. + # for trainers, create a single send_op as optimization net. + self._server_list = server_list + if server_list: + self.remote_optimize = True def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op @@ -129,6 +135,21 @@ def _increment_global_step(self, block): return increment_op + def _create_remote_optimization_pass(self, block, parameters_and_grads): + send_op_list = [] + for param_and_grad in parameters_and_grads: + if param_and_grad[1] is not None: + send_op = block.append_op( + type="send", + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "LearningRate": self._lr + }, + outputs={"ParamOut": param_and_grad[0]}) + send_op_list.append(send_op) + return send_op_list + def create_optimization_pass(self, parameters_and_grads, loss, @@ -153,6 +174,12 @@ def create_optimization_pass(self, # _create_accumulators method if it needs to create accumulators # for parameters and extend _finish_update method to add custom ops. + # NOTE: for remote updates, we only create a send_op here, instead, + # optimization pass will be created at parameter server side. + if self.remote_optimize: + return self._create_remote_optimization_pass(loss.block, + parameters_and_grads) + # Create any accumulators program = loss.block.program self.helper = LayerHelper( From e647ab842a3179677ff3319e15efde0d8c617f57 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Wed, 15 Nov 2017 20:32:18 +0800 Subject: [PATCH 15/28] wip add ut --- paddle/operators/detail/send_impl.cc | 2 +- paddle/operators/recv_op.cc | 4 ++-- paddle/operators/send_op.cc | 3 ++- paddle/operators/send_recv_op_test.cc | 1 + 4 files changed, 6 insertions(+), 4 deletions(-) diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 1621b2ebe4aed7..7010eb03f1e4a3 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -61,7 +61,7 @@ bool RPCClient::SendVariable(const framework::Scope& scope, msg.set_varname(inname); msg.set_serialized(oss.str()); Status status = stub_->SendVariable(&context, msg, &out_msg); - if (status.ok()) { + if (!status.ok()) { return false; } std::istringstream iss(out_msg.serialized()); diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 65ef70ce3d30ff..ca71009217d562 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -63,7 +63,7 @@ class RecvOp : public framework::OperatorBase { const framework::LoDTensor &t = rpc_service_->Get(); framework::Scope &recv_scope = scope.NewScope(); // set graph input var - auto *var = recv_scope.FindVar(Input("X")); + auto *var = recv_scope.Var(Input("RX")); auto *tensor = var->GetMutable(); // FIXME(typhoonzero): do not copy tensor->CopyFrom(t, dev_ctx.GetPlace(), dev_ctx); @@ -89,7 +89,7 @@ class RecvOpMaker : public framework::OpProtoAndCheckerMaker { public: RecvOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("X", "(Tensor) Input tensor to be saved"); + AddInput("RX", "(Tensor) Input tensor to be saved"); AddComment(R"DOC( Recv operator diff --git a/paddle/operators/send_op.cc b/paddle/operators/send_op.cc index 3c7cc9eb909df2..a3059847f2d420 100644 --- a/paddle/operators/send_op.cc +++ b/paddle/operators/send_op.cc @@ -45,7 +45,8 @@ class SendOp : public framework::OperatorBase { const platform::DeviceContext &dev_ctx) const override { auto iname = Input("X"); auto oname = Output("Out"); - // TODO(typhoonzero): block until server has initalized. + // TODO(typhoonzero): currently it's non-blocking, + // should block until server responds. bool ret = client_->SendVariable(scope, iname, oname); if (!ret) { LOG(ERROR) << "send variable error"; diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index fda1a0fe977ca5..56cf2fe50364c2 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -26,6 +26,7 @@ USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(recv); +USE_OP(sum) void InitTensorsInScope(paddle::framework::Scope &scope, paddle::platform::CPUPlace &place) { From 582c52141a6bd714b093fa561b45c1ad493e3bea Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 16 Nov 2017 12:18:11 +0800 Subject: [PATCH 16/28] working version send recv --- cmake/generic.cmake | 5 ++-- paddle/operators/CMakeLists.txt | 2 +- paddle/operators/detail/send_recv.proto | 2 +- paddle/operators/recv_op.cc | 19 +++++++++---- paddle/operators/send_recv_op_test.cc | 38 +++++++++++-------------- 5 files changed, 36 insertions(+), 30 deletions(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index eeac0b2101314e..fc65c83772cbcf 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -475,7 +475,7 @@ function(grpc_library TARGET_NAME) set(options "") cmake_parse_arguments(grpc_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - message("processing ${grpc_library_PROTO}") + message(STATUS "generating grpc ${grpc_library_PROTO}") get_filename_component(ABS_PROTO ${grpc_library_PROTO} ABSOLUTE) get_filename_component(PROTO_WE ${grpc_library_PROTO} NAME_WE) @@ -493,7 +493,8 @@ function(grpc_library TARGET_NAME) --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" DEPENDS "${ABS_PROTO}") - # NOTE: grpc generated code do not generate virtual-dtor + # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it + # as compiler warnings instead of error. Should try remove the warnings also. set_source_files_properties( ${grpc_grpc_srcs} PROPERTIES diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index a6cc6b1a48ebc6..c31f9fd7cb9f4c 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -239,7 +239,7 @@ if(WITH_GPU) nv_test(nccl_op_test SRCS nccl_op_test.cu DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) -cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) +cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) cc_binary(test_send_recv_server SRCS send_recv_op_main_server.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto index 1ee714c4f30705..baa1b387760526 100644 --- a/paddle/operators/detail/send_recv.proto +++ b/paddle/operators/detail/send_recv.proto @@ -31,7 +31,7 @@ service SendRecvService { // SelectedRows message VariableMessage { string varname = 1; - string serialized = 2; + bytes serialized = 2; } message VoidMessage { diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index ca71009217d562..c07f977d6fa1fc 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -18,7 +18,6 @@ #include #include -#include #include "paddle/framework/data_type.h" #include "paddle/framework/executor.h" @@ -31,12 +30,15 @@ namespace paddle { namespace operators { -void RunServer(std::shared_ptr service, +void RunServer(Server **rpc_server, + std::shared_ptr service, const std::string &server_address) { ServerBuilder builder; builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); builder.RegisterService(service.get()); + // rpc_server.reset(new Server(builder.BuildAndStart()); std::unique_ptr server(builder.BuildAndStart()); + *rpc_server = server.get(); LOG(INFO) << "Server listening on " << server_address << std::endl; server->Wait(); } @@ -50,16 +52,19 @@ class RecvOp : public framework::OperatorBase { if (!rpc_service_) { rpc_service_.reset(new detail::SendRecvServerImpl()); std::string endpoint = Attr("endpoint"); - server_thread_.reset(new std::thread(RunServer, rpc_service_, endpoint)); + server_thread_.reset( + new std::thread(RunServer, &rpc_server_, rpc_service_, endpoint)); } } - virtual ~RecvOp() { server_thread_->join(); } + virtual ~RecvOp() { + rpc_server_->Shutdown(); + server_thread_->join(); + } void Run(const framework::Scope &scope, const platform::DeviceContext &dev_ctx) const override { // blocking get one var from client. - std::cout << "before get from client..." << std::endl; const framework::LoDTensor &t = rpc_service_->Get(); framework::Scope &recv_scope = scope.NewScope(); // set graph input var @@ -81,6 +86,10 @@ class RecvOp : public framework::OperatorBase { } protected: + // grpc server instance to track status and gracefully shutdown. + // borrow an pointer from server thread. + Server *rpc_server_{nullptr}; + // grpc send/recv service implement to register. std::shared_ptr rpc_service_; std::shared_ptr server_thread_; }; diff --git a/paddle/operators/send_recv_op_test.cc b/paddle/operators/send_recv_op_test.cc index 56cf2fe50364c2..ac03eb3752e7cd 100644 --- a/paddle/operators/send_recv_op_test.cc +++ b/paddle/operators/send_recv_op_test.cc @@ -16,7 +16,6 @@ // a RemoteOptimizer. #include -#include #include #include "gtest/gtest.h" @@ -26,7 +25,10 @@ USE_NO_KERNEL_OP(send); USE_NO_KERNEL_OP(recv); -USE_OP(sum) +USE_OP(sum); + +// global for simplicity. +std::unique_ptr recv_op; void InitTensorsInScope(paddle::framework::Scope &scope, paddle::platform::CPUPlace &place) { @@ -34,15 +36,15 @@ void InitTensorsInScope(paddle::framework::Scope &scope, auto var = scope.Var("X"); auto tensor = var->GetMutable(); tensor->Resize({10, 10}); - int *expect = tensor->mutable_data(place); + float *expect = tensor->mutable_data(place); for (int64_t i = 0; i < tensor->numel(); ++i) { - expect[i] = static_cast(i); + expect[i] = static_cast(i); } auto out_var = scope.Var("Out"); auto out_tensor = out_var->GetMutable(); out_tensor->Resize({10, 10}); - tensor->mutable_data(place); // allocate + tensor->mutable_data(place); // allocate } void AddOp(const std::string &type, @@ -71,39 +73,32 @@ void AddOp(const std::string &type, } void StartServerNet() { - std::cout << "starting rpc server..." << std::endl; paddle::framework::Scope scope; paddle::platform::CPUPlace place; InitTensorsInScope(scope, place); // sub program run in recv_op, for simple test we use sum - std::cout << "before creating block..." << std::endl; paddle::framework::ProgramDescBind program; paddle::framework::BlockDescBind *block = program.MutableBlock(0); - std::cout << "adding op..." << std::endl; - AddOp("sum", {{"X", {"X"}}}, {{"Out", {"Out"}}}, {}, block); + // X for server side tensors, RX for received tensers, must be of same shape. + AddOp("sum", {{"X", {"X", "RX"}}}, {{"Out", {"Out"}}}, {}, block); paddle::framework::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); attrs.insert({"OptimizeBlock", block}); - std::cout << "create recv op..." << std::endl; - auto recv_op = paddle::framework::OpRegistry::CreateOp( - "recv", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); + recv_op = paddle::framework::OpRegistry::CreateOp("recv", {{"RX", {"RX"}}}, + {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); - std::cout << "before run..." << std::endl; recv_op->Run(scope, ctx); } TEST(SendRecvOp, CPU) { std::thread server_thread(StartServerNet); - std::cout << "####server thread started..." << std::endl; - sleep(10); - std::cout << "####starting trainer..." << std::endl; + sleep(5); // wait server to start // local net paddle::framework::Scope scope; paddle::platform::CPUPlace place; InitTensorsInScope(scope, place); - // FIXME(typhoonzero): call client side init tensors here. paddle::framework::AttributeMap attrs; attrs.insert({"endpoint", std::string("127.0.0.1:6174")}); @@ -111,19 +106,20 @@ TEST(SendRecvOp, CPU) { auto send_op = paddle::framework::OpRegistry::CreateOp( "send", {{"X", {"X"}}}, {{"Out", {"Out"}}}, attrs); paddle::platform::CPUDeviceContext ctx(place); - std::cout << "####before send..." << std::endl; send_op->Run(scope, ctx); auto in_var = scope.Var("X"); auto tensor = in_var->GetMutable(); - int *expected = tensor->data(); + float *expected = tensor->data(); auto out_var = scope.Var("Out"); auto target = out_var->GetMutable(); - int *actual = target->data(); + // send fail cause output is none. + EXPECT_NE(target->memory_size(), size_t(0)); + float *actual = target->data(); for (int64_t i = 0; i < target->numel(); ++i) { - // sumed value EXPECT_EQ(expected[i] * 2, actual[i]); } + recv_op.reset(); // dtor can shutdown and join server thread. server_thread.join(); } From 138cdef24714cf356beda0d9dbd4b5d80eb57eff Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 16 Nov 2017 12:22:57 +0800 Subject: [PATCH 17/28] revert optimizer.py --- python/paddle/v2/framework/optimizer.py | 193 ++++++++++-------------- 1 file changed, 79 insertions(+), 114 deletions(-) diff --git a/python/paddle/v2/framework/optimizer.py b/python/paddle/v2/framework/optimizer.py index 2469ad7ae66fe0..d2841df6af7a0d 100644 --- a/python/paddle/v2/framework/optimizer.py +++ b/python/paddle/v2/framework/optimizer.py @@ -1,15 +1,15 @@ from collections import defaultdict -import paddle.v2.framework.framework as framework -from paddle.v2.framework.framework import unique_name, Program -from paddle.v2.framework.backward import append_backward_ops -from paddle.v2.framework.initializer import ConstantInitializer -from paddle.v2.framework.regularizer import append_regularization_ops -from paddle.v2.framework.layer_helper import LayerHelper +import paddle.v2.fluid.framework as framework +from paddle.v2.fluid.framework import unique_name, Program +from paddle.v2.fluid.backward import append_backward_ops +from paddle.v2.fluid.initializer import ConstantInitializer +from paddle.v2.fluid.regularizer import append_regularization_ops +from paddle.v2.fluid.layer_helper import LayerHelper __all__ = [ 'SGDOptimizer', 'MomentumOptimizer', 'AdagradOptimizer', 'AdamOptimizer', - 'AdamaxOptimizer' + 'AdamaxOptimizer', 'DecayedAdagradOptimizer' ] @@ -21,7 +21,7 @@ class Optimizer(object): but need to use one of it's implementation. """ - def __init__(self, global_step=None, server_list=None): + def __init__(self, global_step=None): self._global_step = global_step # Dictionary of accumulators. Some optimizer subclasses need to # allocate and manage extra variables associated with the parameters @@ -29,27 +29,27 @@ def __init__(self, global_step=None, server_list=None): # {accum_name : { paramter_name : accumulator_for_parameter, ...}, ...} self._accumulators = defaultdict(lambda: dict()) self.helper = None - # server_list for send parameter to remote side optimization. - # for parameter server, create local optimization net inside recv_op. - # for trainers, create a single send_op as optimization net. - self._server_list = server_list - if server_list: - self.remote_optimize = True def _append_optimize_op(self, block, param_and_grad): """ append optimize operator to block and return all the added optimize_op """ raise NotImplementedError() - def _initialize_tensors(self, block): - """Create all necessary tensors, that will be shared for all parameter updates. - - Tensors like learning rate should be initialized here. - - Args: - block: the block in which the loss variable is present - """ - pass + def _create_param_lr(self, param_and_grad): + # create learning rate variable for every parameter + param = param_and_grad[0] + param_lr = param.optimize_attr['learning_rate'] + param_lr_shape = [1] + param_lr_var = self.helper.create_global_variable( + name=unique_name("learning_rate"), + dtype='float32', + shape=param_lr_shape, + lod_level=1, + persistable=True) + param_lr = param_lr * self._learning_rate + self.helper.set_variable_initializer( + var=param_lr_var, initializer=ConstantInitializer(param_lr)) + return param_lr_var def _create_accumulators(self, block, parameters): """Create all accumulators needed by the parameters @@ -85,7 +85,7 @@ def _add_accumulator(self, name, param, dtype=None, fill_value=0.0): """ if (name in self._accumulators and param.name in self._accumulators[name]): - raise Exception("Accumulator {} already exists for parmeter {}". + raise Exception("Accumulator {} already exists for parameter {}". format(name, param.name)) assert isinstance(self.helper, LayerHelper) @@ -135,21 +135,6 @@ def _increment_global_step(self, block): return increment_op - def _create_remote_optimization_pass(self, block, parameters_and_grads): - send_op_list = [] - for param_and_grad in parameters_and_grads: - if param_and_grad[1] is not None: - send_op = block.append_op( - type="send", - inputs={ - "Param": param_and_grad[0], - "Grad": param_and_grad[1], - "LearningRate": self._lr - }, - outputs={"ParamOut": param_and_grad[0]}) - send_op_list.append(send_op) - return send_op_list - def create_optimization_pass(self, parameters_and_grads, loss, @@ -174,12 +159,6 @@ def create_optimization_pass(self, # _create_accumulators method if it needs to create accumulators # for parameters and extend _finish_update method to add custom ops. - # NOTE: for remote updates, we only create a send_op here, instead, - # optimization pass will be created at parameter server side. - if self.remote_optimize: - return self._create_remote_optimization_pass(loss.block, - parameters_and_grads) - # Create any accumulators program = loss.block.program self.helper = LayerHelper( @@ -188,8 +167,6 @@ def create_optimization_pass(self, startup_program=startup_program) self._create_accumulators(loss.block, [p[0] for p in parameters_and_grads]) - # Create any necessary tensors - self._initialize_tensors(loss.block) optimize_ops = [] for param_and_grad in parameters_and_grads: @@ -241,27 +218,16 @@ def __init__(self, learning_rate, global_step=None): self.type = "sgd" self._learning_rate = learning_rate - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _append_optimize_op(self, block, param_and_grad): assert isinstance(block, framework.Block) + # create the optimize op sgd_op = block.append_op( type=self.type, inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={"ParamOut": param_and_grad[0]}) @@ -286,19 +252,6 @@ def __init__(self, self._momentum = momentum self._use_nesterov = bool(use_nesterov) - def _initialize_tensors(self, block): - assert isinstance(block, framework.Block) - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -317,7 +270,7 @@ def _append_optimize_op(self, block, param_and_grad): "Param": param_and_grad[0], "Grad": param_and_grad[1], "Velocity": velocity_acc, - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={ "ParamOut": param_and_grad[0], @@ -342,18 +295,6 @@ def __init__(self, learning_rate, epsilon=1.0e-6, global_step=None): self._learning_rate = learning_rate self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -366,14 +307,14 @@ def _append_optimize_op(self, block, param_and_grad): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) - # create the adagrad optimizer op + # Create the adagrad optimizer op adagrad_op = block.append_op( type=self.type, inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], "Moment": moment_acc, - "LearningRate": self._lr + "LearningRate": self._create_param_lr(param_and_grad) }, outputs={"ParamOut": param_and_grad[0], "MomentOut": moment_acc}, @@ -405,18 +346,6 @@ def __init__(self, self._beta2 = beta2 self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -460,7 +389,7 @@ def _append_optimize_op(self, block, param_and_grad): inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr, + "LearningRate": self._create_param_lr(param_and_grad), "Moment1": moment1, "Moment2": moment2, "Beta1Pow": self._beta1_pow_acc, @@ -522,18 +451,6 @@ def __init__(self, self._beta2 = beta2 self._epsilon = epsilon - def _initialize_tensors(self, block): - lr_shape = [1] - # create a variable for learning_rate - self._lr = self.helper.create_global_variable( - name=unique_name("learning_rate"), - dtype='float32', - shape=lr_shape, - lod_level=1, - persistable=True) - self.helper.set_variable_initializer( - var=self._lr, initializer=ConstantInitializer(self._learning_rate)) - def _create_accumulators(self, block, parameters): # Create beta1 power accumulator tensor beta_shape = [1] @@ -563,7 +480,7 @@ def _append_optimize_op(self, block, param_and_grad): inputs={ "Param": param_and_grad[0], "Grad": param_and_grad[1], - "LearningRate": self._lr, + "LearningRate": self._create_param_lr(param_and_grad), "Moment": moment, "InfNorm": inf_norm, "Beta1Pow": self._beta1_pow_acc @@ -593,3 +510,51 @@ def _finish_update(self, block): attrs={"scale": self._beta1}) return [scale_beta1] + + +class DecayedAdagradOptimizer(Optimizer): + """Simple Decayed Adagrad optimizer with moment state + """ + _moment_acc_str = "moment" + + def __init__(self, + learning_rate, + decay=0.95, + epsilon=1.0e-6, + global_step=None): + assert learning_rate is not None + assert decay is not None + assert epsilon is not None + + super(DecayedAdagradOptimizer, self).__init__(global_step) + self.type = "decayed_adagrad" + self._learning_rate = learning_rate + self._decay = decay + self._epsilon = epsilon + + def _create_accumulators(self, block, parameters): + assert isinstance(block, framework.Block) + + for p in parameters: + self._add_accumulator(self._moment_acc_str, p) + + def _append_optimize_op(self, block, param_and_grad): + assert isinstance(block, framework.Block) + + moment_acc = self._get_accumulator(self._moment_acc_str, + param_and_grad[0]) + + # Create the decayed adagrad optimizer op + decayed_adagrad_op = block.append_op( + type=self.type, + inputs={ + "Param": param_and_grad[0], + "Grad": param_and_grad[1], + "Moment": moment_acc, + "LearningRate": self._create_param_lr(param_and_grad) + }, + outputs={"ParamOut": param_and_grad[0], + "MomentOut": moment_acc}, + attrs={"epsilon": self._epsilon}) + + return decayed_adagrad_op From a993bd715b5a2f12f130f1985290ec8b528f4c45 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 16 Nov 2017 12:35:26 +0800 Subject: [PATCH 18/28] update test cmake --- CMakeLists.txt | 1 + paddle/operators/CMakeLists.txt | 4 ---- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 60cb5674af749a..9f9ee918757112 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -128,6 +128,7 @@ include(external/any) # download libn::any include(external/eigen) # download eigen3 include(external/pybind11) # download pybind11 include(external/nccl) +include(external/cares) include(external/grpc) include(cudnn) # set cudnn libraries, must before configure diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index c252d1ba4742a0..094202d0ab5338 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -256,7 +256,3 @@ if(WITH_GPU) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) - - -cc_binary(test_send_recv_server SRCS send_recv_op_main_server.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) -cc_binary(test_send_recv_client SRCS send_recv_op_main_client.cc DEPS send_op recv_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) From 3ab2f6505692f593d086f9e5af08f9c0aebc394e Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 16 Nov 2017 13:00:26 +0800 Subject: [PATCH 19/28] add libtool to dockerfile --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 150344a8116e2b..857d3f3e5f6479 100644 --- a/Dockerfile +++ b/Dockerfile @@ -29,7 +29,7 @@ RUN apt-get update && \ automake locales clang-format swig doxygen cmake \ liblapack-dev liblapacke-dev libboost-dev \ clang-3.8 llvm-3.8 libclang-3.8-dev \ - net-tools && \ + net-tools libtool && \ apt-get clean -y # Install Go and glide From 3c98a339016498b245cb7d141e8f61ea8108f4c2 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 16 Nov 2017 16:49:45 +0800 Subject: [PATCH 20/28] update cmake dependency --- cmake/external/grpc.cmake | 2 +- paddle/operators/recv_op.cc | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index dd9528eb276820..48aaf914e72b28 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -22,7 +22,7 @@ SET(GRPC_CPP_PLUGIN "${GRPC_SOURCES_DIR}/src/extern_grpc-build/grpc_cpp_plugin" ExternalProject_Add( extern_grpc - # DEPENDS protobuf zlib + DEPENDS protobuf zlib GIT_REPOSITORY "https://github.com/grpc/grpc.git" GIT_TAG "v1.7.x" PREFIX ${GRPC_SOURCES_DIR} diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index c07f977d6fa1fc..045b77154e72e2 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -36,7 +36,6 @@ void RunServer(Server **rpc_server, ServerBuilder builder; builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); builder.RegisterService(service.get()); - // rpc_server.reset(new Server(builder.BuildAndStart()); std::unique_ptr server(builder.BuildAndStart()); *rpc_server = server.get(); LOG(INFO) << "Server listening on " << server_address << std::endl; From 2d7e3bad940ba27b2d09d38740fc3493f289b698 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Fri, 17 Nov 2017 08:10:54 +0800 Subject: [PATCH 21/28] update cmake depends --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index f608aaa6764557..64e49d5fc4c66e 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -491,7 +491,7 @@ function(grpc_library TARGET_NAME) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" - DEPENDS "${ABS_PROTO}") + DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE}) # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it # as compiler warnings instead of error. Should try remove the warnings also. From 6271d86b821b41ba4999e74868e526ab0fde7a2b Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Fri, 17 Nov 2017 17:56:18 +0800 Subject: [PATCH 22/28] update cmake grpc depends --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 64e49d5fc4c66e..1595f3cda13401 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -491,7 +491,7 @@ function(grpc_library TARGET_NAME) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" - DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE}) + DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} ${GRPC_CPP_PLUGIN}) # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it # as compiler warnings instead of error. Should try remove the warnings also. From 02a8bd7f7bcb35719cf57e8843408fa8d80f59bd Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 20 Nov 2017 12:02:01 +0800 Subject: [PATCH 23/28] fix cmake dependency --- cmake/generic.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 1595f3cda13401..76b44be168ef7b 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -491,7 +491,7 @@ function(grpc_library TARGET_NAME) COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} ARGS --grpc_out "${CMAKE_CURRENT_BINARY_DIR}" -I "${PROTO_PATH}" --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}" "${ABS_PROTO}" - DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} ${GRPC_CPP_PLUGIN}) + DEPENDS "${ABS_PROTO}" ${PROTOBUF_PROTOC_EXECUTABLE} extern_grpc) # FIXME(typhoonzero): grpc generated code do not generate virtual-dtor, mark it # as compiler warnings instead of error. Should try remove the warnings also. From b23624868d7d31ef81cd894c3817cb00abf88f28 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 20 Nov 2017 13:31:40 +0800 Subject: [PATCH 24/28] fix compile error --- cmake/external/grpc.cmake | 2 +- paddle/operators/detail/recv_impl.cc | 16 --------------- paddle/operators/detail/send_impl.cc | 25 ------------------------ paddle/operators/detail/send_recv.proto | 2 -- paddle/operators/detail/send_recv_impl.h | 6 ------ paddle/operators/save_op.cc | 2 +- 6 files changed, 2 insertions(+), 51 deletions(-) diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index 48aaf914e72b28..f6cd7e37e8f2c9 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -18,7 +18,7 @@ include (ExternalProject) SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc) SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE) -SET(GRPC_CPP_PLUGIN "${GRPC_SOURCES_DIR}/src/extern_grpc-build/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) +SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE) ExternalProject_Add( extern_grpc diff --git a/paddle/operators/detail/recv_impl.cc b/paddle/operators/detail/recv_impl.cc index 94bc674a5a5e3e..89dc5045221156 100644 --- a/paddle/operators/detail/recv_impl.cc +++ b/paddle/operators/detail/recv_impl.cc @@ -18,22 +18,6 @@ namespace paddle { namespace operators { namespace detail { -Status SendRecvServerImpl::InitVariables( - ServerContext *context, ServerReader *in_var_reader, - VoidMessage *void_ret) { - // set up all variables to run server side block - VariableMessage in_buf; - while (in_var_reader->Read(&in_buf)) { - // create var if not exist - // auto *var = scope_->Var(in_buf.varname()); - // auto *tensor = var->GetMutable(); - // std::istringstream iss(in_buf.serialized()); - // framework::DeserializeFromStream(iss, tensor); - } - *void_ret = VoidMessage(); - return Status::OK; -} - Status SendRecvServerImpl::SendVariable(ServerContext *context, const VariableMessage *in_var, VariableMessage *out_var) { diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index 7010eb03f1e4a3..c2510ccc9efe14 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -18,31 +18,6 @@ namespace paddle { namespace operators { namespace detail { -bool RPCClient::InitVariables(const framework::Scope& scope, - const std::vector& var_list) { - // write streams of Variable to server - ClientContext context; - VoidMessage void_ret; - std::unique_ptr> writer( - stub_->InitVariables(&context, &void_ret)); - // send vars in scope to server using this stream. - for (auto n = var_list.begin(); n != var_list.end(); n++) { - auto* var = scope.FindVar(*n); - // TODO(typhoonzero): support SelectedRows - PADDLE_ENFORCE(var->IsType(), - "Only support LoDTensor, %s has wrong type", *n); - auto& tensor = var->Get(); - VariableMessage msg; - msg.set_varname(*n); - std::ostringstream oss; - framework::SerializeToStream(oss, tensor, platform::CPUDeviceContext()); - // FIXME(typhoonzero): no copy - msg.set_serialized(oss.str()); - writer->Write(msg); - } - return true; -} - bool RPCClient::SendVariable(const framework::Scope& scope, const std::string& inname, const std::string& outname) { diff --git a/paddle/operators/detail/send_recv.proto b/paddle/operators/detail/send_recv.proto index baa1b387760526..66f84678b3c7d0 100644 --- a/paddle/operators/detail/send_recv.proto +++ b/paddle/operators/detail/send_recv.proto @@ -17,8 +17,6 @@ syntax = "proto3"; package sendrecv; service SendRecvService { - // Init Variables before running server side block. - rpc InitVariables(stream VariableMessage) returns (VoidMessage) {} // For parameter server round-robin like hashing, do not split tensors. // Send and recv only one tensor rpc SendVariable(VariableMessage) returns (VariableMessage) {} diff --git a/paddle/operators/detail/send_recv_impl.h b/paddle/operators/detail/send_recv_impl.h index 55063fe0df7a71..b9a5340a8636db 100644 --- a/paddle/operators/detail/send_recv_impl.h +++ b/paddle/operators/detail/send_recv_impl.h @@ -52,10 +52,6 @@ class SendRecvServerImpl final : public SendRecvService::Service { public: explicit SendRecvServerImpl() {} - Status InitVariables(ServerContext *context, - ServerReader *in_var_reader, - VoidMessage *void_ret) override; - Status SendVariable(ServerContext *context, const VariableMessage *in_var, VariableMessage *out_var) override; @@ -79,8 +75,6 @@ class RPCClient { RPCClient(std::shared_ptr channel) : stub_(SendRecvService::NewStub(channel)) {} - bool InitVariables(const framework::Scope &scope, - const std::vector &var_list); bool SendVariable(const framework::Scope &scope, const std::string &inname, const std::string &outname); diff --git a/paddle/operators/save_op.cc b/paddle/operators/save_op.cc index 403d604b77db59..d4921cb80c8d78 100644 --- a/paddle/operators/save_op.cc +++ b/paddle/operators/save_op.cc @@ -88,7 +88,7 @@ class SaveOp : public framework::OperatorBase { "SaveOp only support LoDTensor, %s has wrong type", iname); auto &tensor = var->Get(); - framework::SerializeToStream(fout, tensor); + framework::SerializeToStream(fout, tensor, dev_ctx); } }; From 7b3d0811ce396a05d8f6b4feb32f753cc0864b98 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 20 Nov 2017 14:08:00 +0800 Subject: [PATCH 25/28] fix compile --- paddle/operators/CMakeLists.txt | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index 3503825d6797d3..2088ce53cd228c 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -198,17 +198,19 @@ set(DEPS_OPS gru_op adagrad_op sgd_op + save_op + load_op send_op recv_op) add_subdirectory(detail) -op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc) +op_library(send_op SRCS send_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) set_source_files_properties( send_op.cc PROPERTIES COMPILE_FLAGS "-Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor") -op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc) +op_library(recv_op SRCS recv_op.cc DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib_target protobuf) set_source_files_properties( recv_op.cc PROPERTIES @@ -237,6 +239,9 @@ op_library(sequence_pool_op DEPS sequence_pooling) op_library(lstm_op DEPS sequence2batch lstm_compute) op_library(conv_transpose_op DEPS vol2col) op_library(gru_op DEPS sequence2batch gru_compute) +# FIXME(typhoonzero): save/load depends lodtensor serialization functions +op_library(save_op DEPS lod_tensor) +op_library(load_op DEPS lod_tensor) if(WITH_TESTING) op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc DEPS net_op tensor_array gtest) @@ -267,4 +272,4 @@ if(WITH_GPU) cc_test(nccl_op_test SRCS nccl_op_test.cu.cc DEPS nccl_op gpu_info device_context) endif() cc_test(save_load_op_test SRCS save_load_op_test.cc DEPS save_op load_op) -cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor grpc++_unsecure grpc_unsecure gpr protobuf cares zlib_target) +cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS send_op recv_op sum_op executor) From 3b1230f8be87dfc85e4bad0869690af0e5740cec Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Thu, 23 Nov 2017 17:29:15 +0800 Subject: [PATCH 26/28] follow comments --- .clang-format | 4 ---- cmake/external/cares.cmake | 4 ++++ cmake/external/grpc.cmake | 4 ++++ cmake/generic.cmake | 7 +++++++ 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/.clang-format b/.clang-format index d661ad8f2e61c7..aff93435f58c52 100644 --- a/.clang-format +++ b/.clang-format @@ -24,8 +24,4 @@ Standard: Cpp11 AllowAllParametersOfDeclarationOnNextLine: true BinPackParameters: false BinPackArguments: false ---- -Language: Proto -# Don't format .proto files. -DisableFormat: true ... diff --git a/cmake/external/cares.cmake b/cmake/external/cares.cmake index a1b0fe3bfd902f..e05111ee18efc9 100644 --- a/cmake/external/cares.cmake +++ b/cmake/external/cares.cmake @@ -13,6 +13,10 @@ # limitations under the License. # +IF(MOBILE_INFERENCE) + return() +ENDIF() + include (ExternalProject) # NOTE: c-ares is needed when linking with grpc. diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake index f6cd7e37e8f2c9..f431c037fd52dc 100644 --- a/cmake/external/grpc.cmake +++ b/cmake/external/grpc.cmake @@ -13,6 +13,10 @@ # limitations under the License. # +IF(MOBILE_INFERENCE) + return() +ENDIF() + include (ExternalProject) SET(GRPC_SOURCES_DIR ${THIRD_PARTY_PATH}/grpc) diff --git a/cmake/generic.cmake b/cmake/generic.cmake index 76b44be168ef7b..e4c3b5fbb46175 100644 --- a/cmake/generic.cmake +++ b/cmake/generic.cmake @@ -468,6 +468,13 @@ function(py_test TARGET_NAME) endif() endfunction() +# grpc_library generate grpc code using grpc_cpp_plugin and protoc +# then build the generated protobuf code and grpc code with your +# implementation source codes together. Use SRCS argument for your +# implementation source files and PROTO argument for your .proto +# files. +# +# Usage: grpc_library(my_target SRCS my_client.cc PROTO my_target.proto DEPS my_dep) function(grpc_library TARGET_NAME) set(oneValueArgs PROTO) From d966c20bb27671927b2f395840ee728540806558 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 27 Nov 2017 16:06:42 +0800 Subject: [PATCH 27/28] update --- paddle/operators/CMakeLists.txt | 7 ------- 1 file changed, 7 deletions(-) diff --git a/paddle/operators/CMakeLists.txt b/paddle/operators/CMakeLists.txt index adde0fe7484dbb..bf5dc8d9260b88 100644 --- a/paddle/operators/CMakeLists.txt +++ b/paddle/operators/CMakeLists.txt @@ -252,13 +252,6 @@ op_library(recurrent_op SRCS recurrent_op.cc DEPS executor) # FIXME(typhoonzero): save/load depends lodtensor serialization functions op_library(save_op DEPS lod_tensor) op_library(load_op DEPS lod_tensor) -if(WITH_TESTING) - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array gtest) -else() - op_library(dynamic_recurrent_op SRCS dynamic_recurrent_op.cc rnn/recurrent_op_utils.cc - DEPS net_op tensor_array) -endif() list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS}) foreach(src ${GENERAL_OPS}) From 1a309bfa5bbba3b80c98ce6824a1f940b178a6a2 Mon Sep 17 00:00:00 2001 From: typhoonzero Date: Mon, 27 Nov 2017 20:35:55 +0800 Subject: [PATCH 28/28] update copyfrom --- paddle/operators/detail/send_impl.cc | 2 +- paddle/operators/recv_op.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/paddle/operators/detail/send_impl.cc b/paddle/operators/detail/send_impl.cc index c2510ccc9efe14..da1ddf75d2afb8 100644 --- a/paddle/operators/detail/send_impl.cc +++ b/paddle/operators/detail/send_impl.cc @@ -45,7 +45,7 @@ bool RPCClient::SendVariable(const framework::Scope& scope, auto* outvar = scope.FindVar(outname); framework::LoDTensor* out_tensor = outvar->GetMutable(); // FIXME(typhoonzero): do not copy. - out_tensor->CopyFrom(ret_tensor, ctx.GetPlace(), ctx); + framework::CopyFrom(ret_tensor, ctx.GetPlace(), ctx, out_tensor); return true; } diff --git a/paddle/operators/recv_op.cc b/paddle/operators/recv_op.cc index 045b77154e72e2..c69e416e10f2a9 100644 --- a/paddle/operators/recv_op.cc +++ b/paddle/operators/recv_op.cc @@ -70,7 +70,7 @@ class RecvOp : public framework::OperatorBase { auto *var = recv_scope.Var(Input("RX")); auto *tensor = var->GetMutable(); // FIXME(typhoonzero): do not copy - tensor->CopyFrom(t, dev_ctx.GetPlace(), dev_ctx); + framework::CopyFrom(t, dev_ctx.GetPlace(), dev_ctx, tensor); auto *block = Attr("OptimizeBlock"); auto *program = block->Program();