diff --git a/benchmark/fluid/mnist.py b/benchmark/fluid/mnist.py
index 43866da9cb113..dc10ac2ec195a 100644
--- a/benchmark/fluid/mnist.py
+++ b/benchmark/fluid/mnist.py
@@ -139,9 +139,6 @@ def run_benchmark(model, args):
# inference program
inference_program = fluid.default_main_program().clone()
- with fluid.program_guard(inference_program):
- inference_program = fluid.io.get_inference_program(
- target_vars=[batch_acc, batch_size_tensor])
# Optimization
opt = fluid.optimizer.AdamOptimizer(
@@ -161,7 +158,7 @@ def run_benchmark(model, args):
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=args.batch_size)
- accuracy = fluid.average.WeightedAverage()
+ accuracy = fluid.metrics.Accuracy()
iters, num_samples, start_time = 0, 0, time.time()
for pass_id in range(args.pass_num):
accuracy.reset()
@@ -184,7 +181,7 @@ def run_benchmark(model, args):
"label": y_data},
fetch_list=[avg_cost, batch_acc, batch_size_tensor]
) # The accuracy is the accumulation of batches, but not the current batch.
- accuracy.add(value=outs[1], weight=outs[2])
+ accuracy.update(value=outs[1], weight=outs[2])
iters += 1
num_samples += len(y_data)
loss = np.array(outs[0])
diff --git a/cmake/cblas.cmake b/cmake/cblas.cmake
index 6320b17520a68..52a22c1fbf477 100644
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -62,29 +62,33 @@ endif()
## Then find the reference-cblas. www.netlib.org/blas/
-
-
set(REFERENCE_CBLAS_ROOT $ENV{REFERENCE_CBLAS_ROOT} CACHE PATH
"Folder contains reference-cblas")
-set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
- ${REFERENCE_CBLAS_ROOT}/include
- /usr/include
- /usr/include/cblas
-)
-
-set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
- ${REFERENCE_CBLAS_ROOT}/lib
- /usr/lib
- /usr/lib/blas/reference/
- /usr/lib/reference/
-)
+if(NOT CMAKE_CROSSCOMPILING)
+ set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS
+ ${REFERENCE_CBLAS_ROOT}/include
+ /usr/include
+ /usr/include/cblas
+ )
+
+ set(REFERENCE_CBLAS_LIB_SEARCH_PATHS
+ ${REFERENCE_CBLAS_ROOT}/lib
+ /usr/lib
+ /usr/lib/blas/reference/
+ /usr/lib/reference/
+ )
+else()
+ # Diable the finding of reference cblas under host's system path
+ set(REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/include)
+ set(REFERENCE_CBLAS_LIB_SEARCH_PATHS ${REFERENCE_CBLAS_ROOT}/lib)
+endif()
find_path(REFERENCE_CBLAS_INCLUDE_DIR NAMES cblas.h PATHS
${REFERENCE_CBLAS_INCLUDE_SEARCH_PATHS})
find_library(REFERENCE_CBLAS_LIBRARY NAMES cblas PATHS
${REFERENCE_CBLAS_LIB_SEARCH_PATHS})
-if (REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
+if(REFERENCE_CBLAS_INCLUDE_DIR AND REFERENCE_CBLAS_LIBRARY)
set(CBLAS_FOUND ON)
set(CBLAS_PROVIDER REFERENCE)
set(CBLAS_INC_DIR ${REFERENCE_CBLAS_INCLUDE_DIR})
diff --git a/cmake/external/grpc.cmake b/cmake/external/grpc.cmake
index 0853b981813c5..aa24915947077 100644
--- a/cmake/external/grpc.cmake
+++ b/cmake/external/grpc.cmake
@@ -24,16 +24,16 @@ SET(GRPC_INSTALL_DIR ${THIRD_PARTY_PATH}/install/grpc)
SET(GRPC_INCLUDE_DIR "${GRPC_INSTALL_DIR}/include/" CACHE PATH "grpc include directory." FORCE)
SET(GRPC_CPP_PLUGIN "${GRPC_INSTALL_DIR}/bin/grpc_cpp_plugin" CACHE FILEPATH "GRPC_CPP_PLUGIN" FORCE)
IF(APPLE)
- SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
+ SET(BUILD_CMD make -n HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin | sed "s/-Werror//g" | sh)
ELSE()
- SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j8 static grpc_cpp_plugin)
+ SET(BUILD_CMD make HAS_SYSTEM_PROTOBUF=false -s -j static grpc_cpp_plugin)
ENDIF()
ExternalProject_Add(
extern_grpc
DEPENDS protobuf zlib
GIT_REPOSITORY "https://github.com/grpc/grpc.git"
- GIT_TAG "v1.8.x"
+ GIT_TAG "v1.11.x"
PREFIX ${GRPC_SOURCES_DIR}
UPDATE_COMMAND ""
CONFIGURE_COMMAND ""
diff --git a/cmake/external/nccl.cmake b/cmake/external/nccl.cmake
deleted file mode 100644
index af5c689c35247..0000000000000
--- a/cmake/external/nccl.cmake
+++ /dev/null
@@ -1,67 +0,0 @@
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-if(NOT WITH_GPU)
- return()
-endif()
-
-include(ExternalProject)
-
-set(NCCL_SOURCE_DIR ${THIRD_PARTY_PATH}/nccl)
-
-include_directories(${NCCL_SOURCE_DIR}/src/extern_nccl/src)
-
-if(WITH_DSO)
- # If we use DSO, we do not build nccl, just download the dependencies
- set(NCCL_BUILD_COMMAND "")
- set(NCCL_INSTALL_COMMAND "")
- set(NCCL_INSTALL_DIR "")
-else()
- # otherwise, we build nccl and link it.
- set(NCCL_INSTALL_DIR ${THIRD_PARTY_PATH}/install/nccl)
- # Note: cuda 8.0 is needed to make nccl
- # When cuda is not installed on the system directory, need to set CUDA_HOME to your cuda root
- set(NCCL_BUILD_COMMAND "make -j 8")
- set(NCCL_INSTALL_COMMAND "make install PREFIX=${NCCL_INSTALL_DIR}")
-endif()
-
-ExternalProject_Add(
- extern_nccl
- ${EXTERNAL_PROJECT_LOG_ARGS}
- GIT_REPOSITORY "https://github.com/NVIDIA/nccl.git"
- GIT_TAG "v1.3.4-1"
- PREFIX "${NCCL_SOURCE_DIR}"
- UPDATE_COMMAND ""
- CONFIGURE_COMMAND ""
- BUILD_COMMAND "${NCCL_BUILD_COMMAND}"
- INSTALL_COMMAND "${NCCL_INSTALL_COMMAND}"
- INSTALL_DIR "${NCCL_INSTALL_DIR}"
- TEST_COMMAND ""
-)
-
-if(WITH_DSO)
- if(${CMAKE_VERSION} VERSION_LESS "3.3.0")
- set(dummyfile ${CMAKE_CURRENT_BINARY_DIR}/lib_nccl_dummy.c)
- file(WRITE ${dummyfile} "const char * dummy_nccl = \"${dummyfile}\";")
- add_library(nccl STATIC ${dummyfile})
- else()
- add_library(nccl INTERFACE)
- endif()
-else()
- add_library(nccl STATIC IMPORTED GLOBAL)
- set_property(TARGET nccl PROPERTY IMPORTED_LOCATION
- ${NCCL_INSTALL_DIR}/lib/libnccl_static.a)
-endif()
-
-add_dependencies(nccl extern_nccl)
diff --git a/cmake/external/snappy.cmake b/cmake/external/snappy.cmake
index 71f54c425d4c3..80282329c6ac6 100644
--- a/cmake/external/snappy.cmake
+++ b/cmake/external/snappy.cmake
@@ -11,19 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#
-IF(MOBILE_INFERENCE)
+if(MOBILE_INFERENCE OR RPI)
return()
-ENDIF()
+endif()
include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
-SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
-SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
-SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE)
+set(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
+set(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
+set(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include" CACHE PATH "snappy include directory." FORCE)
+
+set(SNAPPY_LIBRARIES "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
ExternalProject_Add(
extern_snappy
@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappy STATIC IMPORTED GLOBAL)
-set_property(TARGET snappy PROPERTY IMPORTED_LOCATION
- "${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
+set_property(TARGET snappy PROPERTY IMPORTED_LOCATION ${SNAPPY_LIBRARIES})
include_directories(${SNAPPY_INCLUDE_DIR})
add_dependencies(snappy extern_snappy)
diff --git a/cmake/external/snappystream.cmake b/cmake/external/snappystream.cmake
index 8f7a3bf8eeaef..20a96430823d0 100644
--- a/cmake/external/snappystream.cmake
+++ b/cmake/external/snappystream.cmake
@@ -11,9 +11,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#
-IF(MOBILE_INFERENCE)
+IF(MOBILE_INFERENCE OR RPI)
return()
ENDIF()
@@ -21,9 +20,11 @@ include (ExternalProject)
# NOTE: snappy is needed when linking with recordio
-SET(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
-SET(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
-SET(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include/" CACHE PATH "snappy stream include directory." FORCE)
+set(SNAPPYSTREAM_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy_stream)
+set(SNAPPYSTREAM_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy_stream)
+set(SNAPPYSTREAM_INCLUDE_DIR "${SNAPPYSTREAM_INSTALL_DIR}/include" CACHE PATH "snappy stream include directory." FORCE)
+
+set(SNAPPYSTREAM_LIBRARIES "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
ExternalProject_Add(
extern_snappystream
@@ -51,8 +52,7 @@ ExternalProject_Add(
)
add_library(snappystream STATIC IMPORTED GLOBAL)
-set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION
- "${SNAPPYSTREAM_INSTALL_DIR}/lib/libsnappystream.a")
+set_property(TARGET snappystream PROPERTY IMPORTED_LOCATION ${SNAPPYSTREAM_LIBRARIES})
include_directories(${SNAPPYSTREAM_INCLUDE_DIR}) # For snappysteam to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install) # For Paddle to include snappy stream headers.
diff --git a/cmake/generic.cmake b/cmake/generic.cmake
index c4c9f77df8d57..1d3e2ade6d393 100644
--- a/cmake/generic.cmake
+++ b/cmake/generic.cmake
@@ -195,14 +195,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc)
endif()
- if("${cc_library_DEPS}" MATCHES "ARCHIVE_START")
- # Support linking flags: --whole-archive (Linux) / -force_load (MacOS).
- # WARNING: Please don't use ARCHIVE_START&ARCHIVE_END if TARGET_NAME will be linked by other libraries.
- target_circle_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
- list(REMOVE_ITEM cc_library_DEPS ARCHIVE_START ARCHIVE_END)
- else()
- target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
- endif()
+ target_link_libraries(${TARGET_NAME} ${cc_library_DEPS})
add_dependencies(${TARGET_NAME} ${cc_library_DEPS})
endif()
@@ -243,11 +236,7 @@ function(cc_test TARGET_NAME)
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${cc_test_SRCS})
- # Support linking flags: --whole-archive (Linux) / -force_load (MacOS)
- target_circle_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
- if("${cc_test_DEPS}" MATCHES "ARCHIVE_START")
- list(REMOVE_ITEM cc_test_DEPS ARCHIVE_START ARCHIVE_END)
- endif()
+ target_link_libraries(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main memory gtest gflags glog)
add_test(NAME ${TARGET_NAME}
COMMAND ${TARGET_NAME} ${cc_test_ARGS}
diff --git a/cmake/inference_lib.cmake b/cmake/inference_lib.cmake
index 0323cd9698cba..cc758019827b9 100644
--- a/cmake/inference_lib.cmake
+++ b/cmake/inference_lib.cmake
@@ -1,7 +1,22 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
set_property(GLOBAL PROPERTY FLUID_MODULES "")
# find all fluid modules is used for paddle fluid static library
function(find_fluid_modules TARGET_NAME)
get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
+ string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path ${__target_path})
string(FIND "${__target_path}" "fluid" pos)
if(pos GREATER 1)
get_property(fluid_modules GLOBAL PROPERTY FLUID_MODULES)
@@ -77,6 +92,23 @@ elseif (WITH_MKLML)
)
endif()
+if(NOT MOBILE_INFERENCE AND NOT RPI)
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappy")
+ copy(snappy_lib
+ SRCS ${SNAPPY_INCLUDE_DIR} ${SNAPPY_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/snappystream")
+ copy(snappystream_lib
+ SRCS ${SNAPPYSTREAM_INCLUDE_DIR} ${SNAPPYSTREAM_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+
+ set(dst_dir "${CMAKE_INSTALL_PREFIX}/third_party/install/zlib")
+ copy(zlib_lib
+ SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
+ DSTS ${dst_dir} ${dst_dir}/lib)
+endif()
+
# paddle fluid module
set(src_dir "${PADDLE_SOURCE_DIR}/paddle/fluid")
set(dst_dir "${CMAKE_INSTALL_PREFIX}/paddle/fluid")
diff --git a/doc/fluid/design/motivation/fluid.md b/doc/fluid/design/motivation/fluid.md
index 5e147f8263e68..4b7696cc1bbf5 100644
--- a/doc/fluid/design/motivation/fluid.md
+++ b/doc/fluid/design/motivation/fluid.md
@@ -119,7 +119,7 @@ An actual Fluid example is described [here](https://github.com/PaddlePaddle/Pad
From the example, the Fluid programs look very similar to their PyTorch equivalent programs, except that Fluid's loop structure, wrapped with Python's `with` statement, could run much faster than just a Python loop.
-We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/if_else_op.md) structure of Fluid.
+We have more examples of the [`if-then-else`](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/execution/if_else_op.md) structure of Fluid.
## Turing Completeness
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index 23615f8830e99..4231f2bb5cd80 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -65,39 +65,55 @@ PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好D
不使用PaddlePaddle.org工具
--------------------------
-使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
+使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。该方法与 `从源码编译PaddlePaddle `_ 相似,通过从源码中构建可用于编译PaddlePaddle文档的Docker镜像并运行,在进入Docker容器后使用源码中的脚本构建PaddlePaddle文档,具体步骤如下:
-[TBD]
+.. code-block:: bash
+
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # 从源码中构建可用于编译PaddlePaddle文档的Docker镜像
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
+
+ # 进入Docker容器后使用build.sh脚本构建PaddlePaddle文档
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+注:上述命令把当前目录(源码根目录)映射为 container 里的 :code:`/paddle` 目录。
+
+编译完成后,会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 、 ``api/en/html`` 共三个子目录,分别进入这些目录下,执行以下命令:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。
如果不想使用Docker,也可以使用以下命令直接构建PaddlePaddle文档,即
.. code-block:: bash
- mkdir paddle
- cd paddle
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# 如果只需要构建使用文档,则执行以下命令
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# 如果只需要构建API,则执行以下命令
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
-编译完成后,进入 ``doc/v2`` 目录,如果选择构建文档则会在该目录下生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会生成 ``api/en/html`` 目录,分别进入这些目录下,执行以下命令:
+编译完成后,同样会产生 ``doc/v2`` 和 ``doc/fluid`` 两个目录,如果选择构建文档则会在这两个目录下分别都生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会在这两个目录下分别生成 ``api/en/html`` 目录,分别进入这些子目录下,执行以下命令:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-在浏览器中输入 http://localhost:8088 就可以看到编译生成的中/英文的文档页面和英文的API页面,下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+在浏览器中输入 http://localhost:8088 就可以看到编译生成的 ``v2`` 和 ``fluid`` 两种版本的中/英文的文档页面和英文的API页面。下图为生成的 ``v2`` 英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
.. image:: src/doc_en.png
:align: center
diff --git a/doc/v2/dev/write_docs_en.rst b/doc/v2/dev/write_docs_en.rst
index 15ff0d34ad622..6105455e202e4 100644
--- a/doc/v2/dev/write_docs_en.rst
+++ b/doc/v2/dev/write_docs_en.rst
@@ -68,39 +68,56 @@ Please `click here `_ on how to install Docker. After Docker is installed, you could use the scripts in the source directory to build the documentation.
+Build PaddlePaddle's documentation with Docker,you need to install Docker first. Please refer to `Docker's official website `_ on how to install Docker. This method is quite similar to ` Build From Sources `_ , by constructing, from source code, a docker image that can be used to build PaddlePaddle documentation. Enter the Docker container and use the script ``build.sh`` in the source directory to build the PaddlePaddle documentation. The specific steps are as follows:
-[TBD]
+.. code-block:: bash
+
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
+
+ # Construct a docker image from source code
+ docker build -t paddle:dev .
+ docker run -it -v $PWD:/paddle -e "WITH_GPU=OFF" -e "WITH_TESTING=OFF" -e "WITH_DOC=ON" paddle:dev /bin/bash
+
+ # Use build.sh to build PaddlePaddle documentation
+ bash -x /paddle/paddle/scripts/docker/build.sh
+
+Note: The above commands maps the current directory (source root directory) to the :code:`/paddle` directory in the container.
+
+After compiling, there should be two generated directories: ``doc/v2`` and ``doc/fluid``, where three subdirectories ``cn/html/``, ``en/html`` and ``api/en/html`` are generated. Please enter these directories respectively and execute the following commands:
+
+.. code-block:: bash
+
+ python -m SimpleHTTPServer 8088
+
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page.
If you do not wish to use Docker, you can also use the following commands to directly build the PaddlePaddle documentation.
.. code-block:: bash
- mkdir paddle
- cd paddle
+
git clone https://github.com/PaddlePaddle/Paddle.git
+ cd Paddle
mkdir -p build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
# If you only need to build documents, use the following commands
- make -j $processors gen_proto_py
- make -j $processors paddle_docs paddle_docs_cn
+ make -j $processors paddle_docs
# If you only need to build APIs, use the following commands
- make -j $processors gen_proto_py framework_py_proto
- make -j $processors copy_paddle_pybind
- make -j $processors paddle_api_docs
+ make -j $processors paddle_apis
$processors indicates that as many processes as the CPU cores are started to compile in parallel. It should be set according to the number of CPU cores of your machine.
-After the compilation is complete, enter the ``doc/v2`` directory. If you chose to build documents, it will generate ``cn/html/`` and ``en/html`` subdirectories under this directory. If you chose to build APIs,it will generate``api/en/html`` subdirectory. Please enter these directories respectively and execute the following commands:
+After compiling, there also should be two generated directories: ``doc/v2`` and ``doc/fluid`` . If you chose to build documents, two subdirectories ``cn/html/`` and ``en/html`` will be generated in both two directories. If you chose to build APIs,a subdirectory ``api/en/html`` will be generated. Please enter these directories respectively and execute the following commands:
.. code-block:: bash
python -m SimpleHTTPServer 8088
-Use a web browser and navigate to http://localhost:8000, you could see the compiled Chinese/English documents page and the English APIs page. The following figure is an example of the built English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
+Use a web browser and navigate to http://localhost:8000, you could see the compiled ``v2`` 's and ``fluid`` 's Chinese/English documents page and English APIs page. The following figure is an example of the built ``v2`` 's English documents home page. Note that due to the sphinx's original theme used in the example, the style of the page is not consistent with the official website, but this does not affect the developer's debugging.
.. image:: src/doc_en.png
:align: center
diff --git a/doc/v2/howto/cluster/multi_cluster/k8s_distributed_en.md b/doc/v2/howto/cluster/multi_cluster/k8s_distributed_en.md
index bc3d50b3ffd3b..dee1b7554f97a 100644
--- a/doc/v2/howto/cluster/multi_cluster/k8s_distributed_en.md
+++ b/doc/v2/howto/cluster/multi_cluster/k8s_distributed_en.md
@@ -1,3 +1,372 @@
-# Kubernetes Distributed
+# Distributed Training on Kubernetes
-TBD
+We introduced how to create a PaddlePaddle Job with a single node on Kuberentes in the
+previous document.
+In this article, we will introduce how to create a PaddlePaddle job with multiple nodes
+on Kubernetes cluster.
+
+## Overall Architecture
+
+Before creating a training job, the users need to slice the training data and deploy
+the Python scripts along with it into the distributed file system
+(We can use the different type of Kuberentes Volumes to mount different distributed
+file systems). Before training starts, The program will copy the training data into the
+Container and also save the models at the same path during training. The global architecture
+is as follows:
+
+![PaddlePaddle on Kubernetes Architecture](src/k8s-paddle-arch.png)
+
+The above figure describes a distributed training architecture which contains 3 nodes, each
+Pod mounts a folder of the distributed file system to save training data and models
+by Kubernetes Volume. Kubernetes created 3 Pods for this training phase and scheduled these on
+3 nodes, each Pod has a PaddlePaddle container. After the containers car created,
+PaddlePaddle starts up the communication between PServer and Trainer and read training
+data for this training job.
+
+As the description above, we can start up a PaddlePaddle distributed training job on a
+Kubernetes ready cluster with the following steps:
+
+1. [Build PaddlePaddle Docker Image](#Build a Docker Image)
+1. [Split training data and upload to the distributed file system](#Upload Training Data)
+1. [Edit a YAML file and create a Kubernetes Job](#Create a Job)
+1. [Check the output](#Check The Output)
+
+We will introduce these steps as follows:
+
+### Build a Docker Image
+
+Training docker image needs to package the paddle pserver and paddle trainer runtimes, as well as two more processes before we can kick off the training:
+
+- Copying the training data into container.
+- Generating the initialization arguments for `Paddle PServer` and `Paddle Training` processes.
+
+Since the paddlepaddle official docker image already has the runtimes we need, we'll take it as the base image and pack some additional scripts for the processes mentioned above to build our training image. for more detail, please find from the following link:
+- https://github.com/PaddlePaddle/Paddle/blob/develop/doc/howto/usage/cluster/src/k8s_train/Dockerfile
+
+
+```bash
+$ cd doc/howto/usage/k8s/src/k8s_train
+$ docker build -t [YOUR_REPO]/paddle:mypaddle .
+```
+
+And then upload the new Docker Image to a Docker hub:
+
+```bash
+docker push [YOUR_REPO]/paddle:mypaddle
+```
+
+**[NOTE]**, in the above command arguments, `[YOUR_REPO]` represents your Docker repository,
+you need to use your repository instead of it. We will replace it with your respository name to
+represent the Docker Image which built in this step.
+
+### Prepare Training Data
+
+We can download and split the training job by creating a Kubernetes Job, or custom your image
+by editing [k8s_train](./src/k8s_train/).
+
+Before creating a Job, we need to bind a [persistenVolumeClaim](https://kubernetes.io/docs/user-guide/persistent-volumes) by the different type of
+the different file system, the generated dataset would be saved on this volume.
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: paddle-data
+spec:
+ template:
+ metadata:
+ name: pi
+ spec:
+ hostNetwork: true
+ containers:
+ - name: paddle-data
+ image: paddlepaddle/paddle-tutorial:k8s_data
+ imagePullPolicy: Always
+ volumeMounts:
+ - mountPath: "/mnt"
+ name: nfs
+ env:
+ - name: OUT_DIR
+ value: /home/work/mfs/paddle-cluster-job
+ - name: SPLIT_COUNT
+ value: "3"
+ volumes:
+ - name: nfs
+ persistentVolumeClaim:
+ claimName: mfs
+ restartPolicy: Never
+```
+
+Create the Job with the following command:
+
+```bash
+> kubectl create -f xxx.yaml
+```
+
+If created successfully, you can see some information like this:
+
+```base
+[root@paddle-kubernetes-node0 nfsdir]$ tree -d
+.
+`-- paddle-cluster-job
+ |-- 0
+ | `-- data
+ |-- 1
+ | `-- data
+ |-- 2
+ | `-- data
+ |-- output
+ |-- quick_start
+```
+
+The `paddle-cluster-job` above is the job name for this training job; we need 3
+PaddlePaddle training nodes and save the split training data in `paddle-cluster-job` path,
+the folder `0`, `1` and `2` represents the `training_id` on each node, `quick_start` folder is used to store training data, `output` folder is used to store the models and logs.
+
+
+### Create a Job
+
+Kubernetes allow users to create objects with YAML files, and we can use a command-line tool
+to create it.
+
+The Job YAML file describes that which Docker Image would be used in this training job, how much nodes would be created, what's the startup arguments of `Paddle PServer/Trainer` process and what's the type of Volumes. You can find the details of the YAML filed in
+[Kubernetes Job API](http://kubernetes.io/docs/api-reference/batch/v1/definitions/#_v1_job).
+The following is an example for this training job:
+
+```yaml
+apiVersion: batch/v1
+kind: Job
+metadata:
+ name: paddle-cluster-job
+spec:
+ parallelism: 3
+ completions: 3
+ template:
+ metadata:
+ name: paddle-cluster-job
+ spec:
+ volumes:
+ - name: jobpath
+ hostPath:
+ path: /home/work/mfs
+ containers:
+ - name: trainer
+ image: [YOUR_REPO]/paddle:mypaddle
+ command: ["bin/bash", "-c", "/root/start.sh"]
+ env:
+ - name: JOB_NAME
+ value: paddle-cluster-job
+ - name: JOB_PATH
+ value: /home/jobpath
+ - name: JOB_NAMESPACE
+ value: default
+ - name: TRAIN_CONFIG_DIR
+ value: recommendation
+ - name: CONF_PADDLE_NIC
+ value: eth0
+ - name: CONF_PADDLE_PORT
+ value: "7164"
+ - name: CONF_PADDLE_PORTS_NUM
+ value: "2"
+ - name: CONF_PADDLE_PORTS_NUM_SPARSE
+ value: "2"
+ - name: CONF_PADDLE_GRADIENT_NUM
+ value: "3"
+ volumeMounts:
+ - name: jobpath
+ mountPath: /home/jobpath
+ restartPolicy: Never
+```
+
+In the above YAML file:
+- `metadata.name`, The job name.
+- `parallelism`, Whether the Kubernetes Job would create `parallelism` Pods at the same time.
+- `completions`, The Job would become the success status only when the number of successful Pod(the exit code is 0)
+ is equal to `completions`.
+- `volumeMounts`, the name field `jobpath` is a key, the `mountPath` field represents
+ the path in the container, and we can define the `jobpath` in `volumes` filed, use `hostPath`
+ to configure the host path we want to mount.
+- `env`, the environment variables in the Container, we pass some startup arguments by
+ this approach, some details are as following:
+ - JOB_PATH:the mount path in the container
+ - JOB_NAME:the job name
+ - TRAIN_CONFIG_DIR:the job path in the container, we can find the training data path by
+ combine with JOB_NAME.
+ - CONF_PADDLE_NIC: the argument `--nics` of `Paddle PServer` process, the network
+ device name.
+ - CONF_PADDLE_PORT: the argument `--port` of `Paddle PServer` process.
+ - CONF_PADDLE_PORTS_NUM: the argument `--ports_num` of `Paddle PServer`, the port number
+ for dense prameter update.
+ - CONF_PADDLE_PORTS_NUM_SPARSE:the argument `--ports_num_for_sparse` of `Paddle PServer`,
+ the port number for sparse parameter update.
+ - CONF_PADDLE_GRADIENT_NUM:the number of training node, the argument
+ `--num_gradient_servers` of `Paddle PServer` and `Paddle Trainer`.
+
+You can find some details information at [here]
+(http://www.paddlepaddle.org/docs/develop/documentation/zh/howto/usage/cmd_parameter/detail_introduction_cn.html)。
+
+We can use the command-line tool of Kubernetes to create a Job when we finish the YAML file:
+
+```bash
+kubectl create -f job.yaml
+```
+
+Upon successful creation, Kubernetes would create 3 Pods as PaddlePaddle training node,
+pull the Docker image and begin to train.
+
+
+### Checkout the Output
+
+At the process of training, we can check the logs and the output models which is stored in
+the `output` folder.
+
+**NOTE**, `node_0`, `node_1` and `node_2` represent the
+`trainer_id` of the PaddlePaddle training job rather than the node id of Kubernetes.
+
+```bash
+[root@paddle-kubernetes-node0 output]# tree -d
+.
+├── node_0
+│ ├── server.log
+│ └── train.log
+├── node_1
+│ ├── server.log
+│ └── train.log
+├── node_2
+......
+├── pass-00002
+│ ├── done
+│ ├── ___embedding_0__.w0
+│ ├── ___embedding_1__.w0
+......
+```
+
+We can checkout the status of each training Pod by viewing the logs:
+
+```bash
+[root@paddle-kubernetes-node0 node_0]# cat train.log
+I1116 09:10:17.123121 50 Util.cpp:155] commandline:
+ /usr/local/bin/../opt/paddle/bin/paddle_trainer
+ --nics=eth0 --port=7164
+ --ports_num=2 --comment=paddle_process_by_paddle
+ --pservers=192.168.129.66,192.168.223.143,192.168.129.71
+ --ports_num_for_sparse=2 --config=./trainer_config.py
+ --trainer_count=4 --num_passes=10 --use_gpu=0
+ --log_period=50 --dot_period=10 --saving_period=1
+ --local=0 --trainer_id=0
+ --save_dir=/home/jobpath/paddle-cluster-job/output
+I1116 09:10:17.123440 50 Util.cpp:130] Calling runInitFunctions
+I1116 09:10:17.123764 50 Util.cpp:143] Call runInitFunctions done.
+[WARNING 2016-11-16 09:10:17,227 default_decorators.py:40] please use keyword arguments in paddle config.
+[INFO 2016-11-16 09:10:17,239 networks.py:1282] The input order is [movie_id, title, genres, user_id, gender, age, occupation, rating]
+[INFO 2016-11-16 09:10:17,239 networks.py:1289] The output order is [__square_error_cost_0__]
+I1116 09:10:17.392917 50 Trainer.cpp:170] trainer mode: Normal
+I1116 09:10:17.613910 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process
+I1116 09:10:17.680917 50 PyDataProvider2.cpp:257] loading dataprovider dataprovider::process
+I1116 09:10:17.681543 50 GradientMachine.cpp:134] Initing parameters..
+I1116 09:10:18.012390 50 GradientMachine.cpp:141] Init parameters done.
+I1116 09:10:18.018641 50 ParameterClient2.cpp:122] pserver 0 192.168.129.66:7164
+I1116 09:10:18.018950 50 ParameterClient2.cpp:122] pserver 1 192.168.129.66:7165
+I1116 09:10:18.019069 50 ParameterClient2.cpp:122] pserver 2 192.168.223.143:7164
+I1116 09:10:18.019492 50 ParameterClient2.cpp:122] pserver 3 192.168.223.143:7165
+I1116 09:10:18.019716 50 ParameterClient2.cpp:122] pserver 4 192.168.129.71:7164
+I1116 09:10:18.019836 50 ParameterClient2.cpp:122] pserver 5 192.168.129.71:7165
+```
+
+## Some Additional Details
+
+### Using Environment Variables
+
+Usually we use the environment varialbes to configurate the PaddlePaddle Job which runs in
+Kubernetes, `start_paddle.py` provides a start up script to convert the environment variable
+to the start up arguments of PaddlePaddle process:
+
+```bash
+API = "/api/v1/namespaces/"
+JOBSELECTOR = "labelSelector=job-name="
+JOB_PATH = os.getenv("JOB_PATH") + "/" + os.getenv("JOB_NAME")
+JOB_PATH_OUTPUT = JOB_PATH + "/output"
+JOBNAME = os.getenv("JOB_NAME")
+NAMESPACE = os.getenv("JOB_NAMESPACE")
+PADDLE_NIC = os.getenv("CONF_PADDLE_NIC")
+PADDLE_PORT = os.getenv("CONF_PADDLE_PORT")
+PADDLE_PORTS_NUM = os.getenv("CONF_PADDLE_PORTS_NUM")
+PADDLE_PORTS_NUM_SPARSE = os.getenv("CONF_PADDLE_PORTS_NUM_SPARSE")
+PADDLE_SERVER_NUM = os.getenv("CONF_PADDLE_GRADIENT_NUM")
+```
+
+### Communication between Pods
+
+At the begin of `start_paddle.py`, it would initializes and parses the arguments.
+
+```python
+parser = argparse.ArgumentParser(prog="start_paddle.py",
+ description='simple tool for k8s')
+ args, train_args_list = parser.parse_known_args()
+ train_args = refine_unknown_args(train_args_list)
+ train_args_dict = dict(zip(train_args[:-1:2], train_args[1::2]))
+ podlist = getPodList()
+```
+
+And then query the status of all the other Pods of this Job by the function `getPodList()`, and fetch `triner_id` by the function `getIdMap(podlist)` if all the Pods status is `RUNNING`.
+
+```python
+ podlist = getPodList()
+ # need to wait until all pods are running
+ while not isPodAllRunning(podlist):
+ time.sleep(10)
+ podlist = getPodList()
+ idMap = getIdMap(podlist)
+```
+
+**NOTE**: `getPodList()` would prefetch all the Pods in the current namespace, if some
+Pods are alreay running, it may cause some error. We will use [statfulesets](https://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets) instead of
+Kubernetes Pod or Replicaset in the future.
+
+The function `getIdMap(podlist)` fetches IPs addresses of `podlist` and then sort them
+to generate `trainer_id`.
+
+```python
+def getIdMap(podlist):
+ '''
+ generate tainer_id by ip
+ '''
+ ips = []
+ for pod in podlist["items"]:
+ ips.append(pod["status"]["podIP"])
+ ips.sort()
+ idMap = {}
+ for i in range(len(ips)):
+ idMap[ips[i]] = i
+ return idMap
+```
+
+After getting the `idMap`, we can generate the arguments of `Paddle PServer` and `Paddle Trainer`
+so that we can start up them by `startPaddle(idMap, train_args_dict)`.
+
+### Create Job
+
+The main goal of `startPaddle` is generating the arguments of `Paddle PServer` and
+`Paddle Trainer` processes. Take `Paddle Trainer` as an example, we parse the
+environment variable and then get `PADDLE_NIC`, `PADDLE_PORT`, `PADDLE_PORTS_NUM` and etc...,
+finally find `trainerId` from `idMap` according to its IP address.
+
+```python
+ program = 'paddle train'
+ args = " --nics=" + PADDLE_NIC
+ args += " --port=" + str(PADDLE_PORT)
+ args += " --ports_num=" + str(PADDLE_PORTS_NUM)
+ args += " --comment=" + "paddle_process_by_paddle"
+ ip_string = ""
+ for ip in idMap.keys():
+ ip_string += (ip + ",")
+ ip_string = ip_string.rstrip(",")
+ args += " --pservers=" + ip_string
+ args_ext = ""
+ for key, value in train_args_dict.items():
+ args_ext += (' --' + key + '=' + value)
+ localIP = socket.gethostbyname(socket.gethostname())
+ trainerId = idMap[localIP]
+ args += " " + args_ext + " --trainer_id=" + \
+ str(trainerId) + " --save_dir=" + JOB_PATH_OUTPUT
+```
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index c44f8a8a8ecc1..8b1ca5e165483 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -24,6 +24,6 @@ if(NOT WITH_FLUID_ONLY)
endif()
add_subdirectory(testing)
-if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS)
+if(NOT MOBILE_INFERENCE AND NOT RPI)
add_subdirectory(fluid)
endif()
diff --git a/paddle/fluid/CMakeLists.txt b/paddle/fluid/CMakeLists.txt
index d725763b01d59..d274d96c29bdb 100644
--- a/paddle/fluid/CMakeLists.txt
+++ b/paddle/fluid/CMakeLists.txt
@@ -3,6 +3,7 @@ add_subdirectory(platform)
add_subdirectory(framework)
add_subdirectory(operators)
add_subdirectory(pybind)
-add_subdirectory(inference)
add_subdirectory(string)
add_subdirectory(recordio)
+# NOTE: please add subdirectory inference at last.
+add_subdirectory(inference)
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 3840bbe83b68d..1f3ca24df16cf 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -79,14 +79,12 @@ add_custom_command(TARGET framework_py_proto POST_BUILD
COMMENT "Copy generated python proto into directory paddle/fluid/proto."
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
-cc_library(backward SRCS backward.cc DEPS net_op)
-cc_test(backward_test SRCS backward_test.cc DEPS backward recurrent_op device_context fill_constant_op)
cc_library(lod_rank_table SRCS lod_rank_table.cc DEPS lod_tensor)
cc_library(feed_fetch_method SRCS feed_fetch_method.cc DEPS lod_tensor scope glog)
cc_library(executor SRCS executor.cc DEPS op_registry device_context scope
-framework_proto backward glog lod_rank_table feed_fetch_method)
+framework_proto glog lod_rank_table feed_fetch_method)
cc_library(parallel_executor SRCS parallel_executor.cc DEPS multi_devices_graph_builder threaded_ssa_graph_executor)
diff --git a/paddle/fluid/framework/backward.cc b/paddle/fluid/framework/backward.cc
deleted file mode 100644
index 1314af2b3dab2..0000000000000
--- a/paddle/fluid/framework/backward.cc
+++ /dev/null
@@ -1,585 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/framework/backward.h"
-#include "paddle/fluid/operators/net_op.h"
-
-#include
-#include
-#include
-#include
-
-#include "paddle/fluid/framework/block_desc.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/operators/net_op.h"
-
-namespace paddle {
-namespace framework {
-
-static std::unordered_set* g_ctrl_flow_ops_ = nullptr;
-// Control Flow operators's backward is significantly different from
-// computational operators. Hack Code here.
-// We should design a better way to backward CtrlFlowOps.
-static std::unordered_set& CtrlFlowOps() {
- if (g_ctrl_flow_ops_ == nullptr) {
- g_ctrl_flow_ops_ = new std::unordered_set{
- "increment", "lod_rank_table", "less_than"};
- }
- return *g_ctrl_flow_ops_;
-}
-
-static inline std::unique_ptr CreateGradOp(
- const OperatorBase& op, const std::unordered_set& no_grad_set,
- std::unordered_map* grad_to_var) {
- OpDesc op_desc;
- op_desc.SetInputMap(op.Inputs());
- op_desc.SetOutputMap(op.Outputs());
- op_desc.SetType(op.Type());
- op_desc.SetAttrMap(op.Attrs());
- auto& info = OpInfoMap::Instance().Get(op.Type());
- auto grad_descs = info.GradOpMaker()(op_desc, no_grad_set, grad_to_var, {});
- std::vector> grad_ops;
- grad_ops.reserve(grad_descs.size());
- std::transform(grad_descs.begin(), grad_descs.end(),
- std::back_inserter(grad_ops),
- [](const std::unique_ptr& grad_desc) {
- return OpRegistry::CreateOp(*grad_desc);
- });
- PADDLE_ENFORCE(!grad_ops.empty());
- if (grad_ops.size() == 1) {
- return std::move(grad_ops[0]);
- } else {
- auto net_op = new operators::NetOp();
- for (auto& grad_op : grad_ops) {
- net_op->AppendOp(std::move(grad_op));
- }
- net_op->CompleteAddOp();
- return std::unique_ptr(net_op);
- }
-}
-
-template
-static void ForEachVarName(const Map& names, T callback) {
- for (auto& name : names) {
- for (auto& n : name.second) {
- if (callback(n)) return;
- }
- }
-}
-
-// return whether all the names + suffixes in the set
-static bool AllInSet(
- const std::map>& names,
- const std::string& suffix, const std::unordered_set& set) {
- bool all_in_set = true;
- ForEachVarName(names, [&all_in_set, &set, &suffix](const std::string& n) {
- all_in_set = set.find(n + suffix) != set.end();
- return !all_in_set;
- });
- return all_in_set;
-}
-
-static std::unique_ptr NOP() {
- auto net_op = new operators::NetOp();
- net_op->SetType("@NOP@");
- net_op->CompleteAddOp();
- return std::unique_ptr(net_op);
-}
-
-// Get backward operator from a forward operator, a recursive implementation.
-//
-// no_grad_names the gradient variable names without gradient calculating.
-//
-// uniq_id is a unique index used inside recursively calling
-// BackwardRecursive. use `uid = uniq_id++;` to get the unique index, and
-// pass `uniq_id` through recursive calling.
-//
-// returns The backward operator. In a simple situation, it may be a simple
-// operator, in a complex situation, it maybe a NetOp.
-//
-// See Backward.h for details
-static std::unique_ptr BackwardRecursive(
- const OperatorBase& forwardOp,
- std::unordered_set& no_grad_names,
- std::unordered_map* grad_to_var,
- size_t& uniq_id) {
- // If all input gradients of forwarding operator do not need to calculate,
- // just return an NOP. Not return null ptr because NOP does not take
- // too much time for calculation, but it is useful for simplifying logic.
- if (AllInSet(forwardOp.Inputs() /*names*/, kGradVarSuffix /*suffix*/,
- no_grad_names /*set*/)) {
- return NOP();
- }
-
- // All output gradients of forwarding operator do not need to calculate.
- // Then all input gradients cannot be computed at all, and we put them into
- // `no_grad_names` set. Return an NOP.
- if (AllInSet(forwardOp.Outputs() /*names*/, kGradVarSuffix /*suffix*/,
- no_grad_names /*set*/)) {
- ForEachVarName(forwardOp.Inputs(),
- [&no_grad_names](const std::string& name) -> bool {
- no_grad_names.insert(GradVarName(name));
- return false;
- });
- return NOP();
- }
-
- // Returned gradient network
- auto net = std::unique_ptr(new operators::NetOp());
-
- if (forwardOp.IsNetOp()) {
- // Because forwardOp is a net op, it can static_cast.
- auto& forwardNet = static_cast(forwardOp);
-
- // Map from output gradient variable name to operator's indices in
- // backward net's ops_. That operator generates that variable.
- std::unordered_map> dup_output_ops;
-
- size_t local_op_id = 0;
- // reversely travel forwardNet and collect all duplicate outputs.
- for (auto it = forwardNet.ops_.rbegin(); it != forwardNet.ops_.rend();
- ++it, ++local_op_id) {
- auto& fwd = *it;
- auto bwd = BackwardRecursive(*fwd, no_grad_names, grad_to_var, uniq_id);
- ForEachVarName(bwd->Outputs(),
- [&dup_output_ops, local_op_id](const std::string& out) {
- dup_output_ops[out].emplace_back(local_op_id);
- return false;
- });
- net->AppendOp(std::move(bwd));
- }
- // Get unique ID for this method.
- auto uid = uniq_id++;
- // TODO(dzh): more comment
- // multiple operators which have the same output (y for example) may
- // overwrite the same y variable when backward, special operations are token
- // to handle this case. For each duplicate output, rename it to an alias
- // (original name with a offset), append an `add` op for its operator,
- // and finally sum all the alias variable to the final output variable y.
- using Pos = std::pair>;
- std::list insert_position;
- for (auto& dup_output_op : dup_output_ops) {
- const std::string& name = dup_output_op.first;
- // duplicate @Empty@ don't need to be added
- if (name == kEmptyVarName) continue;
-
- auto& dup_op = dup_output_op.second;
- // no duplicate output
- if (dup_op.size() == 1) continue;
-
- // process the duplicate outputs
- std::vector dup_outputs;
- for (size_t i = 0; i < dup_op.size(); ++i) {
- // rename each duplicate output to an alias
- auto op_offset = dup_op[i];
- dup_outputs.push_back(name + "@RENAME@" + std::to_string(uid) + "@" +
- std::to_string(i));
- net->ops_[op_offset]->Rename(name, dup_outputs.back());
- }
- // collect all the offset for each alias,
- // insert a sum operator to add all aliases to output
- insert_position.push_back(
- {dup_op.back(),
- OpRegistry::CreateOp("sum", {{"X", dup_outputs}}, {{"Out", {name}}},
- AttributeMap{})});
- }
-
- // make sure the inserted `sum` ops follow the BFS order.
- insert_position.sort(
- [](const Pos& l, const Pos& r) { return l.first > r.first; });
-
- for (auto& pos : insert_position) {
- net->InsertOp(pos.first + 1, std::move(pos.second));
- }
- } else {
- std::unique_ptr grad_op(
- CreateGradOp(forwardOp, no_grad_names, grad_to_var));
-
- ForEachVarName(grad_op->Inputs(), [&no_grad_names, &net, &grad_op](
- const std::string& grad_input) {
- if (no_grad_names.count(grad_input)) {
- // +1 for \0
- std::string prefix = grad_input.substr(
- 0, grad_input.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
- grad_op->Rename(grad_input, prefix + kZeroVarSuffix);
-
- // If part of input gradient of that operator is not calculated, fill
- // zero variables to that input gradient.
- net->AppendOp(OpRegistry::CreateOp("fill_zeros_like", {{"X", {prefix}}},
- {{"Out", {grad_input}}},
- AttributeMap{}));
- }
- return false;
- });
-
- ForEachVarName(grad_op->Outputs(),
- [&no_grad_names, &grad_op](const std::string& grad_output) {
- if (no_grad_names.count(grad_output)) {
- grad_op->Rename(grad_output, kEmptyVarName);
- }
- return false;
- });
-
- if (net->ops_.empty()) { // Current no aux op is added to network
- return grad_op;
- }
- net->AppendOp(std::move(grad_op));
- }
- net->SetType("@GENERATED_BACKWARD@");
- net->CompleteAddOp();
- return std::unique_ptr(
- static_cast(net.release()));
-}
-
-// See header for comments
-std::unique_ptr Backward(
- const OperatorBase& forwardOp,
- const std::unordered_set& no_grad_vars) {
- std::unordered_set no_grad_names;
- no_grad_names.reserve(no_grad_vars.size() + 1);
-
- no_grad_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
-
- for (auto& name : no_grad_vars) {
- no_grad_names.insert(name + kGradVarSuffix);
- }
- size_t uid = 0;
- std::unordered_map grad_to_var;
- return BackwardRecursive(forwardOp, no_grad_names, &grad_to_var, uid);
-}
-
-// ==================================== //
-
-static bool AllGradInSet(const std::vector& names,
- const std::unordered_set& set) {
- for (const std::string& name : names) {
- if (!set.count(GradVarName(name))) {
- return false;
- }
- }
- if (VLOG_IS_ON(10)) {
- std::ostringstream sout;
- sout << "All input {";
- for (auto& name : names) {
- sout << name << ",";
- }
- sout << "} is in {";
- for (auto& name : set) {
- sout << name << ",";
- }
- sout << "}";
- VLOG(10) << sout.str();
- }
- return true;
-}
-
-static std::string FwdName(const std::string& grad_name) {
- auto pos = grad_name.find("@GRAD");
- if (pos == std::string::npos) {
- return "";
- } else {
- return grad_name.substr(0, pos);
- }
-}
-
-static void CreateGradVarInBlock(
- size_t grad_op_start_index,
- const std::unordered_map& param_name_map,
- BlockDesc* block_desc,
- std::unordered_map* grad_var_record) {
- auto ops = block_desc->AllOps();
- for (size_t op_index = grad_op_start_index; op_index < ops.size();
- ++op_index) {
- std::unordered_set new_vars;
- auto& ctrl_flow_ops = CtrlFlowOps();
- ForEachVarName(ops[op_index]->Outputs(),
- [&](const std::string& grad_var_name) {
- if (ctrl_flow_ops.find(ops[op_index]->Type()) !=
- ctrl_flow_ops.end()) {
- if (block_desc->HasVarRecursive(grad_var_name)) {
- return false;
- }
- } else {
- if (block_desc->HasVar(grad_var_name)) {
- return false;
- }
- }
- if (grad_var_name == framework::kEmptyVarName) {
- return false;
- }
- auto var = block_desc->Var(grad_var_name);
- VLOG(10) << "Creating Variable " << grad_var_name;
- new_vars.insert(var->Name());
- auto it = param_name_map.find(grad_var_name);
- if (it == param_name_map.end()) {
- return false;
- }
- auto param_var_name = it->second;
- auto& grad_record = (*grad_var_record)[param_var_name];
- grad_record.name_ = grad_var_name;
- grad_record.block_idx_ = block_desc->ID();
- grad_record.op_idx_ = static_cast(op_index);
- return false; /* not break */
- });
- ops[op_index]->InferVarType(block_desc);
- for (auto& arg : ops[op_index]->OutputArgumentNames()) {
- if (new_vars.find(arg) == new_vars.end()) {
- continue;
- }
- auto pname = FwdName(arg);
- auto* param = block_desc->FindVarRecursive(pname);
- auto* grad = block_desc->FindVar(arg);
- if (param == nullptr) {
- grad->SetDataType(proto::VarType::FP32);
- } else {
- grad->SetDataType(param->GetDataType());
- }
- }
- ops[op_index]->InferShape(*block_desc);
- }
-}
-
-std::vector> MakeOpGrad(
- const OpDesc* op_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- const std::vector& grad_block = std::vector()) {
- std::vector> grad_op_descs;
- // All input gradients of forwarding operator do not need to calculate.
- const std::vector& inputs = op_desc->InputArgumentNames();
- if (AllGradInSet(inputs, *no_grad_vars)) {
- VLOG(10) << "Drop operator " << op_desc->Type();
- return grad_op_descs; // empty vector
- }
-
- // All output gradients of forwarding operator do not need to calculate.
- const std::vector& outputs = op_desc->OutputArgumentNames();
-
- if (AllGradInSet(outputs, *no_grad_vars)) {
- VLOG(10) << "Drop operator " << op_desc->Type();
- // FIXME: Hack code here
- auto& ctrl_flow_ops = CtrlFlowOps();
- if (ctrl_flow_ops.find(op_desc->Type()) == ctrl_flow_ops.end()) {
- // Only computational op need drop input's gradient.
- for (const std::string& name : inputs) {
- no_grad_vars->insert(GradVarName(name));
- VLOG(10) << " Also drop " << GradVarName(name);
- }
- }
-
- return grad_op_descs; // empty vector
- }
-
- grad_op_descs =
- OpInfoMap::Instance()
- .Get(op_desc->Type())
- .GradOpMaker()(*op_desc, *no_grad_vars, grad_to_var, grad_block);
-
- std::list> pending_fill_zeros_ops;
- for (auto& desc : grad_op_descs) {
- for (const std::string& in_name : desc->InputArgumentNames()) {
- if (no_grad_vars->count(in_name)) {
- std::string prefix = in_name.substr(
- 0, in_name.size() - sizeof(kGradVarSuffix) / sizeof(char) + 1);
- std::string new_name = prefix + kZeroVarSuffix;
- desc->Rename(in_name, new_name);
- std::unique_ptr fill_zeros_op(
- new OpDesc("fill_zeros_like", {{"X", {prefix}}},
- {{"Out", {new_name}}}, AttributeMap{}));
- pending_fill_zeros_ops.push_back(std::move(fill_zeros_op));
- }
- }
- }
-
- for (auto& p : pending_fill_zeros_ops) {
- grad_op_descs.insert(grad_op_descs.begin(), std::move(p));
- }
- return grad_op_descs;
-}
-
-static BlockDesc* CreateStepBlock(
- ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- int step_block_idx);
-
-std::vector> MakeBlockBackward(
- ProgramDesc& program_desc, int block_idx,
- std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var) {
- VLOG(5) << "MakeBlockBackward";
- BlockDesc* cur_block = program_desc.MutableBlock(block_idx);
- std::vector op_descs = cur_block->AllOps();
- std::unordered_map> dup_out_ops;
- size_t grad_desc_idx = 0;
- std::vector> backward_descs;
-
- for (auto it = op_descs.rbegin(); it != op_descs.rend(); ++it) {
- VLOG(5) << "Making backward " << (*it)->Type() << " op";
- std::vector> op_grads;
-
- if ((*it)->Type() == "recurrent" || (*it)->Type() == "while" ||
- (*it)->Type() == "parallel_do") {
- int step_block_idx = (*it)->GetBlockAttr("sub_block");
- BlockDesc* backward_block = CreateStepBlock(program_desc, no_grad_vars,
- grad_to_var, step_block_idx);
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
- } else if ((*it)->Type() == "conditional_block") {
- BlockDesc* backward_block =
- CreateStepBlock(program_desc, no_grad_vars, grad_to_var,
- (*it)->GetBlockAttr("sub_block"));
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var, {backward_block});
- } else {
- op_grads = MakeOpGrad(*it, no_grad_vars, grad_to_var);
- }
-
- if (VLOG_IS_ON(10)) {
- std::ostringstream sout;
- sout << "Made ";
- for (auto& op_grad : op_grads) {
- sout << op_grad->Type() << " ";
- }
- VLOG(10) << sout.str();
- }
-
- for (const auto& desc : op_grads) {
- for (const std::string& out_name : desc->OutputArgumentNames()) {
- if (out_name.find("@GRAD") == std::string::npos) {
- // Not all outputs of a backward operator is a gradient. Only gradient
- // need to be sum. Skip variables are not gradient.
- continue;
- }
- dup_out_ops[out_name].emplace_back(grad_desc_idx);
- }
- ++grad_desc_idx;
- }
- std::transform(op_grads.begin(), op_grads.end(),
- std::back_inserter(backward_descs),
- [](std::unique_ptr& ptr) { return std::move(ptr); });
- }
-
- VLOG(5) << "Appending Sums";
- // Check whether some variables are written more than once
- std::list>> pending_sum_ops;
- for (const auto& dup : dup_out_ops) {
- const std::string& out_name = dup.first;
- const std::vector dup_op = dup.second;
- if (out_name != kEmptyVarName && dup_op.size() > 1) {
- std::vector sum_op_inputs;
- std::string next_g_name = out_name;
- for (size_t i = 0; i < dup_op.size(); ++i) {
- VLOG(10) << backward_descs[dup_op[i]]->Type() << " has " << out_name
- << " duplicated";
- std::string new_name = out_name + "@RENAME@" + std::to_string(i);
- backward_descs[dup_op[i]]->RenameOutput(out_name, new_name);
- backward_descs[dup_op[i]]->RenameInput(out_name, next_g_name);
- sum_op_inputs.emplace_back(new_name);
- next_g_name = sum_op_inputs.back();
- }
- std::unique_ptr sum_op(new OpDesc("sum", {{"X", sum_op_inputs}},
- {{"Out", {out_name}}},
- AttributeMap{}));
- pending_sum_ops.push_back({dup_op.back(), std::move(sum_op)});
- }
- }
-
- pending_sum_ops.sort([](const std::pair>& a,
- const std::pair>& b) {
- return a.first > b.first;
- });
- for (auto& p : pending_sum_ops) {
- backward_descs.insert(backward_descs.begin() + p.first + 1,
- std::move(p.second));
- }
-
- VLOG(5) << "MakeBlockBackward Finished";
-
- return backward_descs;
-}
-
-static BlockDesc* CreateStepBlock(
- ProgramDesc& program_desc, std::unordered_set* no_grad_vars,
- std::unordered_map* grad_to_var,
- int step_block_idx) {
- auto backward_block_op_descs = MakeBlockBackward(program_desc, step_block_idx,
- no_grad_vars, grad_to_var);
- BlockDesc* backward_block =
- program_desc.AppendBlock(*program_desc.MutableBlock(step_block_idx));
- for (auto& ptr : backward_block_op_descs) {
- backward_block->AppendAllocatedOp(move(ptr));
- }
- return backward_block;
-}
-
-ParamGradInfoMap AppendBackward(
- ProgramDesc& program_desc, const VarDesc& target,
- const std::unordered_set& no_grad_vars) {
- std::unordered_set no_grad_var_names;
- no_grad_var_names.reserve(no_grad_vars.size() + 1);
- no_grad_var_names.insert(std::string(kEmptyVarName) + kGradVarSuffix);
- for (auto& name : no_grad_vars) {
- no_grad_var_names.insert(GradVarName(name));
- }
-
- const int root_block_idx = 0;
- auto root_block = program_desc.MutableBlock(root_block_idx);
-
- std::string fill_one_op_out = GradVarName(target.Name());
- bool is_scalar = target.GetShape() == std::vector{1};
- PADDLE_ENFORCE(is_scalar, "target should be scalar");
- VLOG(3) << "backward from loss=" << target.Name()
- << " data_type=" << target.GetDataType();
- std::unique_ptr fill_one_op(
- new OpDesc("fill_constant", {}, {{"Out", {fill_one_op_out}}},
- {{"shape", std::vector{1}},
- {"value", static_cast(1.0)},
- {"dtype", target.GetDataType()}}));
- // infer var type of fill_one_op
- fill_one_op->InferVarType(root_block);
-
- root_block->AppendAllocatedOp(std::move(fill_one_op));
- size_t forward_op_num = root_block->OpSize();
- size_t forward_block_num = program_desc.Size();
-
- // Insert backward operators
- std::unordered_map grad_to_var;
- auto backward_op_descs = MakeBlockBackward(program_desc, root_block_idx,
- &no_grad_var_names, &grad_to_var);
-
- for (auto& ptr : backward_op_descs) {
- root_block->AppendAllocatedOp(std::move(ptr));
- }
- // Create Variable
-
- // Create target gradient variable
- std::unordered_map retv;
-
- auto var = root_block->Var(fill_one_op_out);
- var->SetDataType(target.GetDataType());
- var->SetShape(target.GetShape());
- auto& target_grad = retv[target.Name()];
- target_grad.name_ = fill_one_op_out;
- target_grad.block_idx_ = root_block_idx;
- target_grad.op_idx_ = static_cast(forward_op_num);
-
- // create grad_var for all blocks in this program
- CreateGradVarInBlock(forward_op_num, grad_to_var, root_block, &retv);
- for (size_t block_index = forward_block_num;
- block_index < program_desc.Size(); ++block_index) {
- CreateGradVarInBlock(0, grad_to_var, program_desc.MutableBlock(block_index),
- &retv);
- }
- return retv;
-}
-
-} // namespace framework
-} // namespace paddle
diff --git a/paddle/fluid/framework/backward.h b/paddle/fluid/framework/backward.h
deleted file mode 100644
index 3a971090c25c8..0000000000000
--- a/paddle/fluid/framework/backward.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-
-#include
-#include
-#include
-
-#include "paddle/fluid/framework/operator.h"
-#include "paddle/fluid/framework/program_desc.h"
-
-namespace paddle {
-namespace framework {
-
-// Create the backward operator from a forward operator.
-// TODO(yuyang18): Add more API reference comment.
-extern std::unique_ptr Backward(
- const OperatorBase& forwardOp,
- const std::unordered_set& no_grad_vars);
-
-struct GradVarInfo {
- GradVarInfo() {}
- GradVarInfo(const std::string& name, int block_idx, int op_idx)
- : name_(name), block_idx_(block_idx), op_idx_(op_idx) {}
-
- bool operator==(const GradVarInfo& b) const {
- return name_ == b.name_ && block_idx_ == b.block_idx_ &&
- op_idx_ == b.op_idx_;
- }
-
- std::string name_;
- int block_idx_;
- int op_idx_;
-};
-
-using ParamGradInfoMap = std::unordered_map;
-
-ParamGradInfoMap AppendBackward(
- ProgramDesc& program_desc, const VarDesc& target,
- const std::unordered_set& no_grad_vars);
-
-} // namespace framework
-} // namespace paddle
diff --git a/paddle/fluid/framework/backward_test.cc b/paddle/fluid/framework/backward_test.cc
deleted file mode 100644
index cc1f871360ed3..0000000000000
--- a/paddle/fluid/framework/backward_test.cc
+++ /dev/null
@@ -1,918 +0,0 @@
-// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "paddle/fluid/framework/backward.h"
-
-#include
-#include "paddle/fluid/framework/block_desc.h"
-#include "paddle/fluid/framework/op_desc.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/var_desc.h"
-#include "paddle/fluid/operators/net_op.h"
-
-USE_NO_KERNEL_OP(fill_constant);
-
-namespace paddle {
-namespace framework {
-
-using DeviceContext = platform::DeviceContext;
-
-class NoneOp : public framework::OperatorWithKernel {
- public:
- using framework::OperatorWithKernel::OperatorWithKernel;
-
- protected:
- void InferShape(framework::InferShapeContext *ctx) const override {}
-};
-
-template
-class NoneKernel : public framework::OpKernel {
- public:
- void Compute(const framework::ExecutionContext &context) const override {}
-};
-
-class RowWiseAddOpMaker : public OpProtoAndCheckerMaker {
- public:
- RowWiseAddOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "Input X of Add");
- AddInput("b", "Bias of Add");
- AddOutput("Out", "Out of Add");
- AddComment("Add Op");
- }
-};
-
-class RowWiseAddGradMaker : public SingleGradOpDescMaker {
- public:
- using SingleGradOpDescMaker::SingleGradOpDescMaker;
-
- protected:
- std::unique_ptr Apply() const override {
- auto grad_op = new OpDesc();
- grad_op->SetInput(GradVarName("Out"), OutputGrad("Out"));
- grad_op->SetOutput(GradVarName("X"), InputGrad("X"));
- grad_op->SetOutput(GradVarName("b"), InputGrad("b"));
- grad_op->SetType("rowwise_add_grad");
- return std::unique_ptr(grad_op);
- }
-};
-
-class MulOpMaker : public OpProtoAndCheckerMaker {
- public:
- MulOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "A");
- AddInput("Y", "B");
- AddOutput("Out", "Out");
- AddAttr("x_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
- AddAttr("y_num_col_dims", "").SetDefault(1).EqualGreaterThan(1);
- AddComment("Mul");
- }
-};
-
-class SigmoidOpMaker : public OpProtoAndCheckerMaker {
- public:
- SigmoidOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "X");
- AddOutput("Out", "Y");
- AddComment("Sigmoid");
- }
-};
-
-class NoGradOpMaker : public OpProtoAndCheckerMaker {
- public:
- NoGradOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "X input");
- AddOutput("Out", "Y output");
- AddComment("NoGradOp, same input output. no Grad");
- }
-};
-
-class FcOp : public operators::NetOp {
- public:
- FcOp(const std::string &type, const VariableNameMap &inputs,
- const VariableNameMap &outputs, const AttributeMap &attrs)
- : NetOp(type, inputs, outputs, attrs) {
- AppendOp(OpRegistry::CreateOp(
- "mul", {{"X", {Input("X")}}, {"Y", {Input("W")}}},
- {{"Out", {Output("mul_result")}}}, AttributeMap{}));
- auto input_b = Inputs("b");
- std::string before_act = "mul_result";
- if (input_b.size() != 0) {
- AppendOp(OpRegistry::CreateOp(
- "rowwise_add", {{"X", {Output("mul_result")}}, {"b", {input_b[0]}}},
- {{"Out", {Output("add_result")}}}, AttributeMap{}));
- before_act = "add_result";
- } else {
- auto out_varname = Output("add_result");
- if (out_varname != kEmptyVarName) {
- this->Rename(out_varname, kEmptyVarName);
- }
- }
-
- AppendOp(OpRegistry::CreateOp("sigmoid", {{"X", {Output(before_act)}}},
- {{"Out", {Output("Out")}}}, AttributeMap{}));
- CompleteAddOp(false);
- }
-};
-
-class FcOpMaker : public OpProtoAndCheckerMaker {
- public:
- FcOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddInput("W", "w");
- AddInput("b", "b");
- AddOutput("mul_result", "").AsIntermediate();
- AddOutput("add_result", "").AsIntermediate();
- AddOutput("Out", "");
- AddComment("");
- }
-};
-
-class ManyOutputOpMaker : public OpProtoAndCheckerMaker {
- public:
- ManyOutputOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("x", "x");
- AddOutput("y", "y");
- AddOutput("z", "z");
- AddComment("");
- }
-};
-
-class FillZeroOpMaker : public OpProtoAndCheckerMaker {
- public:
- FillZeroOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddOutput("Out", "out");
- AddComment("");
- }
-};
-
-class SumOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
- SumOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "the input tensors of sum operator.").AsDuplicable();
- AddOutput("Out", "the output tensor of sum operator.");
- AddComment("");
- }
-};
-
-class MultInOutOpMaker : public OpProtoAndCheckerMaker {
- public:
- MultInOutOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "x");
- AddInput("H", "h");
- AddOutput("Y", "y");
- AddOutput("Z", "z");
- AddComment("");
- }
-};
-
-class MinusGradOpDescMaker : public GradOpDescMakerBase {
- public:
- using GradOpDescMakerBase::GradOpDescMakerBase;
-
- std::vector> operator()() const override {
- std::vector> retv;
- auto x_g = InputGrad("X");
- if (!x_g.empty()) {
- auto *op_desc = new OpDesc();
- op_desc->SetType("scale");
- op_desc->SetInput("X", OutputGrad("Out"));
- op_desc->SetOutput("Out", x_g);
- op_desc->SetAttr("scale", 1.0f);
- retv.emplace_back(op_desc);
- }
-
- auto y_g = InputGrad("Y");
- if (!y_g.empty()) {
- auto *op_desc = new OpDesc();
- op_desc->SetType("scale");
- op_desc->SetInput("X", OutputGrad("Out"));
- op_desc->SetOutput("Out", y_g);
- op_desc->SetAttr("scale", -1.0f);
- retv.emplace_back(op_desc);
- }
- return retv;
- }
-};
-
-class MinusOpMaker : public OpProtoAndCheckerMaker {
- public:
- MinusOpMaker(OpProto *proto, OpAttrChecker *op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("X", "");
- AddInput("Y", "");
- AddOutput("Out", "");
- AddComment("minus for unittest");
- }
-};
-} // namespace framework
-} // namespace paddle
-
-namespace f = paddle::framework;
-namespace ops = paddle::operators;
-using EnforceNotMet = paddle::platform::EnforceNotMet;
-// rowwise_add
-REGISTER_OPERATOR(rowwise_add, f::NoneOp, f::RowWiseAddOpMaker,
- f::RowWiseAddGradMaker);
-REGISTER_OP_CPU_KERNEL(rowwise_add,
- f::NoneKernel);
-REGISTER_OPERATOR(rowwise_add_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(rowwise_add_grad,
- f::NoneKernel);
-// mul
-REGISTER_OP(mul, f::NoneOp, f::MulOpMaker, mul_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(mul, f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(mul_grad,
- f::NoneKernel);
-// sigmoid
-REGISTER_OP(sigmoid, f::NoneOp, f::SigmoidOpMaker, sigmoid_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(sigmoid,
- f::NoneKernel);
-REGISTER_OP_WITHOUT_GRADIENT(nograd, f::NoneOp, f::NoGradOpMaker);
-// fill_zeros_like
-REGISTER_OP_WITHOUT_GRADIENT(fill_zeros_like, f::NoneOp, f::FillZeroOpMaker);
-REGISTER_OP_CPU_KERNEL(fill_zeros_like,
- f::NoneKernel);
-// sum
-REGISTER_OP(sum, f::NoneOp, f::SumOpMaker, sum_grad, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(sum, f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(sum_grad,
- f::NoneKernel);
-// fc
-REGISTER_OP_WITHOUT_GRADIENT(fc, f::FcOp, f::FcOpMaker);
-// many_output_op
-REGISTER_OP(many_output_op, f::NoneOp, f::ManyOutputOpMaker,
- many_output_op_grad, f::NoneOp);
-// mult_in_out
-REGISTER_OP(mult_in_out, f::NoneOp, f::MultInOutOpMaker, mult_in_out_grad,
- f::NoneOp);
-REGISTER_OP_CPU_KERNEL(mult_in_out,
- f::NoneKernel);
-REGISTER_OP_CPU_KERNEL(mult_in_out_grad,
- f::NoneKernel);
-// minus
-REGISTER_OPERATOR(minus, f::NoneOp, f::MinusOpMaker, f::MinusGradOpDescMaker);
-REGISTER_OP_CPU_KERNEL(minus, f::NoneKernel);
-// scale
-REGISTER_OPERATOR(scale, f::NoneOp);
-REGISTER_OP_CPU_KERNEL(scale, f::NoneKernel);
-
-TEST(Backward, simple_op_not_need_grad) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- auto gop = f::Backward(*fwd, {"x"});
- ASSERT_EQ(gop->Output(f::GradVarName("X")), f::kEmptyVarName);
-
- auto no_input_gop = f::Backward(*fwd, {"x", "b"});
- ASSERT_NE(no_input_gop, nullptr);
- ASSERT_TRUE(no_input_gop->IsNetOp());
- ASSERT_EQ(0UL, static_cast(no_input_gop.get())->ops_.size());
-}
-
-TEST(Backward, net_fc_backward_normal) {
- std::shared_ptr fwd =
- f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {"b"}}},
- {{"mul_result", {"mul_res"}},
- {"add_result", {"add_re"}},
- {"Out", {"out"}}},
- f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- std::shared_ptr gop =
- f::Backward(*fwd, std::unordered_set{});
- ASSERT_TRUE(gop->IsNetOp());
- auto net = static_cast(gop.get());
-
- ASSERT_NO_THROW(net->DebugString());
-
- ASSERT_EQ(3UL, net->ops_.size());
-
- f::OperatorBase &d_sigmoid = *net->ops_[0];
- ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
-
- f::OperatorBase &d_add = *net->ops_[1];
- ASSERT_EQ("rowwise_add_grad", d_add.Type());
-
- f::OperatorBase &d_mul = *net->ops_[2];
- ASSERT_EQ("mul_grad", d_mul.Type());
-}
-
-TEST(Backward, net_fc_backward_not_have_b) {
- std::shared_ptr fwd =
- f::OpRegistry::CreateOp("fc", {{"X", {"x"}}, {"W", {"w"}}, {"b", {}}},
- {{"mul_result", {"mul_res"}},
- {"add_result", {"add_res"}},
- {"Out", {"tmp"}}},
- f::AttributeMap{});
- ASSERT_NE(fwd, nullptr);
- std::shared_ptr gop =
- f::Backward(*fwd, std::unordered_set{});
- ASSERT_TRUE(gop->IsNetOp());
- auto net = static_cast(gop.get());
-
- ASSERT_NO_THROW(net->DebugString());
-
- ASSERT_EQ(2UL, net->ops_.size());
-
- f::OperatorBase &d_sigmoid = *net->ops_[0];
- ASSERT_EQ("sigmoid_grad", d_sigmoid.Type());
-
- f::OperatorBase &d_mul = *net->ops_[1];
- ASSERT_EQ("mul_grad", d_mul.Type());
-}
-
-TEST(Backward, net_input_of_network_not_need_grad) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"x"}}, {"W", {"W1"}}, {"b", {"b1"}}},
- {{"mul_result", {"mul_tmp_0"}},
- {"add_result", {"add_tmp_0"}},
- {"Out", {"hidden0"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"hidden0"}}, {"W", {"W2"}}, {"b", {"b2"}}},
- {{"mul_result", {"mul_tmp_1"}},
- {"add_result", {"add_tmp_1"}},
- {"Out", {"hidden1"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
- auto bwd = Backward(net, {"x"}); // x@GRAD is not need.
- ASSERT_TRUE(bwd->IsNetOp());
- auto bwd_net = static_cast(bwd.get());
-
- auto output_vars = bwd_net->OutputVars(true);
- std::unordered_set all_outputs =
- std::unordered_set(output_vars.begin(), output_vars.end());
- all_outputs.erase(f::kEmptyVarName);
-
- for (auto &out : {"W1", "b1", "hidden0", "W2", "b2"}) {
- ASSERT_NE(all_outputs.find(f::GradVarName(out)), all_outputs.end());
- }
-
- // Not Generated X
- ASSERT_EQ(all_outputs.find(f::GradVarName("X")), all_outputs.end());
-
- ASSERT_EQ(2UL, bwd_net->ops_.size());
- ASSERT_TRUE(bwd_net->ops_[1]->IsNetOp());
- auto first_fc_grad = static_cast(bwd_net->ops_[1].get());
- ASSERT_EQ(3UL, first_fc_grad->ops_.size());
- ASSERT_EQ(f::kEmptyVarName,
- first_fc_grad->ops_[2]->Output(f::GradVarName("X")));
-}
-
-TEST(Backward, net_shared_weight) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"x"}}, {"Y", {"w"}}},
- {{"Out", {"out"}}}, f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp("mul", {{"X", {"out"}}, {"Y", {"w"}}},
- {{"Out", {"FinalOut"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
-
- auto bwd = f::Backward(net, std::unordered_set{});
- ASSERT_TRUE(bwd->IsNetOp());
- auto bwd_net = static_cast(bwd.get());
- ASSERT_EQ(3UL, bwd_net->ops_.size());
- ASSERT_EQ("sum", bwd_net->ops_[2]->Type());
-}
-
-TEST(Backward, op_all_input_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"x", "b"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_TRUE(net->ops_.empty());
-}
-
-TEST(Backward, op_all_output_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("rowwise_add", {{"X", {"x"}}, {"b", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"out"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_TRUE(net->ops_.empty());
-}
-
-TEST(Backward, op_part_of_output_are_not_need) {
- auto fwd =
- f::OpRegistry::CreateOp("many_output_op", {{"x", {"X"}}},
- {{"y", {"Y"}}, {"z", {"Z"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"Z"});
- ASSERT_TRUE(backward->IsNetOp());
- auto net = static_cast(backward.get());
- ASSERT_EQ(net->ops_.size(), 2UL);
-
- auto &fill_zero = *net->ops_[0];
- ASSERT_EQ("fill_zeros_like", fill_zero.Type());
- ASSERT_EQ(1UL, fill_zero.Inputs("X").size());
- ASSERT_EQ("Z", fill_zero.Input("X"));
- ASSERT_EQ(1UL, fill_zero.Outputs("Out").size());
- ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix, fill_zero.Output("Out"));
-
- auto &d_many_out = *net->ops_[1];
- ASSERT_EQ("many_output_op_grad", d_many_out.Type());
- ASSERT_EQ(1UL + 2UL + 2UL, d_many_out.Inputs().size()); // I/O/OG
- ASSERT_EQ(std::string("Z") + f::kZeroVarSuffix,
- d_many_out.Input(f::GradVarName("z")));
- ASSERT_EQ(f::GradVarName("Y"), d_many_out.Input(f::GradVarName("y")));
- ASSERT_EQ(f::GradVarName("X"), d_many_out.Output(f::GradVarName("x")));
-}
-
-TEST(Backward, op_part_of_input_are_not_need) {
- auto fwd = f::OpRegistry::CreateOp("mul", {{"X", {"a"}}, {"Y", {"b"}}},
- {{"Out", {"out"}}}, f::AttributeMap{});
- auto backward = f::Backward(*fwd, {"a"});
- auto &grad_mul = *backward;
- ASSERT_EQ(grad_mul.Type(), "mul_grad");
- ASSERT_EQ(grad_mul.Inputs().size(), 2UL + 1UL + 1UL);
- ASSERT_EQ(grad_mul.Outputs().size(), 2UL);
- ASSERT_EQ(grad_mul.Output(f::GradVarName("X")), f::kEmptyVarName);
- ASSERT_EQ(grad_mul.Output(f::GradVarName("Y")), f::GradVarName("b"));
- ASSERT_EQ(grad_mul.Input(f::GradVarName("Out")), f::GradVarName("out"));
- ASSERT_EQ(grad_mul.Input("X"), "a");
- ASSERT_EQ(grad_mul.Input("Y"), "b");
- ASSERT_EQ(grad_mul.Input("Out"), "out");
-}
-
-TEST(Backward, linear_net_intermediate_variable_has_no_grad) {
- ops::NetOp net;
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"x1"}}, {"W", {"w1"}}, {"b", {"b1"}}},
- {{"mul_result", {"mul_out1"}},
- {"add_result", {"add_out1"}},
- {"Out", {"out1"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"out1"}}, {"W", {"w2"}}, {"b", {"b2"}}},
- {{"mul_result", {"mul_out2"}},
- {"add_result", {"tmp_out2"}},
- {"Out", {"out2"}}},
- f::AttributeMap{}));
- net.AppendOp(f::OpRegistry::CreateOp(
- "fc", {{"X", {"out2"}}, {"W", {"w3"}}, {"b", {"b3"}}},
- {{"mul_result", {"mul_out3"}},
- {"add_result", {"tmp_out3"}},
- {"Out", {"out3"}}},
- f::AttributeMap{}));
- net.CompleteAddOp();
-
- auto backward = f::Backward(net, {"mul_out2", "tmp_out2", "out2"});
- ASSERT_TRUE(backward->IsNetOp());
- auto bwd_net = static_cast(backward.get());
- ASSERT_EQ(bwd_net->ops_.size(), 3UL);
- auto &grad_fc = *bwd_net->ops_[0];
-
- const char *all = paddle::operators::NetOp::kAll;
- EXPECT_EQ(grad_fc.Inputs(all).size(),
- 2UL /* external input number */
- + 1UL /* external output number*/
- + 1UL /* number of gradient of external output*/
- + 2UL /* internal variable number*/
- );
- EXPECT_EQ(grad_fc.Outputs(all).size(),
- 2UL /* input number of mul*/
- + 2UL /* input number of rowwise_add*/
- + 1UL /* input number of sigmod */
- - 1UL /* out2 is not needed*/);
- EXPECT_EQ(bwd_net->ops_[1]->Inputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[1]->Outputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[2]->Inputs(all).size(), 0UL);
- EXPECT_EQ(bwd_net->ops_[2]->Outputs(all).size(), 0UL);
-}
-
-TEST(Backward, simple_single_op) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
-
- f::OpDesc *op = block->AppendOp();
- op->SetType("rowwise_add");
- op->SetInput("X", {"x"});
- op->SetInput("b", {"b"});
- op->SetOutput("Out", {"out"});
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 3UL);
- f::OpDesc *fill_op = block->AllOps()[1];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op = block->AllOps()[2];
- EXPECT_EQ(grad_op->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out")}));
- EXPECT_EQ(grad_op->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x")}));
- EXPECT_EQ(grad_op->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b")}));
-
- EXPECT_EQ(var_to_grad.size(), 3UL);
- EXPECT_EQ(var_to_grad.at("b"), f::GradVarInfo(f::GradVarName("b"), 0, 2));
- EXPECT_EQ(var_to_grad.at("x"), f::GradVarInfo(f::GradVarName("x"), 0, 2));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("b")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x")));
-}
-
-TEST(Backward, default_attribute) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op = block->AppendOp();
- op->SetType("mul");
- op->SetInput("X", {"x"});
- op->SetInput("Y", {"y"});
- op->SetOutput("Out", {"out"});
- op->CheckAttrs();
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 3UL);
- EXPECT_EQ(boost::get(op->GetAttr("x_num_col_dims")), 1);
- EXPECT_EQ(boost::get(op->GetAttr("y_num_col_dims")), 1);
-
- f::OpDesc *fill_op = block->AllOps()[1];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op = block->AllOps()[2];
- ASSERT_EQ(grad_op->Type(), "mul_grad");
- EXPECT_EQ(boost::get(grad_op->GetAttr("x_num_col_dims")), 1);
- EXPECT_EQ(boost::get(grad_op->GetAttr("y_num_col_dims")), 1);
-}
-
-TEST(Backward, simple_mult_op) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"out1"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out2"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- auto target = f::VarDesc("out3");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 6UL + 1);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op1 = block->AllOps()[6];
- EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- f::OpDesc *grad_op2 = block->AllOps()[5];
- EXPECT_EQ(grad_op2->Type(), "mul_grad");
- ASSERT_EQ(grad_op2->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op2->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op2->Input("Out"), std::vector({"out2"}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
-
- f::OpDesc *grad_op3 = block->AllOps()[4];
- EXPECT_EQ(grad_op3->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out3")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b3")}));
-
- EXPECT_EQ(var_to_grad.size(), 7UL);
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 5));
- EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
- EXPECT_EQ(var_to_grad.at("out2"),
- f::GradVarInfo(f::GradVarName("out2"), 0, 4));
- EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
-}
-
-TEST(Backward, intermedia_var_no_grad) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"x2"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out2"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- f::OpDesc *op4 = block->AppendOp();
- op4->SetType("mul");
- op4->SetInput("X", {"out1"});
- op4->SetInput("Y", {"out3"});
- op4->SetOutput("Out", {"out4"});
-
- auto target = f::VarDesc("out4");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"out3"});
-
- ASSERT_EQ(block->AllOps().size(), 7UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op1 = block->AllOps()[6];
- EXPECT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- f::OpDesc *grad_op4 = block->AllOps()[5];
- EXPECT_EQ(grad_op4->Type(), "mul_grad");
- ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op4->Input("Y"), std::vector({"out3"}));
- EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out4"}));
- EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out4")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")), std::vector());
-
- EXPECT_EQ(var_to_grad.size(), 4UL);
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 5));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
-}
-
-TEST(Backward, var_no_grad) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("mult_in_out");
- op1->SetInput("X", {"x1"});
- op1->SetInput("H", {"h1"});
- op1->SetOutput("Y", {"y1"});
- op1->SetOutput("Z", {"z1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mult_in_out");
- op2->SetInput("X", {"y1"});
- op2->SetInput("H", {"z1"});
- op2->SetOutput("Y", {"y2"});
- op2->SetOutput("Z", {"z2"});
-
- auto target = f::VarDesc("z2");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"z1"});
-
- ASSERT_EQ(block->AllOps().size(), 6UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op2 = block->AllOps()[3];
- ASSERT_EQ(grad_op2->Type(), "mult_in_out_grad");
- ASSERT_EQ(grad_op2->InputNames().size(), 6UL);
- ASSERT_EQ(grad_op2->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op2->Input("X"), std::vector({"y1"}));
- EXPECT_EQ(grad_op2->Input("H"), std::vector({"z1"}));
- EXPECT_EQ(grad_op2->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op2->Input("Z"), std::vector({"z2"}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
- EXPECT_EQ(grad_op2->Input(f::GradVarName("Z")),
- std::vector({f::GradVarName("z2")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("y1")}));
- EXPECT_EQ(grad_op2->Output(f::GradVarName("H")), std::vector());
-
- f::OpDesc *fill_zero_op = block->AllOps()[4];
- ASSERT_EQ(fill_zero_op->Type(), "fill_zeros_like");
- ASSERT_EQ(fill_zero_op->InputNames().size(), 1UL);
- ASSERT_EQ(fill_zero_op->OutputNames().size(), 1UL);
- EXPECT_EQ(fill_zero_op->Input("X"), std::vector({"z1"}));
- EXPECT_EQ(fill_zero_op->Output("Out"),
- std::vector({std::string("z1") + f::kZeroVarSuffix}));
-
- f::OpDesc *grad_op1 = block->AllOps()[5];
- ASSERT_EQ(grad_op1->Type(), "mult_in_out_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 6UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input("X"), std::vector({"x1"}));
- EXPECT_EQ(grad_op1->Input("H"), std::vector({"h1"}));
- EXPECT_EQ(grad_op1->Input("Y"), std::vector({"y1"}));
- EXPECT_EQ(grad_op1->Input("Z"), std::vector({"z1"}));
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Y")),
- std::vector({f::GradVarName("y1")}));
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Z")),
- std::vector({std::string("z1") + f::kZeroVarSuffix}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("H")),
- std::vector({f::GradVarName("h1")}));
-
- EXPECT_EQ(var_to_grad.size(), 4UL);
- EXPECT_EQ(var_to_grad.at("y1"), f::GradVarInfo(f::GradVarName("y1"), 0, 3));
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 5));
- EXPECT_EQ(var_to_grad.at("h1"), f::GradVarInfo(f::GradVarName("h1"), 0, 5));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("y1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("h1")));
-}
-
-TEST(Backward, shared_var) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- f::OpDesc *op1 = block->AppendOp();
- op1->SetType("rowwise_add");
- op1->SetInput("X", {"x1"});
- op1->SetInput("b", {"b1"});
- op1->SetOutput("Out", {"out1"});
-
- f::OpDesc *op2 = block->AppendOp();
- op2->SetType("mul");
- op2->SetInput("X", {"out1"});
- op2->SetInput("Y", {"y2"});
- op2->SetOutput("Out", {"out2"});
-
- f::OpDesc *op3 = block->AppendOp();
- op3->SetType("rowwise_add");
- op3->SetInput("X", {"out1"});
- op3->SetInput("b", {"b3"});
- op3->SetOutput("Out", {"out3"});
-
- auto target = f::VarDesc("out3");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad =
- AppendBackward(program, target, std::unordered_set{});
-
- ASSERT_EQ(block->AllOps().size(), 8UL);
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
-
- f::OpDesc *grad_op3 = block->AllOps()[4];
- ASSERT_EQ(grad_op3->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op3->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op3->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op3->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out3")}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1") + "@RENAME@0"}));
- EXPECT_EQ(grad_op3->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b3")}));
-
- f::OpDesc *grad_op4 = block->AllOps()[5];
- ASSERT_EQ(grad_op4->Type(), "mul_grad");
- ASSERT_EQ(grad_op4->InputNames().size(), 4UL);
- ASSERT_EQ(grad_op4->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op4->Input("X"), std::vector({"out1"}));
- EXPECT_EQ(grad_op4->Input("Y"), std::vector({"y2"}));
- EXPECT_EQ(grad_op4->Input("Out"), std::vector({"out2"}));
- EXPECT_EQ(grad_op4->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out2")}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("out1") + "@RENAME@1"}));
- EXPECT_EQ(grad_op4->Output(f::GradVarName("Y")),
- std::vector({f::GradVarName("y2")}));
-
- f::OpDesc *sum_op = block->AllOps()[6];
- ASSERT_EQ(sum_op->Type(), "sum");
- ASSERT_EQ(sum_op->InputNames().size(), 1UL);
- ASSERT_EQ(sum_op->OutputNames().size(), 1UL);
- EXPECT_EQ(sum_op->Input("X"),
- std::vector({f::GradVarName("out1") + "@RENAME@0",
- f::GradVarName("out1") + "@RENAME@1"}));
- EXPECT_EQ(sum_op->Output("Out"),
- std::vector({f::GradVarName("out1")}));
-
- f::OpDesc *grad_op1 = block->AllOps()[7];
- ASSERT_EQ(grad_op1->Type(), "rowwise_add_grad");
- ASSERT_EQ(grad_op1->InputNames().size(), 1UL);
- ASSERT_EQ(grad_op1->OutputNames().size(), 2UL);
- EXPECT_EQ(grad_op1->Input(f::GradVarName("Out")),
- std::vector({f::GradVarName("out1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("X")),
- std::vector({f::GradVarName("x1")}));
- EXPECT_EQ(grad_op1->Output(f::GradVarName("b")),
- std::vector({f::GradVarName("b1")}));
-
- EXPECT_EQ(var_to_grad.size(), 6UL);
- EXPECT_EQ(var_to_grad.at("b3"), f::GradVarInfo(f::GradVarName("b3"), 0, 4));
- EXPECT_EQ(var_to_grad.at("y2"), f::GradVarInfo(f::GradVarName("y2"), 0, 5));
- EXPECT_EQ(var_to_grad.at("out1"),
- f::GradVarInfo(f::GradVarName("out1"), 0, 6));
- EXPECT_EQ(var_to_grad.at("x1"), f::GradVarInfo(f::GradVarName("x1"), 0, 7));
- EXPECT_EQ(var_to_grad.at("b1"), f::GradVarInfo(f::GradVarName("b1"), 0, 7));
-
- EXPECT_TRUE(block->HasVar(f::GradVarName("b3")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("y2")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("out1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("x1")));
- EXPECT_TRUE(block->HasVar(f::GradVarName("b1")));
-}
-
-TEST(Backward, half_backward) {
- f::ProgramDesc program;
- f::BlockDesc *block = program.MutableBlock(0);
- auto *op1 = block->AppendOp();
- op1->SetType("minus");
- op1->SetInput("X", {"a"});
- op1->SetInput("Y", {"b"});
- op1->SetOutput("Out", {"out"});
-
- auto target = f::VarDesc("out");
- target.SetShape({1});
- size_t forward_len = block->AllOps().size();
- auto var_to_grad = AppendBackward(program, target, {"b"});
- f::OpDesc *fill_op = block->AllOps()[forward_len];
- EXPECT_EQ(fill_op->Type(), "fill_constant");
- auto ops = block->AllOps();
- ASSERT_EQ(3UL, ops.size());
-
- EXPECT_EQ(var_to_grad.size(), 2UL);
- EXPECT_EQ(var_to_grad.at("a"),
- f::GradVarInfo(f::GradVarName("a"), 0, forward_len + 1));
-}
diff --git a/paddle/fluid/framework/block_desc.cc b/paddle/fluid/framework/block_desc.cc
index fbe08349c37c4..b8847e4b909cb 100644
--- a/paddle/fluid/framework/block_desc.cc
+++ b/paddle/fluid/framework/block_desc.cc
@@ -13,11 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/block_desc.h"
+#include
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
-#include
-
namespace paddle {
namespace framework {
@@ -147,52 +146,7 @@ void BlockDesc::RemoveOp(size_t s, size_t e) {
if (ops_.begin() + s == ops_.end() || ops_.begin() + e == ops_.end()) {
return;
}
- auto get_vars = [](std::deque>::iterator &op,
- std::vector &v) {
- auto in_names = (*op)->InputArgumentNames();
- v.insert(v.end(), in_names.begin(), in_names.end());
- auto out_names = (*op)->OutputArgumentNames();
- v.insert(v.end(), out_names.begin(), out_names.end());
- std::sort(v.begin(), v.end());
- auto last = std::unique(v.begin(), v.end());
- v.erase(last, v.end());
- };
- need_update_ = true;
-
- for (size_t i = s; i < e; i++) {
- // since remove op one by one, every time remove the first op.
- auto op = ops_.begin() + s;
-
- // collect input and output variables from current delete op
- std::vector cur_vars;
- get_vars(op, cur_vars);
-
- // remove current op
- ops_.erase(ops_.begin() + s);
-
- // collect input and output variables from other ops
- std::vector other_vars;
- for (auto it = ops_.begin(); it != ops_.end(); it++) {
- get_vars(it, other_vars);
- }
-
- // variables should be deleted
- std::vector delete_vars;
- // delete_vars = cur_vars - cur_vars ^ other_input_vars
- std::set_difference(cur_vars.begin(), cur_vars.end(), other_vars.begin(),
- other_vars.end(),
- std::inserter(delete_vars, delete_vars.end()));
- // remove variables
- for (size_t i = 0; i < delete_vars.size(); i++) {
- auto name = delete_vars[i];
- auto it = vars_.find(name);
- PADDLE_ENFORCE(it != vars_.end(),
- "%s is not in variable list, it should not be deleted",
- name);
- vars_.erase(it);
- VLOG(3) << "deleting variable " << name;
- }
- }
+ ops_.erase(ops_.begin() + s, ops_.begin() + e);
}
std::vector BlockDesc::AllOps() const {
diff --git a/paddle/fluid/framework/block_desc.h b/paddle/fluid/framework/block_desc.h
index 873969b2a884f..eef19c4f09c60 100644
--- a/paddle/fluid/framework/block_desc.h
+++ b/paddle/fluid/framework/block_desc.h
@@ -92,7 +92,7 @@ class BlockDesc {
/*
* Remove Op and its input/output variables.
- * Note that for either input or ouput variable, if it is also an input or
+ * Note that for either input or output variable, if it is also an input or
* output variable of other ops, we should remain it.
*/
void RemoveOp(size_t s, size_t e);
diff --git a/paddle/fluid/framework/details/CMakeLists.txt b/paddle/fluid/framework/details/CMakeLists.txt
index 89b5c6847f15b..85b649b2937f6 100644
--- a/paddle/fluid/framework/details/CMakeLists.txt
+++ b/paddle/fluid/framework/details/CMakeLists.txt
@@ -5,6 +5,7 @@ cc_library(fetch_op_handle SRCS fetch_op_handle.cc DEPS op_handle_base scope lod
nv_library(nccl_all_reduce_op_handle SRCS nccl_all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
dynload_cuda)
cc_library(computation_op_handle SRCS computation_op_handle.cc DEPS framework_proto scope place operator op_registry)
+cc_library(send_op_handle SRCS send_op_handle.cc DEPS framework_proto scope place operator op_registry)
cc_library(ssa_graph SRCS ssa_graph.cc DEPS var_handle op_handle_base)
cc_library(ssa_graph_builder SRCS ssa_graph_builder.cc DEPS ssa_graph)
@@ -15,7 +16,7 @@ else()
set(multi_devices_graph_builder_deps)
endif()
cc_library(multi_devices_graph_builder SRCS multi_devices_graph_builder.cc DEPS ssa_graph_builder computation_op_handle
- scale_loss_grad_op_handle ${multi_devices_graph_builder_deps})
+ scale_loss_grad_op_handle send_op_handle ${multi_devices_graph_builder_deps})
cc_library(ssa_graph_executor SRCS ssa_graph_executor.cc DEPS ssa_graph framework_proto)
cc_library(threaded_ssa_graph_executor SRCS threaded_ssa_graph_executor.cc DEPS fetch_op_handle ssa_graph_executor scope
simple_threadpool device_context)
diff --git a/paddle/fluid/framework/details/computation_op_handle.cc b/paddle/fluid/framework/details/computation_op_handle.cc
index 7a1b40c0b60a7..e3f8bbb72f2a1 100644
--- a/paddle/fluid/framework/details/computation_op_handle.cc
+++ b/paddle/fluid/framework/details/computation_op_handle.cc
@@ -14,6 +14,8 @@
#include "paddle/fluid/framework/details/computation_op_handle.h"
+#include
+
namespace paddle {
namespace framework {
namespace details {
@@ -33,7 +35,7 @@ void ComputationOpHandle::RunImpl() {
}
}
- op_->Run(*scope_->FindVar("@TMP_SCOPE@")->Get(), place_);
+ op_->Run(*scope_->FindVar(kLocalExecScopeName)->Get(), place_);
}
std::string ComputationOpHandle::Name() const { return op_->Type(); }
diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc
index 9180903b864d0..e3e7c55d153ae 100644
--- a/paddle/fluid/framework/details/fetch_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_op_handle.cc
@@ -14,6 +14,9 @@
#include "paddle/fluid/framework/details/fetch_op_handle.h"
+#include
+#include
+
namespace paddle {
namespace framework {
namespace details {
@@ -57,7 +60,10 @@ void FetchOpHandle::RunImpl() {
for (size_t i = 0; i < scopes.size(); ++i) {
auto &scope = scopes[i];
- auto &t = scope->FindVar(var_name)->Get();
+ auto &t = scope->FindVar(kLocalExecScopeName)
+ ->Get()
+ ->FindVar(var_name)
+ ->Get();
if (platform::is_gpu_place(var->place_)) {
#ifdef PADDLE_WITH_CUDA
TensorCopy(t, cpu, *dev_ctxes_[t.place()], &tensors_[i]);
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.cc b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
index e7a0cb678ebfd..e0dd9e6068174 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.cc
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.cc
@@ -15,6 +15,7 @@
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/scale_loss_grad_op_handle.h"
+#include "paddle/fluid/framework/details/send_op_handle.h"
#include "paddle/fluid/framework/scope.h"
#ifdef PADDLE_WITH_CUDA
@@ -54,6 +55,27 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
}
}
+void MultiDevSSAGraphBuilder::CreateOpHandleIOs(SSAGraph *result, OpDesc *op,
+ const platform::Place &p,
+ const size_t &i) const {
+ auto *op_handle = result->ops_.back().get();
+ op_handle->dev_ctxes_[p] = const_cast(
+ platform::DeviceContextPool::Instance().Get(p));
+
+ auto var_names = op->InputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ VarHandle *var = CreateOrGetLatestVarHandle(result, each_var_name, p, i);
+ op_handle->AddInput(var);
+ }
+
+ var_names = op->OutputArgumentNames();
+
+ for (auto &each_var_name : var_names) {
+ CreateOpOutput(result, op_handle, each_var_name, p, i);
+ }
+}
+
std::unique_ptr MultiDevSSAGraphBuilder::Build(
const ProgramDesc &program) const {
auto graph = new SSAGraph();
@@ -76,27 +98,28 @@ std::unique_ptr MultiDevSSAGraphBuilder::Build(
}
}
+ // append send op if program is distributed trainer main program.
+ // always use the first device
+ if (!is_forwarding && op->Type() == "send") {
+ auto &p = places_[0];
+ auto *s = local_scopes_[0];
+ // FIXME(wuyi): send op always copy from GPU 0
+ result.ops_.emplace_back(new SendOpHandle(*op, s, p));
+ // Create inputs for output on original place and no ssa output
+ // is created for send op.
+ CreateOpHandleIOs(&result, op, p, 0);
+ continue;
+ }
+
for (size_t i = 0; i < places_.size(); ++i) {
auto &p = places_[i];
auto *s = local_scopes_[i];
result.ops_.emplace_back(new ComputationOpHandle(*op, s, p));
auto *op_handle = result.ops_.back().get();
- op_handle->dev_ctxes_[p] = const_cast(
- platform::DeviceContextPool::Instance().Get(p));
+ CreateOpHandleIOs(&result, op, p, i);
- auto var_names = op->InputArgumentNames();
-
- for (auto &each_var_name : var_names) {
- VarHandle *var =
- CreateOrGetLatestVarHandle(&result, each_var_name, p, i);
- op_handle->AddInput(var);
- }
- var_names = op->OutputArgumentNames();
-
- for (auto &each_var_name : var_names) {
- CreateOpOutput(&result, op_handle, each_var_name, p, i);
- }
+ auto var_names = op->OutputArgumentNames();
if (is_forwarding) {
if (var_names.size() == 1 && var_names[0] == loss_var_name_) {
diff --git a/paddle/fluid/framework/details/multi_devices_graph_builder.h b/paddle/fluid/framework/details/multi_devices_graph_builder.h
index d3c8e582cf2cd..de34caab1be85 100644
--- a/paddle/fluid/framework/details/multi_devices_graph_builder.h
+++ b/paddle/fluid/framework/details/multi_devices_graph_builder.h
@@ -14,6 +14,9 @@
#pragma once
+#include
+#include
+
#include "paddle/fluid/framework/details/ssa_graph_builder.h"
namespace paddle {
@@ -41,6 +44,10 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
std::unique_ptr Build(const ProgramDesc &program) const override;
+ private:
+ void CreateOpHandleIOs(SSAGraph *result, OpDesc *op, const platform::Place &p,
+ const size_t &i) const;
+
private:
std::string loss_var_name_;
const std::vector &places_;
diff --git a/paddle/fluid/framework/details/op_handle_base.h b/paddle/fluid/framework/details/op_handle_base.h
index d7a541ac4bb83..fbdb54ba8d940 100644
--- a/paddle/fluid/framework/details/op_handle_base.h
+++ b/paddle/fluid/framework/details/op_handle_base.h
@@ -24,6 +24,8 @@ namespace paddle {
namespace framework {
namespace details {
+constexpr char kLocalExecScopeName[] = "@LCOAL_SCOPE@";
+
class OpHandleBase {
private:
DISABLE_COPY_AND_ASSIGN(OpHandleBase);
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
new file mode 100644
index 0000000000000..d181607e86372
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -0,0 +1,43 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/framework/details/send_op_handle.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+SendOpHandle::SendOpHandle(const framework::OpDesc &op_desc,
+ const Scope *local_scope,
+ const platform::Place &place)
+ : op_(framework::OpRegistry::CreateOp(op_desc)),
+ local_scope_(local_scope),
+ place_(place) {}
+
+void SendOpHandle::RunImpl() {
+ // Wait input done
+ for (auto *in : inputs_) {
+ auto &p = static_cast(in)->place_;
+ if (in->DebugString() == "dummy") { // HACK
+ continue;
+ }
+ in->generated_op_->Wait(dev_ctxes_[p]);
+ }
+ op_->Run(*local_scope_, place_);
+}
+
+std::string SendOpHandle::Name() const { return "send"; }
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/details/send_op_handle.h b/paddle/fluid/framework/details/send_op_handle.h
new file mode 100644
index 0000000000000..173f9d726145a
--- /dev/null
+++ b/paddle/fluid/framework/details/send_op_handle.h
@@ -0,0 +1,50 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include
+#include
+
+#include "paddle/fluid/framework/details/op_handle_base.h"
+#include "paddle/fluid/framework/lod_tensor.h"
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/scope.h"
+
+namespace paddle {
+namespace framework {
+namespace details {
+
+struct SendOpHandle : public OpHandleBase {
+ std::unique_ptr op_;
+ const Scope* local_scope_;
+ const platform::Place& place_;
+
+ SendOpHandle(const framework::OpDesc& op_desc, const Scope* local_scope,
+ const platform::Place& place);
+
+ std::string Name() const override;
+
+ // Delay and buffer nccl_all_reduce together can significantly increase
+ // performance. Disable this feature by returning false.
+ bool IsMultiDeviceTransfer() override { return false; };
+
+ protected:
+ void RunImpl() override;
+};
+
+} // namespace details
+} // namespace framework
+} // namespace paddle
diff --git a/paddle/fluid/framework/details/ssa_graph_executor.h b/paddle/fluid/framework/details/ssa_graph_executor.h
index 3b818b1a45b56..a8833b7388ab9 100644
--- a/paddle/fluid/framework/details/ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/ssa_graph_executor.h
@@ -15,13 +15,15 @@
#pragma once
#include
+#include
+#include
+
#include "paddle/fluid/framework/details/ssa_graph.h"
#include "paddle/fluid/framework/feed_fetch_type.h"
namespace paddle {
namespace framework {
namespace details {
-
class SSAGraphExecutor {
DISABLE_COPY_AND_ASSIGN(SSAGraphExecutor);
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
index 62af4c1d79ded..1ce69ab02b09f 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
@@ -136,12 +136,6 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
ready_ops.clear();
};
- // Create local scopes.
- for (auto &scope : local_scopes_) {
- auto &local_scope = scope->NewScope();
- *scope->Var("@TMP_SCOPE@")->GetMutable() = &local_scope;
- }
-
// Step 3. Execution
while (!pending_vars.empty() || !ready_ops.empty() || !delayed_ops.empty()) {
// 1. Run All Ready ops
@@ -189,34 +183,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
PADDLE_ENFORCE(ready_ops.empty());
PADDLE_ENFORCE(delayed_ops.empty());
PADDLE_ENFORCE(blocked_by_delayed_ops.empty());
- ++computation_count_;
-
- auto sync_computation = [&] {
- computation_count_ = 0;
- // Wait All computational streams
- for (auto p : this->places_) {
- platform::DeviceContextPool::Instance().Get(p)->Wait();
- }
- for (auto &scope : local_scopes_) {
- scope->DropKids();
- }
- };
// Wait FetchOps.
if (!fetch_ops.empty()) {
fetch_ops.clear();
- sync_computation();
- }
-
- if (computation_count_ == max_async_computation) {
- sync_computation();
- }
-
- // NOTE: the temp scope can be dropped lazily if needed.
- // Drop tmp scopes;
- for (auto &scope : local_scopes_) {
- auto &kid = *scope->Var("@TMP_SCOPE@")->GetMutable();
- kid = nullptr;
}
return fetch_data;
diff --git a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
index 79cfc26b461a3..bb5e837b135c3 100644
--- a/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+++ b/paddle/fluid/framework/details/threaded_ssa_graph_executor.h
@@ -99,9 +99,6 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std::unique_ptr exception_;
std::atomic running_ops_;
bool allow_op_delay_;
-
- size_t computation_count_{0};
- size_t max_async_computation{100};
};
} // namespace details
diff --git a/paddle/fluid/framework/lod_tensor.cc b/paddle/fluid/framework/lod_tensor.cc
index 8155cb55a468a..a56674cbe216e 100644
--- a/paddle/fluid/framework/lod_tensor.cc
+++ b/paddle/fluid/framework/lod_tensor.cc
@@ -12,9 +12,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
-#include "paddle/fluid/framework/lod_tensor.h"
+#include
+#include
+#include
+#include
+
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/memory/memcpy.h"
#include "paddle/fluid/memory/memory.h"
@@ -22,11 +27,6 @@ limitations under the License. */
#include "paddle/fluid/recordio/scanner.h"
#include "paddle/fluid/recordio/writer.h"
-#include
-#include
-#include
-#include
-
namespace paddle {
namespace framework {
@@ -294,7 +294,7 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor,
TensorFromStream(is, static_cast(tensor), dev_ctx);
}
-void WriteToRecordIO(recordio::Writer &writer,
+void WriteToRecordIO(recordio::Writer *writer,
const std::vector &tensor,
const platform::DeviceContext &dev_ctx) {
std::stringstream buffer;
@@ -303,18 +303,20 @@ void WriteToRecordIO(recordio::Writer &writer,
for (auto &each : tensor) {
SerializeToStream(buffer, each, dev_ctx);
}
- writer.Write(buffer.str());
+ writer->Write(buffer.str());
}
std::vector ReadFromRecordIO(
- recordio::Scanner &scanner, const platform::DeviceContext &dev_ctx) {
- std::istringstream sin(scanner.Next());
- uint32_t sz;
- sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ recordio::Scanner *scanner, const platform::DeviceContext &dev_ctx) {
std::vector result;
- result.resize(sz);
- for (uint32_t i = 0; i < sz; ++i) {
- DeserializeFromStream(sin, &result[i], dev_ctx);
+ if (scanner->HasNext()) {
+ std::istringstream sin(scanner->Next());
+ uint32_t sz;
+ sin.read(reinterpret_cast(&sz), sizeof(uint32_t));
+ result.resize(sz);
+ for (uint32_t i = 0; i < sz; ++i) {
+ DeserializeFromStream(sin, &result[i], dev_ctx);
+ }
}
return result;
}
diff --git a/paddle/fluid/framework/lod_tensor.h b/paddle/fluid/framework/lod_tensor.h
index 4f130d2659004..1159fee39b073 100644
--- a/paddle/fluid/framework/lod_tensor.h
+++ b/paddle/fluid/framework/lod_tensor.h
@@ -15,6 +15,9 @@ limitations under the License. */
#pragma once
#include
+#include
+#include
+#include
#ifdef PADDLE_WITH_CUDA
#include
#include
@@ -216,12 +219,12 @@ void SerializeToStream(std::ostream& os, const LoDTensor& tensor,
void DeserializeFromStream(std::istream& is, LoDTensor* tensor,
const platform::DeviceContext& dev_ctx);
-extern void WriteToRecordIO(recordio::Writer& writer,
+extern void WriteToRecordIO(recordio::Writer* writer,
const std::vector& tensor,
const platform::DeviceContext& dev_ctx);
extern std::vector ReadFromRecordIO(
- recordio::Scanner& scanner, const platform::DeviceContext& dev_ctx);
+ recordio::Scanner* scanner, const platform::DeviceContext& dev_ctx);
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/lod_tensor_test.cc b/paddle/fluid/framework/lod_tensor_test.cc
index e691e29383d48..97ab98f09b1a9 100644
--- a/paddle/fluid/framework/lod_tensor_test.cc
+++ b/paddle/fluid/framework/lod_tensor_test.cc
@@ -12,17 +12,17 @@
// See the License for the specific language governing permissions and
// limitations under the License.
-#include "paddle/fluid/framework/lod_tensor.h"
-
-#include "paddle/fluid/recordio/scanner.h"
-#include "paddle/fluid/recordio/writer.h"
-
#include
#include
#include
#include
#include
+#include "paddle/fluid/framework/lod_tensor.h"
+
+#include "paddle/fluid/recordio/scanner.h"
+#include "paddle/fluid/recordio/writer.h"
+
namespace paddle {
namespace framework {
@@ -240,8 +240,8 @@ TEST(LoDTensor, RecordIO) {
*platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
{
recordio::Writer writer(stream, recordio::Compressor::kSnappy);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
- WriteToRecordIO(writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
+ WriteToRecordIO(&writer, {tensor, tensor}, ctx);
writer.Flush();
}
@@ -254,11 +254,11 @@ TEST(LoDTensor, RecordIO) {
{
std::unique_ptr stream_ptr(stream);
recordio::Scanner scanner(std::move(stream_ptr));
- auto tensors = ReadFromRecordIO(scanner, ctx);
+ auto tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
- tensors = ReadFromRecordIO(scanner, ctx);
+ tensors = ReadFromRecordIO(&scanner, ctx);
ASSERT_EQ(tensors.size(), 2);
assert_tensor_ok(tensors[0]);
assert_tensor_ok(tensors[1]);
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index a3b4a8c0829ae..f97bd0827428f 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -46,7 +46,8 @@ proto::VarType::Type GetDataTypeOfVar(const Variable* var) {
}
}
-static DDim GetDims(const Scope& scope, const std::string& name) {
+static DDim GetDims(const Scope& scope, const std::string& name,
+ bool get_actual_dim = false) {
Variable* var = scope.FindVar(name);
if (var == nullptr) {
return DDim({-1});
@@ -55,7 +56,11 @@ static DDim GetDims(const Scope& scope, const std::string& name) {
if (var->IsType()) {
return var->Get().dims();
} else if (var->IsType