PaddlePaddle · sandyhouse · Aug 11, 2021 · Jun 28, 2021 · Jun 28, 2021 · Jul 1, 2021
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
@@ -202,7 +202,7 @@ cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op
 cc_library(version SRCS version.cc)
 cc_test(version_test SRCS version_test.cc DEPS version)
 
-cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc DEPS attribute shape_inference op_info operator glog version)
+cc_library(proto_desc SRCS var_desc.cc op_desc.cc block_desc.cc program_desc.cc process_mesh_desc.cc DEPS attribute shape_inference op_info operator glog version)
 
 cc_library(op_registry SRCS op_registry.cc DEPS op_proto_maker op_info operator glog proto_desc)
 

@@ -38,6 +38,13 @@ enum AttrType {
   FLOAT64S = 12;
 }
 
+message ProcessMeshDesc {
+  required int32 id = 1;
+  required int32 parent_id = 2;
+  repeated int32 topology = 3;
+  repeated int32 process_group = 4;
+};
+
 // OpDesc describes an instance of a C++ framework::OperatorBase
 // derived class type.
 message OpDesc {
@@ -167,6 +174,15 @@ message VarType {
 }
 
 message VarDesc {
+
+  message Attr {
+    required string name = 1;
+    required AttrType type = 2;
+    optional int32 i = 3;
+    optional string s = 4;
+    repeated int32 ints = 5;
+  };
+
   required string name = 1;
   required VarType type = 2;
   optional bool persistable = 3 [ default = false ];
@@ -175,6 +191,7 @@ message VarDesc {
   optional bool need_check_feed = 4 [ default = false ];
   optional bool is_parameter = 5 [ default = false ];
   optional bool stop_gradient = 6 [ default = false ];
+  repeated Attr attrs = 7;
 }
 
 message BlockDesc {

diff --git a/paddle/fluid/framework/process_mesh_desc.cc b/paddle/fluid/framework/process_mesh_desc.cc
@@ -0,0 +1,61 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/fluid/framework/process_mesh_desc.h"
+
+namespace paddle {
+namespace framework {
+
+int32_t ProcessMeshDesc::next_id = -1;
+
+ProcessMeshDesc::ProcessMeshDesc(const std::vector<int32_t> &topo,
+                                 const std::vector<int32_t> &process_group,
+                                 int32_t parent_id) {
+  int32_t cur_id = ++next_id;
+  desc_.set_id(cur_id);
+  desc_.set_parent_id(parent_id);
+  for (size_t i = 0; i != topo.size(); ++i) {
+    desc_.add_topology(topo[i]);
+  }
+  for (size_t i = 0; i != process_group.size(); ++i) {
+    desc_.add_process_group(process_group[i]);
+  }
+  ProcessMeshDescMap::GetInstance().Insert(cur_id, this);
+}
+
+std::vector<int32_t> ProcessMeshDesc::Topology() const {
+  size_t size = desc_.topology_size();
+  std::vector<int32_t> ret(size);
+  for (auto i = 0; i != desc_.topology_size(); ++i) {
+    ret[i] = desc_.topology(i);
+  }
+  return ret;
+}
+
+std::vector<int32_t> ProcessMeshDesc::ProcessGroup() const {
+  size_t size = desc_.process_group_size();
+  std::vector<int32_t> ret(size);
+  for (auto i = 0; i != desc_.process_group_size(); ++i) {
+    ret[i] = desc_.process_group(i);
+  }
+  return ret;
+}
+
+ProcessMeshDescMap &ProcessMeshDescMap::GetInstance() {
+  static ProcessMeshDescMap g_process_mesh_desc_map;
+  return g_process_mesh_desc_map;
+}
+
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/process_mesh_desc.h b/paddle/fluid/framework/process_mesh_desc.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/proto_desc.h"
+#include "paddle/fluid/platform/enforce.h"
+#include "paddle/fluid/platform/macros.h"
+
+namespace paddle {
+namespace framework {
+
+class ProcessMeshDesc {
+ public:
+  ProcessMeshDesc(const std::vector<int32_t>& topo,
+                  const std::vector<int32_t>& process_group, int32_t parent_id);
+
+  int32_t ID() const { return desc_.id(); }
+  int32_t Parent() const { return desc_.parent_id(); }
+
+  std::vector<int32_t> Topology() const;
+  std::vector<int32_t> ProcessGroup() const;
+
+  static int32_t next_id;
+
+ private:
+  proto::ProcessMeshDesc desc_;  // not_own
+};
+
+class ProcessMeshDescMap {
+ public:
+  static ProcessMeshDescMap& GetInstance();
+
+  bool Has(int32_t index) const { return map_.find(index) != map_.end(); }
+
+  void Insert(int32_t index, ProcessMeshDesc* mesh) {
+    PADDLE_ENFORCE_NE(
+        Has(index), true,
+        platform::errors::AlreadyExists("Index (%d) has been used.", index));
+    map_.insert(std::make_pair(index, mesh));
+  }
+
+ private:
+  ProcessMeshDescMap() = default;
+  // Use raw pointer to avoid double free
+  std::unordered_map<int32_t, ProcessMeshDesc*> map_;
+  DISABLE_COPY_AND_ASSIGN(ProcessMeshDescMap);
+};
+}  // namespace framework
+}  // namespace paddle
diff --git a/paddle/fluid/framework/proto_desc.h b/paddle/fluid/framework/proto_desc.h
@@ -22,5 +22,13 @@ constexpr int kRootBlockIndex = 0;
 // The Parent Index of root Block, this block does not exist.
 constexpr int kNoneBlockIndex = -1;
 
+// The Parent Index of root ProcessMesh, this ProcessMesh does not exist.
+constexpr int kNoneProcessMeshIndex = -1;
+
+// If a attribute name has a certain suffix, it means that the
+// atrribute is a distributed-related attribute for auto parallel.
+// e.g., "mesh_id@PARALLEL".
+constexpr char kAutoParallelSuffix[] = "@PARALLEL";
+
 }  // namespace framework
 }  // namespace paddle
diff --git a/paddle/fluid/framework/var_desc.cc b/paddle/fluid/framework/var_desc.cc
@@ -280,6 +280,46 @@ std::vector<proto::VarType::TensorDesc *> VarDesc::mutable_tensor_descs() {
   }
 }
 
+std::vector<std::string> VarDesc::AttrNames() const {
+  std::vector<std::string> retv;
+  retv.reserve(attrs_.size());
+  for (auto &attr : attrs_) {
+    retv.push_back(attr.first);
+  }
+  return retv;
+}
+
+void VarDesc::RemoveAttr(const std::string &name) { attrs_.erase(name); }
+
+void VarDesc::SetAttr(const std::string &name, const Attribute &v) {
+  // NOTICE(sandyhouse): pybind11 will take the empty list in python as
+  // the std::vector<int> type in C++; so we have to change the attr's type
+  // here if we meet this issue
+  proto::AttrType attr_type = static_cast<proto::AttrType>(v.which() - 1);
+  if (attr_type == proto::AttrType::INTS &&
+      BOOST_GET_CONST(std::vector<int>, v).size() == 0u) {
+    // Find current attr via attr name and set the correct attribute value
+    this->attrs_[name] = std::vector<int>();
+    return;
+  }
+  bool valid = attr_type == proto::AttrType::INT ||
+               attr_type == proto::AttrType::STRING ||
+               attr_type == proto::AttrType::INTS;
+  PADDLE_ENFORCE_EQ(valid, true, platform::errors::InvalidArgument(
+                                     "The value for attr (%s) must be "
+                                     "one of list or int or string.",
+                                     name));
+
+  this->attrs_[name] = v;
+}
+
+Attribute VarDesc::GetAttr(const std::string &name) const {
+  auto it = attrs_.find(name);
+  PADDLE_ENFORCE_NE(it, attrs_.end(), platform::errors::NotFound(
+                                          "Attribute %s is not found.", name));
+  return it->second;
+}
+
 bool operator==(const VarDesc &left, const VarDesc &right) {
   return left.Proto()->SerializeAsString() ==
          right.Proto()->SerializeAsString();

@@ -19,7 +19,9 @@ limitations under the License. */
 #include <vector>
 
 #include "glog/logging.h"
+#include "paddle/fluid/framework/attribute.h"
 #include "paddle/fluid/framework/framework.pb.h"
+#include "paddle/fluid/framework/type_defs.h"
 
 namespace paddle {
 namespace framework {
@@ -137,13 +139,25 @@ class VarDesc {
     desc_.set_need_check_feed(need_check_feed);
   }
 
+  bool HasAttr(const std::string &name) const {
+    return attrs_.find(name) != attrs_.end();
+  }
+
+  std::vector<std::string> AttrNames() const;
+
+  void SetAttr(const std::string &name, const Attribute &v);
+  void RemoveAttr(const std::string &name);
+
+  Attribute GetAttr(const std::string &name) const;
+
  private:
   const proto::VarType::TensorDesc &tensor_desc() const;
   std::vector<proto::VarType::TensorDesc> tensor_descs() const;
   proto::VarType::TensorDesc *mutable_tensor_desc();
   std::vector<proto::VarType::TensorDesc *> mutable_tensor_descs();
 
   proto::VarDesc desc_;
+  AttributeMap attrs_;
 };
 
 bool operator==(const VarDesc &left, const VarDesc &right);

diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc
@@ -16,6 +16,7 @@ limitations under the License. */
 #include "paddle/fluid/framework/ir/node.h"
 #include "paddle/fluid/framework/op_proto_maker.h"
 #include "paddle/fluid/framework/operator.h"
+#include "paddle/fluid/framework/proto_desc.h"
 
 #if defined(PADDLE_WITH_DGC)
 #include "paddle/fluid/framework/details/dgc_const_values.h"
@@ -33,6 +34,9 @@ void BindConstValue(pybind11::module* m) {
   m->def("kControlDepVarName",
          [] { return framework::ir::Node::kControlDepVarName; });
   m->def("kNewGradSuffix", [] { return framework::kNewGradSuffix; });
+  m->def("kAutoParallelSuffix", [] { return framework::kAutoParallelSuffix; });
+  m->def("kNoneProcessMeshIndex",
+         [] { return framework::kNoneProcessMeshIndex; });
 
   auto op_proto_and_checker_maker =
       m->def_submodule("op_proto_and_checker_maker");

diff --git a/paddle/fluid/pybind/protobuf.cc b/paddle/fluid/pybind/protobuf.cc
@@ -20,6 +20,7 @@ limitations under the License. */
 
 #include "paddle/fluid/framework/block_desc.h"
 #include "paddle/fluid/framework/op_desc.h"
+#include "paddle/fluid/framework/process_mesh_desc.h"
 #include "paddle/fluid/framework/program_desc.h"
 #include "paddle/fluid/framework/var_desc.h"
 #include "paddle/fluid/framework/version.h"
@@ -84,6 +85,17 @@ void BindProgramDesc(pybind11::module *m) {
            [](pd::ProgramDesc &self) -> int64_t { return self.Version(); });
 }
 
+void BindProcessMeshDesc(pybind11::module *m) {
+  pybind11::class_<pd::ProcessMeshDesc>(*m, "ProcessMeshDesc", "")
+      .def(pybind11::init<const std::vector<int32_t> &,
+                          const std::vector<int32_t> &, int32_t>())
+      .def_property_readonly("id", &pd::ProcessMeshDesc::ID)
+      .def_property_readonly("parent", &pd::ProcessMeshDesc::Parent)
+      .def_property_readonly("topology", &pd::ProcessMeshDesc::Topology)
+      .def_property_readonly("process_group",
+                             &pd::ProcessMeshDesc::ProcessGroup);
+}
+
 void BindBlockDesc(pybind11::module *m) {
   pybind11::class_<pd::BlockDesc> blockdesc(*m, "BlockDesc", "");
   g_blockdesc_pytype = (PyTypeObject *)blockdesc.ptr();  // NOLINT
@@ -184,7 +196,12 @@ void BindVarDsec(pybind11::module *m) {
       .def("clear_stop_gradient", &pd::VarDesc::ClearStopGradient)
       .def("has_stop_gradient", &pd::VarDesc::HasStopGradient)
       .def("need_check_feed", &pd::VarDesc::NeedCheckFeed)
-      .def("set_need_check_feed", &pd::VarDesc::SetNeedCheckFeed);
+      .def("set_need_check_feed", &pd::VarDesc::SetNeedCheckFeed)
+      .def("has_attr", &pd::VarDesc::HasAttr)
+      .def("attr_names", &pd::VarDesc::AttrNames)
+      .def("_set_attr", &pd::VarDesc::SetAttr)
+      .def("remove_attr", &pd::VarDesc::RemoveAttr)
+      .def("attr", &pd::VarDesc::GetAttr);
 
   pybind11::enum_<pd::proto::VarType::Type> vartype(var_desc, "VarType", "");
   g_vartype_pytype = (PyTypeObject *)vartype.ptr();  // NOLINT

diff --git a/paddle/fluid/pybind/protobuf.h b/paddle/fluid/pybind/protobuf.h
@@ -30,6 +30,7 @@ void BindProgramDesc(pybind11::module* m);
 void BindBlockDesc(pybind11::module* m);
 void BindVarDsec(pybind11::module* m);
 void BindOpDesc(pybind11::module* m);
+void BindProcessMeshDesc(pybind11::module* m);
 
 }  // namespace pybind
 }  // namespace paddle
diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc
@@ -2054,6 +2054,7 @@ All parameter, weight, gradient are variables in Paddle.
   BindOpDesc(&m);
   BindConstValue(&m);
   BindGlobalValueGetterSetter(&m);
+  BindProcessMeshDesc(&m);
 
   py::class_<framework::LoDRankTable>(m, "LodRankTable")
       .def("items", [](framework::LoDRankTable &table) {

@@ -36,6 +36,13 @@
 from .collective import send  # noqa: F401
 from .collective import wait  # noqa: F401
 
+from .auto_parallel import shard_tensor  # noqa: F401
+from .auto_parallel import shard_op  # noqa: F401
+from .auto_parallel import set_shard_mask  # noqa: F401
+from .auto_parallel import set_offload_device  # noqa: F401
+from .auto_parallel import set_pipeline_stage  # noqa: F401
+from .auto_parallel import ProcessMesh  # noqa: F401
+
 from .fleet import BoxPSDataset  # noqa: F401
 
 from .entry_attr import ProbabilityEntry  # noqa: F401
@@ -69,5 +76,11 @@
       "ReduceOp",
       "wait",
       "get_rank",
-      "ProbabilityEntry"
+      "ProbabilityEntry",
+      "shard_tensor",
+      "shard_op",
+      "set_shard_mask",
+      "set_offload_device",
+      "set_pipeline_stage",
+      "ProcessMesh",
 ]
diff --git a/python/paddle/distributed/auto_parallel/__init__.py b/python/paddle/distributed/auto_parallel/__init__.py
@@ -0,0 +1,22 @@
+#   Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .interface import shard_tensor  # noqa: F401
+from .interface import shard_op  # noqa: F401
+from .interface import set_shard_mask  # noqa: F401
+from .interface import set_offload_device  # noqa: F401
+from .interface import set_pipeline_stage  # noqa: F401
+from .interface import ProcessMesh  # noqa: F401
+
+__all__ = []