add ut

PaddlePaddle · Nov 16, 2021 · c858052 · c858052 · paddle-bot-old · Nov 16, 2021
1 parent 4098423
commit c858052
Show file tree

Hide file tree

Showing 5 changed files with 142 additions and 7 deletions.
diff --git a/python/paddle/distributed/collective.py b/python/paddle/distributed/collective.py
@@ -264,6 +264,10 @@ def new_group(ranks=None, backend=None):
                 place = core.CUDAPlace(genv.device_id)
                 core.NCCLParallelContext(strategy,
                                          place).init_with_ring_id(ring_id)
+            elif core.is_compiled_with_npu():
+                place = core.NPUPlace(genv.device_id)
+                core.HCCLParallelContext(strategy,
+                                         place).init_with_ring_id(ring_id)
             else:
                 assert False, ("no cuda device found")
         else:

diff --git a/python/paddle/fluid/dygraph/nn.py b/python/paddle/fluid/dygraph/nn.py
@@ -207,9 +207,9 @@ def __init__(self,
         if core.is_compiled_with_npu():
             if (self._num_channels == self._groups and
                     self._num_channels == self._num_filters):
-                l_type = 'depthwise_conv2d'
+                self._l_type = 'depthwise_conv2d'
             else:
-                l_type = 'conv2d'
+                self._l_type = 'conv2d'
 
         self._num_channels = num_channels
         if self._groups is None:

diff --git a/python/paddle/fluid/dygraph/parallel.py b/python/paddle/fluid/dygraph/parallel.py
@@ -62,9 +62,12 @@ def prepare_context(strategy=None):
         elif isinstance(place, core.XPUPlace):
             parallel_helper._set_parallel_ctx(
                 core.BKCLParallelContext(strategy, place))
+        elif isinstance(place, core.NPUPlace):
+            parallel_helper._set_parallel_ctx(
+                core.HCCLParallelContext(strategy, place))
         else:
             # TODO(Yancey1989): add Gloo Parallel Context to support CPU parallel computation
-            assert ("Only support CUDAPlace or XPUPlace for now.")
+            assert ("Only support CUDAPlace or XPUPlace or NPUPlace for now.")
         parallel_helper._init_parallel_ctx()
     return strategy
 

diff --git a/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py b/python/paddle/fluid/tests/unittests/npu/test_parallel_dygraph_mnist_npu.py
@@ -0,0 +1,90 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+
+import os
+import sys
+import unittest
+sys.path.append("..")
+
+from test_dist_base import TestDistBase
+import paddle.fluid as fluid
+
+flag_name = os.path.splitext(__file__)[0]
+rank_table_file = b"""{
+    "status": "completed",
+    "version": "1.0",
+    "server_count": "1",
+    "server_list": [
+        {
+            "server_id": "127.0.0.1",
+            "device": [
+                {
+                    "device_id": "0",
+                    "device_ip": "192.1.184.23",
+                    "rank_id": "0"
+                },
+                {
+                    "device_id": "1",
+                    "device_ip": "192.2.21.93",
+                    "rank_id": "1"
+                }
+            ]
+        }
+    ]
+}"""
+
+need_envs = {
+    "ASCEND_AICPU_PATH":
+    os.getenv("ASCEND_AICPU_PATH", "/usr/local/Ascend/nnae/latest"),
+    "ASCEND_OPP_PATH":
+    os.getenv("ASCEND_OPP_PATH", "/usr/local/Ascend/nnae/latest/opp"),
+    "HCCL_CONNECT_TIMEOUT": "7200",
+    "HCCL_WHITELIST_DISABLE": "1",
+    "HCCL_SECURITY_MODE": "1",
+    "RANK_TABLE_FILE": "rank_table_file.json",
+}
+
+
+class TestParallelDygraphMnistNPU(TestDistBase):
+    def _setup_config(self):
+        self._sync_mode = False
+        self._hccl_mode = True
+        self._dygraph = True
+        self._enforce_place = "NPU"
+
+    def test_mnist(self):
+        with open("rank_table_file.json", "wb") as f:
+            f.write(rank_table_file)
+        if fluid.core.is_compiled_with_npu():
+            self.check_with_place(
+                os.path.abspath('../parallel_dygraph_mnist.py'),
+                delta=1e-3,
+                check_error_log=True,
+                need_envs=need_envs,
+                log_name=flag_name)
+
+
+class TestFleetDygraphMnistNPU(TestParallelDygraphMnistNPU):
+    def _setup_config(self):
+        self._sync_mode = False
+        self._hccl_mode = True
+        self._dygraph = True
+        self._enforce_place = "NPU"
+        self._use_fleet_api = True
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_dist_base.py b/python/paddle/fluid/tests/unittests/test_dist_base.py
@@ -551,6 +551,9 @@ def run_trainer(self, args):
         elif fluid.core.is_compiled_with_xpu():
             device_id = int(os.getenv("FLAGS_selected_xpus", "0"))
             place = fluid.XPUPlace(device_id)
+        elif fluid.core.is_compiled_with_npu():
+            device_id = int(os.getenv("FLAGS_selected_npus", "0"))
+            place = fluid.NPUPlace(device_id)
         else:
             assert ("Only support CUDAPlace or XPUPlace or CPU(Gloo) for now.")
 
@@ -564,7 +567,7 @@ def run_trainer(self, args):
             nranks = len(args.endpoints.split(",")) if args.endpoints else 1
 
             #if args.update_method == "nccl2":
-            if args.update_method == "nccl2" or args.update_method == "bkcl":
+            if args.update_method == "nccl2" or args.update_method == "bkcl" or args.update_method == "hccl":
                 strategy = dygraph.parallel.ParallelStrategy()
                 strategy.nranks = nranks
                 strategy.local_rank = args.trainer_id
@@ -671,12 +674,12 @@ def run_use_fleet_api_trainer(self, args):
             strategy.find_unused_parameters = True
 
         # 3. init parallel env
-        if args.update_method == "nccl2" or "bkcl":
+        if args.update_method == "nccl2" or "bkcl" or "hccl":
             fleet.init(is_collective=True, strategy=strategy)
 
         # 4. train model
         model, train_reader, opt = self.get_model()
-        if args.update_method == "nccl2" or "bkcl":
+        if args.update_method == "nccl2" or "bkcl" or "hccl":
             opt = fleet.distributed_optimizer(opt)
             model = fleet.distributed_model(model)
 
@@ -706,7 +709,8 @@ def runtime_main(test_class):
         type=str,
         default="local",
         choices=[
-            "pserver", "nccl2", "bkcl", "local", "nccl2_reduce_layer", "gloo"
+            "pserver", "nccl2", "bkcl", "local", "nccl2_reduce_layer", "gloo",
+            "hccl"
         ])
     parser.add_argument('--trainer_id', type=int, required=False, default=0)
     parser.add_argument('--trainers', type=int, required=False, default=1)
@@ -728,6 +732,7 @@ def runtime_main(test_class):
     parser.add_argument('--use_cpu', action='store_true')
     parser.add_argument('--use_xpu', action='store_true')
     parser.add_argument('--use_dgc', action='store_true')
+    parser.add_argument('--use_npu', action='store_true')
     parser.add_argument('--accumulate_gradient', action='store_true')
     parser.add_argument('--find_unused_parameters', action='store_true')
     parser.add_argument('--use_reduce', action='store_true')
@@ -784,13 +789,21 @@ def _after_setup_config(self):
             self.__use_cuda = False
             self.__use_xpu = False
             self._use_dgc = False
+            self.__use_npu = False
         elif self._enforce_place == "GPU":
             self.__use_cuda = True
             self.__use_xpu = False
+            self.__use_npu = False
         elif self._enforce_place == "XPU":
             self.__use_cuda = False
             self.__use_xpu = True
             self._use_dgc = False
+            self.__use_npu = False
+        elif self._enforce_place == "NPU":
+            self.__use_cuda = False
+            self.__use_xpu = False
+            self._use_dgc = False
+            self.__use_npu = True
         else:
             if fluid.core.is_compiled_with_cuda():
                 self.__use_cuda = True
@@ -815,6 +828,7 @@ def setUp(self):
         self._nccl2_mode = False
         self._bkcl_mode = False
         self._gloo_mode = False  # now, support gloo backend
+        self._hccl_mode = False
         self._pipeline_mode = False
         self._mp_mode = False
         self._diff_batch = False
@@ -953,6 +967,13 @@ def _run_local(self,
                 "PADDLE_TRAINERS_NUM": "1",
                 "PADDLE_TRAINER_ID": "0"
             }
+        elif self.__use_npu:
+            cmd += " --use_npu"
+            env_local = {
+                "FLAGS_selected_npus": devices,
+                "PADDLE_TRAINERS_NUM": "1",
+                "PADDLE_TRAINER_ID": "0"
+            }
         else:
             env_local = {'CPU_NUM': '1'}
 
@@ -1199,6 +1220,16 @@ def _get_nccl2_trainer_cmd(self, model, ep, update_method, trainer_id,
                 "PADDLE_CURRENT_ENDPOINT": ep,
                 "GLOG_v": "2",
             })
+        elif self.__use_npu:
+            tr_cmd += " --use_npu"
+            env.update({
+                "FLAGS_selected_npus": "{}".format(trainer_id),
+                "PADDLE_TRAINERS_NUM": "{}".format(trainer_num),
+                "PADDLE_TRAINER_ID": "{}".format(trainer_id),
+                "PADDLE_TRAINER_ENDPOINTS": self._ps_endpoints,
+                "PADDLE_CURRENT_ENDPOINT": ep,
+                "GLOG_v": "2",
+            })
         else:
             env.update({'CPU_NUM': '1'})
 
@@ -1471,6 +1502,13 @@ def check_with_place(self,
                 update_method='gloo',
                 check_error_log=check_error_log,
                 log_name=log_name)
+        elif self._hccl_mode:
+            tr0_losses, tr1_losses = self._run_cluster_nccl2(
+                model_file,
+                required_envs,
+                update_method='hccl',
+                check_error_log=check_error_log,
+                log_name=log_name)
 
         elif self._pipeline_mode:
             tr0_losses, tr1_losses = self._run_pipeline(