Skip to content

Commit

Permalink
[AutoParallel] fix unittest with paddle.distributed.launch (PaddlePad…
Browse files Browse the repository at this point in the history
…dle#44439)

* fix unittest

* fix log_dir

* _enable_legacy_dygraph
  • Loading branch information
zhaoyinglia authored and Aurelius84 committed Jul 29, 2022
1 parent d977bf7 commit 7298e5f
Show file tree
Hide file tree
Showing 8 changed files with 21 additions and 18 deletions.
5 changes: 4 additions & 1 deletion python/paddle/distributed/auto_parallel/process_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,12 @@

import paddle
import paddle.fluid.core as core

from ..collective import _get_global_env
from ..collective import _new_ring_id
from ...fluid.framework import _non_static_mode
from ...fluid.layers.tensor import fill_constant
from paddle.fluid.framework import _enable_legacy_dygraph


def get_all_process_groups():
Expand Down Expand Up @@ -134,7 +136,8 @@ def instantiate(self):

# TODO(shenliang03): This is a temporary solution to solve the problem of
# hang caused by cross-creation of new_group
paddle.framework._in_legacy_dygraph()
paddle.disable_static()
_enable_legacy_dygraph()
paddle.set_device('gpu:%d' %
paddle.distributed.ParallelEnv().dev_id)
tmp = paddle.to_tensor(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_relaunch(self):
coverage_args = []

cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name,
"-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import tempfile
import unittest
import os
import sys
Expand All @@ -32,18 +33,17 @@ def test_converter(self):
else:
coverage_args = []

tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", launch_model_path
"-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
]

process = subprocess.Popen(cmd)
process.wait()
self.assertEqual(process.returncode, 0)

# Remove unnecessary files
log_path = os.path.join(file_dir, "log")
if os.path.exists(log_path):
shutil.rmtree(log_path)
tmp_dir.cleanup()

def test_input_invalid(self):
with self.assertRaises(ValueError):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def test_engine_api(self):

tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name,
launch_model_path
"-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
]

process = subprocess.Popen(cmd)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ def test_engine_api(self):

tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", "--log_dir", tmp_dir.name,
launch_model_path
"-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
]

process = subprocess.Popen(cmd)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import tempfile
import unittest
import os
import sys
Expand All @@ -31,18 +32,17 @@ def test_dp2(self):
else:
coverage_args = []

tmp_dir = tempfile.TemporaryDirectory()
cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--gpus", "0,1", launch_model_path
"-m", "paddle.distributed.launch", "--devices", "0,1", "--log_dir",
tmp_dir.name, launch_model_path
]

process = subprocess.Popen(cmd)
process.wait()
self.assertEqual(process.returncode, 0)

# Remove unnecessary files
log_path = os.path.join(file_dir, "log")
if os.path.exists(log_path):
shutil.rmtree(log_path)
tmp_dir.cleanup()


if __name__ == "__main__":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_relaunch_with_planner(self):
coverage_args = []

cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name,
"-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def test_relaunch_with_planner(self):
coverage_args = []

cmd = [sys.executable, "-u"] + coverage_args + [
"-m", "launch", "--log_dir", self.temp_dir.name,
"-m", "paddle.distributed.launch", "--log_dir", self.temp_dir.name,
"--cluster_topo_path", cluster_json_path, "--rank_mapping_path",
mapping_json_path, "--enable_auto_mapping", "True",
launch_model_path
Expand Down

0 comments on commit 7298e5f

Please sign in to comment.