From 7a4d921ddc0e2f0ae973c13253174f7476a4ecb2 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Wed, 4 Nov 2020 19:15:21 -0800 Subject: [PATCH 1/5] [s2s] test_distributed_eval --- examples/seq2seq/test_finetune_trainer.py | 9 ++--- .../test_seq2seq_examples_multi_gpu.py | 37 ++++++++++++++++++- src/transformers/testing_utils.py | 16 ++++++++ 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/examples/seq2seq/test_finetune_trainer.py b/examples/seq2seq/test_finetune_trainer.py index 923ecf6d945831..6da0e240c41959 100644 --- a/examples/seq2seq/test_finetune_trainer.py +++ b/examples/seq2seq/test_finetune_trainer.py @@ -2,9 +2,9 @@ import sys from unittest.mock import patch -from transformers import BertTokenizer, EncoderDecoderModel, is_torch_available +from transformers import BertTokenizer, EncoderDecoderModel from transformers.file_utils import is_datasets_available -from transformers.testing_utils import TestCasePlus, execute_subprocess_async, slow +from transformers.testing_utils import TestCasePlus, execute_subprocess_async, get_gpu_count, slow from transformers.trainer_callback import TrainerState from transformers.trainer_utils import set_seed @@ -13,9 +13,6 @@ from .test_seq2seq_examples import MBART_TINY -if is_torch_available(): - import torch - set_seed(42) MARIAN_MODEL = "sshleifer/student_marian_en_ro_6_1" @@ -196,7 +193,7 @@ def run_trainer(self, eval_steps: int, max_len: str, model_name: str, num_train_ """.split() # --eval_beams 2 - n_gpu = torch.cuda.device_count() + n_gpu = get_gpu_count() if n_gpu > 1: distributed_args = f""" -m torch.distributed.launch diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index 463ad1e7d9b8c4..d67a1d93e64696 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -3,7 +3,14 @@ import os import sys -from transformers.testing_utils import TestCasePlus, execute_subprocess_async, require_torch_multigpu +from transformers.testing_utils import ( + TestCasePlus, + execute_subprocess_async, + get_gpu_count, + require_torch_gpu, + require_torch_multigpu, + slow, +) from .test_seq2seq_examples import CHEAP_ARGS, make_test_data_dir from .utils import load_json @@ -80,3 +87,31 @@ def convert(k, v): self.assertEqual(len(metrics["test"]), 1) desired_n_evals = int(args_d["max_epochs"] * (1 / args_d["val_check_interval"]) / 2 + 1) self.assertEqual(len(metrics["val"]), desired_n_evals) + + @slow + @require_torch_gpu + def test_distributed_eval(self): + output_dir = self.get_auto_remove_tmp_dir("./xxx") + args = f""" + --model_name Helsinki-NLP/opus-mt-en-ro + --save_dir {output_dir} + --data_dir test_data/wmt_en_ro + """.split() + + # we want this test to run even if there is only one GPU, but if there are more we use them all + n_gpu = get_gpu_count() + distributed_args = f""" + -m torch.distributed.launch + --nproc_per_node={n_gpu} + {self.test_file_dir}/run_distributed_eval.py + """.split() + cmd = [sys.executable] + distributed_args + args + execute_subprocess_async(cmd, env=self.get_env()) + + metrics_save_path = os.path.join(output_dir, "test_rouge.json") + metrics = load_json(metrics_save_path) + # print(metrics) + self.assertGreaterEqual(metrics["rouge1"], 63) + self.assertGreaterEqual(metrics["rouge2"], 40) + self.assertGreaterEqual(metrics["rougeL"], 55) + self.assertGreaterEqual(metrics["rougeLsum"], 60) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 8eb41ac85f8817..02998bcfd656b6 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -297,6 +297,22 @@ def require_ray(test_case): return test_case +def get_gpu_count(): + """ + Return the number of available gpus (regardless of whether torch or tf is used) + """ + if _torch_available: + import torch + + return torch.cuda.device_count() + elif _tf_available: + import tensorflow as tf + + return len(tf.config.list_physical_devices("GPU")) + else: + return 0 + + def get_tests_dir(append_path=None): """ Args: From 79c790f94232dd477c91756a5c1c8f9f3e94f91a Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Wed, 4 Nov 2020 19:20:17 -0800 Subject: [PATCH 2/5] remove debug --- examples/seq2seq/test_seq2seq_examples_multi_gpu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index d67a1d93e64696..1ae0d9ce5ec129 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -91,7 +91,7 @@ def convert(k, v): @slow @require_torch_gpu def test_distributed_eval(self): - output_dir = self.get_auto_remove_tmp_dir("./xxx") + output_dir = self.get_auto_remove_tmp_dir() args = f""" --model_name Helsinki-NLP/opus-mt-en-ro --save_dir {output_dir} From fd4c9cdd787b6d96ad052a45b10653b13e999801 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 12:34:22 -0800 Subject: [PATCH 3/5] integrate suggestions --- examples/seq2seq/test_seq2seq_examples_multi_gpu.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index 1ae0d9ce5ec129..796f78e33e1158 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -96,6 +96,9 @@ def test_distributed_eval(self): --model_name Helsinki-NLP/opus-mt-en-ro --save_dir {output_dir} --data_dir test_data/wmt_en_ro + --n_obs 100 + --num_beams 2 + --task translation """.split() # we want this test to run even if there is only one GPU, but if there are more we use them all @@ -108,10 +111,7 @@ def test_distributed_eval(self): cmd = [sys.executable] + distributed_args + args execute_subprocess_async(cmd, env=self.get_env()) - metrics_save_path = os.path.join(output_dir, "test_rouge.json") + metrics_save_path = os.path.join(output_dir, "test_bleu.json") metrics = load_json(metrics_save_path) # print(metrics) - self.assertGreaterEqual(metrics["rouge1"], 63) - self.assertGreaterEqual(metrics["rouge2"], 40) - self.assertGreaterEqual(metrics["rougeL"], 55) - self.assertGreaterEqual(metrics["rougeLsum"], 60) + self.assertGreaterEqual(metrics["bleu"], 25) From 275f4000de9ba526291d3888e97bf09c800b7ca9 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 12:42:03 -0800 Subject: [PATCH 4/5] update doc --- docs/source/testing.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/source/testing.rst b/docs/source/testing.rst index aef3b7efc8126d..0a9d3d525bfa9e 100644 --- a/docs/source/testing.rst +++ b/docs/source/testing.rst @@ -450,7 +450,8 @@ Inside tests: .. code-block:: bash - torch.cuda.device_count() + from transformers.testing_utils import get_gpu_count + n_gpu = get_gpu_count() # works with torch and tf From 9c34dd1fb67d8bce6f44dbe3f56b0185e365bad4 Mon Sep 17 00:00:00 2001 From: Sam Shleifer Date: Thu, 5 Nov 2020 15:49:18 -0500 Subject: [PATCH 5/5] Update examples/seq2seq/test_seq2seq_examples_multi_gpu.py --- examples/seq2seq/test_seq2seq_examples_multi_gpu.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py index 796f78e33e1158..efc23b5681e040 100644 --- a/examples/seq2seq/test_seq2seq_examples_multi_gpu.py +++ b/examples/seq2seq/test_seq2seq_examples_multi_gpu.py @@ -96,7 +96,6 @@ def test_distributed_eval(self): --model_name Helsinki-NLP/opus-mt-en-ro --save_dir {output_dir} --data_dir test_data/wmt_en_ro - --n_obs 100 --num_beams 2 --task translation """.split()