From 32b1433f1c1846663e9e9913e6dca3f1b0145f61 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 13:07:32 -0800 Subject: [PATCH 1/7] add a multi-gpu job for all example tests --- .github/workflows/self-scheduled.yml | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index decd61f4b09536..7022d6f362af61 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -227,7 +227,7 @@ jobs: python -c "import torch; print('Cuda available:', torch.cuda.is_available())" python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - - name: Run all tests on GPU + - name: Run all tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes @@ -238,8 +238,20 @@ jobs: - name: Failure short reports if: ${{ always() }} run: cat reports/tests_torch_multiple_gpu_failures_short.txt - - - name: Run all pipeline tests on GPU + + - name: Run all examples tests on multi-GPU + env: + OMP_NUM_THREADS: 1 + RUN_SLOW: yes + run: | + source .env/bin/activate + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples + + - name: Failure short reports + if: ${{ always() }} + run: cat reports/examples_torch_multiple_gpu_failures_short.txt + + - name: Run all pipeline tests on multi-GPU if: ${{ always() }} env: TF_FORCE_GPU_ALLOW_GROWTH: "true" @@ -306,7 +318,7 @@ jobs: TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))" TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))" - - name: Run all tests on GPU + - name: Run all tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes @@ -318,7 +330,7 @@ jobs: if: ${{ always() }} run: cat reports/tests_tf_multiple_gpu_failures_short.txt - - name: Run all pipeline tests on GPU + - name: Run all pipeline tests on multi-GPU if: ${{ always() }} env: TF_FORCE_GPU_ALLOW_GROWTH: "true" From ab42b86e7e7e2a4d4ac3695e00d2be781ddcd9ba Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 15:28:24 -0800 Subject: [PATCH 2/7] run only ported tests --- .github/workflows/self-scheduled.yml | 4 ++-- examples/passing-multigpu-tests.txt | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) create mode 100644 examples/passing-multigpu-tests.txt diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 7022d6f362af61..a67710d136ef7e 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -239,13 +239,13 @@ jobs: if: ${{ always() }} run: cat reports/tests_torch_multiple_gpu_failures_short.txt - - name: Run all examples tests on multi-GPU + - name: Run ported examples tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu $(tr '\n' ' ' < examples/passing-multigpu-tests.txt) - name: Failure short reports if: ${{ always() }} diff --git a/examples/passing-multigpu-tests.txt b/examples/passing-multigpu-tests.txt new file mode 100644 index 00000000000000..2ead41cbd9c12a --- /dev/null +++ b/examples/passing-multigpu-tests.txt @@ -0,0 +1,4 @@ +examples/seq2seq/test_finetune_trainer.py::TestFinetuneTrainer::test_finetune_trainer +examples/seq2seq/test_finetune_trainer.py::TestFinetuneTrainer::test_finetune_trainer_slow +examples/seq2seq/test_seq2seq_examples_multi_gpu.py::TestSummarizationDistillerMultiGPU::test_distributed_eval +examples/seq2seq/test_seq2seq_examples_multi_gpu.py::TestSummarizationDistillerMultiGPU::test_multigpu From 5cc730857b55a11483754fc900af1c3de8f10410 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 15:31:23 -0800 Subject: [PATCH 3/7] rename --- .github/workflows/self-scheduled.yml | 2 +- .../{passing-multigpu-tests.txt => ported-multigpu-tests.txt} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename examples/{passing-multigpu-tests.txt => ported-multigpu-tests.txt} (100%) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index a67710d136ef7e..53470096214440 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -245,7 +245,7 @@ jobs: RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu $(tr '\n' ' ' < examples/passing-multigpu-tests.txt) + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu $(tr '\n' ' ' < examples/ported-multigpu-tests.txt) - name: Failure short reports if: ${{ always() }} diff --git a/examples/passing-multigpu-tests.txt b/examples/ported-multigpu-tests.txt similarity index 100% rename from examples/passing-multigpu-tests.txt rename to examples/ported-multigpu-tests.txt From 28229d5aead8709476a9c787bb792d0adca16e2b Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Thu, 5 Nov 2020 15:49:19 -0800 Subject: [PATCH 4/7] explain why env is re-activated on each step --- .github/workflows/self-scheduled.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 53470096214440..760abb8b15ea8f 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -1,3 +1,8 @@ +# configuration notes: +# +# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise +# the step uses the system-wide python interpreter. + name: Self-hosted runner (scheduled) on: From 1e2d201b659a8b79ac68de9008e6a597fb4373fe Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Sat, 7 Nov 2020 17:41:34 -0800 Subject: [PATCH 5/7] mark all unported/checked tests with @require_torch_non_multigpu_but_fix_me --- .github/workflows/self-scheduled.yml | 2 +- .../test_run_glue_with_pabee.py | 3 ++- examples/deebert/test_glue_deebert.py | 3 ++- examples/ported-multigpu-tests.txt | 4 ---- examples/rag/test_distributed_retriever.py | 4 ++++ examples/seq2seq/test_bash_script.py | 5 ++++- examples/seq2seq/test_datasets.py | 9 +++++++- examples/seq2seq/test_fsmt_bleu_score.py | 9 +++++++- examples/seq2seq/test_make_student.py | 7 ++++++- examples/seq2seq/test_seq2seq_examples.py | 21 ++++++++++++++++++- examples/seq2seq/test_tatoeba_conversion.py | 4 +++- examples/test_examples.py | 8 ++++++- examples/test_xla_examples.py | 4 +++- .../token-classification/test_ner_examples.py | 4 +++- src/transformers/testing_utils.py | 6 ++++++ 15 files changed, 77 insertions(+), 16 deletions(-) delete mode 100644 examples/ported-multigpu-tests.txt diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 760abb8b15ea8f..1771dd95b586e3 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -250,7 +250,7 @@ jobs: RUN_SLOW: yes run: | source .env/bin/activate - python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu $(tr '\n' ' ' < examples/ported-multigpu-tests.txt) + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples - name: Failure short reports if: ${{ always() }} diff --git a/examples/bert-loses-patience/test_run_glue_with_pabee.py b/examples/bert-loses-patience/test_run_glue_with_pabee.py index 22c6f4de06f430..eaac5329379c11 100644 --- a/examples/bert-loses-patience/test_run_glue_with_pabee.py +++ b/examples/bert-loses-patience/test_run_glue_with_pabee.py @@ -4,7 +4,7 @@ from unittest.mock import patch import run_glue_with_pabee -from transformers.testing_utils import TestCasePlus +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me logging.basicConfig(level=logging.DEBUG) @@ -20,6 +20,7 @@ def get_setup_file(): class PabeeTests(TestCasePlus): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/deebert/test_glue_deebert.py b/examples/deebert/test_glue_deebert.py index 59f7f58024f4e9..66faa557c0d0ba 100644 --- a/examples/deebert/test_glue_deebert.py +++ b/examples/deebert/test_glue_deebert.py @@ -5,7 +5,7 @@ from unittest.mock import patch import run_glue_deebert -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow logging.basicConfig(level=logging.DEBUG) @@ -26,6 +26,7 @@ def setup(self) -> None: logger.addHandler(stream_handler) @slow + @require_torch_non_multigpu_but_fix_me def test_glue_deebert_train(self): train_args = """ diff --git a/examples/ported-multigpu-tests.txt b/examples/ported-multigpu-tests.txt deleted file mode 100644 index 2ead41cbd9c12a..00000000000000 --- a/examples/ported-multigpu-tests.txt +++ /dev/null @@ -1,4 +0,0 @@ -examples/seq2seq/test_finetune_trainer.py::TestFinetuneTrainer::test_finetune_trainer -examples/seq2seq/test_finetune_trainer.py::TestFinetuneTrainer::test_finetune_trainer_slow -examples/seq2seq/test_seq2seq_examples_multi_gpu.py::TestSummarizationDistillerMultiGPU::test_distributed_eval -examples/seq2seq/test_seq2seq_examples_multi_gpu.py::TestSummarizationDistillerMultiGPU::test_multigpu diff --git a/examples/rag/test_distributed_retriever.py b/examples/rag/test_distributed_retriever.py index 80d8362d1edd1a..be874c83e8b37e 100644 --- a/examples/rag/test_distributed_retriever.py +++ b/examples/rag/test_distributed_retriever.py @@ -16,6 +16,7 @@ from transformers.configuration_rag import RagConfig from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available from transformers.retrieval_rag import CustomHFIndex +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me from transformers.tokenization_bart import BartTokenizer from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer @@ -178,6 +179,7 @@ def get_dummy_custom_hf_index_retriever(self, init_retrieval: bool, from_disk: b retriever.init_retrieval(port) return retriever + @require_torch_non_multigpu_but_fix_me def test_pytorch_distributed_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True) @@ -193,6 +195,7 @@ def test_pytorch_distributed_retriever_retrieve(self): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) + @require_torch_non_multigpu_but_fix_me def test_custom_hf_index_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False) @@ -208,6 +211,7 @@ def test_custom_hf_index_retriever_retrieve(self): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) + @require_torch_non_multigpu_but_fix_me def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True) diff --git a/examples/seq2seq/test_bash_script.py b/examples/seq2seq/test_bash_script.py index 71861ef4dbc6a3..3e30af217baf2d 100644 --- a/examples/seq2seq/test_bash_script.py +++ b/examples/seq2seq/test_bash_script.py @@ -15,7 +15,7 @@ from finetune import SummarizationModule, main from test_seq2seq_examples import CUDA_AVAILABLE, MBART_TINY from transformers import BartForConditionalGeneration, MarianMTModel -from transformers.testing_utils import TestCasePlus, slow +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow from utils import load_json @@ -26,6 +26,7 @@ class TestAll(TestCasePlus): @slow @pytest.mark.skipif(not CUDA_AVAILABLE, reason="too slow to run on CPU") + @require_torch_non_multigpu_but_fix_me def test_model_download(self): """This warms up the cache so that we can time the next test without including download time, which varies between machines.""" BartForConditionalGeneration.from_pretrained(MODEL_NAME) @@ -34,6 +35,7 @@ def test_model_download(self): @timeout_decorator.timeout(120) @slow @pytest.mark.skipif(not CUDA_AVAILABLE, reason="too slow to run on CPU") + @require_torch_non_multigpu_but_fix_me def test_train_mbart_cc25_enro_script(self): data_dir = "examples/seq2seq/test_data/wmt_en_ro" env_vars_to_replace = { @@ -110,6 +112,7 @@ def test_train_mbart_cc25_enro_script(self): @timeout_decorator.timeout(600) @slow @pytest.mark.skipif(not CUDA_AVAILABLE, reason="too slow to run on CPU") + @require_torch_non_multigpu_but_fix_me def test_opus_mt_distill_script(self): data_dir = "examples/seq2seq/test_data/wmt_en_ro" env_vars_to_replace = { diff --git a/examples/seq2seq/test_datasets.py b/examples/seq2seq/test_datasets.py index 4b5c95ed4e5770..625b6da347d3c1 100644 --- a/examples/seq2seq/test_datasets.py +++ b/examples/seq2seq/test_datasets.py @@ -11,7 +11,7 @@ from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir from transformers import AutoTokenizer from transformers.modeling_bart import shift_tokens_right -from transformers.testing_utils import TestCasePlus, slow +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset @@ -30,6 +30,7 @@ class TestAll(TestCasePlus): ], ) @slow + @require_torch_non_multigpu_but_fix_me def test_seq2seq_dataset_truncation(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -69,6 +70,7 @@ def test_seq2seq_dataset_truncation(self, tok_name): break # No need to test every batch @parameterized.expand([BART_TINY, BERT_BASE_CASED]) + @require_torch_non_multigpu_but_fix_me def test_legacy_dataset_truncation(self, tok): tokenizer = AutoTokenizer.from_pretrained(tok) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -93,6 +95,7 @@ def test_legacy_dataset_truncation(self, tok): assert max_len_target > trunc_target # Truncated break # No need to test every batch + @require_torch_non_multigpu_but_fix_me def test_pack_dataset(self): tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") @@ -111,6 +114,7 @@ def test_pack_dataset(self): assert orig_paths == new_paths @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") + @require_torch_non_multigpu_but_fix_me def test_dynamic_batch_size(self): if not FAIRSEQ_AVAILABLE: return @@ -135,6 +139,7 @@ def test_dynamic_batch_size(self): if failures: raise AssertionError(f"too many tokens in {len(failures)} batches") + @require_torch_non_multigpu_but_fix_me def test_sortish_sampler_reduces_padding(self): ds, _, tokenizer = self._get_dataset(max_len=512) bs = 2 @@ -174,6 +179,7 @@ def _get_dataset(self, n_obs=1000, max_len=128): ) return ds, max_tokens, tokenizer + @require_torch_non_multigpu_but_fix_me def test_distributed_sortish_sampler_splits_indices_between_procs(self): ds, max_tokens, tokenizer = self._get_dataset() ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) @@ -189,6 +195,7 @@ def test_distributed_sortish_sampler_splits_indices_between_procs(self): PEGASUS_XSUM, ], ) + @require_torch_non_multigpu_but_fix_me def test_dataset_kwargs(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) if tok_name == MBART_TINY: diff --git a/examples/seq2seq/test_fsmt_bleu_score.py b/examples/seq2seq/test_fsmt_bleu_score.py index beb7f2bc9857fd..2be6b7d5285282 100644 --- a/examples/seq2seq/test_fsmt_bleu_score.py +++ b/examples/seq2seq/test_fsmt_bleu_score.py @@ -19,7 +19,13 @@ from parameterized import parameterized from transformers import FSMTForConditionalGeneration, FSMTTokenizer -from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device +from transformers.testing_utils import ( + get_tests_dir, + require_torch, + require_torch_non_multigpu_but_fix_me, + slow, + torch_device, +) from utils import calculate_bleu @@ -48,6 +54,7 @@ def get_model(self, mname): ] ) @slow + @require_torch_non_multigpu_but_fix_me def test_bleu_scores(self, pair, min_bleu_score): # note: this test is not testing the best performance since it only evals a small batch # but it should be enough to detect a regression in the output quality diff --git a/examples/seq2seq/test_make_student.py b/examples/seq2seq/test_make_student.py index 0a1688a95cc11e..28b5672f0e3a6d 100644 --- a/examples/seq2seq/test_make_student.py +++ b/examples/seq2seq/test_make_student.py @@ -4,7 +4,7 @@ from make_student import create_student_by_copying_alternating_layers from transformers import AutoConfig from transformers.file_utils import cached_property -from transformers.testing_utils import require_torch +from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me TINY_BART = "sshleifer/bart-tiny-random" @@ -17,23 +17,28 @@ class MakeStudentTester(unittest.TestCase): def teacher_config(self): return AutoConfig.from_pretrained(TINY_BART) + @require_torch_non_multigpu_but_fix_me def test_valid_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.num_hidden_layers, 1) + @require_torch_non_multigpu_but_fix_me def test_asymmetric_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) + @require_torch_non_multigpu_but_fix_me def test_same_decoder_small_encoder(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) + @require_torch_non_multigpu_but_fix_me def test_small_enc_small_dec(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1) + @require_torch_non_multigpu_but_fix_me def test_raises_assert(self): with self.assertRaises(AssertionError): create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) diff --git a/examples/seq2seq/test_seq2seq_examples.py b/examples/seq2seq/test_seq2seq_examples.py index 09b34e552a925b..9afa6ab0f20d19 100644 --- a/examples/seq2seq/test_seq2seq_examples.py +++ b/examples/seq2seq/test_seq2seq_examples.py @@ -19,7 +19,14 @@ from run_eval_search import run_search from transformers import AutoConfig, AutoModelForSeq2SeqLM from transformers.hf_api import HfApi -from transformers.testing_utils import CaptureStderr, CaptureStdout, TestCasePlus, require_torch_gpu, slow +from transformers.testing_utils import ( + CaptureStderr, + CaptureStdout, + TestCasePlus, + require_torch_gpu, + require_torch_non_multigpu_but_fix_me, + slow, +) from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json @@ -126,6 +133,7 @@ def setUpClass(cls): @slow @require_torch_gpu + @require_torch_non_multigpu_but_fix_me def test_hub_configs(self): """I put require_torch_gpu cause I only want this to run with self-scheduled.""" @@ -143,10 +151,12 @@ def test_hub_configs(self): failures.append(m) assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" + @require_torch_non_multigpu_but_fix_me def test_distill_no_teacher(self): updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) self._test_distiller_cli(updates) + @require_torch_non_multigpu_but_fix_me def test_distill_checkpointing_with_teacher(self): updates = dict( student_encoder_layers=2, @@ -171,6 +181,7 @@ def test_distill_checkpointing_with_teacher(self): convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) + @require_torch_non_multigpu_but_fix_me def test_loss_fn(self): model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] @@ -191,6 +202,7 @@ def test_loss_fn(self): # TODO: understand why this breaks self.assertEqual(nll_loss, model_computed_loss) + @require_torch_non_multigpu_but_fix_me def test_distill_mbart(self): updates = dict( student_encoder_layers=2, @@ -215,6 +227,7 @@ def test_distill_mbart(self): assert len(all_files) > 2 self.assertEqual(len(transformer_ckpts), 2) + @require_torch_non_multigpu_but_fix_me def test_distill_t5(self): updates = dict( student_encoder_layers=1, @@ -296,18 +309,21 @@ def run_eval_tester(self, model): # test one model to quickly (no-@slow) catch simple problems and do an # extensive testing of functionality with multiple models as @slow separately + @require_torch_non_multigpu_but_fix_me def test_run_eval(self): self.run_eval_tester(T5_TINY) # any extra models should go into the list here - can be slow @parameterized.expand([BART_TINY, MBART_TINY]) @slow + @require_torch_non_multigpu_but_fix_me def test_run_eval_slow(self, model): self.run_eval_tester(model) # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart) @parameterized.expand([T5_TINY, MBART_TINY]) @slow + @require_torch_non_multigpu_but_fix_me def test_run_eval_search(self, model): input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source" output_file_name = input_file_name.parent / "utest_output.txt" @@ -358,6 +374,7 @@ def test_run_eval_search(self, model): @parameterized.expand( [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY], ) + @require_torch_non_multigpu_but_fix_me def test_finetune(self, model): args_d: dict = CHEAP_ARGS.copy() task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization" @@ -409,6 +426,7 @@ def test_finetune(self, model): assert isinstance(example_batch, dict) assert len(example_batch) >= 4 + @require_torch_non_multigpu_but_fix_me def test_finetune_extra_model_args(self): args_d: dict = CHEAP_ARGS.copy() @@ -459,6 +477,7 @@ def test_finetune_extra_model_args(self): model = main(args) assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute" + @require_torch_non_multigpu_but_fix_me def test_finetune_lr_schedulers(self): args_d: dict = CHEAP_ARGS.copy() diff --git a/examples/seq2seq/test_tatoeba_conversion.py b/examples/seq2seq/test_tatoeba_conversion.py index 73a4f660fc1f16..4f97eca133ccd3 100644 --- a/examples/seq2seq/test_tatoeba_conversion.py +++ b/examples/seq2seq/test_tatoeba_conversion.py @@ -4,7 +4,7 @@ from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter from transformers.file_utils import cached_property -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @@ -15,10 +15,12 @@ def resolver(self): return TatoebaConverter(save_dir=tmp_dir) @slow + @require_torch_non_multigpu_but_fix_me def test_resolver(self): self.resolver.convert_models(["heb-eng"]) @slow + @require_torch_non_multigpu_but_fix_me def test_model_card(self): content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) assert mmeta["long_pair"] == "heb-eng" diff --git a/examples/test_examples.py b/examples/test_examples.py index 4eda398537d715..4a8bc94e0af3e3 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -23,7 +23,7 @@ import torch from transformers.file_utils import is_apex_available -from transformers.testing_utils import TestCasePlus, torch_device +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, torch_device SRC_DIRS = [ @@ -60,6 +60,7 @@ def is_cuda_and_apex_available(): class ExamplesTests(TestCasePlus): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -92,6 +93,7 @@ def test_run_glue(self): for value in result.values(): self.assertGreaterEqual(value, 0.75) + @require_torch_non_multigpu_but_fix_me def test_run_pl_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -129,6 +131,7 @@ def test_run_pl_glue(self): # self.assertGreaterEqual(v, 0.75, f"({k})") # + @require_torch_non_multigpu_but_fix_me def test_run_clm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -160,6 +163,7 @@ def test_run_clm(self): result = run_clm.main() self.assertLess(result["perplexity"], 100) + @require_torch_non_multigpu_but_fix_me def test_run_mlm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -185,6 +189,7 @@ def test_run_mlm(self): result = run_mlm.main() self.assertLess(result["perplexity"], 42) + @require_torch_non_multigpu_but_fix_me def test_run_squad(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -213,6 +218,7 @@ def test_run_squad(self): self.assertGreaterEqual(result["f1"], 25) self.assertGreaterEqual(result["exact"], 21) + @require_torch_non_multigpu_but_fix_me def test_generation(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/test_xla_examples.py b/examples/test_xla_examples.py index ed1458a010ff36..f8026554b73cac 100644 --- a/examples/test_xla_examples.py +++ b/examples/test_xla_examples.py @@ -20,7 +20,7 @@ from time import time from unittest.mock import patch -from transformers.testing_utils import require_torch_tpu +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, require_torch_tpu logging.basicConfig(level=logging.DEBUG) @@ -30,6 +30,7 @@ @require_torch_tpu class TorchXLAExamplesTests(unittest.TestCase): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): import xla_spawn @@ -81,6 +82,7 @@ def test_run_glue(self): # Assert that the script takes less than 300 seconds to make sure it doesn't hang. self.assertLess(end - start, 500) + @require_torch_non_multigpu_but_fix_me def test_trainer_tpu(self): import xla_spawn diff --git a/examples/token-classification/test_ner_examples.py b/examples/token-classification/test_ner_examples.py index d6bb0b25fa3bca..3772d780f62766 100644 --- a/examples/token-classification/test_ner_examples.py +++ b/examples/token-classification/test_ner_examples.py @@ -4,7 +4,7 @@ from unittest.mock import patch import run_ner -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow logging.basicConfig(level=logging.INFO) @@ -14,6 +14,7 @@ class ExamplesTests(unittest.TestCase): @slow + @require_torch_non_multigpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -34,6 +35,7 @@ def test_run_ner(self): result = run_ner.main() self.assertLess(result["eval_loss"], 1.5) + @require_torch_non_multigpu_but_fix_me def test_run_ner_pl(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 02998bcfd656b6..119ff433df5627 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -227,6 +227,12 @@ def require_torch_non_multigpu(test_case): return test_case +# this is a decorator identical to require_torch_non_multigpu, but is used as a quick band-aid to +# allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one +# need to be ported and aren't so by design. +require_torch_non_multigpu_but_fix_me = require_torch_non_multigpu + + def require_torch_tpu(test_case): """ Decorator marking a test that requires a TPU (in PyTorch). From 02287ee3b69b8dd1ce6359e4678432546168f0de Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Sat, 7 Nov 2020 17:50:08 -0800 Subject: [PATCH 6/7] style --- examples/seq2seq/test_bash_script.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/seq2seq/test_bash_script.py b/examples/seq2seq/test_bash_script.py index bc354e1e5cfb80..fffe6c4be73f13 100644 --- a/examples/seq2seq/test_bash_script.py +++ b/examples/seq2seq/test_bash_script.py @@ -13,7 +13,7 @@ from finetune import SummarizationModule, main from transformers import MarianMTModel from transformers.file_utils import cached_path -from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, require_torch_gpu, slow +from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow from utils import load_json From 07f65f6d1c3dc16f7de84caecff1edbbd8942070 Mon Sep 17 00:00:00 2001 From: Stas Bekman Date: Sun, 8 Nov 2020 17:26:03 -0800 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Sam Shleifer --- .github/workflows/self-scheduled.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index 1771dd95b586e3..6033a9e4e8322a 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -244,7 +244,7 @@ jobs: if: ${{ always() }} run: cat reports/tests_torch_multiple_gpu_failures_short.txt - - name: Run ported examples tests on multi-GPU + - name: Run examples tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes