diff --git a/.github/workflows/self-scheduled.yml b/.github/workflows/self-scheduled.yml index decd61f4b09536..6033a9e4e8322a 100644 --- a/.github/workflows/self-scheduled.yml +++ b/.github/workflows/self-scheduled.yml @@ -1,3 +1,8 @@ +# configuration notes: +# +# - `source .env/bin/activate` is currently needed to be run first thing first in each step. Otherwise +# the step uses the system-wide python interpreter. + name: Self-hosted runner (scheduled) on: @@ -227,7 +232,7 @@ jobs: python -c "import torch; print('Cuda available:', torch.cuda.is_available())" python -c "import torch; print('Number of GPUs available:', torch.cuda.device_count())" - - name: Run all tests on GPU + - name: Run all tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes @@ -238,8 +243,20 @@ jobs: - name: Failure short reports if: ${{ always() }} run: cat reports/tests_torch_multiple_gpu_failures_short.txt - - - name: Run all pipeline tests on GPU + + - name: Run examples tests on multi-GPU + env: + OMP_NUM_THREADS: 1 + RUN_SLOW: yes + run: | + source .env/bin/activate + python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples + + - name: Failure short reports + if: ${{ always() }} + run: cat reports/examples_torch_multiple_gpu_failures_short.txt + + - name: Run all pipeline tests on multi-GPU if: ${{ always() }} env: TF_FORCE_GPU_ALLOW_GROWTH: "true" @@ -306,7 +323,7 @@ jobs: TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('TF GPUs available:', bool(tf.config.list_physical_devices('GPU')))" TF_CPP_MIN_LOG_LEVEL=3 python -c "import tensorflow as tf; print('Number of TF GPUs available:', len(tf.config.list_physical_devices('GPU')))" - - name: Run all tests on GPU + - name: Run all tests on multi-GPU env: OMP_NUM_THREADS: 1 RUN_SLOW: yes @@ -318,7 +335,7 @@ jobs: if: ${{ always() }} run: cat reports/tests_tf_multiple_gpu_failures_short.txt - - name: Run all pipeline tests on GPU + - name: Run all pipeline tests on multi-GPU if: ${{ always() }} env: TF_FORCE_GPU_ALLOW_GROWTH: "true" diff --git a/examples/bert-loses-patience/test_run_glue_with_pabee.py b/examples/bert-loses-patience/test_run_glue_with_pabee.py index 22c6f4de06f430..eaac5329379c11 100644 --- a/examples/bert-loses-patience/test_run_glue_with_pabee.py +++ b/examples/bert-loses-patience/test_run_glue_with_pabee.py @@ -4,7 +4,7 @@ from unittest.mock import patch import run_glue_with_pabee -from transformers.testing_utils import TestCasePlus +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me logging.basicConfig(level=logging.DEBUG) @@ -20,6 +20,7 @@ def get_setup_file(): class PabeeTests(TestCasePlus): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/deebert/test_glue_deebert.py b/examples/deebert/test_glue_deebert.py index 59f7f58024f4e9..66faa557c0d0ba 100644 --- a/examples/deebert/test_glue_deebert.py +++ b/examples/deebert/test_glue_deebert.py @@ -5,7 +5,7 @@ from unittest.mock import patch import run_glue_deebert -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow logging.basicConfig(level=logging.DEBUG) @@ -26,6 +26,7 @@ def setup(self) -> None: logger.addHandler(stream_handler) @slow + @require_torch_non_multigpu_but_fix_me def test_glue_deebert_train(self): train_args = """ diff --git a/examples/rag/test_distributed_retriever.py b/examples/rag/test_distributed_retriever.py index 80d8362d1edd1a..be874c83e8b37e 100644 --- a/examples/rag/test_distributed_retriever.py +++ b/examples/rag/test_distributed_retriever.py @@ -16,6 +16,7 @@ from transformers.configuration_rag import RagConfig from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available from transformers.retrieval_rag import CustomHFIndex +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me from transformers.tokenization_bart import BartTokenizer from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer @@ -178,6 +179,7 @@ def get_dummy_custom_hf_index_retriever(self, init_retrieval: bool, from_disk: b retriever.init_retrieval(port) return retriever + @require_torch_non_multigpu_but_fix_me def test_pytorch_distributed_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True) @@ -193,6 +195,7 @@ def test_pytorch_distributed_retriever_retrieve(self): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) + @require_torch_non_multigpu_but_fix_me def test_custom_hf_index_retriever_retrieve(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False) @@ -208,6 +211,7 @@ def test_custom_hf_index_retriever_retrieve(self): self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc self.assertListEqual(doc_ids.tolist(), [[1], [0]]) + @require_torch_non_multigpu_but_fix_me def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self): n_docs = 1 retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True) diff --git a/examples/seq2seq/test_bash_script.py b/examples/seq2seq/test_bash_script.py index 7a7cd0806794a8..fffe6c4be73f13 100644 --- a/examples/seq2seq/test_bash_script.py +++ b/examples/seq2seq/test_bash_script.py @@ -13,7 +13,7 @@ from finetune import SummarizationModule, main from transformers import MarianMTModel from transformers.file_utils import cached_path -from transformers.testing_utils import TestCasePlus, require_torch_gpu, slow +from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow from utils import load_json @@ -32,6 +32,7 @@ def setUp(self): @slow @require_torch_gpu + @require_torch_non_multigpu_but_fix_me def test_model_download(self): """This warms up the cache so that we can time the next test without including download time, which varies between machines.""" MarianMTModel.from_pretrained(MARIAN_MODEL) @@ -39,6 +40,7 @@ def test_model_download(self): # @timeout_decorator.timeout(1200) @slow @require_torch_gpu + @require_torch_non_multigpu_but_fix_me def test_train_mbart_cc25_enro_script(self): env_vars_to_replace = { "$MAX_LEN": 64, @@ -127,6 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus): @timeout_decorator.timeout(600) @slow @require_torch_gpu + @require_torch_non_multigpu_but_fix_me def test_opus_mt_distill_script(self): data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro" env_vars_to_replace = { diff --git a/examples/seq2seq/test_datasets.py b/examples/seq2seq/test_datasets.py index 4b5c95ed4e5770..625b6da347d3c1 100644 --- a/examples/seq2seq/test_datasets.py +++ b/examples/seq2seq/test_datasets.py @@ -11,7 +11,7 @@ from test_seq2seq_examples import ARTICLES, BART_TINY, MARIAN_TINY, MBART_TINY, SUMMARIES, T5_TINY, make_test_data_dir from transformers import AutoTokenizer from transformers.modeling_bart import shift_tokens_right -from transformers.testing_utils import TestCasePlus, slow +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, slow from utils import FAIRSEQ_AVAILABLE, DistributedSortishSampler, LegacySeq2SeqDataset, Seq2SeqDataset @@ -30,6 +30,7 @@ class TestAll(TestCasePlus): ], ) @slow + @require_torch_non_multigpu_but_fix_me def test_seq2seq_dataset_truncation(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -69,6 +70,7 @@ def test_seq2seq_dataset_truncation(self, tok_name): break # No need to test every batch @parameterized.expand([BART_TINY, BERT_BASE_CASED]) + @require_torch_non_multigpu_but_fix_me def test_legacy_dataset_truncation(self, tok): tokenizer = AutoTokenizer.from_pretrained(tok) tmp_dir = make_test_data_dir(tmp_dir=self.get_auto_remove_tmp_dir()) @@ -93,6 +95,7 @@ def test_legacy_dataset_truncation(self, tok): assert max_len_target > trunc_target # Truncated break # No need to test every batch + @require_torch_non_multigpu_but_fix_me def test_pack_dataset(self): tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25") @@ -111,6 +114,7 @@ def test_pack_dataset(self): assert orig_paths == new_paths @pytest.mark.skipif(not FAIRSEQ_AVAILABLE, reason="This test requires fairseq") + @require_torch_non_multigpu_but_fix_me def test_dynamic_batch_size(self): if not FAIRSEQ_AVAILABLE: return @@ -135,6 +139,7 @@ def test_dynamic_batch_size(self): if failures: raise AssertionError(f"too many tokens in {len(failures)} batches") + @require_torch_non_multigpu_but_fix_me def test_sortish_sampler_reduces_padding(self): ds, _, tokenizer = self._get_dataset(max_len=512) bs = 2 @@ -174,6 +179,7 @@ def _get_dataset(self, n_obs=1000, max_len=128): ) return ds, max_tokens, tokenizer + @require_torch_non_multigpu_but_fix_me def test_distributed_sortish_sampler_splits_indices_between_procs(self): ds, max_tokens, tokenizer = self._get_dataset() ids1 = set(DistributedSortishSampler(ds, 256, num_replicas=2, rank=0, add_extra_examples=False)) @@ -189,6 +195,7 @@ def test_distributed_sortish_sampler_splits_indices_between_procs(self): PEGASUS_XSUM, ], ) + @require_torch_non_multigpu_but_fix_me def test_dataset_kwargs(self, tok_name): tokenizer = AutoTokenizer.from_pretrained(tok_name) if tok_name == MBART_TINY: diff --git a/examples/seq2seq/test_fsmt_bleu_score.py b/examples/seq2seq/test_fsmt_bleu_score.py index beb7f2bc9857fd..2be6b7d5285282 100644 --- a/examples/seq2seq/test_fsmt_bleu_score.py +++ b/examples/seq2seq/test_fsmt_bleu_score.py @@ -19,7 +19,13 @@ from parameterized import parameterized from transformers import FSMTForConditionalGeneration, FSMTTokenizer -from transformers.testing_utils import get_tests_dir, require_torch, slow, torch_device +from transformers.testing_utils import ( + get_tests_dir, + require_torch, + require_torch_non_multigpu_but_fix_me, + slow, + torch_device, +) from utils import calculate_bleu @@ -48,6 +54,7 @@ def get_model(self, mname): ] ) @slow + @require_torch_non_multigpu_but_fix_me def test_bleu_scores(self, pair, min_bleu_score): # note: this test is not testing the best performance since it only evals a small batch # but it should be enough to detect a regression in the output quality diff --git a/examples/seq2seq/test_make_student.py b/examples/seq2seq/test_make_student.py index 0a1688a95cc11e..28b5672f0e3a6d 100644 --- a/examples/seq2seq/test_make_student.py +++ b/examples/seq2seq/test_make_student.py @@ -4,7 +4,7 @@ from make_student import create_student_by_copying_alternating_layers from transformers import AutoConfig from transformers.file_utils import cached_property -from transformers.testing_utils import require_torch +from transformers.testing_utils import require_torch, require_torch_non_multigpu_but_fix_me TINY_BART = "sshleifer/bart-tiny-random" @@ -17,23 +17,28 @@ class MakeStudentTester(unittest.TestCase): def teacher_config(self): return AutoConfig.from_pretrained(TINY_BART) + @require_torch_non_multigpu_but_fix_me def test_valid_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.num_hidden_layers, 1) + @require_torch_non_multigpu_but_fix_me def test_asymmetric_t5(self): student, *_ = create_student_by_copying_alternating_layers(TINY_T5, tempfile.mkdtemp(), e=1, d=None) + @require_torch_non_multigpu_but_fix_me def test_same_decoder_small_encoder(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=None) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, self.teacher_config.encoder_layers) + @require_torch_non_multigpu_but_fix_me def test_small_enc_small_dec(self): student, *_ = create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=1, d=1) self.assertEqual(student.config.encoder_layers, 1) self.assertEqual(student.config.decoder_layers, 1) + @require_torch_non_multigpu_but_fix_me def test_raises_assert(self): with self.assertRaises(AssertionError): create_student_by_copying_alternating_layers(TINY_BART, tempfile.mkdtemp(), e=None, d=None) diff --git a/examples/seq2seq/test_seq2seq_examples.py b/examples/seq2seq/test_seq2seq_examples.py index 09b34e552a925b..9afa6ab0f20d19 100644 --- a/examples/seq2seq/test_seq2seq_examples.py +++ b/examples/seq2seq/test_seq2seq_examples.py @@ -19,7 +19,14 @@ from run_eval_search import run_search from transformers import AutoConfig, AutoModelForSeq2SeqLM from transformers.hf_api import HfApi -from transformers.testing_utils import CaptureStderr, CaptureStdout, TestCasePlus, require_torch_gpu, slow +from transformers.testing_utils import ( + CaptureStderr, + CaptureStdout, + TestCasePlus, + require_torch_gpu, + require_torch_non_multigpu_but_fix_me, + slow, +) from utils import ROUGE_KEYS, label_smoothed_nll_loss, lmap, load_json @@ -126,6 +133,7 @@ def setUpClass(cls): @slow @require_torch_gpu + @require_torch_non_multigpu_but_fix_me def test_hub_configs(self): """I put require_torch_gpu cause I only want this to run with self-scheduled.""" @@ -143,10 +151,12 @@ def test_hub_configs(self): failures.append(m) assert not failures, f"The following models could not be loaded through AutoConfig: {failures}" + @require_torch_non_multigpu_but_fix_me def test_distill_no_teacher(self): updates = dict(student_encoder_layers=2, student_decoder_layers=1, no_teacher=True) self._test_distiller_cli(updates) + @require_torch_non_multigpu_but_fix_me def test_distill_checkpointing_with_teacher(self): updates = dict( student_encoder_layers=2, @@ -171,6 +181,7 @@ def test_distill_checkpointing_with_teacher(self): convert_pl_to_hf(ckpts[0], transformer_ckpts[0].parent, out_path_new) assert os.path.exists(os.path.join(out_path_new, "pytorch_model.bin")) + @require_torch_non_multigpu_but_fix_me def test_loss_fn(self): model = AutoModelForSeq2SeqLM.from_pretrained(BART_TINY, return_dict=True) input_ids, mask = model.dummy_inputs["input_ids"], model.dummy_inputs["attention_mask"] @@ -191,6 +202,7 @@ def test_loss_fn(self): # TODO: understand why this breaks self.assertEqual(nll_loss, model_computed_loss) + @require_torch_non_multigpu_but_fix_me def test_distill_mbart(self): updates = dict( student_encoder_layers=2, @@ -215,6 +227,7 @@ def test_distill_mbart(self): assert len(all_files) > 2 self.assertEqual(len(transformer_ckpts), 2) + @require_torch_non_multigpu_but_fix_me def test_distill_t5(self): updates = dict( student_encoder_layers=1, @@ -296,18 +309,21 @@ def run_eval_tester(self, model): # test one model to quickly (no-@slow) catch simple problems and do an # extensive testing of functionality with multiple models as @slow separately + @require_torch_non_multigpu_but_fix_me def test_run_eval(self): self.run_eval_tester(T5_TINY) # any extra models should go into the list here - can be slow @parameterized.expand([BART_TINY, MBART_TINY]) @slow + @require_torch_non_multigpu_but_fix_me def test_run_eval_slow(self, model): self.run_eval_tester(model) # testing with 2 models to validate: 1. translation (t5) 2. summarization (mbart) @parameterized.expand([T5_TINY, MBART_TINY]) @slow + @require_torch_non_multigpu_but_fix_me def test_run_eval_search(self, model): input_file_name = Path(self.get_auto_remove_tmp_dir()) / "utest_input.source" output_file_name = input_file_name.parent / "utest_output.txt" @@ -358,6 +374,7 @@ def test_run_eval_search(self, model): @parameterized.expand( [T5_TINY, BART_TINY, MBART_TINY, MARIAN_TINY, FSMT_TINY], ) + @require_torch_non_multigpu_but_fix_me def test_finetune(self, model): args_d: dict = CHEAP_ARGS.copy() task = "translation" if model in [MBART_TINY, MARIAN_TINY, FSMT_TINY] else "summarization" @@ -409,6 +426,7 @@ def test_finetune(self, model): assert isinstance(example_batch, dict) assert len(example_batch) >= 4 + @require_torch_non_multigpu_but_fix_me def test_finetune_extra_model_args(self): args_d: dict = CHEAP_ARGS.copy() @@ -459,6 +477,7 @@ def test_finetune_extra_model_args(self): model = main(args) assert str(excinfo.value) == f"model config doesn't have a `{unsupported_param}` attribute" + @require_torch_non_multigpu_but_fix_me def test_finetune_lr_schedulers(self): args_d: dict = CHEAP_ARGS.copy() diff --git a/examples/seq2seq/test_tatoeba_conversion.py b/examples/seq2seq/test_tatoeba_conversion.py index 73a4f660fc1f16..4f97eca133ccd3 100644 --- a/examples/seq2seq/test_tatoeba_conversion.py +++ b/examples/seq2seq/test_tatoeba_conversion.py @@ -4,7 +4,7 @@ from transformers.convert_marian_tatoeba_to_pytorch import DEFAULT_REPO, TatoebaConverter from transformers.file_utils import cached_property -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow @unittest.skipUnless(os.path.exists(DEFAULT_REPO), "Tatoeba directory does not exist.") @@ -15,10 +15,12 @@ def resolver(self): return TatoebaConverter(save_dir=tmp_dir) @slow + @require_torch_non_multigpu_but_fix_me def test_resolver(self): self.resolver.convert_models(["heb-eng"]) @slow + @require_torch_non_multigpu_but_fix_me def test_model_card(self): content, mmeta = self.resolver.write_model_card("opus-mt-he-en", dry_run=True) assert mmeta["long_pair"] == "heb-eng" diff --git a/examples/test_examples.py b/examples/test_examples.py index ad4c5ffe27e260..e2c0ae42dea418 100644 --- a/examples/test_examples.py +++ b/examples/test_examples.py @@ -23,7 +23,7 @@ import torch from transformers.file_utils import is_apex_available -from transformers.testing_utils import TestCasePlus, torch_device +from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me, torch_device SRC_DIRS = [ @@ -67,6 +67,7 @@ def is_cuda_and_apex_available(): class ExamplesTests(TestCasePlus): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -99,6 +100,7 @@ def test_run_glue(self): for value in result.values(): self.assertGreaterEqual(value, 0.75) + @require_torch_non_multigpu_but_fix_me def test_run_pl_glue(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -136,6 +138,7 @@ def test_run_pl_glue(self): # self.assertGreaterEqual(v, 0.75, f"({k})") # + @require_torch_non_multigpu_but_fix_me def test_run_clm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -167,6 +170,7 @@ def test_run_clm(self): result = run_clm.main() self.assertLess(result["perplexity"], 100) + @require_torch_non_multigpu_but_fix_me def test_run_mlm(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -192,6 +196,7 @@ def test_run_mlm(self): result = run_mlm.main() self.assertLess(result["perplexity"], 42) + @require_torch_non_multigpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -222,6 +227,7 @@ def test_run_ner(self): self.assertGreaterEqual(result["eval_precision"], 0.75) self.assertLess(result["eval_loss"], 0.5) + @require_torch_non_multigpu_but_fix_me def test_run_squad(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -250,6 +256,7 @@ def test_run_squad(self): self.assertGreaterEqual(result["f1"], 25) self.assertGreaterEqual(result["exact"], 21) + @require_torch_non_multigpu_but_fix_me def test_generation(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/examples/test_xla_examples.py b/examples/test_xla_examples.py index ed1458a010ff36..f8026554b73cac 100644 --- a/examples/test_xla_examples.py +++ b/examples/test_xla_examples.py @@ -20,7 +20,7 @@ from time import time from unittest.mock import patch -from transformers.testing_utils import require_torch_tpu +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, require_torch_tpu logging.basicConfig(level=logging.DEBUG) @@ -30,6 +30,7 @@ @require_torch_tpu class TorchXLAExamplesTests(unittest.TestCase): + @require_torch_non_multigpu_but_fix_me def test_run_glue(self): import xla_spawn @@ -81,6 +82,7 @@ def test_run_glue(self): # Assert that the script takes less than 300 seconds to make sure it doesn't hang. self.assertLess(end - start, 500) + @require_torch_non_multigpu_but_fix_me def test_trainer_tpu(self): import xla_spawn diff --git a/examples/token-classification/test_ner_examples.py b/examples/token-classification/test_ner_examples.py index 6ecb421a7dbbe5..d8ba83983ff64c 100644 --- a/examples/token-classification/test_ner_examples.py +++ b/examples/token-classification/test_ner_examples.py @@ -4,7 +4,7 @@ from unittest.mock import patch import run_ner_old as run_ner -from transformers.testing_utils import slow +from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow logging.basicConfig(level=logging.INFO) @@ -14,6 +14,7 @@ class ExamplesTests(unittest.TestCase): @slow + @require_torch_non_multigpu_but_fix_me def test_run_ner(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) @@ -34,6 +35,7 @@ def test_run_ner(self): result = run_ner.main() self.assertLess(result["eval_loss"], 1.5) + @require_torch_non_multigpu_but_fix_me def test_run_ner_pl(self): stream_handler = logging.StreamHandler(sys.stdout) logger.addHandler(stream_handler) diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 02998bcfd656b6..119ff433df5627 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -227,6 +227,12 @@ def require_torch_non_multigpu(test_case): return test_case +# this is a decorator identical to require_torch_non_multigpu, but is used as a quick band-aid to +# allow all of examples to be run multi-gpu CI and it reminds us that tests decorated with this one +# need to be ported and aren't so by design. +require_torch_non_multigpu_but_fix_me = require_torch_non_multigpu + + def require_torch_tpu(test_case): """ Decorator marking a test that requires a TPU (in PyTorch).