Skip to content

Commit

Permalink
using multi_gpu consistently (#8446)
Browse files Browse the repository at this point in the history
* s|multiple_gpu|multi_gpu|g; s|multigpu|multi_gpu|g'

* doc
  • Loading branch information
stas00 authored Nov 10, 2020
1 parent b935694 commit 02bdfc0
Show file tree
Hide file tree
Showing 22 changed files with 117 additions and 117 deletions.
16 changes: 8 additions & 8 deletions .github/workflows/self-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ jobs:
name: run_all_tests_tf_gpu_test_reports
path: reports

run_tests_torch_multiple_gpu:
run_tests_torch_multi_gpu:
runs-on: [self-hosted, multi-gpu]
steps:
- uses: actions/checkout@v2
Expand All @@ -154,7 +154,7 @@ jobs:
id: cache
with:
path: .env
key: v1.1-tests_torch_multiple_gpu-${{ hashFiles('setup.py') }}
key: v1.1-tests_torch_multi_gpu-${{ hashFiles('setup.py') }}

- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
run: |
Expand All @@ -181,11 +181,11 @@ jobs:
OMP_NUM_THREADS: 1
run: |
source .env/bin/activate
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_torch_multiple_gpu_failures_short.txt
run: cat reports/tests_torch_multi_gpu_failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
Expand All @@ -194,7 +194,7 @@ jobs:
name: run_all_tests_torch_multi_gpu_test_reports
path: reports

run_tests_tf_multiple_gpu:
run_tests_tf_multi_gpu:
runs-on: [self-hosted, multi-gpu]
steps:
- uses: actions/checkout@v2
Expand All @@ -213,7 +213,7 @@ jobs:
id: cache
with:
path: .env
key: v1.1-tests_tf_multiple_gpu-${{ hashFiles('setup.py') }}
key: v1.1-tests_tf_multi_gpu-${{ hashFiles('setup.py') }}

- name: Create new python env (on self-hosted runners we have to handle isolation ourselves)
run: |
Expand All @@ -240,11 +240,11 @@ jobs:
OMP_NUM_THREADS: 1
run: |
source .env/bin/activate
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests
python -m pytest -n 2 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_failures_short.txt
run: cat reports/tests_tf_multi_gpu_failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
Expand Down
24 changes: 12 additions & 12 deletions .github/workflows/self-scheduled.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ jobs:
name: run_all_tests_tf_gpu_test_reports
path: reports

run_all_tests_torch_multiple_gpu:
run_all_tests_torch_multi_gpu:
runs-on: [self-hosted, multi-gpu]
steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -238,23 +238,23 @@ jobs:
RUN_SLOW: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multiple_gpu tests
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_torch_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_torch_multiple_gpu_failures_short.txt
run: cat reports/tests_torch_multi_gpu_failures_short.txt

- name: Run examples tests on multi-GPU
env:
OMP_NUM_THREADS: 1
RUN_SLOW: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multiple_gpu examples
python -m pytest -n 1 --dist=loadfile -s --make-reports=examples_torch_multi_gpu examples
- name: Failure short reports
if: ${{ always() }}
run: cat reports/examples_torch_multiple_gpu_failures_short.txt
run: cat reports/examples_torch_multi_gpu_failures_short.txt

- name: Run all pipeline tests on multi-GPU
if: ${{ always() }}
Expand All @@ -265,11 +265,11 @@ jobs:
RUN_PIPELINE_TESTS: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multiple_gpu tests
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_torch_pipeline_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_torch_pipeline_multiple_gpu_failures_short.txt
run: cat reports/tests_torch_pipeline_multi_gpu_failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
Expand All @@ -278,7 +278,7 @@ jobs:
name: run_all_tests_torch_multi_gpu_test_reports
path: reports

run_all_tests_tf_multiple_gpu:
run_all_tests_tf_multi_gpu:
runs-on: [self-hosted, multi-gpu]
steps:
- uses: actions/checkout@v2
Expand Down Expand Up @@ -329,11 +329,11 @@ jobs:
RUN_SLOW: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multiple_gpu tests
python -m pytest -n 1 --dist=loadfile -s --make-reports=tests_tf_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_failures_short.txt
run: cat reports/tests_tf_multi_gpu_failures_short.txt

- name: Run all pipeline tests on multi-GPU
if: ${{ always() }}
Expand All @@ -344,11 +344,11 @@ jobs:
RUN_PIPELINE_TESTS: yes
run: |
source .env/bin/activate
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multiple_gpu tests
python -m pytest -n 1 --dist=loadfile -s -m is_pipeline_test --make-reports=tests_tf_pipelines_multi_gpu tests
- name: Failure short reports
if: ${{ always() }}
run: cat reports/tests_tf_multiple_gpu_pipelines_failures_short.txt
run: cat reports/tests_tf_multi_gpu_pipelines_failures_short.txt

- name: Test suite reports artifacts
if: ${{ always() }}
Expand Down
32 changes: 16 additions & 16 deletions docs/source/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -405,32 +405,32 @@ decorators are used to set the requirements of tests CPU/GPU/TPU-wise:

* ``require_torch`` - this test will run only under torch
* ``require_torch_gpu`` - as ``require_torch`` plus requires at least 1 GPU
* ``require_torch_multigpu`` - as ``require_torch`` plus requires at least 2 GPUs
* ``require_torch_non_multigpu`` - as ``require_torch`` plus requires 0 or 1 GPUs
* ``require_torch_multi_gpu`` - as ``require_torch`` plus requires at least 2 GPUs
* ``require_torch_non_multi_gpu`` - as ``require_torch`` plus requires 0 or 1 GPUs
* ``require_torch_tpu`` - as ``require_torch`` plus requires at least 1 TPU

Let's depict the GPU requirements in the following table:


+----------+---------------------------------+
| n gpus | decorator |
+==========+=================================+
| ``>= 0`` | ``@require_torch`` |
+----------+---------------------------------+
| ``>= 1`` | ``@require_torch_gpu`` |
+----------+---------------------------------+
| ``>= 2`` | ``@require_torch_multigpu`` |
+----------+---------------------------------+
| ``< 2`` | ``@require_torch_non_multigpu`` |
+----------+---------------------------------+
+----------+----------------------------------+
| n gpus | decorator |
+==========+==================================+
| ``>= 0`` | ``@require_torch`` |
+----------+----------------------------------+
| ``>= 1`` | ``@require_torch_gpu`` |
+----------+----------------------------------+
| ``>= 2`` | ``@require_torch_multi_gpu`` |
+----------+----------------------------------+
| ``< 2`` | ``@require_torch_non_multi_gpu`` |
+----------+----------------------------------+


For example, here is a test that must be run only when there are 2 or more GPUs available and pytorch is installed:

.. code-block:: python
@require_torch_multigpu
def test_example_with_multigpu():
@require_torch_multi_gpu
def test_example_with_multi_gpu():
If a test requires ``tensorflow`` use the ``require_tf`` decorator. For example:

Expand All @@ -454,7 +454,7 @@ last for them to work correctly. Here is an example of the correct usage:
.. code-block:: python
@parameterized.expand(...)
@require_torch_multigpu
@require_torch_multi_gpu
def test_integration_foo():
This order problem doesn't exist with ``@pytest.mark.parametrize``, you can put it first or last and it will still
Expand Down
4 changes: 2 additions & 2 deletions examples/bert-loses-patience/test_run_glue_with_pabee.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from unittest.mock import patch

import run_glue_with_pabee
from transformers.testing_utils import TestCasePlus, require_torch_non_multigpu_but_fix_me
from transformers.testing_utils import TestCasePlus, require_torch_non_multi_gpu_but_fix_me


logging.basicConfig(level=logging.DEBUG)
Expand All @@ -20,7 +20,7 @@ def get_setup_file():


class PabeeTests(TestCasePlus):
@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_run_glue(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)
Expand Down
4 changes: 2 additions & 2 deletions examples/deebert/test_glue_deebert.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from unittest.mock import patch

import run_glue_deebert
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me, slow
from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me, slow


logging.basicConfig(level=logging.DEBUG)
Expand All @@ -26,7 +26,7 @@ def setup(self) -> None:
logger.addHandler(stream_handler)

@slow
@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_glue_deebert_train(self):

train_args = """
Expand Down
8 changes: 4 additions & 4 deletions examples/rag/test_distributed_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from transformers.configuration_rag import RagConfig
from transformers.file_utils import is_datasets_available, is_faiss_available, is_psutil_available, is_torch_available
from transformers.retrieval_rag import CustomHFIndex
from transformers.testing_utils import require_torch_non_multigpu_but_fix_me
from transformers.testing_utils import require_torch_non_multi_gpu_but_fix_me
from transformers.tokenization_bart import BartTokenizer
from transformers.tokenization_bert import VOCAB_FILES_NAMES as DPR_VOCAB_FILES_NAMES
from transformers.tokenization_dpr import DPRQuestionEncoderTokenizer
Expand Down Expand Up @@ -179,7 +179,7 @@ def get_dummy_custom_hf_index_retriever(self, init_retrieval: bool, from_disk: b
retriever.init_retrieval(port)
return retriever

@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_pytorch_distributed_retriever_retrieve(self):
n_docs = 1
retriever = self.get_dummy_pytorch_distributed_retriever(init_retrieval=True)
Expand All @@ -195,7 +195,7 @@ def test_pytorch_distributed_retriever_retrieve(self):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]])

@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_custom_hf_index_retriever_retrieve(self):
n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=False)
Expand All @@ -211,7 +211,7 @@ def test_custom_hf_index_retriever_retrieve(self):
self.assertEqual(doc_dicts[1]["id"][0], "0") # max inner product is reached with first doc
self.assertListEqual(doc_ids.tolist(), [[1], [0]])

@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_custom_pytorch_distributed_retriever_retrieve_from_disk(self):
n_docs = 1
retriever = self.get_dummy_custom_hf_index_retriever(init_retrieval=True, from_disk=True)
Expand Down
12 changes: 6 additions & 6 deletions examples/seq2seq/test_bash_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from finetune import SummarizationModule, main
from transformers import MarianMTModel
from transformers.file_utils import cached_path
from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multigpu_but_fix_me, slow
from transformers.testing_utils import TestCasePlus, require_torch_gpu, require_torch_non_multi_gpu_but_fix_me, slow
from utils import load_json


Expand All @@ -32,15 +32,15 @@ def setUp(self):

@slow
@require_torch_gpu
@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_model_download(self):
"""This warms up the cache so that we can time the next test without including download time, which varies between machines."""
MarianMTModel.from_pretrained(MARIAN_MODEL)

# @timeout_decorator.timeout(1200)
@slow
@require_torch_gpu
@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_train_mbart_cc25_enro_script(self):
env_vars_to_replace = {
"$MAX_LEN": 64,
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_train_mbart_cc25_enro_script(self):
--num_sanity_val_steps 0
--eval_beams 2
""".split()
# XXX: args.gpus > 1 : handle multigpu in the future
# XXX: args.gpus > 1 : handle multi_gpu in the future

testargs = ["finetune.py"] + bash_script.split() + args
with patch.object(sys, "argv", testargs):
Expand Down Expand Up @@ -129,7 +129,7 @@ class TestDistilMarianNoTeacher(TestCasePlus):
@timeout_decorator.timeout(600)
@slow
@require_torch_gpu
@require_torch_non_multigpu_but_fix_me
@require_torch_non_multi_gpu_but_fix_me
def test_opus_mt_distill_script(self):
data_dir = f"{self.test_file_dir_str}/test_data/wmt_en_ro"
env_vars_to_replace = {
Expand Down Expand Up @@ -172,7 +172,7 @@ def test_opus_mt_distill_script(self):
parser = pl.Trainer.add_argparse_args(parser)
parser = BartSummarizationDistiller.add_model_specific_args(parser, os.getcwd())
args = parser.parse_args()
# assert args.gpus == gpus THIS BREAKS for multigpu
# assert args.gpus == gpus THIS BREAKS for multi_gpu

model = distill_main(args)

Expand Down
Loading

0 comments on commit 02bdfc0

Please sign in to comment.