diff --git a/docs/source/en/testing.mdx b/docs/source/en/testing.mdx index cb03a57b0413..4663b8ac4d93 100644 --- a/docs/source/en/testing.mdx +++ b/docs/source/en/testing.mdx @@ -176,6 +176,15 @@ If you want to include only tests that include both patterns, `and` is to be use ```bash pytest -k "test and ada" tests/test_optimization.py ``` + +### Run `accelerate` tests + +Sometimes you need to run `accelerate` tests on your models. For that you can just add `-m accelerate_tests` to your command, if let's say you want to run these tests on `OPT` run: +```bash +RUN_SLOW=1 pytest -m accelerate_tests tests/models/opt/test_modeling_opt.py +``` + + ### Run documentation tests In order to test whether the documentation examples are correct, you should check that the `doctests` are passing. diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py index eddf5033344d..5bfb8c1f2200 100755 --- a/tests/test_modeling_common.py +++ b/tests/test_modeling_common.py @@ -32,6 +32,7 @@ import numpy as np from huggingface_hub import HfFolder, delete_repo, set_access_token from huggingface_hub.file_download import http_get +from pytest import mark from requests.exceptions import HTTPError import transformers @@ -2455,6 +2456,7 @@ def check_device_map_is_respected(self, model, device_map): self.assertEqual(param.device, torch.device(param_device)) @require_accelerate + @mark.accelerate_tests @require_torch_gpu def test_disk_offload(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -2490,6 +2492,7 @@ def test_disk_offload(self): self.assertTrue(torch.allclose(base_output[0], new_output[0])) @require_accelerate + @mark.accelerate_tests @require_torch_gpu def test_cpu_offload(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -2525,6 +2528,7 @@ def test_cpu_offload(self): self.assertTrue(torch.allclose(base_output[0], new_output[0])) @require_accelerate + @mark.accelerate_tests @require_torch_multi_gpu def test_model_parallelism(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common() @@ -3164,6 +3168,7 @@ def test_checkpoint_variant_save_load(self): self.assertIsNotNone(model) @require_accelerate + @mark.accelerate_tests def test_from_pretrained_low_cpu_mem_usage_functional(self): # test that we can use `from_pretrained(..., low_cpu_mem_usage=True)` with normal and # sharded models @@ -3177,6 +3182,7 @@ def test_from_pretrained_low_cpu_mem_usage_functional(self): @require_usr_bin_time @require_accelerate + @mark.accelerate_tests def test_from_pretrained_low_cpu_mem_usage_measured(self): # test that `from_pretrained(..., low_cpu_mem_usage=True)` uses less cpu memory than default @@ -3216,6 +3222,7 @@ def test_from_pretrained_low_cpu_mem_usage_measured(self): # cuda memory tracking and then we should be able to do a much more precise test. @require_accelerate + @mark.accelerate_tests @require_torch_multi_gpu @slow def test_model_parallelism_gpt2(self): @@ -3233,6 +3240,7 @@ def test_model_parallelism_gpt2(self): self.assertEqual(text_output, "Hello, my name is John. I'm a writer, and I'm a writer. I'm") @require_accelerate + @mark.accelerate_tests @require_torch_gpu def test_from_pretrained_disk_offload_task_model(self): model = AutoModel.from_pretrained("hf-internal-testing/tiny-random-gpt2")