diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py index 1240710674c59..c3a14d0616d18 100644 --- a/benchmarks/test_sharded_parity.py +++ b/benchmarks/test_sharded_parity.py @@ -105,14 +105,14 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu(): @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest") -@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 32") +@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32") def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None): plugin_parity_test( gpus=args.gpus, precision=args.precision, - accelerator=args.distributed_backend, + accelerator=args.accelerator, plugin=DDPShardedPlugin(), - model_cls=SeedTrainLoaderModel + model_cls=SeedTrainLoaderModel, ) @@ -120,14 +120,14 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None): @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1', reason="test should be run outside of pytest") -@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 16") +@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 16") def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None): plugin_parity_test( gpus=args.gpus, precision=args.precision, - accelerator=args.distributed_backend, + accelerator=args.accelerator, plugin=DDPShardedPlugin(), - model_cls=SeedTrainLoaderModel + model_cls=SeedTrainLoaderModel, ) diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py index ba4b292fb4af0..f02b6dc0952d7 100644 --- a/pl_examples/domain_templates/imagenet.py +++ b/pl_examples/domain_templates/imagenet.py @@ -210,7 +210,7 @@ def main(args: Namespace) -> None: if args.seed is not None: pl.seed_everything(args.seed) - if args.distributed_backend == 'ddp': + if args.accelerator == 'ddp': # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py index c0599dc74c5a9..4b01f83e36639 100644 --- a/pl_examples/domain_templates/reinforce_learn_Qnet.py +++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py @@ -341,7 +341,7 @@ def main(args) -> None: trainer = pl.Trainer( gpus=1, - distributed_backend='dp', + accelerator='dp', val_check_interval=100 ) diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py index af37575bbfafc..4ca1ebc2aec76 100644 --- a/pl_examples/domain_templates/semantic_segmentation.py +++ b/pl_examples/domain_templates/semantic_segmentation.py @@ -214,7 +214,7 @@ def main(hparams: Namespace): logger=logger, max_epochs=hparams.epochs, accumulate_grad_batches=hparams.grad_batches, - distributed_backend=hparams.distributed_backend, + accelerator=hparams.accelerator, precision=16 if hparams.use_amp else 32, ) diff --git a/pytorch_lightning/accelerators/accelerator_connector.py b/pytorch_lightning/accelerators/accelerator_connector.py index 9d36f76876a08..1436e37dbdeaa 100644 --- a/pytorch_lightning/accelerators/accelerator_connector.py +++ b/pytorch_lightning/accelerators/accelerator_connector.py @@ -87,7 +87,7 @@ def on_trainer_init( self.trainer.tpu_id = self.trainer.tpu_cores[0] if isinstance(self.trainer.tpu_cores, list) else None if num_processes != 1 and distributed_backend != "ddp_cpu": - rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.") + rank_zero_warn("num_processes is only used for `accelerator='ddp_cpu'`. Ignoring it.") self.trainer.num_processes = num_processes # override with environment flag @@ -276,7 +276,7 @@ def select_accelerator(self): accelerator_backend = accelerators.CPUAccelerator(self.trainer, cluster_env) else: raise MisconfigurationException( - f'Trainer(distributed_backend={self.trainer.distributed_backend} is not a supported backend' + f'Trainer(accelerator={self.trainer.distributed_backend} is not a supported backend' ) return accelerator_backend @@ -299,8 +299,8 @@ def set_distributed_mode(self): elif self.trainer.num_gpus > 1: rank_zero_warn( 'You requested multiple GPUs but did not specify a backend, e.g.' - ' Trainer(distributed_backend="dp"|"ddp"|"ddp2").' - ' Setting distributed_backend="ddp_spawn" for you.' + ' `Trainer(accelerator="dp"|"ddp"|"ddp2")`.' + ' Setting `accelerator="ddp_spawn"` for you.' ) self.trainer.distributed_backend = "ddp_spawn" @@ -342,7 +342,7 @@ def set_distributed_mode(self): if self.trainer.num_nodes > 1 and not (self.trainer.use_ddp2 or self.trainer.use_ddp): raise MisconfigurationException( 'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. ' - 'To silence this warning set distributed_backend=ddp or distributed_backend=ddp2' + 'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`' ) rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self.trainer.on_gpu}') @@ -366,7 +366,7 @@ def check_horovod(self): """Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod.""" if not HOROVOD_AVAILABLE: raise MisconfigurationException( - 'Requested `distributed_backend="horovod"`, but Horovod is not installed.' + 'Requested `accelerator="horovod"`, but Horovod is not installed.' 'Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]' ) diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py index 942d66bc029e9..9789247ac24ce 100644 --- a/pytorch_lightning/accelerators/ddp_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_accelerator.py @@ -174,7 +174,7 @@ def _check_can_spawn_children(self): if self._has_spawned_children: raise RuntimeError( "You tried to run `.fit` or `.test` multiple times in the same script." - " This is not supported in DDP mode, switch to `distributed_backend='ddp_spawn'` instead." + " This is not supported in DDP mode, switch to `accelerator='ddp_spawn'` instead." ) def set_world_ranks(self, process_idx): diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py index 3bb444622cebc..f2bcb1d1760d6 100644 --- a/pytorch_lightning/trainer/data_loading.py +++ b/pytorch_lightning/trainer/data_loading.py @@ -69,12 +69,12 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None: if dataloader.num_workers > 0 and using_spawn: rank_zero_warn('Dataloader(num_workers>0) and ddp_spawn do not mix well!' ' Your performance might suffer dramatically.' - ' Please consider setting distributed_backend=ddp to use num_workers > 0' + ' Please consider setting accelerator=ddp to use num_workers > 0' ' (this is a bottleneck of Python .spawn() and PyTorch') elif dataloader.num_workers == 0 and using_spawn: - rank_zero_warn('You are using `distributed_backend=ddp_spawn` with num_workers=0.' - ' For much faster performance, switch to `distributed_backend=ddp`' + rank_zero_warn('You are using `accelerator=ddp_spawn` with num_workers=0.' + ' For much faster performance, switch to `accelerator=ddp`' ' and set `num_workers>0`') elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn: diff --git a/tests/backends/ddp_model.py b/tests/backends/ddp_model.py index 32b30c05538be..685dae0e8ef28 100644 --- a/tests/backends/ddp_model.py +++ b/tests/backends/ddp_model.py @@ -33,7 +33,7 @@ def main(): parser.add_argument('--tmpdir') parser.add_argument('--workdir') parser.set_defaults(gpus=2) - parser.set_defaults(distributed_backend="ddp") + parser.set_defaults(accelerator="ddp") args = parser.parse_args() model = EvalModelTemplate() diff --git a/tests/backends/test_accelerator_connector.py b/tests/backends/test_accelerator_connector.py index 551de95c7e480..704a701153f18 100644 --- a/tests/backends/test_accelerator_connector.py +++ b/tests/backends/test_accelerator_connector.py @@ -47,8 +47,8 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp_cpu', - callbacks=[CB()] + accelerator='ddp_cpu', + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -68,9 +68,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp', + accelerator='ddp', gpus=1, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -90,9 +90,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', gpus=1, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -120,9 +120,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp', + accelerator='ddp', gpus=2, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -152,9 +152,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp2', + accelerator='ddp2', gpus=2, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -181,9 +181,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp', + accelerator='ddp', gpus=2, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -210,9 +210,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp2', + accelerator='ddp2', gpus=2, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -239,9 +239,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', num_processes=1, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -267,9 +267,9 @@ def on_fit_start(self, trainer, pl_module): model = BoringModel() trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', num_processes=1, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -304,9 +304,9 @@ def on_fit_start(self, trainer, pl_module): trainer = Trainer( plugins=[CustomCluster()], fast_dev_run=True, - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', num_processes=1, - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): diff --git a/tests/backends/test_ddp.py b/tests/backends/test_ddp.py index f87827fdca4ea..19585d4e8caab 100644 --- a/tests/backends/test_ddp.py +++ b/tests/backends/test_ddp.py @@ -22,7 +22,7 @@ @pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'), + pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), ]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args): @@ -38,7 +38,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args): @pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'), + pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), ]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args): @@ -54,7 +54,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args): @pytest.mark.parametrize('cli_args', [ - pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'), + pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'), ]) @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine") def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args): diff --git a/tests/backends/test_ddp_spawn.py b/tests/backends/test_ddp_spawn.py index ed4bb140a5ba9..de22622c85e2e 100644 --- a/tests/backends/test_ddp_spawn.py +++ b/tests/backends/test_ddp_spawn.py @@ -34,7 +34,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', ) model = EvalModelTemplate() @@ -51,8 +51,8 @@ def test_multi_gpu_model_ddp_spawn(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='ddp_spawn', - progress_bar_refresh_rate=0 + accelerator='ddp_spawn', + progress_bar_refresh_rate=0, ) model = EvalModelTemplate() @@ -79,7 +79,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir): limit_train_batches=0.2, limit_val_batches=0.2, gpus=[0, 1], - distributed_backend='ddp_spawn' + accelerator='ddp_spawn', ) result = trainer.fit(model, **fit_options) assert result == 1, "DDP doesn't work with dataloaders passed to fit()." diff --git a/tests/backends/test_dp.py b/tests/backends/test_dp.py index c051b442cb7a7..c0e03efceca82 100644 --- a/tests/backends/test_dp.py +++ b/tests/backends/test_dp.py @@ -37,7 +37,7 @@ def test_multi_gpu_early_stop_dp(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='dp', + accelerator='dp', ) model = EvalModelTemplate() @@ -54,8 +54,8 @@ def test_multi_gpu_model_dp(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='dp', - progress_bar_refresh_rate=0 + accelerator='dp', + progress_bar_refresh_rate=0, ) model = EvalModelTemplate() @@ -80,7 +80,7 @@ def test_dp_test(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='dp', + accelerator='dp', ) trainer.fit(model) assert 'ckpt' in trainer.checkpoint_callback.best_model_path diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py index 3e683025e8867..c60553c9f8921 100644 --- a/tests/core/test_datamodules.py +++ b/tests/core/test_datamodules.py @@ -356,7 +356,7 @@ def test_full_loop_dp(tmpdir): default_root_dir=tmpdir, max_epochs=3, weights_summary=None, - distributed_backend='dp', + accelerator='dp', gpus=2, deterministic=True, ) diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py index 5fd166afce0f3..ba5791c7b9f4a 100644 --- a/tests/loggers/test_all.py +++ b/tests/loggers/test_all.py @@ -316,7 +316,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class): trainer = Trainer( logger=logger, default_root_dir=tmpdir, - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', num_processes=2, max_steps=1, checkpoint_callback=True, diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py index d5a05057a42dd..b6900e322f0bb 100644 --- a/tests/models/data/horovod/train_default_model.py +++ b/tests/models/data/horovod/train_default_model.py @@ -79,7 +79,7 @@ def run_test_from_config(trainer_options): trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu) if args.on_gpu: - trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1) + trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1) # Test the root_gpu property assert trainer.root_gpu == hvd.local_rank() diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py index 9c2ce2ccfffcf..79238c92e712d 100644 --- a/tests/models/test_amp.py +++ b/tests/models/test_amp.py @@ -37,7 +37,7 @@ def test_amp_single_gpu_dp(tmpdir): default_root_dir=tmpdir, max_epochs=1, gpus=1, - distributed_backend='dp', + accelerator='dp', precision=16, ) @@ -56,7 +56,7 @@ def test_amp_single_gpu_ddp_spawn(tmpdir): default_root_dir=tmpdir, max_epochs=1, gpus=1, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', precision=16, ) @@ -77,7 +77,7 @@ def test_amp_multi_gpu_dp(tmpdir): default_root_dir=tmpdir, max_epochs=1, gpus=2, - distributed_backend='dp', + accelerator='dp', precision=16, ) @@ -96,7 +96,7 @@ def test_amp_multi_gpu_ddp_spawn(tmpdir): default_root_dir=tmpdir, max_epochs=1, gpus=2, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', precision=16, ) @@ -127,7 +127,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir): default_root_dir=tmpdir, max_epochs=1, gpus=[0], - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', precision=16, checkpoint_callback=checkpoint, logger=logger, diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py index 19bc5f063faf9..f8403e460d327 100644 --- a/tests/models/test_cpu.py +++ b/tests/models/test_cpu.py @@ -143,7 +143,7 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir): limit_val_batches=0.2, gpus=None, num_processes=2, - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', enable_pl_optimizer=enable_pl_optimizer, ) diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py index 5eb49ead9d1ed..2393b42d27191 100644 --- a/tests/models/test_gpu.py +++ b/tests/models/test_gpu.py @@ -38,12 +38,11 @@ def test_multi_gpu_none_backend(tmpdir): tutils.set_random_master_port() trainer_options = dict( default_root_dir=tmpdir, - distributed_backend=None, progress_bar_refresh_rate=0, max_epochs=1, limit_train_batches=0.2, limit_val_batches=0.2, - gpus=2 + gpus=2, ) model = EvalModelTemplate() @@ -93,7 +92,7 @@ def device_count(): pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)") ]) def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend): - assert Trainer(gpus=gpus, distributed_backend=distributed_backend).num_gpus == expected_num_gpus + assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus @pytest.mark.gpus_param_tests @@ -102,7 +101,7 @@ def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distrib pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."), ]) def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend): - assert Trainer(gpus=gpus, distributed_backend=distributed_backend).num_gpus == expected_num_gpus + assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus @pytest.mark.gpus_param_tests @@ -115,7 +114,7 @@ def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distr pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)") ]) def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend): - assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu + assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu @pytest.mark.gpus_param_tests @@ -125,7 +124,7 @@ def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distrib pytest.param(0, None, "ddp", id="None is None"), ]) def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): - assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu + assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu # Asking for a gpu when non are available will result in a MisconfigurationException @@ -141,7 +140,7 @@ def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_ ]) def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend): with pytest.raises(MisconfigurationException): - Trainer(gpus=gpus, distributed_backend=distributed_backend) + Trainer(gpus=gpus, accelerator=distributed_backend) @pytest.mark.gpus_param_tests diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py index 1a38b12d37ba0..f47c13021edde 100644 --- a/tests/models/test_horovod.py +++ b/tests/models/test_horovod.py @@ -80,7 +80,7 @@ def test_horovod_cpu(enable_pl_optimizer, tmpdir): max_epochs=1, limit_train_batches=0.4, limit_val_batches=0.2, - distributed_backend='horovod', + accelerator='horovod', deterministic=True, enable_pl_optimizer=enable_pl_optimizer, ) @@ -120,7 +120,7 @@ def test_horovod_multi_gpu(tmpdir): limit_val_batches=0.2, gpus=2, deterministic=True, - distributed_backend='horovod' + accelerator='horovod', ) _run_horovod(trainer_options, on_gpu=True) @@ -141,7 +141,7 @@ def test_horovod_apex(tmpdir): limit_val_batches=0.2, gpus=2, deterministic=True, - distributed_backend='horovod', + accelerator='horovod', amp_backend='apex', precision=16, ) @@ -165,7 +165,7 @@ def test_horovod_amp(tmpdir): limit_val_batches=0.2, gpus=2, deterministic=True, - distributed_backend='horovod', + accelerator='horovod', amp_backend='native', precision=16, ) @@ -200,7 +200,7 @@ def validation_step(self, batch, *args, **kwargs): limit_val_batches=0.2, gpus=1, deterministic=True, - distributed_backend='horovod' + accelerator='horovod', ) tpipes.run_model_test_without_loggers(trainer_options, model) @@ -218,7 +218,7 @@ def test_horovod_multi_optimizer(enable_pl_optimizer, tmpdir): limit_train_batches=0.4, limit_val_batches=0.2, deterministic=True, - distributed_backend='horovod', + accelerator='horovod', enable_pl_optimizer=enable_pl_optimizer, ) result = trainer.fit(model) @@ -308,7 +308,7 @@ def sk_metric(preds, target): def _compute_batch(): trainer = Trainer( fast_dev_run=True, - distributed_backend='horovod', + accelerator='horovod', ) accelerator_backend = trainer.accelerator_connector.select_accelerator() diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py index 5d3cf7d6bdffc..a3919a6a8a7dd 100644 --- a/tests/models/test_onnx.py +++ b/tests/models/test_onnx.py @@ -85,8 +85,8 @@ def test_model_saves_on_multi_gpu(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='ddp_spawn', - progress_bar_refresh_rate=0 + accelerator='ddp_spawn', + progress_bar_refresh_rate=0, ) model = EvalModelTemplate() diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py index b350e8391143d..e8703b9496f65 100644 --- a/tests/models/test_restore.py +++ b/tests/models/test_restore.py @@ -162,7 +162,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir): checkpoint_callback=checkpoint, logger=logger, gpus=[0, 1], - distributed_backend='dp', + accelerator='dp', default_root_dir=tmpdir, ) @@ -212,7 +212,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir): checkpoint_callback=checkpoint, logger=logger, gpus=[0, 1], - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', default_root_dir=tmpdir, ) @@ -332,7 +332,7 @@ def test_dp_resume(tmpdir): hparams = EvalModelTemplate.get_default_hparams() model = EvalModelTemplate(**hparams) - trainer_options = dict(max_epochs=1, gpus=2, distributed_backend='dp', default_root_dir=tmpdir,) + trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir) # get logger logger = tutils.get_default_logger(tmpdir) diff --git a/tests/models/test_sync_batchnorm.py b/tests/models/test_sync_batchnorm.py index d99e0a205292b..fd771c98635ab 100644 --- a/tests/models/test_sync_batchnorm.py +++ b/tests/models/test_sync_batchnorm.py @@ -102,7 +102,7 @@ def test_sync_batchnorm_ddp(tmpdir): trainer = Trainer( gpus=2, num_nodes=1, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', max_epochs=1, max_steps=3, sync_batchnorm=True, diff --git a/tests/plugins/test_amp_plugin.py b/tests/plugins/test_amp_plugin.py index 724ebe7c82999..aafc48b8350a5 100644 --- a/tests/plugins/test_amp_plugin.py +++ b/tests/plugins/test_amp_plugin.py @@ -37,8 +37,8 @@ def on_fit_start(self, trainer, pl_module): amp_backend='native', gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, - callbacks=[CB()] + accelerator=ddp_backend, + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -73,9 +73,9 @@ def on_fit_start(self, trainer, pl_module): amp_backend='native', gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[MyNativeAMP()], - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -101,11 +101,11 @@ def test_amp_gradient_unscale(tmpdir): limit_test_batches=2, limit_val_batches=2, amp_backend='native', - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', gpus=2, precision=16, track_grad_norm=2, - log_every_n_steps=1 + log_every_n_steps=1, ) trainer.fit(model) @@ -130,7 +130,7 @@ def test_amp_gradient_unscale_accumulate_grad_batches(tmpdir): limit_test_batches=2, limit_val_batches=2, amp_backend='native', - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', gpus=2, precision=16, track_grad_norm=2, diff --git a/tests/plugins/test_apex_plugin.py b/tests/plugins/test_apex_plugin.py index f1e9a5b4f73c5..e833dff98dfb7 100644 --- a/tests/plugins/test_apex_plugin.py +++ b/tests/plugins/test_apex_plugin.py @@ -34,8 +34,8 @@ def on_fit_start(self, trainer, pl_module): amp_backend='apex', gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, - callbacks=[CB()] + accelerator=ddp_backend, + callbacks=[CB()], ) with pytest.raises(SystemExit): @@ -70,9 +70,9 @@ def on_fit_start(self, trainer, pl_module): amp_backend='apex', gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[MyApexPlugin()], - callbacks=[CB()] + callbacks=[CB()], ) with pytest.raises(SystemExit): diff --git a/tests/plugins/test_ddp_plugin.py b/tests/plugins/test_ddp_plugin.py index f80a31c975e9b..ef6abf6be5c4c 100644 --- a/tests/plugins/test_ddp_plugin.py +++ b/tests/plugins/test_ddp_plugin.py @@ -40,7 +40,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, callbacks=[CB()], ) @@ -78,7 +78,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[MyDDP()], callbacks=[CB()], ) @@ -116,7 +116,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins='ddp_sharded', callbacks=[CB()], ) @@ -147,7 +147,7 @@ def test_ddp_invalid_choice_string_ddp_cpu(tmpdir, ddp_backend, gpus, num_proces fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins='invalid', ) @@ -183,7 +183,7 @@ class MyDDP(DDPPlugin): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=['ddp_sharded', MyDDP()], ) @@ -220,7 +220,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[MyDDP(broadcast_buffers=False, find_unused_parameters=True)], callbacks=[CB()], ) diff --git a/tests/plugins/test_plugin.py b/tests/plugins/test_plugin.py index 9c2bffdff72e1..be9d95f09f03f 100644 --- a/tests/plugins/test_plugin.py +++ b/tests/plugins/test_plugin.py @@ -70,7 +70,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[CustomPlugin()], callbacks=[CB()], ) @@ -120,6 +120,6 @@ def required_plugins(self, amp_backend: AMPType, trainer: Trainer) -> list: fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[CustomPlugin(), NativeAMPPlugin()], ) diff --git a/tests/plugins/test_sharded_plugin.py b/tests/plugins/test_sharded_plugin.py index 5010c39de7a80..09453f6e24600 100644 --- a/tests/plugins/test_sharded_plugin.py +++ b/tests/plugins/test_sharded_plugin.py @@ -46,7 +46,7 @@ def on_fit_start(self, trainer, pl_module): fast_dev_run=True, gpus=gpus, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[DDPShardedPlugin()], callbacks=[CB()], ) @@ -66,10 +66,10 @@ def test_invalid_apex_sharded(tmpdir): with pytest.raises(MisconfigurationException, match='Sharded Plugin is not supported with Apex AMP'): trainer = Trainer( fast_dev_run=True, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', plugins=[DDPShardedPlugin()], precision=16, - amp_backend='apex' + amp_backend='apex', ) trainer.fit(model) @@ -110,7 +110,7 @@ def on_fit_start(self, trainer, pl_module): gpus=gpus, precision=16, num_processes=num_processes, - distributed_backend=ddp_backend, + accelerator=ddp_backend, plugins=[DDPShardedPlugin()], callbacks=[CB()], ) diff --git a/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py b/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py index 436f25b0eb7d8..8f3b39ea7e117 100644 --- a/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py +++ b/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py @@ -222,7 +222,7 @@ def test_dpp_reduce_mean_pbar(tmpdir): limit_train_batches=10, limit_test_batches=2, limit_val_batches=2, - distributed_backend=distributed_backend, + accelerator=distributed_backend, gpus=2, precision=32) diff --git a/tests/trainer/logging_tests/test_distributed_logging.py b/tests/trainer/logging_tests/test_distributed_logging.py index fe357d4faa407..72975d17d17a2 100644 --- a/tests/trainer/logging_tests/test_distributed_logging.py +++ b/tests/trainer/logging_tests/test_distributed_logging.py @@ -41,7 +41,7 @@ def test_global_zero_only_logging_ddp_cpu(tmpdir): model = TestModel() model.training_epoch_end = None trainer = Trainer( - distributed_backend='ddp_cpu', + accelerator='ddp_cpu', num_processes=2, default_root_dir=tmpdir, limit_train_batches=1, @@ -60,7 +60,7 @@ def test_global_zero_only_logging_ddp_spawn(tmpdir): model = TestModel() model.training_epoch_end = None trainer = Trainer( - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', gpus=2, default_root_dir=tmpdir, limit_train_batches=1, diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py index 71e1c088ece14..ef8a39c5d8abf 100644 --- a/tests/trainer/test_dataloaders.py +++ b/tests/trainer/test_dataloaders.py @@ -705,7 +705,7 @@ def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None, trainer = Trainer( gpus=[0, 1], num_nodes=1, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', default_root_dir=tmpdir, ) @@ -763,10 +763,10 @@ def test_dataloader_distributed_sampler(tmpdir): trainer = Trainer( gpus=[0, 1], num_nodes=1, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', default_root_dir=tmpdir, max_steps=1, - callbacks=[DistribSamplerCallback()] + callbacks=[DistribSamplerCallback()], ) trainer.fit(model) trainer.test(ckpt_path=None) @@ -795,7 +795,7 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir): trainer = Trainer( gpus=[0, 1], num_nodes=1, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', default_root_dir=tmpdir, max_steps=100, callbacks=[DistribSamplerCallback()], diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py index 328b2c0a0f859..b7db30e398535 100644 --- a/tests/trainer/test_trainer.py +++ b/tests/trainer/test_trainer.py @@ -1118,7 +1118,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): "trainer_kwargs,expected", [ pytest.param( - dict(distributed_backend=None, gpus=None), + dict(accelerator=None, gpus=None), dict( use_dp=False, use_ddp=False, @@ -1130,7 +1130,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="dp", gpus=None), + dict(accelerator="dp", gpus=None), dict( use_dp=False, use_ddp=False, @@ -1142,7 +1142,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="dp", gpus=None), + dict(accelerator="dp", gpus=None), dict( use_dp=False, use_ddp=False, @@ -1154,7 +1154,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="ddp", gpus=None), + dict(accelerator="ddp", gpus=None), dict( use_dp=False, use_ddp=False, @@ -1166,7 +1166,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="ddp", num_processes=2, gpus=None), + dict(accelerator="ddp", num_processes=2, gpus=None), dict( use_dp=False, use_ddp=True, @@ -1178,7 +1178,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="ddp", num_nodes=2, gpus=None), + dict(accelerator="ddp", num_nodes=2, gpus=None), dict( use_dp=False, use_ddp=True, @@ -1190,7 +1190,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="ddp_cpu", num_processes=2, gpus=None), + dict(accelerator="ddp_cpu", num_processes=2, gpus=None), dict( use_dp=False, use_ddp=True, @@ -1202,7 +1202,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend="ddp2", gpus=None), + dict(accelerator="ddp2", gpus=None), dict( use_dp=False, use_ddp=False, @@ -1214,7 +1214,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): ), ), pytest.param( - dict(distributed_backend=None, gpus=1), + dict(accelerator=None, gpus=1), dict( use_dp=False, use_ddp=False, @@ -1227,7 +1227,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")], ), pytest.param( - dict(distributed_backend="dp", gpus=1), + dict(accelerator="dp", gpus=1), dict( use_dp=True, use_ddp=False, @@ -1240,7 +1240,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")], ), pytest.param( - dict(distributed_backend="ddp", gpus=1), + dict(accelerator="ddp", gpus=1), dict( use_dp=False, use_ddp=True, @@ -1253,7 +1253,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")], ), pytest.param( - dict(distributed_backend="ddp_cpu", num_processes=2, gpus=1), + dict(accelerator="ddp_cpu", num_processes=2, gpus=1), dict( use_dp=False, use_ddp=True, @@ -1266,7 +1266,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")], ), pytest.param( - dict(distributed_backend="ddp2", gpus=1), + dict(accelerator="ddp2", gpus=1), dict( use_dp=False, use_ddp=False, @@ -1279,7 +1279,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")], ), pytest.param( - dict(distributed_backend=None, gpus=2), + dict(accelerator=None, gpus=2), dict( use_dp=False, use_ddp=True, @@ -1292,7 +1292,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")], ), pytest.param( - dict(distributed_backend="dp", gpus=2), + dict(accelerator="dp", gpus=2), dict( use_dp=True, use_ddp=False, @@ -1305,7 +1305,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")], ), pytest.param( - dict(distributed_backend="ddp", gpus=2), + dict(accelerator="ddp", gpus=2), dict( use_dp=False, use_ddp=True, @@ -1318,7 +1318,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches): marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")], ), pytest.param( - dict(distributed_backend="ddp2", gpus=2), + dict(accelerator="ddp2", gpus=2), dict( use_dp=False, use_ddp=False, diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py index c39d643aed407..db8df3ef0d5cd 100644 --- a/tests/trainer/test_trainer_cli.py +++ b/tests/trainer/test_trainer_cli.py @@ -125,7 +125,7 @@ def _raise(): "min_steps": None, "max_steps": None, "log_gpu_memory": None, - "distributed_backend": None, + "accelerator": None, "weights_save_path": None, "truncated_bptt_steps": None, "resume_from_checkpoint": None, diff --git a/tests/trainer/test_trainer_test_loop.py b/tests/trainer/test_trainer_test_loop.py index 6ccbbd6c0b557..26f6710d09f7d 100644 --- a/tests/trainer/test_trainer_test_loop.py +++ b/tests/trainer/test_trainer_test_loop.py @@ -58,7 +58,7 @@ def test_ddp_spawn_test(tmpdir): limit_train_batches=10, limit_val_batches=10, gpus=[0, 1], - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', ) trainer.fit(model) assert 'ckpt' in trainer.checkpoint_callback.best_model_path diff --git a/tests/utilities/test_dtype_device_mixin.py b/tests/utilities/test_dtype_device_mixin.py index 31ce37839dc58..0c2b13d686834 100644 --- a/tests/utilities/test_dtype_device_mixin.py +++ b/tests/utilities/test_dtype_device_mixin.py @@ -80,7 +80,7 @@ def test_submodules_multi_gpu_dp(tmpdir): model = TopModule() trainer = Trainer( default_root_dir=tmpdir, - distributed_backend='dp', + accelerator='dp', gpus=2, callbacks=[DeviceAssertCallback()], max_steps=1, @@ -93,7 +93,7 @@ def test_submodules_multi_gpu_ddp_spawn(tmpdir): model = TopModule() trainer = Trainer( default_root_dir=tmpdir, - distributed_backend='ddp_spawn', + accelerator='ddp_spawn', gpus=2, callbacks=[DeviceAssertCallback()], max_steps=1,