diff --git a/benchmarks/test_sharded_parity.py b/benchmarks/test_sharded_parity.py
index 1240710674c59..c3a14d0616d18 100644
--- a/benchmarks/test_sharded_parity.py
+++ b/benchmarks/test_sharded_parity.py
@@ -105,14 +105,14 @@ def test_ddp_string_sharded_plugin_correctness_amp_multi_gpu():
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
                     reason="test should be run outside of pytest")
-@DDPLauncher.run("--distributed_backend ddp --gpus 2 --precision 32")
+@DDPLauncher.run("--accelerator ddp --gpus 2 --precision 32")
 def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
     plugin_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.distributed_backend,
+        accelerator=args.accelerator,
         plugin=DDPShardedPlugin(),
-        model_cls=SeedTrainLoaderModel
+        model_cls=SeedTrainLoaderModel,
     )
 
 
@@ -120,14 +120,14 @@ def test_ddp_sharded_plugin_correctness_multi_gpu_ddp(tmpdir, args=None):
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 @pytest.mark.skipif(not os.getenv("PL_RUNNING_SPECIAL_TESTS", '0') == '1',
                     reason="test should be run outside of pytest")
-@DDPLauncher.run("--distributed_backend ddp --gpus 2  --precision 16")
+@DDPLauncher.run("--accelerator ddp --gpus 2  --precision 16")
 def test_ddp_sharded_plugin_correctness_amp_multi_gpu_ddp(tmpdir, args=None):
     plugin_parity_test(
         gpus=args.gpus,
         precision=args.precision,
-        accelerator=args.distributed_backend,
+        accelerator=args.accelerator,
         plugin=DDPShardedPlugin(),
-        model_cls=SeedTrainLoaderModel
+        model_cls=SeedTrainLoaderModel,
     )
 
 
diff --git a/pl_examples/domain_templates/imagenet.py b/pl_examples/domain_templates/imagenet.py
index ba4b292fb4af0..f02b6dc0952d7 100644
--- a/pl_examples/domain_templates/imagenet.py
+++ b/pl_examples/domain_templates/imagenet.py
@@ -210,7 +210,7 @@ def main(args: Namespace) -> None:
     if args.seed is not None:
         pl.seed_everything(args.seed)
 
-    if args.distributed_backend == 'ddp':
+    if args.accelerator == 'ddp':
         # When using a single GPU per process and per
         # DistributedDataParallel, we need to divide the batch size
         # ourselves based on the total number of GPUs we have
diff --git a/pl_examples/domain_templates/reinforce_learn_Qnet.py b/pl_examples/domain_templates/reinforce_learn_Qnet.py
index c0599dc74c5a9..4b01f83e36639 100644
--- a/pl_examples/domain_templates/reinforce_learn_Qnet.py
+++ b/pl_examples/domain_templates/reinforce_learn_Qnet.py
@@ -341,7 +341,7 @@ def main(args) -> None:
 
     trainer = pl.Trainer(
         gpus=1,
-        distributed_backend='dp',
+        accelerator='dp',
         val_check_interval=100
     )
 
diff --git a/pl_examples/domain_templates/semantic_segmentation.py b/pl_examples/domain_templates/semantic_segmentation.py
index af37575bbfafc..4ca1ebc2aec76 100644
--- a/pl_examples/domain_templates/semantic_segmentation.py
+++ b/pl_examples/domain_templates/semantic_segmentation.py
@@ -214,7 +214,7 @@ def main(hparams: Namespace):
         logger=logger,
         max_epochs=hparams.epochs,
         accumulate_grad_batches=hparams.grad_batches,
-        distributed_backend=hparams.distributed_backend,
+        accelerator=hparams.accelerator,
         precision=16 if hparams.use_amp else 32,
     )
 
diff --git a/pytorch_lightning/accelerators/accelerator_connector.py b/pytorch_lightning/accelerators/accelerator_connector.py
index 9d36f76876a08..1436e37dbdeaa 100644
--- a/pytorch_lightning/accelerators/accelerator_connector.py
+++ b/pytorch_lightning/accelerators/accelerator_connector.py
@@ -87,7 +87,7 @@ def on_trainer_init(
         self.trainer.tpu_id = self.trainer.tpu_cores[0] if isinstance(self.trainer.tpu_cores, list) else None
 
         if num_processes != 1 and distributed_backend != "ddp_cpu":
-            rank_zero_warn("num_processes is only used for distributed_backend=\"ddp_cpu\". Ignoring it.")
+            rank_zero_warn("num_processes is only used for `accelerator='ddp_cpu'`. Ignoring it.")
         self.trainer.num_processes = num_processes
 
         # override with environment flag
@@ -276,7 +276,7 @@ def select_accelerator(self):
             accelerator_backend = accelerators.CPUAccelerator(self.trainer, cluster_env)
         else:
             raise MisconfigurationException(
-                f'Trainer(distributed_backend={self.trainer.distributed_backend} is not a supported backend'
+                f'Trainer(accelerator={self.trainer.distributed_backend} is not a supported backend'
             )
 
         return accelerator_backend
@@ -299,8 +299,8 @@ def set_distributed_mode(self):
             elif self.trainer.num_gpus > 1:
                 rank_zero_warn(
                     'You requested multiple GPUs but did not specify a backend, e.g.'
-                    ' Trainer(distributed_backend="dp"|"ddp"|"ddp2").'
-                    ' Setting distributed_backend="ddp_spawn" for you.'
+                    ' `Trainer(accelerator="dp"|"ddp"|"ddp2")`.'
+                    ' Setting `accelerator="ddp_spawn"` for you.'
                 )
                 self.trainer.distributed_backend = "ddp_spawn"
 
@@ -342,7 +342,7 @@ def set_distributed_mode(self):
         if self.trainer.num_nodes > 1 and not (self.trainer.use_ddp2 or self.trainer.use_ddp):
             raise MisconfigurationException(
                 'DataParallel does not support num_nodes > 1. Switching to DistributedDataParallel for you. '
-                'To silence this warning set distributed_backend=ddp or distributed_backend=ddp2'
+                'To silence this warning set `accelerator="ddp"` or `accelerator="ddp2"`'
             )
 
         rank_zero_info(f'GPU available: {torch.cuda.is_available()}, used: {self.trainer.on_gpu}')
@@ -366,7 +366,7 @@ def check_horovod(self):
         """Raises a `MisconfigurationException` if the Trainer is not configured correctly for Horovod."""
         if not HOROVOD_AVAILABLE:
             raise MisconfigurationException(
-                'Requested `distributed_backend="horovod"`, but Horovod is not installed.'
+                'Requested `accelerator="horovod"`, but Horovod is not installed.'
                 'Install with \n $HOROVOD_WITH_PYTORCH=1 pip install horovod[pytorch]'
             )
 
diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py
index 942d66bc029e9..9789247ac24ce 100644
--- a/pytorch_lightning/accelerators/ddp_accelerator.py
+++ b/pytorch_lightning/accelerators/ddp_accelerator.py
@@ -174,7 +174,7 @@ def _check_can_spawn_children(self):
         if self._has_spawned_children:
             raise RuntimeError(
                 "You tried to run `.fit` or `.test` multiple times in the same script."
-                " This is not supported in DDP mode, switch to `distributed_backend='ddp_spawn'` instead."
+                " This is not supported in DDP mode, switch to `accelerator='ddp_spawn'` instead."
             )
 
     def set_world_ranks(self, process_idx):
diff --git a/pytorch_lightning/trainer/data_loading.py b/pytorch_lightning/trainer/data_loading.py
index 3bb444622cebc..f2bcb1d1760d6 100644
--- a/pytorch_lightning/trainer/data_loading.py
+++ b/pytorch_lightning/trainer/data_loading.py
@@ -69,12 +69,12 @@ def _worker_check(self, dataloader: DataLoader, name: str) -> None:
             if dataloader.num_workers > 0 and using_spawn:
                 rank_zero_warn('Dataloader(num_workers>0) and ddp_spawn do not mix well!'
                                ' Your performance might suffer dramatically.'
-                               ' Please consider setting distributed_backend=ddp to use num_workers > 0'
+                               ' Please consider setting accelerator=ddp to use num_workers > 0'
                                ' (this is a bottleneck of Python .spawn() and PyTorch')
 
             elif dataloader.num_workers == 0 and using_spawn:
-                rank_zero_warn('You are using `distributed_backend=ddp_spawn` with num_workers=0.'
-                               ' For much faster performance, switch to `distributed_backend=ddp`'
+                rank_zero_warn('You are using `accelerator=ddp_spawn` with num_workers=0.'
+                               ' For much faster performance, switch to `accelerator=ddp`'
                                ' and set `num_workers>0`')
 
             elif dataloader.num_workers <= 2 and multiprocessing.cpu_count() > 2 and not using_spawn:
diff --git a/tests/backends/ddp_model.py b/tests/backends/ddp_model.py
index 32b30c05538be..685dae0e8ef28 100644
--- a/tests/backends/ddp_model.py
+++ b/tests/backends/ddp_model.py
@@ -33,7 +33,7 @@ def main():
     parser.add_argument('--tmpdir')
     parser.add_argument('--workdir')
     parser.set_defaults(gpus=2)
-    parser.set_defaults(distributed_backend="ddp")
+    parser.set_defaults(accelerator="ddp")
     args = parser.parse_args()
 
     model = EvalModelTemplate()
diff --git a/tests/backends/test_accelerator_connector.py b/tests/backends/test_accelerator_connector.py
index 551de95c7e480..704a701153f18 100644
--- a/tests/backends/test_accelerator_connector.py
+++ b/tests/backends/test_accelerator_connector.py
@@ -47,8 +47,8 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
-        callbacks=[CB()]
+        accelerator='ddp_cpu',
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -68,9 +68,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -90,9 +90,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -120,9 +120,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -152,9 +152,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp2',
+        accelerator='ddp2',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -181,9 +181,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp',
+        accelerator='ddp',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -210,9 +210,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp2',
+        accelerator='ddp2',
         gpus=2,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -239,9 +239,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -267,9 +267,9 @@ def on_fit_start(self, trainer, pl_module):
     model = BoringModel()
     trainer = Trainer(
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -304,9 +304,9 @@ def on_fit_start(self, trainer, pl_module):
     trainer = Trainer(
         plugins=[CustomCluster()],
         fast_dev_run=True,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=1,
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
diff --git a/tests/backends/test_ddp.py b/tests/backends/test_ddp.py
index f87827fdca4ea..19585d4e8caab 100644
--- a/tests/backends/test_ddp.py
+++ b/tests/backends/test_ddp.py
@@ -22,7 +22,7 @@
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
@@ -38,7 +38,7 @@ def test_multi_gpu_model_ddp_fit_only(tmpdir, cli_args):
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
@@ -54,7 +54,7 @@ def test_multi_gpu_model_ddp_test_only(tmpdir, cli_args):
 
 
 @pytest.mark.parametrize('cli_args', [
-    pytest.param('--max_epochs 1 --gpus 2 --distributed_backend ddp'),
+    pytest.param('--max_epochs 1 --gpus 2 --accelerator ddp'),
 ])
 @pytest.mark.skipif(torch.cuda.device_count() < 2, reason="test requires multi-GPU machine")
 def test_multi_gpu_model_ddp_fit_test(tmpdir, cli_args):
diff --git a/tests/backends/test_ddp_spawn.py b/tests/backends/test_ddp_spawn.py
index ed4bb140a5ba9..de22622c85e2e 100644
--- a/tests/backends/test_ddp_spawn.py
+++ b/tests/backends/test_ddp_spawn.py
@@ -34,7 +34,7 @@ def test_multi_gpu_early_stop_ddp_spawn(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
     )
 
     model = EvalModelTemplate()
@@ -51,8 +51,8 @@ def test_multi_gpu_model_ddp_spawn(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
-        progress_bar_refresh_rate=0
+        accelerator='ddp_spawn',
+        progress_bar_refresh_rate=0,
     )
 
     model = EvalModelTemplate()
@@ -79,7 +79,7 @@ def test_ddp_all_dataloaders_passed_to_fit(tmpdir):
         limit_train_batches=0.2,
         limit_val_batches=0.2,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn'
+        accelerator='ddp_spawn',
     )
     result = trainer.fit(model, **fit_options)
     assert result == 1, "DDP doesn't work with dataloaders passed to fit()."
diff --git a/tests/backends/test_dp.py b/tests/backends/test_dp.py
index c051b442cb7a7..c0e03efceca82 100644
--- a/tests/backends/test_dp.py
+++ b/tests/backends/test_dp.py
@@ -37,7 +37,7 @@ def test_multi_gpu_early_stop_dp(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
+        accelerator='dp',
     )
 
     model = EvalModelTemplate()
@@ -54,8 +54,8 @@ def test_multi_gpu_model_dp(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
-        progress_bar_refresh_rate=0
+        accelerator='dp',
+        progress_bar_refresh_rate=0,
     )
 
     model = EvalModelTemplate()
@@ -80,7 +80,7 @@ def test_dp_test(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='dp',
+        accelerator='dp',
     )
     trainer.fit(model)
     assert 'ckpt' in trainer.checkpoint_callback.best_model_path
diff --git a/tests/core/test_datamodules.py b/tests/core/test_datamodules.py
index 3e683025e8867..c60553c9f8921 100644
--- a/tests/core/test_datamodules.py
+++ b/tests/core/test_datamodules.py
@@ -356,7 +356,7 @@ def test_full_loop_dp(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=3,
         weights_summary=None,
-        distributed_backend='dp',
+        accelerator='dp',
         gpus=2,
         deterministic=True,
     )
diff --git a/tests/loggers/test_all.py b/tests/loggers/test_all.py
index 5fd166afce0f3..ba5791c7b9f4a 100644
--- a/tests/loggers/test_all.py
+++ b/tests/loggers/test_all.py
@@ -316,7 +316,7 @@ def _test_logger_created_on_rank_zero_only(tmpdir, logger_class):
     trainer = Trainer(
         logger=logger,
         default_root_dir=tmpdir,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=2,
         max_steps=1,
         checkpoint_callback=True,
diff --git a/tests/models/data/horovod/train_default_model.py b/tests/models/data/horovod/train_default_model.py
index d5a05057a42dd..b6900e322f0bb 100644
--- a/tests/models/data/horovod/train_default_model.py
+++ b/tests/models/data/horovod/train_default_model.py
@@ -79,7 +79,7 @@ def run_test_from_config(trainer_options):
     trainer.checkpoint_connector.hpc_load(ckpt_path, on_gpu=args.on_gpu)
 
     if args.on_gpu:
-        trainer = Trainer(gpus=1, distributed_backend='horovod', max_epochs=1)
+        trainer = Trainer(gpus=1, accelerator='horovod', max_epochs=1)
         # Test the root_gpu property
         assert trainer.root_gpu == hvd.local_rank()
 
diff --git a/tests/models/test_amp.py b/tests/models/test_amp.py
index 9c2ce2ccfffcf..79238c92e712d 100644
--- a/tests/models/test_amp.py
+++ b/tests/models/test_amp.py
@@ -37,7 +37,7 @@ def test_amp_single_gpu_dp(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=1,
         gpus=1,
-        distributed_backend='dp',
+        accelerator='dp',
         precision=16,
     )
 
@@ -56,7 +56,7 @@ def test_amp_single_gpu_ddp_spawn(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=1,
         gpus=1,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         precision=16,
     )
 
@@ -77,7 +77,7 @@ def test_amp_multi_gpu_dp(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=1,
         gpus=2,
-        distributed_backend='dp',
+        accelerator='dp',
         precision=16,
     )
 
@@ -96,7 +96,7 @@ def test_amp_multi_gpu_ddp_spawn(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=1,
         gpus=2,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         precision=16,
     )
 
@@ -127,7 +127,7 @@ def test_amp_gpu_ddp_slurm_managed(tmpdir):
         default_root_dir=tmpdir,
         max_epochs=1,
         gpus=[0],
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         precision=16,
         checkpoint_callback=checkpoint,
         logger=logger,
diff --git a/tests/models/test_cpu.py b/tests/models/test_cpu.py
index 19bc5f063faf9..f8403e460d327 100644
--- a/tests/models/test_cpu.py
+++ b/tests/models/test_cpu.py
@@ -143,7 +143,7 @@ def test_multi_cpu_model_ddp(enable_pl_optimizer, tmpdir):
         limit_val_batches=0.2,
         gpus=None,
         num_processes=2,
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         enable_pl_optimizer=enable_pl_optimizer,
     )
 
diff --git a/tests/models/test_gpu.py b/tests/models/test_gpu.py
index 5eb49ead9d1ed..2393b42d27191 100644
--- a/tests/models/test_gpu.py
+++ b/tests/models/test_gpu.py
@@ -38,12 +38,11 @@ def test_multi_gpu_none_backend(tmpdir):
     tutils.set_random_master_port()
     trainer_options = dict(
         default_root_dir=tmpdir,
-        distributed_backend=None,
         progress_bar_refresh_rate=0,
         max_epochs=1,
         limit_train_batches=0.2,
         limit_val_batches=0.2,
-        gpus=2
+        gpus=2,
     )
 
     model = EvalModelTemplate()
@@ -93,7 +92,7 @@ def device_count():
     pytest.param(3, 3, "ddp", id="3rd gpu - 1 gpu to use (backend:ddp)")
 ])
 def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, distributed_backend=distributed_backend).num_gpus == expected_num_gpus
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
 
 
 @pytest.mark.gpus_param_tests
@@ -102,7 +101,7 @@ def test_trainer_gpu_parse(mocked_device_count, gpus, expected_num_gpus, distrib
     pytest.param(None, 0, "ddp", id="None - expect 0 gpu to use."),
 ])
 def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distributed_backend):
-    assert Trainer(gpus=gpus, distributed_backend=distributed_backend).num_gpus == expected_num_gpus
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).num_gpus == expected_num_gpus
 
 
 @pytest.mark.gpus_param_tests
@@ -115,7 +114,7 @@ def test_trainer_num_gpu_0(mocked_device_count_0, gpus, expected_num_gpus, distr
     pytest.param(3, 0, "ddp", id="3 gpus, expect gpu root device to be 0.(backend:ddp)")
 ])
 def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
 
 
 @pytest.mark.gpus_param_tests
@@ -125,7 +124,7 @@ def test_root_gpu_property(mocked_device_count, gpus, expected_root_gpu, distrib
     pytest.param(0, None, "ddp", id="None is None"),
 ])
 def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
-    assert Trainer(gpus=gpus, distributed_backend=distributed_backend).root_gpu == expected_root_gpu
+    assert Trainer(gpus=gpus, accelerator=distributed_backend).root_gpu == expected_root_gpu
 
 
 # Asking for a gpu when non are available will result in a MisconfigurationException
@@ -141,7 +140,7 @@ def test_root_gpu_property_0_passing(mocked_device_count_0, gpus, expected_root_
 ])
 def test_root_gpu_property_0_raising(mocked_device_count_0, gpus, expected_root_gpu, distributed_backend):
     with pytest.raises(MisconfigurationException):
-        Trainer(gpus=gpus, distributed_backend=distributed_backend)
+        Trainer(gpus=gpus, accelerator=distributed_backend)
 
 
 @pytest.mark.gpus_param_tests
diff --git a/tests/models/test_horovod.py b/tests/models/test_horovod.py
index 1a38b12d37ba0..f47c13021edde 100644
--- a/tests/models/test_horovod.py
+++ b/tests/models/test_horovod.py
@@ -80,7 +80,7 @@ def test_horovod_cpu(enable_pl_optimizer, tmpdir):
         max_epochs=1,
         limit_train_batches=0.4,
         limit_val_batches=0.2,
-        distributed_backend='horovod',
+        accelerator='horovod',
         deterministic=True,
         enable_pl_optimizer=enable_pl_optimizer,
     )
@@ -120,7 +120,7 @@ def test_horovod_multi_gpu(tmpdir):
         limit_val_batches=0.2,
         gpus=2,
         deterministic=True,
-        distributed_backend='horovod'
+        accelerator='horovod',
     )
     _run_horovod(trainer_options, on_gpu=True)
 
@@ -141,7 +141,7 @@ def test_horovod_apex(tmpdir):
         limit_val_batches=0.2,
         gpus=2,
         deterministic=True,
-        distributed_backend='horovod',
+        accelerator='horovod',
         amp_backend='apex',
         precision=16,
     )
@@ -165,7 +165,7 @@ def test_horovod_amp(tmpdir):
         limit_val_batches=0.2,
         gpus=2,
         deterministic=True,
-        distributed_backend='horovod',
+        accelerator='horovod',
         amp_backend='native',
         precision=16,
     )
@@ -200,7 +200,7 @@ def validation_step(self, batch, *args, **kwargs):
         limit_val_batches=0.2,
         gpus=1,
         deterministic=True,
-        distributed_backend='horovod'
+        accelerator='horovod',
     )
     tpipes.run_model_test_without_loggers(trainer_options, model)
 
@@ -218,7 +218,7 @@ def test_horovod_multi_optimizer(enable_pl_optimizer, tmpdir):
         limit_train_batches=0.4,
         limit_val_batches=0.2,
         deterministic=True,
-        distributed_backend='horovod',
+        accelerator='horovod',
         enable_pl_optimizer=enable_pl_optimizer,
     )
     result = trainer.fit(model)
@@ -308,7 +308,7 @@ def sk_metric(preds, target):
     def _compute_batch():
         trainer = Trainer(
             fast_dev_run=True,
-            distributed_backend='horovod',
+            accelerator='horovod',
         )
 
         accelerator_backend = trainer.accelerator_connector.select_accelerator()
diff --git a/tests/models/test_onnx.py b/tests/models/test_onnx.py
index 5d3cf7d6bdffc..a3919a6a8a7dd 100644
--- a/tests/models/test_onnx.py
+++ b/tests/models/test_onnx.py
@@ -85,8 +85,8 @@ def test_model_saves_on_multi_gpu(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
-        progress_bar_refresh_rate=0
+        accelerator='ddp_spawn',
+        progress_bar_refresh_rate=0,
     )
 
     model = EvalModelTemplate()
diff --git a/tests/models/test_restore.py b/tests/models/test_restore.py
index b350e8391143d..e8703b9496f65 100644
--- a/tests/models/test_restore.py
+++ b/tests/models/test_restore.py
@@ -162,7 +162,7 @@ def test_running_test_pretrained_model_distrib_dp(tmpdir):
         checkpoint_callback=checkpoint,
         logger=logger,
         gpus=[0, 1],
-        distributed_backend='dp',
+        accelerator='dp',
         default_root_dir=tmpdir,
     )
 
@@ -212,7 +212,7 @@ def test_running_test_pretrained_model_distrib_ddp_spawn(tmpdir):
         checkpoint_callback=checkpoint,
         logger=logger,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         default_root_dir=tmpdir,
     )
 
@@ -332,7 +332,7 @@ def test_dp_resume(tmpdir):
     hparams = EvalModelTemplate.get_default_hparams()
     model = EvalModelTemplate(**hparams)
 
-    trainer_options = dict(max_epochs=1, gpus=2, distributed_backend='dp', default_root_dir=tmpdir,)
+    trainer_options = dict(max_epochs=1, gpus=2, accelerator='dp', default_root_dir=tmpdir)
 
     # get logger
     logger = tutils.get_default_logger(tmpdir)
diff --git a/tests/models/test_sync_batchnorm.py b/tests/models/test_sync_batchnorm.py
index d99e0a205292b..fd771c98635ab 100644
--- a/tests/models/test_sync_batchnorm.py
+++ b/tests/models/test_sync_batchnorm.py
@@ -102,7 +102,7 @@ def test_sync_batchnorm_ddp(tmpdir):
     trainer = Trainer(
         gpus=2,
         num_nodes=1,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         max_epochs=1,
         max_steps=3,
         sync_batchnorm=True,
diff --git a/tests/plugins/test_amp_plugin.py b/tests/plugins/test_amp_plugin.py
index 724ebe7c82999..aafc48b8350a5 100644
--- a/tests/plugins/test_amp_plugin.py
+++ b/tests/plugins/test_amp_plugin.py
@@ -37,8 +37,8 @@ def on_fit_start(self, trainer, pl_module):
         amp_backend='native',
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
-        callbacks=[CB()]
+        accelerator=ddp_backend,
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -73,9 +73,9 @@ def on_fit_start(self, trainer, pl_module):
         amp_backend='native',
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[MyNativeAMP()],
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -101,11 +101,11 @@ def test_amp_gradient_unscale(tmpdir):
         limit_test_batches=2,
         limit_val_batches=2,
         amp_backend='native',
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=2,
         precision=16,
         track_grad_norm=2,
-        log_every_n_steps=1
+        log_every_n_steps=1,
     )
     trainer.fit(model)
 
@@ -130,7 +130,7 @@ def test_amp_gradient_unscale_accumulate_grad_batches(tmpdir):
         limit_test_batches=2,
         limit_val_batches=2,
         amp_backend='native',
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=2,
         precision=16,
         track_grad_norm=2,
diff --git a/tests/plugins/test_apex_plugin.py b/tests/plugins/test_apex_plugin.py
index f1e9a5b4f73c5..e833dff98dfb7 100644
--- a/tests/plugins/test_apex_plugin.py
+++ b/tests/plugins/test_apex_plugin.py
@@ -34,8 +34,8 @@ def on_fit_start(self, trainer, pl_module):
         amp_backend='apex',
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
-        callbacks=[CB()]
+        accelerator=ddp_backend,
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
@@ -70,9 +70,9 @@ def on_fit_start(self, trainer, pl_module):
         amp_backend='apex',
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[MyApexPlugin()],
-        callbacks=[CB()]
+        callbacks=[CB()],
     )
 
     with pytest.raises(SystemExit):
diff --git a/tests/plugins/test_ddp_plugin.py b/tests/plugins/test_ddp_plugin.py
index f80a31c975e9b..ef6abf6be5c4c 100644
--- a/tests/plugins/test_ddp_plugin.py
+++ b/tests/plugins/test_ddp_plugin.py
@@ -40,7 +40,7 @@ def on_fit_start(self, trainer, pl_module):
         fast_dev_run=True,
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         callbacks=[CB()],
     )
 
@@ -78,7 +78,7 @@ def on_fit_start(self, trainer, pl_module):
         fast_dev_run=True,
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[MyDDP()],
         callbacks=[CB()],
     )
@@ -116,7 +116,7 @@ def on_fit_start(self, trainer, pl_module):
         fast_dev_run=True,
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins='ddp_sharded',
         callbacks=[CB()],
     )
@@ -147,7 +147,7 @@ def test_ddp_invalid_choice_string_ddp_cpu(tmpdir, ddp_backend, gpus, num_proces
             fast_dev_run=True,
             gpus=gpus,
             num_processes=num_processes,
-            distributed_backend=ddp_backend,
+            accelerator=ddp_backend,
             plugins='invalid',
         )
 
@@ -183,7 +183,7 @@ class MyDDP(DDPPlugin):
             fast_dev_run=True,
             gpus=gpus,
             num_processes=num_processes,
-            distributed_backend=ddp_backend,
+            accelerator=ddp_backend,
             plugins=['ddp_sharded', MyDDP()],
         )
 
@@ -220,7 +220,7 @@ def on_fit_start(self, trainer, pl_module):
         fast_dev_run=True,
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[MyDDP(broadcast_buffers=False, find_unused_parameters=True)],
         callbacks=[CB()],
     )
diff --git a/tests/plugins/test_plugin.py b/tests/plugins/test_plugin.py
index 9c2bffdff72e1..be9d95f09f03f 100644
--- a/tests/plugins/test_plugin.py
+++ b/tests/plugins/test_plugin.py
@@ -70,7 +70,7 @@ def on_fit_start(self, trainer, pl_module):
             fast_dev_run=True,
             gpus=gpus,
             num_processes=num_processes,
-            distributed_backend=ddp_backend,
+            accelerator=ddp_backend,
             plugins=[CustomPlugin()],
             callbacks=[CB()],
         )
@@ -120,6 +120,6 @@ def required_plugins(self, amp_backend: AMPType, trainer: Trainer) -> list:
             fast_dev_run=True,
             gpus=gpus,
             num_processes=num_processes,
-            distributed_backend=ddp_backend,
+            accelerator=ddp_backend,
             plugins=[CustomPlugin(), NativeAMPPlugin()],
         )
diff --git a/tests/plugins/test_sharded_plugin.py b/tests/plugins/test_sharded_plugin.py
index 5010c39de7a80..09453f6e24600 100644
--- a/tests/plugins/test_sharded_plugin.py
+++ b/tests/plugins/test_sharded_plugin.py
@@ -46,7 +46,7 @@ def on_fit_start(self, trainer, pl_module):
         fast_dev_run=True,
         gpus=gpus,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[DDPShardedPlugin()],
         callbacks=[CB()],
     )
@@ -66,10 +66,10 @@ def test_invalid_apex_sharded(tmpdir):
     with pytest.raises(MisconfigurationException, match='Sharded Plugin is not supported with Apex AMP'):
         trainer = Trainer(
             fast_dev_run=True,
-            distributed_backend='ddp_spawn',
+            accelerator='ddp_spawn',
             plugins=[DDPShardedPlugin()],
             precision=16,
-            amp_backend='apex'
+            amp_backend='apex',
         )
 
         trainer.fit(model)
@@ -110,7 +110,7 @@ def on_fit_start(self, trainer, pl_module):
         gpus=gpus,
         precision=16,
         num_processes=num_processes,
-        distributed_backend=ddp_backend,
+        accelerator=ddp_backend,
         plugins=[DDPShardedPlugin()],
         callbacks=[CB()],
     )
diff --git a/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py b/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py
index 436f25b0eb7d8..8f3b39ea7e117 100644
--- a/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py
+++ b/tests/trainer/legacy_deprecate_flow_log_tests/test_trainer_steps_scalar_return.py
@@ -222,7 +222,7 @@ def test_dpp_reduce_mean_pbar(tmpdir):
         limit_train_batches=10,
         limit_test_batches=2,
         limit_val_batches=2,
-        distributed_backend=distributed_backend,
+        accelerator=distributed_backend,
         gpus=2,
         precision=32)
 
diff --git a/tests/trainer/logging_tests/test_distributed_logging.py b/tests/trainer/logging_tests/test_distributed_logging.py
index fe357d4faa407..72975d17d17a2 100644
--- a/tests/trainer/logging_tests/test_distributed_logging.py
+++ b/tests/trainer/logging_tests/test_distributed_logging.py
@@ -41,7 +41,7 @@ def test_global_zero_only_logging_ddp_cpu(tmpdir):
     model = TestModel()
     model.training_epoch_end = None
     trainer = Trainer(
-        distributed_backend='ddp_cpu',
+        accelerator='ddp_cpu',
         num_processes=2,
         default_root_dir=tmpdir,
         limit_train_batches=1,
@@ -60,7 +60,7 @@ def test_global_zero_only_logging_ddp_spawn(tmpdir):
     model = TestModel()
     model.training_epoch_end = None
     trainer = Trainer(
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=2,
         default_root_dir=tmpdir,
         limit_train_batches=1,
diff --git a/tests/trainer/test_dataloaders.py b/tests/trainer/test_dataloaders.py
index 71e1c088ece14..ef8a39c5d8abf 100644
--- a/tests/trainer/test_dataloaders.py
+++ b/tests/trainer/test_dataloaders.py
@@ -705,7 +705,7 @@ def __init__(self, dataset, batch_size=1, shuffle=False, sampler=None,
     trainer = Trainer(
         gpus=[0, 1],
         num_nodes=1,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         default_root_dir=tmpdir,
     )
 
@@ -763,10 +763,10 @@ def test_dataloader_distributed_sampler(tmpdir):
     trainer = Trainer(
         gpus=[0, 1],
         num_nodes=1,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         default_root_dir=tmpdir,
         max_steps=1,
-        callbacks=[DistribSamplerCallback()]
+        callbacks=[DistribSamplerCallback()],
     )
     trainer.fit(model)
     trainer.test(ckpt_path=None)
@@ -795,7 +795,7 @@ def test_dataloader_distributed_sampler_already_attached(tmpdir):
     trainer = Trainer(
         gpus=[0, 1],
         num_nodes=1,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         default_root_dir=tmpdir,
         max_steps=100,
         callbacks=[DistribSamplerCallback()],
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 328b2c0a0f859..b7db30e398535 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -1118,7 +1118,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
     "trainer_kwargs,expected",
     [
         pytest.param(
-            dict(distributed_backend=None, gpus=None),
+            dict(accelerator=None, gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1130,7 +1130,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="dp", gpus=None),
+            dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1142,7 +1142,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="dp", gpus=None),
+            dict(accelerator="dp", gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1154,7 +1154,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="ddp", gpus=None),
+            dict(accelerator="ddp", gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1166,7 +1166,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="ddp", num_processes=2, gpus=None),
+            dict(accelerator="ddp", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1178,7 +1178,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="ddp", num_nodes=2, gpus=None),
+            dict(accelerator="ddp", num_nodes=2, gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1190,7 +1190,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="ddp_cpu", num_processes=2, gpus=None),
+            dict(accelerator="ddp_cpu", num_processes=2, gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1202,7 +1202,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend="ddp2", gpus=None),
+            dict(accelerator="ddp2", gpus=None),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1214,7 +1214,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             ),
         ),
         pytest.param(
-            dict(distributed_backend=None, gpus=1),
+            dict(accelerator=None, gpus=1),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1227,7 +1227,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
         pytest.param(
-            dict(distributed_backend="dp", gpus=1),
+            dict(accelerator="dp", gpus=1),
             dict(
                 use_dp=True,
                 use_ddp=False,
@@ -1240,7 +1240,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
         pytest.param(
-            dict(distributed_backend="ddp", gpus=1),
+            dict(accelerator="ddp", gpus=1),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1253,7 +1253,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
         pytest.param(
-            dict(distributed_backend="ddp_cpu", num_processes=2, gpus=1),
+            dict(accelerator="ddp_cpu", num_processes=2, gpus=1),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1266,7 +1266,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
         pytest.param(
-            dict(distributed_backend="ddp2", gpus=1),
+            dict(accelerator="ddp2", gpus=1),
             dict(
                 use_dp=False,
                 use_ddp=False,
@@ -1279,7 +1279,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() == 0, reason="GPU needed")],
         ),
         pytest.param(
-            dict(distributed_backend=None, gpus=2),
+            dict(accelerator=None, gpus=2),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1292,7 +1292,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
         pytest.param(
-            dict(distributed_backend="dp", gpus=2),
+            dict(accelerator="dp", gpus=2),
             dict(
                 use_dp=True,
                 use_ddp=False,
@@ -1305,7 +1305,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
         pytest.param(
-            dict(distributed_backend="ddp", gpus=2),
+            dict(accelerator="ddp", gpus=2),
             dict(
                 use_dp=False,
                 use_ddp=True,
@@ -1318,7 +1318,7 @@ def test_num_sanity_val_steps_neg_one(tmpdir, limit_val_batches):
             marks=[pytest.mark.skipif(torch.cuda.device_count() < 2, reason="Multiple GPUs needed")],
         ),
         pytest.param(
-            dict(distributed_backend="ddp2", gpus=2),
+            dict(accelerator="ddp2", gpus=2),
             dict(
                 use_dp=False,
                 use_ddp=False,
diff --git a/tests/trainer/test_trainer_cli.py b/tests/trainer/test_trainer_cli.py
index c39d643aed407..db8df3ef0d5cd 100644
--- a/tests/trainer/test_trainer_cli.py
+++ b/tests/trainer/test_trainer_cli.py
@@ -125,7 +125,7 @@ def _raise():
             "min_steps": None,
             "max_steps": None,
             "log_gpu_memory": None,
-            "distributed_backend": None,
+            "accelerator": None,
             "weights_save_path": None,
             "truncated_bptt_steps": None,
             "resume_from_checkpoint": None,
diff --git a/tests/trainer/test_trainer_test_loop.py b/tests/trainer/test_trainer_test_loop.py
index 6ccbbd6c0b557..26f6710d09f7d 100644
--- a/tests/trainer/test_trainer_test_loop.py
+++ b/tests/trainer/test_trainer_test_loop.py
@@ -58,7 +58,7 @@ def test_ddp_spawn_test(tmpdir):
         limit_train_batches=10,
         limit_val_batches=10,
         gpus=[0, 1],
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
     )
     trainer.fit(model)
     assert 'ckpt' in trainer.checkpoint_callback.best_model_path
diff --git a/tests/utilities/test_dtype_device_mixin.py b/tests/utilities/test_dtype_device_mixin.py
index 31ce37839dc58..0c2b13d686834 100644
--- a/tests/utilities/test_dtype_device_mixin.py
+++ b/tests/utilities/test_dtype_device_mixin.py
@@ -80,7 +80,7 @@ def test_submodules_multi_gpu_dp(tmpdir):
     model = TopModule()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        distributed_backend='dp',
+        accelerator='dp',
         gpus=2,
         callbacks=[DeviceAssertCallback()],
         max_steps=1,
@@ -93,7 +93,7 @@ def test_submodules_multi_gpu_ddp_spawn(tmpdir):
     model = TopModule()
     trainer = Trainer(
         default_root_dir=tmpdir,
-        distributed_backend='ddp_spawn',
+        accelerator='ddp_spawn',
         gpus=2,
         callbacks=[DeviceAssertCallback()],
         max_steps=1,