diff --git a/CHANGELOG.md b/CHANGELOG.md index 3643655d55ec4..278a4363de9e8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,9 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/). - Fixed `toggle_optimizer` to reset `requieres_grad` state ([#5574](https://github.com/PyTorchLightning/pytorch-lightning/pull/5574)) +- Fixed FileNotFoundError for best checkpoint when using DDP with Hydra ([#5629](https://github.com/PyTorchLightning/pytorch-lightning/pull/5629)) + + - Fixed an error when logging a progress bar metric with a reserved name ([#5620](https://github.com/PyTorchLightning/pytorch-lightning/pull/5620)) diff --git a/pytorch_lightning/accelerators/ddp_accelerator.py b/pytorch_lightning/accelerators/ddp_accelerator.py index 24b7c295eb0df..84830ae3e88e6 100644 --- a/pytorch_lightning/accelerators/ddp_accelerator.py +++ b/pytorch_lightning/accelerators/ddp_accelerator.py @@ -134,11 +134,16 @@ def _call_children_scripts(self): del env_copy['PL_GLOBAL_SEED'] # start process - # if hydra is available and initialized, make sure to set the cwd correctly + # if hydra is available and initialized, make sure to set the original cwd correctly + # and pass current cwd for ddp processes (which hydra has overridden) cwd: Optional[str] = None if HYDRA_AVAILABLE: if HydraConfig.initialized(): cwd = get_original_cwd() + command += [ + f'hydra.run.dir={os.getcwd()}', + f'hydra.job.name=train_ddp_process_{local_rank}' + ] proc = subprocess.Popen(command, env=env_copy, cwd=cwd) self.interactive_ddp_procs.append(proc)