open-mmlab · Junjun2016 · Sep 11, 2021 · Sep 10, 2021 · Sep 10, 2021
diff --git a/tools/train.py b/tools/train.py
@@ -7,7 +7,7 @@
 
 import mmcv
 import torch
-from mmcv.runner import init_dist
+from mmcv.runner import get_dist_info, init_dist
 from mmcv.utils import Config, DictAction, get_git_hash
 
 from mmseg import __version__
@@ -94,6 +94,9 @@ def main():
     else:
         distributed = True
         init_dist(args.launcher, **cfg.dist_params)
+        # gpu_ids is used to calculate iter when resuming checkpoint,
+        _, world_size = get_dist_info()
+        cfg.gpu_ids = range(world_size)
 
     # create work_dir
     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))