From ddf33b5e64c9dc84eb8ff0df29468208d5e1e4ba Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Fri, 1 Apr 2022 15:48:28 +0000 Subject: [PATCH 1/3] Add DDP support test for prototype datasets --- test/test_prototype_builtin_datasets.py | 28 +++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index f8dc3a0542b..763e87dc215 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -8,6 +8,7 @@ from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair from torch.utils.data.graph import traverse +from torch.utils.data import DataLoader from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter from torchvision._utils import sequence_to_str @@ -109,7 +110,7 @@ def test_transformable(self, test_home, dataset_mock, config): next(iter(dataset.map(transforms.Identity()))) - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") + # @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") @parametrize_dataset_mocks(DATASET_MOCKS) def test_serializable(self, test_home, dataset_mock, config): dataset_mock.prepare(test_home, config) @@ -118,10 +119,33 @@ def test_serializable(self, test_home, dataset_mock, config): pickle.dumps(dataset) + @parametrize_dataset_mocks(DATASET_MOCKS) + def test_ddp(self, test_home, dataset_mock, config,): + dataset_mock.prepare(test_home, config) + + import os + if not torch.distributed.is_initialized(): + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29501" + torch.distributed.init_process_group(backend="gloo", world_size=1, rank=0) + torch.distributed.barrier() + + dataset = datasets.load(dataset_mock.name, **config) + + # Ugly hack: custom collate_fn because the default one doesn't handle None values + from torch.utils.data import default_collate + def collate_fn(batch): + return default_collate([x["image"] for x in batch]) + + dl = DataLoader(dataset, collate_fn=collate_fn) + + next(iter(dl)) + # TODO: Do we need to manually shut down DPP now?? + # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 # contain a custom test for that, but we opted to wait for a potential solution / test from torchdata for now. - @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") + # @pytest.mark.xfail(reason="See https://github.com/pytorch/data/issues/237") @parametrize_dataset_mocks(DATASET_MOCKS) @pytest.mark.parametrize("annotation_dp_type", (Shuffler, ShardingFilter)) def test_has_annotations(self, test_home, dataset_mock, config, annotation_dp_type): From e1919dc061f02e03d3d7ff02a49dfbefd87d38f8 Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 4 Apr 2022 16:11:24 +0100 Subject: [PATCH 2/3] Add Fixture with proper shutdown --- test/test_prototype_builtin_datasets.py | 31 ++++++++++++++++--------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 763e87dc215..63ada3528e4 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -1,5 +1,6 @@ import functools import io +import os import pickle from pathlib import Path @@ -7,8 +8,8 @@ import torch from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair +from torch.utils.data import DataLoader, default_collate from torch.utils.data.graph import traverse -from torch.utils.data import DataLoader from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter from torchvision._utils import sequence_to_str @@ -31,6 +32,23 @@ def test_home(mocker, tmp_path): yield tmp_path +@pytest.fixture +def ddp_fixture(): + # Note: we only test DDP with world_size=1, but it should be enough for our purpose. + # If we ever need to go full DDP, we'll need to implement a much more complex logic, similar to + # MultiProcessTestCase from torch core. + + os.environ["MASTER_ADDR"] = "localhost" + os.environ["MASTER_PORT"] = "29501" + torch.distributed.init_process_group(backend="gloo", world_size=1, rank=0) + torch.distributed.barrier() + + yield + + torch.distributed.barrier() + torch.distributed.destroy_process_group() + + def test_coverage(): untested_datasets = set(datasets.list_datasets()) - DATASET_MOCKS.keys() if untested_datasets: @@ -120,27 +138,18 @@ def test_serializable(self, test_home, dataset_mock, config): pickle.dumps(dataset) @parametrize_dataset_mocks(DATASET_MOCKS) - def test_ddp(self, test_home, dataset_mock, config,): + def test_ddp(self, test_home, dataset_mock, config, ddp_fixture): dataset_mock.prepare(test_home, config) - import os - if not torch.distributed.is_initialized(): - os.environ["MASTER_ADDR"] = "localhost" - os.environ["MASTER_PORT"] = "29501" - torch.distributed.init_process_group(backend="gloo", world_size=1, rank=0) - torch.distributed.barrier() - dataset = datasets.load(dataset_mock.name, **config) # Ugly hack: custom collate_fn because the default one doesn't handle None values - from torch.utils.data import default_collate def collate_fn(batch): return default_collate([x["image"] for x in batch]) dl = DataLoader(dataset, collate_fn=collate_fn) next(iter(dl)) - # TODO: Do we need to manually shut down DPP now?? # TODO: we need to enforce not only that both a Shuffler and a ShardingFilter are part of the datapipe, but also # that the Shuffler comes before the ShardingFilter. Early commits in https://github.com/pytorch/vision/pull/5680 From 9aac9ee82a0e4831ee52780ca511dd7a8db76eec Mon Sep 17 00:00:00 2001 From: Nicolas Hug Date: Mon, 4 Apr 2022 16:15:00 +0100 Subject: [PATCH 3/3] Simpler collate_fn --- test/test_prototype_builtin_datasets.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/test/test_prototype_builtin_datasets.py b/test/test_prototype_builtin_datasets.py index 63ada3528e4..06f86c0a016 100644 --- a/test/test_prototype_builtin_datasets.py +++ b/test/test_prototype_builtin_datasets.py @@ -8,7 +8,7 @@ import torch from builtin_dataset_mocks import parametrize_dataset_mocks, DATASET_MOCKS from torch.testing._comparison import assert_equal, TensorLikePair, ObjectPair -from torch.utils.data import DataLoader, default_collate +from torch.utils.data import DataLoader from torch.utils.data.graph import traverse from torch.utils.data.graph_settings import get_all_graph_pipes from torchdata.datapipes.iter import Shuffler, ShardingFilter @@ -143,11 +143,7 @@ def test_ddp(self, test_home, dataset_mock, config, ddp_fixture): dataset = datasets.load(dataset_mock.name, **config) - # Ugly hack: custom collate_fn because the default one doesn't handle None values - def collate_fn(batch): - return default_collate([x["image"] for x in batch]) - - dl = DataLoader(dataset, collate_fn=collate_fn) + dl = DataLoader(dataset, collate_fn=lambda batch: batch) next(iter(dl))