Skip to content

Commit

Permalink
Add support to traverse all python collection objects (#773)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #773

Fixes #752

This PR makes `traverse` function supporting more collections data structures from Python. The `getstate_hook` will be invoked after custom `__getstate__` function

X-link: pytorch/pytorch#84079

Reviewed By: NivekT

Differential Revision: D39357886

Pulled By: ejguan

fbshipit-source-id: 69d71e3bb2eb5ef127efbe397ce6af9fb45fba86
  • Loading branch information
ejguan authored and facebook-github-bot committed Sep 22, 2022
1 parent b87e016 commit 50787ca
Show file tree
Hide file tree
Showing 7 changed files with 14 additions and 18 deletions.
7 changes: 2 additions & 5 deletions test/test_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import warnings

from unittest import TestCase

from torchdata.dataloader2 import DataLoader2, MultiProcessingReadingService, ReadingServiceInterface
Expand All @@ -21,10 +19,9 @@ def test_shuffle(self) -> None:
dl = DataLoader2(datapipe=dp)
self.assertEqual(list(range(size)), list(dl))

with warnings.catch_warnings(record=True) as wa:
with self.assertWarns(Warning, msg="`shuffle=True` was set, but the datapipe does not contain a `Shuffler`."):
dl = DataLoader2(datapipe=dp, datapipe_adapter_fn=Shuffle(True))
self.assertNotEqual(list(range(size)), list(dl))
self.assertEqual(1, len(wa))
self.assertNotEqual(list(range(size)), list(dl))

dp = IterableWrapper(range(size)).shuffle()

Expand Down
4 changes: 2 additions & 2 deletions torchdata/datapipes/iter/transform/bucketbatcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,6 @@ def __len__(self) -> int:
return len(self.datapipe)

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)
state = (
self.datapipe,
self._enabled,
Expand All @@ -81,6 +79,8 @@ def __getstate__(self):
self._valid_iterator_id,
self._number_of_samples_yielded,
)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down
4 changes: 2 additions & 2 deletions torchdata/datapipes/iter/util/combining.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,6 @@ def reset(self) -> None:
self.buffer = OrderedDict()

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)
state = (
self.source_datapipe,
self.ref_datapipe,
Expand All @@ -138,6 +136,8 @@ def __getstate__(self):
self.merge_fn,
self.buffer_size,
)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down
5 changes: 2 additions & 3 deletions torchdata/datapipes/iter/util/dataframemaker.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,14 +150,13 @@ def __iter__(self):
yield torcharrow.from_arrow(row_group, dtype=self.dtype)

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)

if DILL_AVAILABLE:
dill_dtype = dill.dumps(self.dtype)
else:
dill_dtype = self.dtype
state = (self.source_dp, dill_dtype, self.columns, self.device, self.use_threads)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down
4 changes: 2 additions & 2 deletions torchdata/datapipes/iter/util/paragraphaggregator.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,9 @@ def reset(self) -> None:
self.buffer = []

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)
state = (self.source_datapipe, self.joiner)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down
4 changes: 2 additions & 2 deletions torchdata/datapipes/iter/util/prefetch.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,12 +203,12 @@ def reset(self):
self._done_callback = False

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)
state = (
self.datapipe,
self.timeout,
)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down
4 changes: 2 additions & 2 deletions torchdata/datapipes/iter/util/randomsplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,8 +132,6 @@ def override_seed(self, seed):
return self

def __getstate__(self):
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(self)
state = (
self.source_datapipe,
self.total_length,
Expand All @@ -144,6 +142,8 @@ def __getstate__(self):
self.weights,
self._rng.getstate(),
)
if IterDataPipe.getstate_hook is not None:
return IterDataPipe.getstate_hook(state)
return state

def __setstate__(self, state):
Expand Down

0 comments on commit 50787ca

Please sign in to comment.