From bcee0e45653e83be6b6e091a2368534bdb7e7af6 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 22:44:47 +0800 Subject: [PATCH 01/22] feat: naive impl of list_file for fsspec --- torchdata/datapipes/iter/load/fsspec.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torchdata/datapipes/iter/load/fsspec.py b/torchdata/datapipes/iter/load/fsspec.py index 2ded99eee..48cb9ee04 100644 --- a/torchdata/datapipes/iter/load/fsspec.py +++ b/torchdata/datapipes/iter/load/fsspec.py @@ -100,6 +100,9 @@ def __iter__(self) -> Iterator[str]: if not starts_with: yield abs_path + def list_files(self) -> List[str]: + return [fp for fp in self] + @functional_datapipe("open_files_by_fsspec") class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): From 2a09e7710fda49a47c01b18d61a7a22df7ae6386 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 22:48:48 +0800 Subject: [PATCH 02/22] test: test for list_file() for FSSpecFileLister --- test/test_fsspec.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 7e596ef6e..52efd1ac5 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -82,6 +82,16 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # check all file paths within sub_folder are listed self.assertEqual(file_lister, temp_files) + @skipIfNoFSSpec + def test_fsspec_file_lister_iterdatapipe_list_file(self): + datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) + + for path in datapipe.list_files(): + self.assertIn( + path.split("://")[1], + {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, + ) + @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) From 225189a350d1db6b289f4f022bef74bdeb2fa926 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:04:56 +0800 Subject: [PATCH 03/22] test: consistency with test_fsspec_file_lister_iterdatapipe_with_list --- test/test_fsspec.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 52efd1ac5..1da378d5c 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -86,12 +86,28 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): def test_fsspec_file_lister_iterdatapipe_list_file(self): datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) + # Should be consistent with test_fsspec_file_lister_iterdatapipe for path in datapipe.list_files(): self.assertIn( path.split("://")[1], {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, ) + @skipIfNoFSSpec + def test_fsspec_file_lister_iterdatapipe_with_list_list_file(self): + datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) + + paths = datapipe.list_files() + paths = sorted(map(lambda path: path.split("://")[1], paths)) + temp_files = list( + map( + lambda file: fsspec.implementations.local.make_path_posix(file), + self.temp_sub_files + self.temp_sub_files_2, + ) + ) + temp_files.sort() + self.assertEqual(paths, temp_files) + @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) From 8da9374db2f45aae56e7f3167db54bf0ef451017 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:07:23 +0800 Subject: [PATCH 04/22] test: ensure protocol is present --- test/test_fsspec.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 1da378d5c..5f37e478a 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -108,6 +108,12 @@ def test_fsspec_file_lister_iterdatapipe_with_list_list_file(self): temp_files.sort() self.assertEqual(paths, temp_files) + @skipIfNoFSSpec + def test_fsspec_iterdatapipe_list_file_has_protocol(self): + datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) + for path in datapipe.list_files(): + self.assertIn("file://", path) + @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) From ab915248059c458b1d924d0d0df7827a5e4c768a Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:10:22 +0800 Subject: [PATCH 05/22] feat: naive impl of list_file for iopath --- torchdata/datapipes/iter/load/iopath.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/torchdata/datapipes/iter/load/iopath.py b/torchdata/datapipes/iter/load/iopath.py index 0a04d6517..6c2b81444 100644 --- a/torchdata/datapipes/iter/load/iopath.py +++ b/torchdata/datapipes/iter/load/iopath.py @@ -95,6 +95,9 @@ def __iter__(self) -> Iterator[str]: if match_masks(file_name, self.masks): yield os.path.join(path, file_name) + def list_files(self) -> List[str]: + return [path for path in self] + @functional_datapipe("open_files_by_iopath") class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): From aafaa71429d2ba62db312d9d70414a33e449b75b Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:15:32 +0800 Subject: [PATCH 06/22] test: test for list_file() of iopath --- test/test_local_io.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/test_local_io.py b/test/test_local_io.py index 3be37a5ed..2f343356e 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -660,6 +660,16 @@ def test_io_path_file_lister_iterdatapipe(self): for path in datapipe: self.assertTrue(path in self.temp_sub_files) + @skipIfNoIoPath + def test_io_path_list_files_iterdatapipe(self): + datapipe = IoPathFileLister(root=self.temp_sub_dir.name) + + listed = datapipe.list_files() + listed.sort() + tmp_files = [*self.temp_sub_files] + tmp_files.sort() + self.assertEqual(listed, tmp_files) + @skipIfNoIoPath def test_io_path_file_lister_iterdatapipe_with_list(self): datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name]) From 774a6ac3b9455cd677cadd6e52115cf3c572c852 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:16:58 +0800 Subject: [PATCH 07/22] test: consistency for iopath --- test/test_local_io.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_local_io.py b/test/test_local_io.py index 2f343356e..ce6aeb4f7 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -682,6 +682,17 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): # check all file paths within sub_folder are listed self.assertEqual(file_lister, all_temp_files) + @skipIfNoIoPath + def test_io_path_list_files_iterdatapipe_with_list(self): + datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name]) + + listed = datapipe.list_files() + listed.sort() + all_temp_files = [*self.temp_sub_files, *self.temp_sub_files_2] + all_temp_files.sort() + + self.assertEqual(listed, all_temp_files) + @skipIfNoIoPath def test_io_path_file_loader_iterdatapipe(self): datapipe1 = IoPathFileLister(root=self.temp_sub_dir.name) From 2c52f9db028dba31690db4b7493466e135190082 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:42:14 +0800 Subject: [PATCH 08/22] feat: functional_datapipe list_file_by --- torchdata/datapipes/iter/load/fsspec.py | 4 +--- torchdata/datapipes/iter/load/iopath.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/torchdata/datapipes/iter/load/fsspec.py b/torchdata/datapipes/iter/load/fsspec.py index 48cb9ee04..ea16b9dca 100644 --- a/torchdata/datapipes/iter/load/fsspec.py +++ b/torchdata/datapipes/iter/load/fsspec.py @@ -33,6 +33,7 @@ def _assert_fsspec() -> None: ) +@functional_datapipe("list_file_by_fsspec") class FSSpecFileListerIterDataPipe(IterDataPipe[str]): r""" Lists the contents of the directory at the provided ``root`` pathname or URL, @@ -100,9 +101,6 @@ def __iter__(self) -> Iterator[str]: if not starts_with: yield abs_path - def list_files(self) -> List[str]: - return [fp for fp in self] - @functional_datapipe("open_files_by_fsspec") class FSSpecFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): diff --git a/torchdata/datapipes/iter/load/iopath.py b/torchdata/datapipes/iter/load/iopath.py index 6c2b81444..35f720ff0 100644 --- a/torchdata/datapipes/iter/load/iopath.py +++ b/torchdata/datapipes/iter/load/iopath.py @@ -39,6 +39,7 @@ def _create_default_pathmanager(): return pathmgr +@functional_datapipe("list_file_by_iopath") class IoPathFileListerIterDataPipe(IterDataPipe[str]): r""" Lists the contents of the directory at the provided ``root`` pathname or URL, @@ -95,9 +96,6 @@ def __iter__(self) -> Iterator[str]: if match_masks(file_name, self.masks): yield os.path.join(path, file_name) - def list_files(self) -> List[str]: - return [path for path in self] - @functional_datapipe("open_files_by_iopath") class IoPathFileOpenerIterDataPipe(IterDataPipe[Tuple[str, StreamWrapper]]): From 44909dd65d0996aa218e6b7d86797587c397079e Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 25 May 2022 23:47:43 +0800 Subject: [PATCH 09/22] test: update tests for fsspec and iopath for functional api --- test/test_fsspec.py | 9 +++++---- test/test_local_io.py | 8 ++++---- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 5f37e478a..eebf08324 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -83,21 +83,22 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): self.assertEqual(file_lister, temp_files) @skipIfNoFSSpec - def test_fsspec_file_lister_iterdatapipe_list_file(self): + def test_fsspec_functional_list_file(self): datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) # Should be consistent with test_fsspec_file_lister_iterdatapipe - for path in datapipe.list_files(): + paths = list(datapipe.list_file_by_fsspec()) + for path in paths: self.assertIn( path.split("://")[1], {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, ) @skipIfNoFSSpec - def test_fsspec_file_lister_iterdatapipe_with_list_list_file(self): + def test_fsspec_functional_list_file(self): datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) - paths = datapipe.list_files() + paths = list(datapipe.list_file_by_fsspec()) paths = sorted(map(lambda path: path.split("://")[1], paths)) temp_files = list( map( diff --git a/test/test_local_io.py b/test/test_local_io.py index ce6aeb4f7..f07de32d7 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -661,10 +661,10 @@ def test_io_path_file_lister_iterdatapipe(self): self.assertTrue(path in self.temp_sub_files) @skipIfNoIoPath - def test_io_path_list_files_iterdatapipe(self): + def test_io_path_functional_list_files_iterdatapipe(self): datapipe = IoPathFileLister(root=self.temp_sub_dir.name) - listed = datapipe.list_files() + listed = list(datapipe.list_file_by_iopath()) listed.sort() tmp_files = [*self.temp_sub_files] tmp_files.sort() @@ -683,10 +683,10 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): self.assertEqual(file_lister, all_temp_files) @skipIfNoIoPath - def test_io_path_list_files_iterdatapipe_with_list(self): + def test_io_path_functional_list_files_iterdatapipe_with_list(self): datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - listed = datapipe.list_files() + listed = list(datapipe.list_file_by_iopath()) listed.sort() all_temp_files = [*self.temp_sub_files, *self.temp_sub_files_2] all_temp_files.sort() From e777d267fbaea38944d448c3aa34c0bbaabc2b40 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 26 May 2022 00:05:33 +0800 Subject: [PATCH 10/22] test: merge functional API tests with regular tests --- test/test_fsspec.py | 39 ++++++++++++++------------------------- test/test_local_io.py | 19 ++++--------------- 2 files changed, 18 insertions(+), 40 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index eebf08324..bb0680adf 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -64,6 +64,15 @@ def test_fsspec_file_lister_iterdatapipe(self): {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, ) + # checks for functional API + datapipe = IterableWrapper([self.temp_sub_dir.name]) + listed = list(datapipe.list_file_by_fsspec()) + for path in listed: + self.assertIn( + path, + {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, + ) + @skipIfNoFSSpec def test_fsspec_file_lister_iterdatapipe_with_list(self): datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) @@ -82,32 +91,12 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # check all file paths within sub_folder are listed self.assertEqual(file_lister, temp_files) - @skipIfNoFSSpec - def test_fsspec_functional_list_file(self): - datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) - - # Should be consistent with test_fsspec_file_lister_iterdatapipe - paths = list(datapipe.list_file_by_fsspec()) - for path in paths: - self.assertIn( - path.split("://")[1], - {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, - ) + # checks for functional API + datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) + listed = list(datapipe.list_file_by_fsspec()) + listed.sort() + self.assertEqual(listed, temp_files) - @skipIfNoFSSpec - def test_fsspec_functional_list_file(self): - datapipe = FSSpecFileLister(root=["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) - - paths = list(datapipe.list_file_by_fsspec()) - paths = sorted(map(lambda path: path.split("://")[1], paths)) - temp_files = list( - map( - lambda file: fsspec.implementations.local.make_path_posix(file), - self.temp_sub_files + self.temp_sub_files_2, - ) - ) - temp_files.sort() - self.assertEqual(paths, temp_files) @skipIfNoFSSpec def test_fsspec_iterdatapipe_list_file_has_protocol(self): diff --git a/test/test_local_io.py b/test/test_local_io.py index f07de32d7..11e37a64d 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -660,15 +660,10 @@ def test_io_path_file_lister_iterdatapipe(self): for path in datapipe: self.assertTrue(path in self.temp_sub_files) - @skipIfNoIoPath - def test_io_path_functional_list_files_iterdatapipe(self): - datapipe = IoPathFileLister(root=self.temp_sub_dir.name) - + datapipe = IterableWrapper([self.temp_sub_dir.name]) listed = list(datapipe.list_file_by_iopath()) - listed.sort() - tmp_files = [*self.temp_sub_files] - tmp_files.sort() - self.assertEqual(listed, tmp_files) + for path in listed: + self.assertTrue(path in self.temp_sub_files) @skipIfNoIoPath def test_io_path_file_lister_iterdatapipe_with_list(self): @@ -682,15 +677,9 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): # check all file paths within sub_folder are listed self.assertEqual(file_lister, all_temp_files) - @skipIfNoIoPath - def test_io_path_functional_list_files_iterdatapipe_with_list(self): - datapipe = IoPathFileLister(root=[self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - + datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) listed = list(datapipe.list_file_by_iopath()) listed.sort() - all_temp_files = [*self.temp_sub_files, *self.temp_sub_files_2] - all_temp_files.sort() - self.assertEqual(listed, all_temp_files) @skipIfNoIoPath From 04b17d0ce18022ae710983dd1f5afea07a30d411 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 26 May 2022 00:07:24 +0800 Subject: [PATCH 11/22] test: delete unnecessary test --- test/test_fsspec.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index bb0680adf..e175a2b28 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -97,13 +97,6 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): listed.sort() self.assertEqual(listed, temp_files) - - @skipIfNoFSSpec - def test_fsspec_iterdatapipe_list_file_has_protocol(self): - datapipe = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) - for path in datapipe.list_files(): - self.assertIn("file://", path) - @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): datapipe1 = FSSpecFileLister(root="file://" + self.temp_sub_dir.name) From 4d88bedf3d1cf45bdc45df077071a3897ccc0c67 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 26 May 2022 09:23:41 +0800 Subject: [PATCH 12/22] style: change variable names for functional lister tests --- test/test_fsspec.py | 10 +++++----- test/test_local_io.py | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index e175a2b28..fc4a9480b 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -66,8 +66,8 @@ def test_fsspec_file_lister_iterdatapipe(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name]) - listed = list(datapipe.list_file_by_fsspec()) - for path in listed: + datapipe = list(datapipe.list_file_by_fsspec()) + for path in datapipe: self.assertIn( path, {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, @@ -93,9 +93,9 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - listed = list(datapipe.list_file_by_fsspec()) - listed.sort() - self.assertEqual(listed, temp_files) + datapipe = list(datapipe.list_file_by_fsspec()) + datapipe.sort() + self.assertEqual(datapipe, temp_files) @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): diff --git a/test/test_local_io.py b/test/test_local_io.py index 11e37a64d..993188324 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -661,8 +661,8 @@ def test_io_path_file_lister_iterdatapipe(self): self.assertTrue(path in self.temp_sub_files) datapipe = IterableWrapper([self.temp_sub_dir.name]) - listed = list(datapipe.list_file_by_iopath()) - for path in listed: + datapipe = list(datapipe.list_file_by_iopath()) + for path in datapipe: self.assertTrue(path in self.temp_sub_files) @skipIfNoIoPath @@ -678,9 +678,9 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): self.assertEqual(file_lister, all_temp_files) datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - listed = list(datapipe.list_file_by_iopath()) - listed.sort() - self.assertEqual(listed, all_temp_files) + datapipe = list(datapipe.list_file_by_iopath()) + datapipe.sort() + self.assertEqual(datapipe, all_temp_files) @skipIfNoIoPath def test_io_path_file_loader_iterdatapipe(self): From 0971a6e7cce3d95c3d2c3ccf77606fb719de3c8c Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Fri, 27 May 2022 11:43:56 +0800 Subject: [PATCH 13/22] chore: directly iterate over datapipe --- test/test_fsspec.py | 2 +- test/test_local_io.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index fc4a9480b..22b54ff0b 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -66,7 +66,7 @@ def test_fsspec_file_lister_iterdatapipe(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name]) - datapipe = list(datapipe.list_file_by_fsspec()) + datapipe = datapipe.list_file_by_fsspec() for path in datapipe: self.assertIn( path, diff --git a/test/test_local_io.py b/test/test_local_io.py index 993188324..1f7e69886 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -661,7 +661,7 @@ def test_io_path_file_lister_iterdatapipe(self): self.assertTrue(path in self.temp_sub_files) datapipe = IterableWrapper([self.temp_sub_dir.name]) - datapipe = list(datapipe.list_file_by_iopath()) + datapipe = datapipe.list_file_by_iopath() for path in datapipe: self.assertTrue(path in self.temp_sub_files) From 0155dc1caceefcfe8f23a1da12017769fafcc6ea Mon Sep 17 00:00:00 2001 From: Robert Date: Tue, 31 May 2022 23:25:59 +0800 Subject: [PATCH 14/22] refactor: fix grammar Co-authored-by: Kevin Tse --- torchdata/datapipes/iter/load/iopath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchdata/datapipes/iter/load/iopath.py b/torchdata/datapipes/iter/load/iopath.py index 35f720ff0..61a3db2dd 100644 --- a/torchdata/datapipes/iter/load/iopath.py +++ b/torchdata/datapipes/iter/load/iopath.py @@ -39,7 +39,7 @@ def _create_default_pathmanager(): return pathmgr -@functional_datapipe("list_file_by_iopath") +@functional_datapipe("list_files_by_iopath") class IoPathFileListerIterDataPipe(IterDataPipe[str]): r""" Lists the contents of the directory at the provided ``root`` pathname or URL, From 7c892014b9bb50509f6724ccaba84ee5d7edc39d Mon Sep 17 00:00:00 2001 From: Robert Date: Tue, 31 May 2022 23:26:04 +0800 Subject: [PATCH 15/22] refactor: fix grammar Co-authored-by: Kevin Tse --- torchdata/datapipes/iter/load/fsspec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/torchdata/datapipes/iter/load/fsspec.py b/torchdata/datapipes/iter/load/fsspec.py index ea16b9dca..14e9128d6 100644 --- a/torchdata/datapipes/iter/load/fsspec.py +++ b/torchdata/datapipes/iter/load/fsspec.py @@ -33,7 +33,7 @@ def _assert_fsspec() -> None: ) -@functional_datapipe("list_file_by_fsspec") +@functional_datapipe("list_files_by_fsspec") class FSSpecFileListerIterDataPipe(IterDataPipe[str]): r""" Lists the contents of the directory at the provided ``root`` pathname or URL, From 492c9b39cf811d59ff11d19d26f33bd249f0ffe3 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Tue, 31 May 2022 23:27:54 +0800 Subject: [PATCH 16/22] test: update tests with new functional API --- test/test_fsspec.py | 4 ++-- test/test_local_io.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 22b54ff0b..067563d82 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -66,7 +66,7 @@ def test_fsspec_file_lister_iterdatapipe(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name]) - datapipe = datapipe.list_file_by_fsspec() + datapipe = datapipe.list_files_by_fsspec() for path in datapipe: self.assertIn( path, @@ -93,7 +93,7 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - datapipe = list(datapipe.list_file_by_fsspec()) + datapipe = datapipe.list_files_by_fsspec() datapipe.sort() self.assertEqual(datapipe, temp_files) diff --git a/test/test_local_io.py b/test/test_local_io.py index 1f7e69886..f034a91b1 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -661,7 +661,7 @@ def test_io_path_file_lister_iterdatapipe(self): self.assertTrue(path in self.temp_sub_files) datapipe = IterableWrapper([self.temp_sub_dir.name]) - datapipe = datapipe.list_file_by_iopath() + datapipe = datapipe.list_files_by_iopath() for path in datapipe: self.assertTrue(path in self.temp_sub_files) @@ -678,7 +678,7 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): self.assertEqual(file_lister, all_temp_files) datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) - datapipe = list(datapipe.list_file_by_iopath()) + datapipe = datapipe.list_files_by_iopath() datapipe.sort() self.assertEqual(datapipe, all_temp_files) From 626385a47c4c59d45724f1647264eeeafce4e154 Mon Sep 17 00:00:00 2001 From: Robert Date: Tue, 31 May 2022 23:43:13 +0800 Subject: [PATCH 17/22] Update test/test_fsspec.py Co-authored-by: Erjia Guan <68879799+ejguan@users.noreply.github.com> --- test/test_fsspec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 067563d82..bb2c6d769 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -94,8 +94,8 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # checks for functional API datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_fsspec() - datapipe.sort() - self.assertEqual(datapipe, temp_files) + res = list(datapipe).sort() + self.assertEqual(res, temp_files) @skipIfNoFSSpec def test_fsspec_file_loader_iterdatapipe(self): From 6e3c661bfeea325c0ee6cc44e590ac224559f758 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 1 Jun 2022 08:16:42 +0800 Subject: [PATCH 18/22] test: fix failing tests due to missing file:// prefix --- test/test_fsspec.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index bb2c6d769..058f3b962 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -65,7 +65,7 @@ def test_fsspec_file_lister_iterdatapipe(self): ) # checks for functional API - datapipe = IterableWrapper([self.temp_sub_dir.name]) + datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name]) datapipe = datapipe.list_files_by_fsspec() for path in datapipe: self.assertIn( @@ -92,7 +92,7 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): self.assertEqual(file_lister, temp_files) # checks for functional API - datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) + datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_fsspec() res = list(datapipe).sort() self.assertEqual(res, temp_files) From 3d1702e5d8c84e6ab1fc89e9405863be520ba9bc Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Wed, 1 Jun 2022 08:17:02 +0800 Subject: [PATCH 19/22] test: fix breaking test due to unsortable datapipe --- test/test_local_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/test_local_io.py b/test/test_local_io.py index f034a91b1..e6f9afa27 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -679,8 +679,8 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_iopath() - datapipe.sort() - self.assertEqual(datapipe, all_temp_files) + results = list(datapipe).sort() + self.assertEqual(results, all_temp_files) @skipIfNoIoPath def test_io_path_file_loader_iterdatapipe(self): From cf1085c68804beeda0c95e174471d6a20e10df84 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 2 Jun 2022 09:44:52 +0800 Subject: [PATCH 20/22] test: fix failing test_fsspec_file_lister_iterdp_with_lsit --- test/test_fsspec.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 058f3b962..29b7eb7af 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -94,7 +94,13 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # checks for functional API datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_fsspec() - res = list(datapipe).sort() + res = list(map(lambda path: path.split("://")[1], datapipe)).sort() + temp_files = list( + map( + lambda file: fsspec.implementations.local.make_path_posix(file), + self.temp_sub_dir_files + self.temp_sub_dir_files_2, + ) + ) self.assertEqual(res, temp_files) @skipIfNoFSSpec From 2a4efb4fc677302f893cb6d0690b2858ee0c4d8d Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 2 Jun 2022 22:34:00 +0800 Subject: [PATCH 21/22] test: fix failing tests due to .sort() being in-place I completely forgot that sort() was in place and returns None, oops --- test/test_fsspec.py | 6 ++++-- test/test_local_io.py | 3 ++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index 29b7eb7af..de6f61875 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -94,13 +94,15 @@ def test_fsspec_file_lister_iterdatapipe_with_list(self): # checks for functional API datapipe = IterableWrapper(["file://" + self.temp_sub_dir.name, "file://" + self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_fsspec() - res = list(map(lambda path: path.split("://")[1], datapipe)).sort() + res = list(map(lambda path: path.split("://")[1], datapipe)) + res.sort() temp_files = list( map( lambda file: fsspec.implementations.local.make_path_posix(file), - self.temp_sub_dir_files + self.temp_sub_dir_files_2, + self.temp_sub_files + self.temp_sub_files_2, ) ) + temp_files.sort() self.assertEqual(res, temp_files) @skipIfNoFSSpec diff --git a/test/test_local_io.py b/test/test_local_io.py index e6f9afa27..29f8de3b5 100644 --- a/test/test_local_io.py +++ b/test/test_local_io.py @@ -679,7 +679,8 @@ def test_io_path_file_lister_iterdatapipe_with_list(self): datapipe = IterableWrapper([self.temp_sub_dir.name, self.temp_sub_dir_2.name]) datapipe = datapipe.list_files_by_iopath() - results = list(datapipe).sort() + results = list(datapipe) + results.sort() self.assertEqual(results, all_temp_files) @skipIfNoIoPath From ff13537494b6b4661cce9892fa33d3ad7fd519a2 Mon Sep 17 00:00:00 2001 From: Robert Xiu Date: Thu, 2 Jun 2022 22:35:18 +0800 Subject: [PATCH 22/22] test: fix failing tests due to missing file:// prefix again --- test/test_fsspec.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_fsspec.py b/test/test_fsspec.py index de6f61875..2397ead97 100644 --- a/test/test_fsspec.py +++ b/test/test_fsspec.py @@ -69,7 +69,7 @@ def test_fsspec_file_lister_iterdatapipe(self): datapipe = datapipe.list_files_by_fsspec() for path in datapipe: self.assertIn( - path, + path.split("://")[1], {fsspec.implementations.local.make_path_posix(file) for file in self.temp_sub_files}, )