From f5e1eb8e05482096543560ee45451edcf335b3b5 Mon Sep 17 00:00:00 2001 From: Brian Hannafious <32105697+xbrianh@users.noreply.github.com> Date: Fri, 11 Oct 2024 10:39:27 -0700 Subject: [PATCH] Impliment file_uris (#46) --- tests/test_xdlake.py | 9 +++++++++ xdlake/__init__.py | 5 +++++ 2 files changed, 14 insertions(+) diff --git a/tests/test_xdlake.py b/tests/test_xdlake.py index 883c152..ff93b33 100644 --- a/tests/test_xdlake.py +++ b/tests/test_xdlake.py @@ -332,5 +332,14 @@ def gen_frames(): xdl.to_pyarrow_table() ) + def test_file_uris(self): + number_of_writes = 3 + xdl = xdlake.DeltaTable(f"{self.scratch_folder}/{uuid4()}") + for _ in range(number_of_writes): + xdl = xdl.write(self.gen_table()) + self.assertEqual(number_of_writes, len(xdl.file_uris())) + for uri in xdl.file_uris(): + self.assertTrue(uri.startswith("file://")) + if __name__ == '__main__': unittest.main() diff --git a/xdlake/__init__.py b/xdlake/__init__.py index d48cdf1..d3caef9 100644 --- a/xdlake/__init__.py +++ b/xdlake/__init__.py @@ -119,6 +119,11 @@ def get_fragments(self) -> dict[str, list[pyarrow.dataset.Fragment]]: fragments[loc.fs].append(fragment) return dict(fragments) + def file_uris(self) -> list[str]: + """Return the URI for each file in the current version of the table.""" + return [storage.absloc(add.path, self.loc).url + for add in self.adds.values()] + def to_pyarrow_dataset(self) -> pyarrow.dataset.Dataset: """Return arrow dataset.""" datasets = list()