Skip to content

Commit

Permalink
fix open file method for tar files (#412)
Browse files Browse the repository at this point in the history
  • Loading branch information
Dave Berenbaum authored Sep 9, 2024
1 parent 823f9f8 commit ca36654
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 9 deletions.
2 changes: 1 addition & 1 deletion examples/multimodal/wds.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
)

wds_images = (
DataChain.from_storage(IMAGE_TARS)
DataChain.from_storage(IMAGE_TARS, type="image")
.settings(cache=True)
.gen(laion=process_webdataset(spec=WDSLaion), params="file")
)
Expand Down
17 changes: 9 additions & 8 deletions src/datachain/lib/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,15 @@ def open(self, mode: Literal["rb", "r"] = "rb"):
with VFileRegistry.resolve(self, self.location) as f: # type: ignore[arg-type]
yield f

uid = self.get_uid()
client = self._catalog.get_client(self.source)
if self._caching_enabled:
client.download(uid, callback=self._download_cb)
with client.open_object(
uid, use_cache=self._caching_enabled, cb=self._download_cb
) as f:
yield io.TextIOWrapper(f) if mode == "r" else f
else:
uid = self.get_uid()
client = self._catalog.get_client(self.source)
if self._caching_enabled:
client.download(uid, callback=self._download_cb)
with client.open_object(
uid, use_cache=self._caching_enabled, cb=self._download_cb
) as f:
yield io.TextIOWrapper(f) if mode == "r" else f

def read(self, length: int = -1):
"""Returns file contents."""
Expand Down

0 comments on commit ca36654

Please sign in to comment.