File tree Expand file tree Collapse file tree 1 file changed +9
-4
lines changed
python/ray/data/_internal/planner Expand file tree Collapse file tree 1 file changed +9
-4
lines changed Original file line number Diff line number Diff line change @@ -189,14 +189,19 @@ def download_bytes_threaded(
189189 def load_uri_bytes (uri_path_iterator ):
190190 """Function that takes an iterator of URI paths and yields downloaded bytes for each."""
191191 for uri_path in uri_path_iterator :
192+ # Handle both file and stream for uri download:
192193 try :
193- with fs .open_input_file (uri_path ) as f :
194+ with fs .open_input_stream (uri_path ) as f :
194195 yield f .read ()
195196 except OSError as e :
196- logger .debug (
197- f"Failed to download URI '{ uri_path } ' from column '{ uri_column_name } ' with error: { e } "
198- )
197+ logger .debug (f"OSError: '{ uri_path } ' from column '{ uri_column_name } ' with error: { e } " )
199198 yield None
199+ except Exception as e :
200+ # Catch unexpected errors like pyarrow.lib.ArrowInvalid caused by invalid uri like `foo://bar`
201+ # This make sure we don't fail the entire dataset download because of one invalid uri.
202+ logger .error (f"Unexpected error in load_uri_bytes: { e } " )
203+ yield None
204+
200205
201206 # Use make_async_gen to download URI bytes concurrently
202207 # This preserves the order of results to match the input URIs
You can’t perform that action at this time.
0 commit comments