Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: set fsspec as default source #1023

Merged
merged 40 commits into from
Nov 15, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
57c0a3b
feat: add fsspec as required dependency (#1021)
lobis Nov 14, 2023
bfefbb2
remove deprecated handlers from docs
lobis Nov 14, 2023
aa641b0
simplify source selection
lobis Nov 14, 2023
7910038
return object source
lobis Nov 14, 2023
faf416d
pickle executor
lobis Nov 15, 2023
1560be5
rename test
lobis Nov 15, 2023
a31830d
test more handlers
lobis Nov 15, 2023
f674ece
option to check writeable file-like object
lobis Nov 15, 2023
b6b9c9b
rename test
lobis Nov 15, 2023
307a5ab
explicitly set handler
lobis Nov 15, 2023
e76fdbb
fix s3 source
lobis Nov 15, 2023
f9ebf41
rename test
lobis Nov 15, 2023
91b7c80
Revert "fix s3 source"
lobis Nov 15, 2023
4f9a5d2
sesparate PR for s3 fix (https://github.com/scikit-hep/uproot5/pull/1…
lobis Nov 15, 2023
24cf716
strip file://
lobis Nov 15, 2023
f620750
rename test
lobis Nov 15, 2023
2aef261
rename tests
lobis Nov 15, 2023
39e7673
add aiohttp skip
lobis Nov 15, 2023
a168498
attempt to parse windows paths
lobis Nov 15, 2023
4c1c8a5
test ci
lobis Nov 15, 2023
41cdb5d
Merge branch 'main-fsspec' into handler-argument
lobis Nov 15, 2023
ec26fe7
Revert "test ci"
lobis Nov 15, 2023
8b9dbef
Merge remote-tracking branch 'origin/handler-argument' into handler-a…
lobis Nov 15, 2023
e25448a
rename test
lobis Nov 15, 2023
af34e45
remove fsspec from test
lobis Nov 15, 2023
99b2f11
remove *_handler options
lobis Nov 15, 2023
74d4d81
update defaults
lobis Nov 15, 2023
d8ecb94
do not override default s3
lobis Nov 15, 2023
f459c07
do not use fsspec for multiprocessing
lobis Nov 15, 2023
abaa198
rename test
lobis Nov 15, 2023
a3db12b
fix not selecting object source
lobis Nov 15, 2023
fbdc741
missing import
lobis Nov 15, 2023
633d94c
normalize doc
lobis Nov 15, 2023
24b8ce8
remove helper
lobis Nov 15, 2023
ac9fa9e
never return None as source
lobis Nov 15, 2023
4418dad
fix conflicts
lobis Nov 15, 2023
2feebdb
Merge branch 'main-fsspec' into handler-argument
lobis Nov 15, 2023
3b63353
remove unnecessary xrootd source default override since fsspec is def…
lobis Nov 15, 2023
5a66cdf
rename test
lobis Nov 15, 2023
6c618f3
add empty class to pass old pickle test
lobis Nov 15, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,7 @@ def dask(

Options (type; default):

* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* handler (:doc:`uproot.source.chunk.Source` class; `None`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down
219 changes: 37 additions & 182 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,16 @@
import re
import warnings
from collections.abc import Iterable
from urllib.parse import unquote, urlparse
from typing import IO
from urllib.parse import urlparse

import numpy
import packaging.version

import uproot.source.chunk
import uproot.source.fsspec
import uproot.source.object

win = platform.system().lower().startswith("win")


Expand Down Expand Up @@ -56,6 +61,21 @@ def isnum(x):
)


def is_file_like(
obj, readable: bool = True, writeable: bool = False, seekable: bool = True
) -> bool:
return (
callable(getattr(obj, "read", None))
and callable(getattr(obj, "write", None))
and callable(getattr(obj, "seek", None))
and callable(getattr(obj, "tell", None))
and callable(getattr(obj, "flush", None))
and ((not hasattr(obj, "readable") or obj.readable()) if readable else True)
and ((not hasattr(obj, "writable") or obj.writable()) if writeable else True)
and ((not hasattr(obj, "seekable") or obj.seekable()) if seekable else True)
)


def ensure_str(x):
"""
Ensures that ``x`` is a string (decoding with 'surrogateescape' if necessary).
Expand Down Expand Up @@ -344,200 +364,35 @@ def _split_path(path: str) -> list[str]:
return path, obj


def file_path_to_source_class(file_path, options):
def file_path_to_source_class(file_path: str | IO, options: dict):
"""
Use a file path to get the :doc:`uproot.source.chunk.Source` class that would read it.

Returns a tuple of (class, file_path) where the class is a subclass of :doc:`uproot.source.chunk.Source`.

The "handler" option is the preferred way to specify a custom source class.
The "*_handler" options are for backwards compatibility and will override the "handler" option if set.
"""
import uproot.source.chunk

file_path = regularize_path(file_path)

out = options["handler"]
if out is not None:
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
f"'handler' is not a class object inheriting from Source: {out!r}"
)
# check if "object_handler" is set
if (
options["object_handler"] is not None
or options["file_handler"] is not None
or options["xrootd_handler"] is not None
or options["s3_handler"] is not None
or options["http_handler"] is not None
):
# These options will override the "handler" option for backwards compatibility
warnings.warn(
"""In version 5.2.0, the '*_handler' argument ('http_handler`, 's3_handler', etc.) will be removed from 'uproot.open'. Use 'handler' instead.""",
stacklevel=1,
)
else:
return out, file_path

if (
not isinstance(file_path, str)
and hasattr(file_path, "read")
and hasattr(file_path, "seek")
):
out = options["object_handler"]
if out is None:
out = uproot.source.object.ObjectSource
handler_cls = options["handler"]
if handler_cls is None:
if isinstance(file_path, str):
handler_cls = uproot.source.fsspec.FSSpecSource
elif uproot._util.is_file_like(file_path):
handler_cls = uproot.source.object.ObjectSource
else:
warnings.warn(
f"""In version 5.2.0, the 'object_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r})
instead.

To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
f"'object_handler' is not a class object inheriting from Source: {out!r}"
f"file_path must be a string or file-like object, not {file_path!r}"
)

return out, file_path

windows_absolute_path = None
if win and _windows_absolute_path_pattern.match(file_path) is not None:
windows_absolute_path = file_path

parsed_url = urlparse(file_path)
if parsed_url.scheme.upper() == "FILE":
parsed_url_path = unquote(parsed_url.path)
else:
parsed_url_path = parsed_url.path
file_path = regularize_path(file_path)

if win and windows_absolute_path is None:
if _windows_absolute_path_pattern.match(parsed_url_path) is not None:
windows_absolute_path = parsed_url_path
elif _windows_absolute_path_pattern_slash.match(parsed_url_path) is not None:
windows_absolute_path = parsed_url_path[1:]

scheme = parsed_url.scheme.lower()
if (
scheme == "file"
or len(parsed_url.scheme) == 0
or windows_absolute_path is not None
if not (
isinstance(handler_cls, type)
and issubclass(handler_cls, uproot.source.chunk.Source)
):
if windows_absolute_path is None:
if parsed_url.netloc.lower() == "localhost":
file_path = parsed_url_path
else:
file_path = parsed_url.netloc + parsed_url_path
else:
file_path = windows_absolute_path

out = options["file_handler"]
if out is None:
out = uproot.source.file.MemmapSource
else:
warnings.warn(
f"""In version 5.2.0, the 'file_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.

To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)

if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'file_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, os.path.expanduser(file_path)

elif scheme == "root":
out = options["xrootd_handler"]
if out is None:
out = uproot.source.xrootd.XRootDSource
else:
warnings.warn(
f"""In version 5.2.0, the 'xrootd_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.

To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'xrootd_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, file_path

elif scheme == "s3":
out = options["s3_handler"]
if out is None:
out = uproot.source.s3.S3Source
else:
warnings.warn(
f"""In version 5.2.0, the 's3_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.

To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'s3' is not a class object inheriting from Source: " + repr(out)
)
return out, file_path

elif scheme in ("http", "https"):
out = options["http_handler"]
if out is None:
out = uproot.source.http.HTTPSource
else:
warnings.warn(
f"""In version 5.2.0, the 'http_handler' argument will be removed from 'uproot.open'. Use
uproot.open(..., handler={out!r}
instead.

To raise these warnings as errors (and get stack traces to find out where they're called), run
import warnings
warnings.filterwarnings("error", module="uproot.*")
after the first `import uproot` or use `@pytest.mark.filterwarnings("error:::uproot.*")` in pytest.""",
DeprecationWarning,
stacklevel=1,
)
if not (isinstance(out, type) and issubclass(out, uproot.source.chunk.Source)):
raise TypeError(
"'http_handler' is not a class object inheriting from Source: "
+ repr(out)
)
return out, file_path

else:
# try to use fsspec before raising an error
if scheme in _schemes:
return uproot.source.fsspec.FSSpecSource, file_path
raise TypeError(
f"'handler' is not a class object inheriting from Source: {handler_cls!r}"
)

raise ValueError(f"URI scheme not recognized: {file_path}")
return handler_cls, file_path


if isinstance(__builtins__, dict):
Expand Down
14 changes: 2 additions & 12 deletions src/uproot/behaviors/TBranch.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,12 +152,7 @@ def iterate(

Options (type; default):

* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* handler (:doc:`uproot.source.chunk.Source` class; `None`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -326,12 +321,7 @@ def concatenate(

Options (type; default):

* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* handler (:doc:`uproot.source.chunk.Source` class; `None`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down
28 changes: 7 additions & 21 deletions src/uproot/reading.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

import uproot
import uproot.behaviors.TBranch
import uproot.source.fsspec
from uproot._util import no_filter


Expand All @@ -42,7 +43,7 @@ def open(
``"rel/file.root:tdirectory/ttree"``, ``Path("rel:/file.root")``,
``Path("/abs/path:stuff.root")``
object_cache (None, MutableMapping, or int): Cache of objects drawn
from ROOT directories (e.g histograms, TTrees, other directories);
from ROOT directories (e.g. histograms, TTrees, other directories);
if None, do not use a cache; if an int, create a new cache of this
size.
array_cache (None, MutableMapping, or memory size): Cache of arrays
Expand Down Expand Up @@ -76,12 +77,7 @@ def open(

Options (type; default):

* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* handler (:doc:`uproot.source.chunk.Source` class; `None`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -178,12 +174,7 @@ def __getitem__(self, where):

open.defaults = _OpenDefaults(
{
"handler": None, # To be updated to fsspec source
"file_handler": None, # Deprecated
"s3_handler": None, # Deprecated
"http_handler": None, # Deprecated
"object_handler": None, # Deprecated
"xrootd_handler": None, # Deprecated
"handler": None,
"timeout": 30,
"max_num_elements": None,
"num_workers": 1,
Expand Down Expand Up @@ -535,12 +526,7 @@ class ReadOnlyFile(CommonFileMethods):

Options (type; default):

* handler (:doc:`uproot.source.chunk.Source` class; None)
* file_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* xrootd_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* s3_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* http_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* object_handler (:doc:`uproot.source.chunk.Source` class; None) (Deprecated: Use `handler` instead. If set, this will take precedence over `handler`)
* handler (:doc:`uproot.source.chunk.Source` class; `None`)
* timeout (float for HTTP, int for XRootD; 30)
* max_num_elements (None or int; None)
* num_workers (int; 1)
Expand Down Expand Up @@ -581,10 +567,10 @@ def __init__(

self.hook_before_create_source()

Source, file_path = uproot._util.file_path_to_source_class(
source_cls, file_path = uproot._util.file_path_to_source_class(
file_path, self._options
)
self._source = Source(file_path, **self._options)
self._source = source_cls(file_path, **self._options)

self.hook_before_get_chunks()

Expand Down
Loading
Loading