Skip to content

Commit

Permalink
Merge branch 'main' into fsspec-optional-backends
Browse files Browse the repository at this point in the history
  • Loading branch information
lobis authored Oct 19, 2023
2 parents e397896 + 7eec772 commit a93eb6c
Show file tree
Hide file tree
Showing 12 changed files with 423 additions and 107 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ repos:


- repo: https://github.com/asottile/pyupgrade
rev: v3.13.0
rev: v3.15.0
hooks:
- id: pyupgrade
args: ["--py38-plus"]
89 changes: 58 additions & 31 deletions src/uproot/_dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -885,8 +885,9 @@ def __call__(self, form: Form) -> tuple[Form, TrivialFormMappingInfo]:


class UprootReadMixin:
form_mapping: ImplementsFormMapping
base_form: Form
expected_form: Form
form_mapping_info: ImplementsFormMappingInfo
common_keys: frozenset[str]
interp_options: dict[str, Any]

Expand All @@ -898,24 +899,24 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
awkward = uproot.extras.awkward()
nplike = Numpy.instance()

form, form_info = self.form_mapping(self.base_form)

# The remap implementation should correctly populate the generated
# buffer mapping in __call__, such that the high-level form can be
# used in `from_buffers`
mapping = form_info.load_buffers(
mapping = self.form_mapping_info.load_buffers(
tree, self.common_keys, start, stop, self.interp_options
)

# Populate container with placeholders if keys aren't required
# Otherwise, read from disk
container = {}
for buffer_key, dtype in form.expected_from_buffers(
buffer_key=form_info.buffer_key
for buffer_key, dtype in self.expected_form.expected_from_buffers(
buffer_key=self.form_mapping_info.buffer_key
).items():
# Which key(s) does this buffer require. This code permits the caller
# to require multiple keys to compute a single buffer.
keys_for_buffer = form_info.keys_for_buffer_keys(frozenset({buffer_key}))
keys_for_buffer = self.form_mapping_info.keys_for_buffer_keys(
frozenset({buffer_key})
)
# If reading this buffer loads a permitted key, read from the tree
# We might not have _all_ keys if e.g. buffer A requires one
# but not two of the keys required for buffer B
Expand All @@ -930,20 +931,19 @@ def read_tree(self, tree: HasBranches, start: int, stop: int) -> AwkArray:
)

return awkward.from_buffers(
form,
self.expected_form,
stop - start,
container,
behavior=form_info.behavior,
buffer_key=form_info.buffer_key,
behavior=self.form_mapping_info.behavior,
buffer_key=self.form_mapping_info.buffer_key,
)

def mock(self) -> AwkArray:
awkward = uproot.extras.awkward()
high_level_form, form_info = self.form_mapping(self.base_form)
return awkward.typetracer.typetracer_from_form(
high_level_form,
self.expected_form,
highlevel=True,
behavior=form_info.behavior,
behavior=self.form_mapping_info.behavior,
)

def prepare_for_projection(self) -> tuple[AwkArray, TypeTracerReport, dict]:
Expand All @@ -952,25 +952,24 @@ def prepare_for_projection(self) -> tuple[AwkArray, TypeTracerReport, dict]:

# A form mapping will (may) remap the base form into a new form
# The remapped form can be queried for structural information
high_level_form, form_info = self.form_mapping(self.base_form)

# Build typetracer and associated report object
meta, report = awkward.typetracer.typetracer_with_report(
high_level_form,
self.expected_form,
highlevel=True,
behavior=form_info.behavior,
buffer_key=form_info.buffer_key,
behavior=self.form_mapping_info.behavior,
buffer_key=self.form_mapping_info.buffer_key,
)

return (
meta,
report,
{
"trace": dask_awkward.lib.utils.trace_form_structure(
high_level_form,
buffer_key=form_info.buffer_key,
self.expected_form,
buffer_key=self.form_mapping_info.buffer_key,
),
"form_info": form_info,
"form_info": self.form_mapping_info,
},
)

Expand Down Expand Up @@ -1016,20 +1015,27 @@ class _UprootRead(UprootReadMixin):
def __init__(
self,
ttrees,
common_keys,
interp_options,
form_mapping: ImplementsFormMapping,
base_form,
common_keys: frozenset[str],
interp_options: dict[str, Any],
base_form: Form,
expected_form: Form,
form_mapping_info: ImplementsFormMappingInfo,
) -> None:
self.ttrees = ttrees
self.common_keys = frozenset(common_keys)
self.interp_options = interp_options
self.form_mapping = form_mapping
self.base_form = base_form
self.expected_form = expected_form
self.form_mapping_info = form_mapping_info

def project_keys(self: T, keys: frozenset[str]) -> T:
return _UprootRead(
self.ttrees, keys, self.interp_options, self.form_mapping, self.base_form
self.ttrees,
keys,
self.interp_options,
self.base_form,
self.expected_form,
self.form_mapping_info,
)

def __call__(self, i_start_stop) -> AwkArray:
Expand All @@ -1046,16 +1052,18 @@ def __init__(
real_options,
common_keys,
interp_options,
form_mapping: ImplementsFormMapping,
base_form: Form,
expected_form: Form,
form_mapping_info: ImplementsFormMappingInfo,
) -> None:
self.custom_classes = custom_classes
self.allow_missing = allow_missing
self.real_options = real_options
self.common_keys = frozenset(common_keys)
self.interp_options = interp_options
self.form_mapping = form_mapping
self.base_form = base_form
self.expected_form = expected_form
self.form_mapping_info = form_mapping_info

def __call__(self, blockwise_args) -> AwkArray:
(
Expand Down Expand Up @@ -1104,8 +1112,9 @@ def project_keys(self: T, keys: frozenset[str]) -> T:
self.real_options,
keys,
self.interp_options,
self.form_mapping,
self.base_form,
self.expected_form,
self.form_mapping_info,
)


Expand Down Expand Up @@ -1289,13 +1298,22 @@ def real_filter_branch(branch):
divisions.append(0)
partition_args.append((0, 0, 0))

if form_mapping is None:
expected_form = dask_awkward.lib.utils.form_with_unique_keys(
base_form, "<root>"
)
form_mapping_info = TrivialFormMappingInfo(expected_form)
else:
expected_form, form_mapping_info = form_mapping(base_form)

return dask_awkward.from_map(
_UprootRead(
ttrees,
common_keys,
interp_options,
form_mapping=TrivialFormMapping() if form_mapping is None else form_mapping,
base_form=base_form,
expected_form=expected_form,
form_mapping_info=form_mapping_info,
),
partition_args,
divisions=tuple(divisions),
Expand Down Expand Up @@ -1370,15 +1388,24 @@ def _get_dak_array_delay_open(
)
)

if form_mapping is None:
expected_form = dask_awkward.lib.utils.form_with_unique_keys(
base_form, "<root>"
)
form_mapping_info = TrivialFormMappingInfo(expected_form)
else:
expected_form, form_mapping_info = form_mapping(base_form)

return dask_awkward.from_map(
_UprootOpenAndRead(
custom_classes,
allow_missing,
real_options,
common_keys,
interp_options,
form_mapping=TrivialFormMapping() if form_mapping is None else form_mapping,
base_form=base_form,
expected_form=expected_form,
form_mapping_info=form_mapping_info,
),
partition_args,
divisions=None if divisions is None else tuple(divisions),
Expand Down
46 changes: 25 additions & 21 deletions src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,31 +285,35 @@ def regularize_path(path):
def file_object_path_split(path):
"""
Split a path with a colon into a file path and an object-in-file path.
"""
path = regularize_path(path)
try:
index = path.rindex(":")
except ValueError:
return path, None
else:
file_path, object_path = path[:index], path[index + 1 :]
Args:
path: The path to split. Example: ``"https://localhost:8888/file.root:tree"``
if (
_might_be_port.match(object_path) is not None
and urlparse(file_path).path == ""
):
return path, None
Returns:
A tuple of the file path and the object-in-file path. If there is no
object-in-file path, the second element is ``None``.
Example: ``("https://localhost:8888/file.root", "tree")``
"""

file_path = file_path.rstrip()
object_path = object_path.lstrip()
path: str = regularize_path(path)
# remove whitespace
path = path.strip()

# split url into parts
parsed_url = urlparse(path)

parts = parsed_url.path.split(":")
if len(parts) == 1:
obj = None
elif len(parts) == 2:
obj = parts[1]
# remove the object from the path (including the colon)
path = path[: -len(obj) - 1]
obj = obj.strip()
else:
raise ValueError(f"too many colons in file path: {path} for url {parsed_url}")

if file_path.upper() in _schemes:
return path, None
elif win and _windows_drive_letter_ending.match(file_path) is not None:
return path, None
else:
return file_path, object_path
return path, obj


def file_path_to_source_class(file_path, options):
Expand Down
51 changes: 36 additions & 15 deletions src/uproot/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,25 @@
7: "float64",
8: "float32",
9: "float16",
10: "int64",
11: "int32",
12: "int16",
13: "int8",
14: "uint32", # SplitIndex64 delta encoding
15: "uint64", # SplitIndex32 delta encoding
10: "uint64",
11: "uint32",
12: "uint16",
13: "uint8",
14: "uint64", # SplitIndex64 delta encoding
15: "uint32", # SplitIndex32 delta encoding
16: "float64", # split
17: "float32", # split
18: "float16", # split
19: "int64", # split
20: "int32", # split
21: "int16", # split
19: "uint64", # split
20: "uint32", # split
21: "uint16", # split
22: "int64",
23: "int32",
24: "int16",
25: "int8",
26: "int64", # split + zigzag encoding
27: "int32", # split + zigzag encoding
28: "int16", # split + zigzag encoding
}
rntuple_col_num_to_size_dict = {
1: 64,
Expand All @@ -156,14 +163,21 @@
11: 32,
12: 16,
13: 8,
14: 32, # SplitIndex64 delta encoding
15: 64, # SplitIndex32 delta encoding
14: 64, # SplitIndex64 delta encoding
15: 32, # SplitIndex32 delta encoding
16: 64, # split
17: 32, # split
18: 16, # split
19: 64, # split
20: 32, # split
21: 16, # split
22: 64,
23: 32,
24: 16,
25: 8,
26: 64, # split + zigzag encoding
27: 32, # split + zigzag encoding
28: 16, # split + zigzag encoding
}

rntuple_col_type_to_num_dict = {
Expand All @@ -176,10 +190,10 @@
"real64": 7,
"real32": 8,
"real16": 9,
"int64": 10,
"int32": 11,
"int16": 12,
"int8": 13,
"uint64": 10,
"uint32": 11,
"uint16": 12,
"uint8": 13,
"splitindex64": 14,
"splitindex32": 15,
"splitreal64": 16,
Expand All @@ -188,6 +202,13 @@
"splitin64": 19,
"splitint32": 20,
"splitint16": 21,
"int64": 22,
"int32": 23,
"int16": 24,
"int8": 25,
"splitzigzagint64": 26,
"splitzigzagint32": 27,
"splitzigzagint16": 28,
}

rntuple_role_leaf = 0
Expand Down
Loading

0 comments on commit a93eb6c

Please sign in to comment.