Skip to content

Commit

Permalink
delphes schema passes tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lgray committed Mar 15, 2023
1 parent f31dbf1 commit 77e43ae
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 27 deletions.
6 changes: 5 additions & 1 deletion coffea/nanoevents/mapping/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from abc import abstractmethod
from cachetools import LRUCache
from collections.abc import Mapping
import awkward
import numpy
from coffea.nanoevents import transforms
from coffea.nanoevents.util import key_to_tuple, tuple_to_key
Expand Down Expand Up @@ -79,6 +80,9 @@ def __getitem__(self, key):
if skip:
skip = False
continue
elif node[0] == "@":
skip = True
continue
elif node == "!skip":
skip = True
continue
Expand All @@ -103,7 +107,7 @@ def __getitem__(self, key):
raise RuntimeError(f"Syntax error in form key {nodes}")
out = stack.pop()
try:
out = numpy.array(out)
out = numpy.array(awkward.Array(out))
except ValueError:
if self._debug:
print(out)
Expand Down
59 changes: 33 additions & 26 deletions coffea/nanoevents/schemas/delphes.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,9 +192,12 @@ def __init__(self, base_form, version="latest"):
pass
else:
pass
self._form["fields"], self._form["contents"] = self._build_collections(
self._form["fields"], self._form["contents"]
)
old_style_form = {
k: v for k, v in zip(self._form["fields"], self._form["contents"])
}
output = self._build_collections(old_style_form)
self._form["fields"] = [k for k in output.keys()]
self._form["contents"] = [v for v in output.values()]
self._form["parameters"]["metadata"]["version"] = self._version

@classmethod
Expand All @@ -206,19 +209,24 @@ def v1(cls, base_form):
"""
return cls(base_form, version="1")

def _build_collections(self, branch_fields, branch_forms):
zipped_branch_forms = {k: v for k, v in zip(branch_fields, branch_forms)}

def _build_collections(self, branch_forms):
def _tlorentz_vectorize(objname, form):
# first handle RecordArray
if {"fE", "fP"} == form.get("fields", []):
print(form)
if {
"@instance_version",
"@num_bytes",
"@fUniqueID",
"@fBits",
"@pidf",
"fE",
"fP",
} == set(form.get("fields", [])):
return zip_forms(
{
"x": form["contents"][0]["contents"][0],
"y": form["contents"][0]["contents"][1],
"z": form["contents"][0]["contents"][2],
"t": form["contents"][1],
"x": form["contents"][5]["contents"][0],
"y": form["contents"][5]["contents"][1],
"z": form["contents"][5]["contents"][2],
"t": form["contents"][6],
},
objname,
"LorentzVector",
Expand All @@ -232,38 +240,37 @@ def _tlorentz_vectorize(objname, form):
return form

# preprocess lorentz vectors properly (and recursively)
for objname, form in zipped_branch_forms.items():
zipped_branch_forms[objname] = _tlorentz_vectorize(objname, form)
for objname, form in branch_forms.items():
branch_forms[objname] = _tlorentz_vectorize(objname, form)

# parse into high-level records (collections, list collections, and singletons)
collections = set(k.split("/")[0] for k in zipped_branch_forms)
collections = set(k.split("/")[0] for k in branch_forms)
collections -= set(k for k in collections if k.endswith("_size"))

# Create offsets virtual arrays
for name in collections:
if f"{name}_size" in zipped_branch_forms:
zipped_branch_forms[f"o{name}"] = transforms.counts2offsets_form(
zipped_branch_forms[f"{name}_size"]
if f"{name}_size" in branch_forms:
branch_forms[f"o{name}"] = transforms.counts2offsets_form(
branch_forms[f"{name}_size"]
)

output = {}
for name in collections:
output[f"{name}.offsets"] = zipped_branch_forms[f"o{name}"]
output[f"{name}.offsets"] = branch_forms[f"o{name}"]
mixin = self.mixins.get(name, "NanoCollection")

# Every delphes collection is a list
offsets = zipped_branch_forms["o" + name]
offsets = branch_forms["o" + name]
content = {
k[2 * len(name) + 2 :]: zipped_branch_forms[k]
for k in zipped_branch_forms
k[2 * len(name) + 2 :]: branch_forms[k]
for k in branch_forms
if k.startswith(name + "/" + name)
}
output[name] = zip_forms(content, name, record_name=mixin, offsets=offsets)

# update docstrings as needed
# NB: must be before flattening for easier logic
for index, parameter in enumerate(output[name]["content"]["fields"]):
print(output[name]["content"]["contents"][index])
if "parameters" not in output[name]["content"]["contents"][index]:
continue
output[name]["content"]["contents"][index]["parameters"][
Expand All @@ -276,10 +283,10 @@ def _tlorentz_vectorize(objname, form):
)

# handle branches named like [4] and [5]
output[name]["content"]["fields"] = {
output[name]["content"]["fields"] = [
k.replace("[", "_").replace("]", "")
for k in output[name]["content"]["fields"]
}
]
output[name]["content"]["parameters"].update(
{
"__doc__": offsets["parameters"]["__doc__"],
Expand All @@ -292,7 +299,7 @@ def _tlorentz_vectorize(objname, form):
# upwards, effectively hiding one nested dimension
output[name] = output[name]["content"]

return output.keys(), output.values()
return output

@property
def behavior(self):
Expand Down

0 comments on commit 77e43ae

Please sign in to comment.