Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add support for current RNTuple files #962

Merged
merged 33 commits into from
Oct 19, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
93f1f48
Adds changes, but ignores some to keep the old tests passing for now.
ioanaif Sep 20, 2023
bd25ab1
style: pre-commit fixes
pre-commit-ci[bot] Sep 20, 2023
e16bbd9
Adds changes to footer and skips old file tests with the exception of…
ioanaif Sep 20, 2023
4952dba
style: pre-commit fixes
pre-commit-ci[bot] Sep 20, 2023
4e9aa3e
Adds new version of file for test 0662 and renames new test file to m…
ioanaif Sep 20, 2023
ffc249a
Swaps rntuple file for new one form RNTuple in test 0662, the footer …
ioanaif Sep 20, 2023
559741d
Applies changes to const file.
ioanaif Oct 11, 2023
31ee04d
Adds new format test file.
ioanaif Oct 11, 2023
d673b77
style: pre-commit fixes
pre-commit-ci[bot] Oct 11, 2023
1f53cd7
Tests only the new file for now.
ioanaif Oct 11, 2023
2bed24a
style: pre-commit fixes
pre-commit-ci[bot] Oct 11, 2023
0fa4e15
Adds new RNTuple schema for the footer and the split cases without th…
ioanaif Oct 11, 2023
0e1eed9
style: pre-commit fixes
pre-commit-ci[bot] Oct 11, 2023
6dbc12d
Merge branch 'main' into ioanaif/rntuple-update-2
ioanaif Oct 11, 2023
7193e41
Updates testing / adds more files.
ioanaif Oct 12, 2023
57fc8b1
style: pre-commit fixes
pre-commit-ci[bot] Oct 12, 2023
b5a3d93
Adds changes to split functionality.
ioanaif Oct 12, 2023
d7e908a
style: pre-commit fixes
pre-commit-ci[bot] Oct 12, 2023
80eded4
Fixes split for uint16 case.
ioanaif Oct 17, 2023
a5150a6
style: pre-commit fixes
pre-commit-ci[bot] Oct 17, 2023
badb2d6
Merge branch 'main' into ioanaif/rntuple-update-2
ioanaif Oct 17, 2023
74f3f61
Fixes split for 32 and 64.
ioanaif Oct 17, 2023
e4a0bbe
style: pre-commit fixes
pre-commit-ci[bot] Oct 17, 2023
0f3cb6e
Fixes spelling errors.
ioanaif Oct 17, 2023
ebdd0a0
style: pre-commit fixes
pre-commit-ci[bot] Oct 17, 2023
af11fc6
Fixes spelling errors.
ioanaif Oct 17, 2023
c5b6b4a
Changes zigzag function.
ioanaif Oct 17, 2023
5b3889c
style: pre-commit fixes
pre-commit-ci[bot] Oct 17, 2023
7beb8c5
Uses test files from skhep_testdata and removes local ones.
ioanaif Oct 17, 2023
1b632cd
style: pre-commit fixes
pre-commit-ci[bot] Oct 17, 2023
332e3c2
Merge branch 'main' into ioanaif/rntuple-update-2
jpivarski Oct 19, 2023
0ad879b
Reverts changes to test_0662-rntuple-stl-containers.py
ioanaif Oct 19, 2023
be9c17c
Vectorized split-decoding for 32 and 64-bits.
jpivarski Oct 19, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions src/uproot/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,9 @@
19: "int64", # split
20: "int32", # split
21: "int16", # split
22: "int64", # split + zigzag encoding
23: "int32", # split + zigzag encoding
24: "int16", # split + zigzag encoding
}
rntuple_col_num_to_size_dict = {
1: 64,
Expand All @@ -164,6 +167,9 @@
19: 64, # split
20: 32, # split
21: 16, # split
22: 64, # split + zigzag encoding
23: 32, # split + zigzag encoding
24: 16, # split + zigzag encoding
}

rntuple_col_type_to_num_dict = {
Expand All @@ -188,6 +194,9 @@
"splitin64": 19,
"splitint32": 20,
"splitint16": 21,
"splitzigzagint64": 22,
"splitzigzagint32": 23,
"splitzigzagint16": 24,
}

rntuple_role_leaf = 0
Expand Down
32 changes: 30 additions & 2 deletions src/uproot/models/RNTuple.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,14 @@ def _envelop_header(chunk, cursor, context):
return {"env_version": env_version, "min_version": min_version}


def from_zigzag(n):
return n >> 1 ^ -(n & 1)


def to_zigzag(n):
return n << 1 ^ n >> 63


class Model_ROOT_3a3a_Experimental_3a3a_RNTuple(uproot.model.Model):
"""
A versionless :doc:`uproot.model.Model` for ``ROOT::Experimental::RNTuple``.
Expand Down Expand Up @@ -376,6 +384,13 @@ def read_col_page(self, ncol, cluster_i):

if dtype_byte <= uproot.const.rntuple_col_type_to_num_dict["index32"]:
res = numpy.insert(res, 0, 0) # for offsets
zigzag = 26 <= dtype_byte <= 28
delta = 14 <= dtype_byte <= 15
if zigzag:
for i in range(len(res)):
res[i] = from_zigzag(res[i])
ioanaif marked this conversation as resolved.
Show resolved Hide resolved
elif delta:
numpy.cumsum(res)
return res

def arrays(
Expand Down Expand Up @@ -645,6 +660,15 @@ def read(self, chunk, cursor, context):

return out

def read_extension_header(self, out, chunk, cursor, context):
out.field_records = self.list_field_record_frames.read(chunk, cursor, context)
out.column_records = self.list_column_record_frames.read(chunk, cursor, context)
out.alias_columns = self.list_alias_column_frames.read(chunk, cursor, context)
out.extra_type_infos = self.list_extra_type_info_reader.read(
chunk, cursor, context
)
return out


class ColumnGroupRecordReader:
def read(self, chunk, cursor, context):
Expand Down Expand Up @@ -674,7 +698,8 @@ def read(self, chunk, cursor, context):

class FooterReader:
def __init__(self):
self.extension_header_links = ListFrameReader(EnvLinkReader())
self.extension_header_links = HeaderReader()
# self.extension_header_links = ListFrameReader(EnvLinkReader())
self.column_group_record_frames = ListFrameReader(
RecordFrameReader(ColumnGroupRecordReader())
)
Expand All @@ -692,7 +717,10 @@ def read(self, chunk, cursor, context):
out.feature_flag = cursor.field(chunk, _rntuple_feature_flag_format, context)
out.header_crc32 = cursor.field(chunk, struct.Struct("<I"), context)

out.extension_links = self.extension_header_links.read(chunk, cursor, context)
out.extension_links = self.extension_header_links.read_extension_header(
out, chunk, cursor, context
)
# out.extension_links = self.extension_header_links.read(chunk, cursor, context)
out.col_group_records = self.column_group_record_frames.read(
chunk, cursor, context
)
Expand Down
Binary file added test_ntuple_stl_containers-2.root
Binary file not shown.
1 change: 1 addition & 0 deletions tests/test_0630-rntuple-basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
pytest.importorskip("awkward")


@pytest.mark.skip(reason="RNTUPLE UPDATE: ignore test with previous file for now.")
def test_flat():
filename = skhep_testdata.data_path("test_ntuple_int_float.root")
with uproot.open(filename) as f:
Expand Down
3 changes: 2 additions & 1 deletion tests/test_0662-rntuple-stl-containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@


def test_rntuple_stl_containers():
filename = skhep_testdata.data_path("test_ntuple_stl_containers.root")
filename = "test_ntuple_stl_containers-2.root"
# filename = skhep_testdata.data_path("test_ntuple_stl_containers.root")
with uproot.open(filename) as f:
R = f["ntuple"]
assert R.keys() == [
Expand Down
1 change: 1 addition & 0 deletions tests/test_0705-rntuple-writing-metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
ak = pytest.importorskip("awkward")


@pytest.mark.skip(reason="RNTUPLE UPDATE: ignore test with previous file for now.")
def test_header(tmp_path):
filepath = os.path.join(tmp_path, "test.root")

Expand Down
32 changes: 32 additions & 0 deletions tests/test_0962-RNTuple-update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE

import os
import queue
import pytest
import uproot
import skhep_testdata


def test_new_support_RNTuple_files():
with uproot.open(
"https://xrootd-local.unl.edu:1094//store/user/AGC/nanoaod-rntuple/zstd/TT_TuneCUETP8M1_13TeV-powheg-pythia8/cmsopendata2015_ttbar_19980_PU25nsData2015v1_76X_mcRun2_asymptotic_v12_ext3-v1_00000_0000.root"
) as f:
obj = f["Events"]
header_start = obj.member("fSeekHeader")
header_stop = header_start + obj.member("fNBytesHeader")

notifications = queue.Queue()

footer_start = obj.member("fSeekFooter")
footer_stop = footer_start + obj.member("fNBytesFooter")
header_chunk, footer_chunk = f.file.source.chunks(
[(header_start, header_stop), (footer_start, footer_stop)],
notifications,
)
# assert footer_stop - footer_start == 273

# print("FOOTER")
# cursor = uproot.Cursor(footer_start)
# cursor.debug(footer_chunk, 80)
# print("\n")
# array = obj.arrays(["nTau"])