Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Write a jagged array. #414

Merged
merged 15 commits into from
Aug 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/uproot/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def ensure_str(x):
raise TypeError("expected a string, not {0}".format(type(x)))


def ensure_numpy(array, types=(numpy.integer, numpy.floating)):
def ensure_numpy(array, types=(numpy.bool_, numpy.integer, numpy.floating)):
"""
Returns an ``np.ndarray`` if ``array`` can be converted to an array of the
desired type and raises TypeError if it cannot.
Expand Down
701 changes: 598 additions & 103 deletions src/uproot/_writing.py

Large diffs are not rendered by default.

101 changes: 89 additions & 12 deletions src/uproot/writing.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,12 +671,18 @@ def _get_del_search(self, where, isget):
return self._del(item, cycle)

def __getitem__(self, where):
if self._file.sink.closed:
raise ValueError("cannot get data from a closed file")
return self._get_del_search(where, True)

def __setitem__(self, where, what):
if self._file.sink.closed:
raise ValueError("cannot write data to a closed file")
self.update({where: what})

def __delitem__(self, where):
if self._file.sink.closed:
raise ValueError("cannot delete data from a closed file")
return self._get_del_search(where, False)

def _get(self, name, cycle):
Expand Down Expand Up @@ -857,6 +863,9 @@ def _subdir(self, key):
return self._subdirs[name]

def mkdir(self, name, initial_directory_bytes=None):
if self._file.sink.closed:
raise ValueError("cannot create a TDirectory in a closed file")

stripped = name.strip("/")
try:
at = stripped.index("/")
Expand Down Expand Up @@ -900,11 +909,16 @@ def mkdir(self, name, initial_directory_bytes=None):
def mktree(
self,
name,
title,
branch_types,
title="",
counter_name=lambda counted: "N" + counted,
field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner,
initial_basket_capacity=10,
resize_factor=10.0,
):
if self._file.sink.closed:
raise ValueError("cannot create a TTree in a closed file")

try:
at = name.rindex("/")
except ValueError:
Expand All @@ -924,6 +938,8 @@ def mktree(
treename,
title,
branch_types,
counter_name,
field_name,
initial_basket_capacity,
resize_factor,
),
Expand Down Expand Up @@ -1070,28 +1086,89 @@ def update(self, pairs=None, **more_pairs):
if isinstance(v, pandas.DataFrame) and v.index.is_numeric():
v = uproot._writing.dataframe_to_dict(v)

if module_name == "awkward" or module_name.startswith("awkward."):
import awkward

if isinstance(v, awkward.Array):
v = {"": v}

if isinstance(v, numpy.ndarray) and v.dtype.fields is not None:
v = uproot._writing.recarray_to_dict(v)

if isinstance(v, Mapping) and all(uproot._util.isstr(x) for x in v):
data = {}
metadata = {}
for branch_name, branch_array in v.items():
try:
branch_array = uproot._util.ensure_numpy(branch_array)
except TypeError:
break
data[branch_name] = branch_array
branch_dtype = branch_array.dtype
branch_shape = branch_array.shape[1:]
if branch_shape != ():
branch_dtype = numpy.dtype((branch_dtype, branch_shape))
metadata[branch_name] = branch_dtype
module_name = type(branch_array).__module__

if module_name == "pandas" or module_name.startswith("pandas."):
branch_array = uproot._writing.dataframe_to_dict(branch_array)

if (
isinstance(branch_array, numpy.ndarray)
and branch_array.dtype.fields is not None
):
branch_array = uproot._writing.recarray_to_dict(branch_array)

if isinstance(branch_array, Mapping) and all(
uproot._util.isstr(x) for x in branch_array
):
okay = True
datum = {}
metadatum = {}
for kk, vv in branch_array.items():
try:
vv = uproot._util.ensure_numpy(vv)
except TypeError:
okay = False
datum[kk] = vv
branch_dtype = vv.dtype
branch_shape = vv.shape[1:]
if branch_shape != ():
branch_dtype = numpy.dtype((branch_dtype, branch_shape))
metadatum[kk] = branch_dtype

if not okay:
break

data[branch_name] = datum
metadata[branch_name] = metadatum

else:
try:
branch_array = uproot._util.ensure_numpy(branch_array)
except TypeError:
module_name = type(branch_array).__module__
if module_name == "awkward" or module_name.startswith(
"awkward."
):
data[branch_name] = branch_array
metadata[branch_name] = branch_array.type
else:
try:
import awkward
except ImportError:
break
try:
branch_array = awkward.from_iter(branch_array)
except Exception:
break
else:
data[branch_name] = branch_array
metadata[branch_name] = awkward.type(branch_array)
else:
data[branch_name] = branch_array
branch_dtype = branch_array.dtype
branch_shape = branch_array.shape[1:]
if branch_shape != ():
branch_dtype = numpy.dtype((branch_dtype, branch_shape))
metadata[branch_name] = branch_dtype

else:
is_ttree = True

if is_ttree:
tree = directory.mktree(name, "", metadata)
tree = directory.mktree(name, metadata)
tree.extend(data)

else:
Expand Down
4 changes: 2 additions & 2 deletions tests/test_0303-empty-jagged-array.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ def test_numpy():


def test_awkward():
ak = pytest.importorskip("awkward")
awkward = pytest.importorskip("awkward")

with uproot.open(skhep_testdata.data_path("uproot-HZZ.root")) as f:
a = f["events/Muon_Px"].array(entry_start=1, entry_stop=1)
assert isinstance(a, ak.Array)
assert isinstance(a, awkward.Array)
assert len(a) == 0
16 changes: 16 additions & 0 deletions tests/test_0405-write-a-histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ def test_copy(tmp_path):
assert h3.GetBinContent(7) == pytest.approx(7.7)
assert h3.GetBinContent(8) == pytest.approx(8.8)
assert h3.GetBinContent(9) == pytest.approx(9.9)
f3.Close()


def test_from_old(tmp_path):
Expand All @@ -74,6 +75,7 @@ def test_from_old(tmp_path):
assert h1.GetBinContent(9) == 289
assert h1.GetBinContent(10) == 76
assert h1.GetBinContent(11) == 0
f1.Close()


def test_new_name(tmp_path):
Expand All @@ -99,6 +101,7 @@ def test_new_name(tmp_path):
assert h1.GetBinContent(9) == 289
assert h1.GetBinContent(10) == 76
assert h1.GetBinContent(11) == 0
f1.Close()


@pytest.mark.parametrize("cls", [ROOT.TH1C, ROOT.TH1D, ROOT.TH1F, ROOT.TH1I, ROOT.TH1S])
Expand Down Expand Up @@ -136,6 +139,7 @@ def test_all_TH1(tmp_path, cls):
assert h3.GetBinError(1) == pytest.approx(1.4142135623730951)
assert h3.GetBinError(2) == pytest.approx(5)
assert h3.GetBinError(3) == pytest.approx(4)
f3.Close()


@pytest.mark.parametrize("cls", [ROOT.TH2C, ROOT.TH2D, ROOT.TH2F, ROOT.TH2I, ROOT.TH2S])
Expand Down Expand Up @@ -175,6 +179,7 @@ def test_all_TH2(tmp_path, cls):
pytest.approx([0, 5, 0, 0, 0]),
pytest.approx([0, 0, 0, 4, 0]),
]
f3.Close()


@pytest.mark.parametrize("cls", [ROOT.TH3C, ROOT.TH3D, ROOT.TH3F, ROOT.TH3I, ROOT.TH3S])
Expand Down Expand Up @@ -221,6 +226,7 @@ def test_all_TH3(tmp_path, cls):
[[0, 0, 0], approx([0, 5, 0]), [0, 0, 0], approx([0, 0, 0]), [0, 0, 0]],
[[0, 0, 0], approx([0, 0, 0]), [0, 0, 0], approx([0, 4, 0]), [0, 0, 0]],
]
f3.Close()


def test_TProfile(tmp_path):
Expand Down Expand Up @@ -257,6 +263,7 @@ def test_TProfile(tmp_path):
assert h3.GetBinError(1) == pytest.approx(np.sqrt(12.5))
assert h3.GetBinError(2) == pytest.approx(0)
assert h3.GetBinError(3) == pytest.approx(0)
f3.Close()


def test_TProfile2D(tmp_path):
Expand Down Expand Up @@ -301,6 +308,7 @@ def test_TProfile2D(tmp_path):
pytest.approx([0, 0, 0, 0, 0]),
pytest.approx([0, 0, 0, 0, 0]),
]
f3.Close()


def test_TProfile3D(tmp_path):
Expand Down Expand Up @@ -355,6 +363,7 @@ def test_TProfile3D(tmp_path):
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
]
f3.Close()


def test_ex_nihilo_TH1(tmp_path):
Expand Down Expand Up @@ -396,6 +405,7 @@ def test_ex_nihilo_TH1(tmp_path):
assert h3.GetBinError(1) == pytest.approx(1.4142135623730951)
assert h3.GetBinError(2) == pytest.approx(5)
assert h3.GetBinError(3) == pytest.approx(4)
f3.Close()


def test_ex_nihilo_TH2(tmp_path):
Expand Down Expand Up @@ -453,6 +463,7 @@ def test_ex_nihilo_TH2(tmp_path):
pytest.approx([0, 5, 0, 0, 0]),
pytest.approx([0, 0, 0, 4, 0]),
]
f3.Close()


def test_ex_nihilo_TH3(tmp_path):
Expand Down Expand Up @@ -534,6 +545,7 @@ def test_ex_nihilo_TH3(tmp_path):
[[0, 0, 0], approx([0, 5, 0]), [0, 0, 0], approx([0, 0, 0]), [0, 0, 0]],
[[0, 0, 0], approx([0, 0, 0]), [0, 0, 0], approx([0, 4, 0]), [0, 0, 0]],
]
f3.Close()


def test_ex_nihilo_TProfile(tmp_path):
Expand Down Expand Up @@ -579,6 +591,7 @@ def test_ex_nihilo_TProfile(tmp_path):
assert h3.GetBinError(1) == pytest.approx(np.sqrt(12.5))
assert h3.GetBinError(2) == pytest.approx(0)
assert h3.GetBinError(3) == pytest.approx(0)
f3.Close()


def test_ex_nihilo_TProfile2D(tmp_path):
Expand Down Expand Up @@ -649,6 +662,7 @@ def test_ex_nihilo_TProfile2D(tmp_path):
pytest.approx([0, 0, 0, 0, 0]),
pytest.approx([0, 0, 0, 0, 0]),
]
f3.Close()


def test_ex_nihilo_TProfile3D(tmp_path):
Expand Down Expand Up @@ -748,6 +762,7 @@ def test_ex_nihilo_TProfile3D(tmp_path):
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0]],
]
f3.Close()


def test_delete(tmp_path):
Expand Down Expand Up @@ -793,3 +808,4 @@ def test_delete(tmp_path):
assert h3.GetBinError(1) == pytest.approx(1.4142135623730951)
assert h3.GetBinError(2) == pytest.approx(5)
assert h3.GetBinError(3) == pytest.approx(4)
f3.Close()
Loading