From 9d38d8d575f6fb9511cd885b5860e23a776ba94f Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 20 Aug 2021 11:46:33 -0500 Subject: [PATCH 1/7] Start developing compression for Uproot writing. --- src/uproot/compression.py | 132 ++++++++++++++++----- src/uproot/writing/writable.py | 2 +- tests/test_0414-write-jagged-arrays.py | 48 ++++---- tests/test_0416-writing-compressed-data.py | 71 +++++++++++ 4 files changed, 200 insertions(+), 53 deletions(-) create mode 100644 tests/test_0416-writing-compressed-data.py diff --git a/src/uproot/compression.py b/src/uproot/compression.py index f6441f009..3c3e4bc7d 100644 --- a/src/uproot/compression.py +++ b/src/uproot/compression.py @@ -87,7 +87,18 @@ def level(self, value): self._level = int(value) -class ZLIB(Compression): +class DecompressZLIB(object): + """ + FIXME: docstring + """ + + def decompress(self, data, uncompressed_bytes=None): + import zlib + + return zlib.decompress(data) + + +class ZLIB(Compression, DecompressZLIB): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -98,14 +109,27 @@ class ZLIB(Compression): Uproot uses ``zlib`` from the Python standard library. """ - @classmethod - def decompress(cls, data, uncompressed_bytes=None): + def __init__(self, level): + DecompressZLIB.__init__(self) + Compression.__init__(self, level) + + def compress(self, data): import zlib - return zlib.decompress(data) + return zlib.compress(data, level=self._level) -class LZMA(Compression): +class DecompressLZMA(object): + """ + FIXME: docstring + """ + + def decompress(self, data, uncompressed_bytes=None): + lzma = uproot.extras.lzma() + return lzma.decompress(data) + + +class LZMA(Compression, DecompressLZMA): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -118,13 +142,30 @@ class LZMA(Compression): In Python 2, ``backports.lzma`` must be installed. """ - @classmethod - def decompress(cls, data, uncompressed_bytes=None): + def __init__(self, level): + DecompressLZMA.__init__(self) + Compression.__init__(self, level) + + def compress(self, data): lzma = uproot.extras.lzma() - return lzma.decompress(data) + return lzma.compress(data, preset=self._level) -class LZ4(Compression): +class DecompressLZ4(object): + """ + FIXME: docstring + """ + + def decompress(self, data, uncompressed_bytes=None): + lz4_block = uproot.extras.lz4_block() + if uncompressed_bytes is None: + raise ValueError( + "lz4 block decompression requires the number of uncompressed bytes" + ) + return lz4_block.decompress(data, uncompressed_size=uncompressed_bytes) + + +class LZ4(Compression, DecompressLZ4): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -135,17 +176,35 @@ class LZ4(Compression): The ``zl4`` and ``xxhash`` libraries must be installed. """ - @classmethod - def decompress(cls, data, uncompressed_bytes=None): + def __init__(self, level): + DecompressLZ4.__init__(self) + Compression.__init__(self, level) + + def compress(self, data): lz4_block = uproot.extras.lz4_block() - if uncompressed_bytes is None: - raise ValueError( - "lz4 block decompression requires the number of uncompressed bytes" - ) - return lz4_block.decompress(data, uncompressed_size=uncompressed_bytes) + return lz4_block.compress(data, compression=self._level, store_size=False) + + +class DecompressZSTD(object): + """ + FIXME: docstring + """ + + def __init__(self): + self._decompressor = None + + @property + def decompressor(self): + if self._decompressor is None: + zstandard = uproot.extras.zstandard() + self._decompressor = zstandard.ZstdDecompressor() + return self._decompressor + + def decompress(self, data, uncompressed_bytes=None): + return self.decompressor.decompress(data) -class ZSTD(Compression): +class ZSTD(Compression, DecompressZSTD): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -156,11 +215,20 @@ class ZSTD(Compression): The ``zstandard`` library must be installed. """ - @classmethod - def decompress(cls, data, uncompressed_bytes=None): - zstandard = uproot.extras.zstandard() - dctx = zstandard.ZstdDecompressor() - return dctx.decompress(data) + def __init__(self, level): + DecompressZSTD.__init__(self) + Compression.__init__(self, level) + self._compressor = None + + @property + def compressor(self): + if self._compressor is None: + zstandard = uproot.extras.zstandard() + self._compressor = zstandard.ZstdCompressor(level=self._level) + return self._compressor + + def compress(self, data): + return self.compressor.compress(data) algorithm_codes = { @@ -170,6 +238,10 @@ def decompress(cls, data, uncompressed_bytes=None): uproot.const.kZSTD: ZSTD, } +_decompress_ZLIB = DecompressZLIB() +_decompress_LZMA = DecompressLZMA() +_decompress_LZ4 = DecompressLZ4() +_decompress_ZSTD = DecompressZSTD() _decompress_header_format = struct.Struct("2sBBBBBBB") _decompress_checksum_format = struct.Struct(">Q") @@ -228,15 +300,15 @@ def decompress( block_uncompressed_bytes = u1 + (u2 << 8) + (u3 << 16) if algo == b"ZL": - cls = ZLIB + decompressor = _decompress_ZLIB data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"XZ": - cls = LZMA + decompressor = _decompress_LZMA data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"L4": - cls = LZ4 + decompressor = _decompress_LZ4 block_compressed_bytes -= 8 expected_checksum = cursor.field( chunk, _decompress_checksum_format, context @@ -254,7 +326,7 @@ def decompress( ) elif algo == b"ZS": - cls = ZSTD + decompressor = _decompress_ZSTD data = cursor.bytes(chunk, block_compressed_bytes, context) elif algo == b"CS": @@ -275,9 +347,13 @@ def decompress( ) if block_info is not None: - block_info.append((cls, block_compressed_bytes, block_uncompressed_bytes)) + block_info.append( + (decompressor, block_compressed_bytes, block_uncompressed_bytes) + ) - uncompressed_bytestring = cls.decompress(data, block_uncompressed_bytes) + uncompressed_bytestring = decompressor.decompress( + data, block_uncompressed_bytes + ) if len(uncompressed_bytestring) != block_uncompressed_bytes: raise ValueError( diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index c1ca2291e..692faf810 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -905,7 +905,7 @@ def mktree( name, branch_types, title="", - counter_name=lambda counted: "N" + counted, + counter_name=lambda counted: "n" + counted, field_name=lambda outer, inner: inner if outer == "" else outer + "_" + inner, initial_basket_capacity=10, resize_factor=10.0, diff --git a/tests/test_0414-write-jagged-arrays.py b/tests/test_0414-write-jagged-arrays.py index efbd14c34..47633eaa3 100644 --- a/tests/test_0414-write-jagged-arrays.py +++ b/tests/test_0414-write-jagged-arrays.py @@ -192,7 +192,7 @@ def test_awkward_jagged_metadata(tmp_path): with uproot.open(newfile) as fin: assert fin["tree/b1"].typename == "int64_t" - assert fin["tree/Nb2"].typename == "int32_t" + assert fin["tree/nb2"].typename == "int32_t" assert fin["tree/b2"].typename == "double[]" f1 = ROOT.TFile(newfile) @@ -202,14 +202,14 @@ def test_awkward_jagged_metadata(tmp_path): assert b1.GetLeaf("b1").GetName() == "b1" assert b1.GetLeaf("b1").GetLeafCount() == None # noqa: E711 (ROOT null check) - Nb2 = t1.GetBranch("Nb2") - assert Nb2.GetLeaf("Nb2").GetName() == "Nb2" - assert Nb2.GetLeaf("Nb2").GetLeafCount() == None # noqa: E711 (ROOT null check) + nb2 = t1.GetBranch("nb2") + assert nb2.GetLeaf("nb2").GetName() == "nb2" + assert nb2.GetLeaf("nb2").GetLeafCount() == None # noqa: E711 (ROOT null check) b2 = t1.GetBranch("b2") assert b2.GetLeaf("b2").GetName() == "b2" assert b2.GetLeaf("b2").GetLeafCount() != None # noqa: E711 (ROOT null check) - assert b2.GetLeaf("b2").GetLeafCount().GetName() == "Nb2" + assert b2.GetLeaf("b2").GetLeafCount().GetName() == "nb2" f1.Close() @@ -228,7 +228,7 @@ def test_awkward_jagged_record_metadata(tmp_path): with uproot.open(newfile) as fin: assert fin["tree/b1"].typename == "int64_t" - assert fin["tree/Nb2"].typename == "int32_t" + assert fin["tree/nb2"].typename == "int32_t" assert fin["tree/b2_x"].typename == "double[]" assert fin["tree/b2_y"].typename == "int8_t[]" @@ -239,19 +239,19 @@ def test_awkward_jagged_record_metadata(tmp_path): assert b1.GetLeaf("b1").GetName() == "b1" assert b1.GetLeaf("b1").GetLeafCount() == None # noqa: E711 (ROOT null check) - Nb2 = t1.GetBranch("Nb2") - assert Nb2.GetLeaf("Nb2").GetName() == "Nb2" - assert Nb2.GetLeaf("Nb2").GetLeafCount() == None # noqa: E711 (ROOT null check) + nb2 = t1.GetBranch("nb2") + assert nb2.GetLeaf("nb2").GetName() == "nb2" + assert nb2.GetLeaf("nb2").GetLeafCount() == None # noqa: E711 (ROOT null check) b2_x = t1.GetBranch("b2_x") assert b2_x.GetLeaf("b2_x").GetName() == "b2_x" assert b2_x.GetLeaf("b2_x").GetLeafCount() != None # noqa: E711 (ROOT null check) - assert b2_x.GetLeaf("b2_x").GetLeafCount().GetName() == "Nb2" + assert b2_x.GetLeaf("b2_x").GetLeafCount().GetName() == "nb2" b2_y = t1.GetBranch("b2_y") assert b2_y.GetLeaf("b2_y").GetName() == "b2_y" assert b2_y.GetLeaf("b2_y").GetLeafCount() != None # noqa: E711 (ROOT null check) - assert b2_y.GetLeaf("b2_y").GetLeafCount().GetName() == "Nb2" + assert b2_y.GetLeaf("b2_y").GetLeafCount().GetName() == "nb2" f1.Close() @@ -268,10 +268,10 @@ def test_awkward_jagged_data_1(tmp_path): fout["tree"].extend({"b1": b1, "b2": b2}) with uproot.open(newfile) as fin: - assert fin["tree/Nb2"].member("fLeaves")[0].member("fMaximum") == 4 + assert fin["tree/nb2"].member("fLeaves")[0].member("fMaximum") == 4 assert fin["tree/b2"].member("fEntryOffsetLen") == 4 * 5 assert fin["tree/b1"].array().tolist() == [1, 2, 3, 4, 5] - assert fin["tree/Nb2"].array().tolist() == [3, 0, 2, 1, 4] + assert fin["tree/nb2"].array().tolist() == [3, 0, 2, 1, 4] assert fin["tree/b2"].array().tolist() == [ [0.0, 1.1, 2.2], [], @@ -283,7 +283,7 @@ def test_awkward_jagged_data_1(tmp_path): f1 = ROOT.TFile(newfile) t1 = f1.Get("tree") assert [x.b1 for x in t1] == [1, 2, 3, 4, 5] - assert [x.Nb2 for x in t1] == [3, 0, 2, 1, 4] + assert [x.nb2 for x in t1] == [3, 0, 2, 1, 4] assert [list(x.b2) for x in t1] == [ [0.0, 1.1, 2.2], [], @@ -306,10 +306,10 @@ def test_awkward_jagged_data_2(tmp_path): fout["tree"].extend({"b1": b1[:3], "b2": b2[:3]}) with uproot.open(newfile) as fin: - assert fin["tree/Nb2"].member("fLeaves")[0].member("fMaximum") == 4 + assert fin["tree/nb2"].member("fLeaves")[0].member("fMaximum") == 4 assert fin["tree/b2"].member("fEntryOffsetLen") == 4 * 3 assert fin["tree/b1"].array().tolist() == [1, 2, 3, 4, 5, 1, 2, 3] - assert fin["tree/Nb2"].array().tolist() == [3, 0, 2, 1, 4, 3, 0, 2] + assert fin["tree/nb2"].array().tolist() == [3, 0, 2, 1, 4, 3, 0, 2] assert fin["tree/b2"].array().tolist() == [ [0.0, 1.1, 2.2], [], @@ -324,7 +324,7 @@ def test_awkward_jagged_data_2(tmp_path): f1 = ROOT.TFile(newfile) t1 = f1.Get("tree") assert [x.b1 for x in t1] == [1, 2, 3, 4, 5, 1, 2, 3] - assert [x.Nb2 for x in t1] == [3, 0, 2, 1, 4, 3, 0, 2] + assert [x.nb2 for x in t1] == [3, 0, 2, 1, 4, 3, 0, 2] assert [list(x.b2) for x in t1] == [ [0.0, 1.1, 2.2], [], @@ -349,9 +349,9 @@ def test_awkward_jagged_data_3(tmp_path): # more than 1000 entries, a special number for fNevBufSize and fEntryOffsetLen with uproot.open(newfile) as fin: - assert fin["tree/Nbig"].member("fLeaves")[0].member("fMaximum") == 4 + assert fin["tree/nbig"].member("fLeaves")[0].member("fMaximum") == 4 assert fin["tree/big"].member("fEntryOffsetLen") == 4 * 1500 - assert fin["tree/Nbig"].array().tolist() == [3, 0, 2, 1, 4] * 300 + assert fin["tree/nbig"].array().tolist() == [3, 0, 2, 1, 4] * 300 assert ( fin["tree/big"].array().tolist() == [ @@ -366,7 +366,7 @@ def test_awkward_jagged_data_3(tmp_path): f1 = ROOT.TFile(newfile) t1 = f1.Get("tree") - assert [x.Nbig for x in t1] == [3, 0, 2, 1, 4] * 300 + assert [x.nbig for x in t1] == [3, 0, 2, 1, 4] * 300 assert [list(x.big) for x in t1] == [ [0.0, 1.1, 2.2], [], @@ -392,7 +392,7 @@ def test_awkward_jagged_record_1(tmp_path): fout["tree"].extend({"array": array}) with uproot.open(newfile) as fin: - assert fin["tree/Narray"].array().tolist() == [3, 0, 2] * 2 + assert fin["tree/narray"].array().tolist() == [3, 0, 2] * 2 assert fin["tree/array_x"].array().tolist() == [[1, 2, 3], [], [4, 5]] * 2 assert ( fin["tree/array_y"].array().tolist() @@ -401,7 +401,7 @@ def test_awkward_jagged_record_1(tmp_path): f1 = ROOT.TFile(newfile) t1 = f1.Get("tree") - assert [x.Narray for x in t1] == [3, 0, 2] * 2 + assert [x.narray for x in t1] == [3, 0, 2] * 2 assert [list(x.array_x) for x in t1] == [[1, 2, 3], [], [4, 5]] * 2 assert [list(x.array_y) for x in t1] == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 2 f1.Close() @@ -429,13 +429,13 @@ def test_awkward_jagged_record_2(tmp_path): ) with uproot.open(newfile) as fin: - assert fin["tree/N"].array().tolist() == [3, 0, 2] * 2 + assert fin["tree/n"].array().tolist() == [3, 0, 2] * 2 assert fin["tree/x"].array().tolist() == [[1, 2, 3], [], [4, 5]] * 2 assert fin["tree/y"].array().tolist() == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 2 f1 = ROOT.TFile(newfile) t1 = f1.Get("tree") - assert [x.N for x in t1] == [3, 0, 2] * 2 + assert [x.n for x in t1] == [3, 0, 2] * 2 assert [list(x.x) for x in t1] == [[1, 2, 3], [], [4, 5]] * 2 assert [list(x.y) for x in t1] == [[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 2 f1.Close() diff --git a/tests/test_0416-writing-compressed-data.py b/tests/test_0416-writing-compressed-data.py new file mode 100644 index 000000000..a573ed68c --- /dev/null +++ b/tests/test_0416-writing-compressed-data.py @@ -0,0 +1,71 @@ +# BSD 3-Clause License; see https://github.com/scikit-hep/uproot4/blob/main/LICENSE + +from __future__ import absolute_import + +import numpy +import pytest +import skhep_testdata + +import uproot + + +def test_ZLIB(): + for _ in range(2): + with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zlib.root"))[ + "events" + ] as events: + assert events["px1"].array(entry_stop=5).tolist() == [ + -41.1952876442, + 35.1180497674, + 35.1180497674, + 34.1444372454, + 22.7835819537, + ] + + +def test_LZMA(): + pytest.importorskip("lzma") + + for _ in range(2): + with uproot.open(skhep_testdata.data_path("uproot-Zmumu-lzma.root"))[ + "events" + ] as events: + assert events["px1"].array(entry_stop=5).tolist() == [ + -41.1952876442, + 35.1180497674, + 35.1180497674, + 34.1444372454, + 22.7835819537, + ] + + +def test_LZ4(): + pytest.importorskip("lz4") + + for _ in range(2): + with uproot.open(skhep_testdata.data_path("uproot-Zmumu-lz4.root"))[ + "events" + ] as events: + assert events["px1"].array(entry_stop=5).tolist() == [ + -41.1952876442, + 35.1180497674, + 35.1180497674, + 34.1444372454, + 22.7835819537, + ] + + +def test_ZSTD(): + pytest.importorskip("zstandard") + + for _ in range(2): + with uproot.open(skhep_testdata.data_path("uproot-Zmumu-zstd.root"))[ + "events" + ] as events: + assert events["px1"].array(entry_stop=5).tolist() == [ + -41.1952876442, + 35.1180497674, + 35.1180497674, + 34.1444372454, + 22.7835819537, + ] From 5eac04200fe1626008b52b7bb6797fdc19c97808 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Fri, 20 Aug 2021 13:36:50 -0500 Subject: [PATCH 2/7] Correctly compressing histograms. No TBaskets yet. --- src/uproot/compression.py | 110 ++++++++++++++------- src/uproot/models/TBasket.py | 5 +- src/uproot/writing/identify.py | 10 +- tests/test_0416-writing-compressed-data.py | 104 ++++++++++++++++++- 4 files changed, 191 insertions(+), 38 deletions(-) diff --git a/src/uproot/compression.py b/src/uproot/compression.py index 3c3e4bc7d..5a72368c2 100644 --- a/src/uproot/compression.py +++ b/src/uproot/compression.py @@ -87,10 +87,10 @@ def level(self, value): self._level = int(value) -class DecompressZLIB(object): - """ - FIXME: docstring - """ +class _DecompressZLIB(object): + name = "ZLIB" + _2byte = b"ZL" + _method = b"\x08" def decompress(self, data, uncompressed_bytes=None): import zlib @@ -98,7 +98,7 @@ def decompress(self, data, uncompressed_bytes=None): return zlib.decompress(data) -class ZLIB(Compression, DecompressZLIB): +class ZLIB(Compression, _DecompressZLIB): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -110,7 +110,7 @@ class ZLIB(Compression, DecompressZLIB): """ def __init__(self, level): - DecompressZLIB.__init__(self) + _DecompressZLIB.__init__(self) Compression.__init__(self, level) def compress(self, data): @@ -119,17 +119,17 @@ def compress(self, data): return zlib.compress(data, level=self._level) -class DecompressLZMA(object): - """ - FIXME: docstring - """ +class _DecompressLZMA(object): + name = "LZMA" + _2byte = b"XZ" + _method = b"\x00" def decompress(self, data, uncompressed_bytes=None): lzma = uproot.extras.lzma() return lzma.decompress(data) -class LZMA(Compression, DecompressLZMA): +class LZMA(Compression, _DecompressLZMA): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -143,7 +143,7 @@ class LZMA(Compression, DecompressLZMA): """ def __init__(self, level): - DecompressLZMA.__init__(self) + _DecompressLZMA.__init__(self) Compression.__init__(self, level) def compress(self, data): @@ -151,10 +151,10 @@ def compress(self, data): return lzma.compress(data, preset=self._level) -class DecompressLZ4(object): - """ - FIXME: docstring - """ +class _DecompressLZ4(object): + name = "LZ4" + _2byte = b"L4" + _method = b"\x01" def decompress(self, data, uncompressed_bytes=None): lz4_block = uproot.extras.lz4_block() @@ -165,7 +165,7 @@ def decompress(self, data, uncompressed_bytes=None): return lz4_block.decompress(data, uncompressed_size=uncompressed_bytes) -class LZ4(Compression, DecompressLZ4): +class LZ4(Compression, _DecompressLZ4): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -177,7 +177,7 @@ class LZ4(Compression, DecompressLZ4): """ def __init__(self, level): - DecompressLZ4.__init__(self) + _DecompressLZ4.__init__(self) Compression.__init__(self, level) def compress(self, data): @@ -185,10 +185,10 @@ def compress(self, data): return lz4_block.compress(data, compression=self._level, store_size=False) -class DecompressZSTD(object): - """ - FIXME: docstring - """ +class _DecompressZSTD(object): + name = "ZSTD" + _2byte = b"ZS" + _method = b"\x01" def __init__(self): self._decompressor = None @@ -204,7 +204,7 @@ def decompress(self, data, uncompressed_bytes=None): return self.decompressor.decompress(data) -class ZSTD(Compression, DecompressZSTD): +class ZSTD(Compression, _DecompressZSTD): """ Args: level (int, 0-9): Compression level: 0 is uncompressed, 1 is minimally @@ -216,7 +216,7 @@ class ZSTD(Compression, DecompressZSTD): """ def __init__(self, level): - DecompressZSTD.__init__(self) + _DecompressZSTD.__init__(self) Compression.__init__(self, level) self._compressor = None @@ -238,10 +238,10 @@ def compress(self, data): uproot.const.kZSTD: ZSTD, } -_decompress_ZLIB = DecompressZLIB() -_decompress_LZMA = DecompressLZMA() -_decompress_LZ4 = DecompressLZ4() -_decompress_ZSTD = DecompressZSTD() +_decompress_ZLIB = _DecompressZLIB() +_decompress_LZMA = _DecompressLZMA() +_decompress_LZ4 = _DecompressLZ4() +_decompress_ZSTD = _DecompressZSTD() _decompress_header_format = struct.Struct("2sBBBBBBB") _decompress_checksum_format = struct.Struct(">Q") @@ -299,15 +299,15 @@ def decompress( block_compressed_bytes = c1 + (c2 << 8) + (c3 << 16) block_uncompressed_bytes = u1 + (u2 << 8) + (u3 << 16) - if algo == b"ZL": + if algo == _decompress_ZLIB._2byte: decompressor = _decompress_ZLIB data = cursor.bytes(chunk, block_compressed_bytes, context) - elif algo == b"XZ": + elif algo == _decompress_LZMA._2byte: decompressor = _decompress_LZMA data = cursor.bytes(chunk, block_compressed_bytes, context) - elif algo == b"L4": + elif algo == _decompress_LZ4._2byte: decompressor = _decompress_LZ4 block_compressed_bytes -= 8 expected_checksum = cursor.field( @@ -325,7 +325,7 @@ def decompress( ) ) - elif algo == b"ZS": + elif algo == _decompress_ZSTD._2byte: decompressor = _decompress_ZSTD data = cursor.bytes(chunk, block_compressed_bytes, context) @@ -348,7 +348,7 @@ def decompress( if block_info is not None: block_info.append( - (decompressor, block_compressed_bytes, block_uncompressed_bytes) + (decompressor.name, block_compressed_bytes, block_uncompressed_bytes) ) uncompressed_bytestring = decompressor.decompress( @@ -413,3 +413,47 @@ def hook_after_block(**kwargs): # noqa: D103 decompress.hook_before_block = hook_before_block decompress.hook_after_block = hook_after_block + +_3BYTE_MAX = 2 ** 24 - 1 +_4byte = struct.Struct(" 0: + block, next = next[:_3BYTE_MAX], next[_3BYTE_MAX:] + + compressed = compression.compress(block) + + uncompressed_size = _4byte.pack(len(block))[:-1] + compressed_size = _4byte.pack(len(compressed))[:-1] + + out.append( + compression._2byte + + compression._method + + compressed_size + + uncompressed_size + ) + + if isinstance(compression, LZ4): + xxhash = uproot.extras.xxhash() + out.append( + _decompress_checksum_format.pack(xxhash.xxh64(block).intdigest()) + ) + + out.append(compressed) + + out = b"".join(out) + + if len(out) < len(data): + return out + else: + return data diff --git a/src/uproot/models/TBasket.py b/src/uproot/models/TBasket.py index 92c12a48b..25e7b1afd 100644 --- a/src/uproot/models/TBasket.py +++ b/src/uproot/models/TBasket.py @@ -198,11 +198,14 @@ def block_compression_info(self): """ For compressed ``TBaskets``, a tuple of 3-tuples containing - ``(compression type class, num compressed bytes, num uncompressed bytes)`` + ``(name of algorithm, num compressed bytes, num uncompressed bytes)`` to describe the actual compression algorithms and sizes encountered in each block of data. + The name of the algorithm can be ``"ZLIB"``, ``"LZMA"``, ``"LZ4"``, or + ``"ZSTD"``. + For uncompressed ``TBaskets``, this is None. """ return self._block_compression_info diff --git a/src/uproot/writing/identify.py b/src/uproot/writing/identify.py index e501fabd4..9464919c0 100644 --- a/src/uproot/writing/identify.py +++ b/src/uproot/writing/identify.py @@ -13,6 +13,7 @@ import numpy +import uproot.compression import uproot.writing._cascadetree @@ -125,7 +126,10 @@ def add_to_directory(obj, name, directory, streamers): else: streamers.append(rawstreamer) - raw_data = writable.serialize(name=name) + uncompressed_data = writable.serialize(name=name) + compressed_data = uproot.compression.compress( + uncompressed_data, directory.file.compression + ) if hasattr(writable, "fTitle"): title = writable.fTitle @@ -139,8 +143,8 @@ def add_to_directory(obj, name, directory, streamers): writable.classname, name, title, - raw_data, - len(raw_data), + compressed_data, + len(uncompressed_data), ) diff --git a/tests/test_0416-writing-compressed-data.py b/tests/test_0416-writing-compressed-data.py index a573ed68c..669c22f0e 100644 --- a/tests/test_0416-writing-compressed-data.py +++ b/tests/test_0416-writing-compressed-data.py @@ -2,12 +2,16 @@ from __future__ import absolute_import -import numpy +import os + +import numpy as np import pytest import skhep_testdata import uproot +ROOT = pytest.importorskip("ROOT") + def test_ZLIB(): for _ in range(2): @@ -69,3 +73,101 @@ def test_ZSTD(): 34.1444372454, 22.7835819537, ] + + +def test_histogram_ZLIB(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + SIZE = 2 ** 21 + histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + last = histogram[0][-1] + + with uproot.recreate(newfile, compression=uproot.ZLIB(1)) as fout: + fout["out"] = histogram + + with uproot.open(newfile) as fin: + content, edges = fin["out"].to_numpy() + assert len(content) == SIZE + assert len(edges) == SIZE + 1 + assert content[-1] == last + + f3 = ROOT.TFile(newfile) + h3 = f3.Get("out") + assert h3.GetNbinsX() == SIZE + assert h3.GetBinContent(SIZE) == last + f3.Close() + + +def test_histogram_LZMA(tmp_path): + pytest.importorskip("lzma") + + newfile = os.path.join(tmp_path, "newfile.root") + + SIZE = 2 ** 20 + histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + last = histogram[0][-1] + + with uproot.recreate(newfile, compression=uproot.LZMA(1)) as fout: + fout["out"] = histogram + + with uproot.open(newfile) as fin: + content, edges = fin["out"].to_numpy() + assert len(content) == SIZE + assert len(edges) == SIZE + 1 + assert content[-1] == last + + f3 = ROOT.TFile(newfile) + h3 = f3.Get("out") + assert h3.GetNbinsX() == SIZE + assert h3.GetBinContent(SIZE) == last + f3.Close() + + +def test_histogram_LZ4(tmp_path): + pytest.importorskip("lz4") + + newfile = os.path.join(tmp_path, "newfile.root") + + SIZE = 2 ** 21 + histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + last = histogram[0][-1] + + with uproot.recreate(newfile, compression=uproot.LZ4(1)) as fout: + fout["out"] = histogram + + with uproot.open(newfile) as fin: + content, edges = fin["out"].to_numpy() + assert len(content) == SIZE + assert len(edges) == SIZE + 1 + assert content[-1] == last + + f3 = ROOT.TFile(newfile) + h3 = f3.Get("out") + assert h3.GetNbinsX() == SIZE + assert h3.GetBinContent(SIZE) == last + f3.Close() + + +def test_histogram_ZSTD(tmp_path): + pytest.importorskip("zstandard") + + newfile = os.path.join(tmp_path, "newfile.root") + + SIZE = 2 ** 21 + histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + last = histogram[0][-1] + + with uproot.recreate(newfile, compression=uproot.ZSTD(1)) as fout: + fout["out"] = histogram + + with uproot.open(newfile) as fin: + content, edges = fin["out"].to_numpy() + assert len(content) == SIZE + assert len(edges) == SIZE + 1 + assert content[-1] == last + + f3 = ROOT.TFile(newfile) + h3 = f3.Get("out") + assert h3.GetNbinsX() == SIZE + assert h3.GetBinContent(SIZE) == last + f3.Close() From 66f20aee6a4d23e80eef9d5a8dc5b004f5950f7b Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 23 Aug 2021 16:12:05 -0500 Subject: [PATCH 3/7] TTree compression is entirely done, and TTree/TBranch.(un)compressed_bytes properties are now consistent: these numbers of bytes all include TKey headers now, the way ROOT does. --- src/uproot/behaviors/TBranch.py | 27 +-- src/uproot/behaviors/TTree.py | 34 +++ src/uproot/compression.py | 24 +- src/uproot/models/TBasket.py | 35 ++- src/uproot/writing/_cascadetree.py | 60 ++--- src/uproot/writing/writable.py | 102 ++++++++- tests/test_0406-write-a-ttree.py | 2 - tests/test_0416-writing-compressed-data.py | 250 ++++++++++++++++++++- 8 files changed, 460 insertions(+), 74 deletions(-) diff --git a/src/uproot/behaviors/TBranch.py b/src/uproot/behaviors/TBranch.py index c835e4a11..69127f506 100644 --- a/src/uproot/behaviors/TBranch.py +++ b/src/uproot/behaviors/TBranch.py @@ -2458,25 +2458,24 @@ def compression(self): @property def compressed_bytes(self): """ - The number of compressed bytes in all ``TBaskets`` of this ``TBranch``. + The number of compressed bytes in all ``TBaskets`` of this ``TBranch``, + including the TKey headers (which are always uncompressed). - The number of compressed bytes is specified in the ``TBranch`` metadata - and can be determined without reading any additional data. The - uncompressed bytes requires reading all of the ``TBasket`` ``TKeys`` at - least. + This information is specified in the ``TBranch`` metadata (``fZipBytes``) + and can be determined without reading any additional data. """ - return sum(self.basket_compressed_bytes(i) for i in range(self.num_baskets)) + return self.member("fZipBytes") @property def uncompressed_bytes(self): """ - The number of uncompressed bytes in all ``TBaskets`` of this ``TBranch``. + The number of uncompressed bytes in all ``TBaskets`` of this ``TBranch``, + including the TKey headers. - The number of uncompressed bytes cannot be determined without reading a - ``TKey``, which are small, but may be slow for remote connections because - of the latency of round-trip requests. + This information is specified in the ``TBranch`` metadata (``fTotBytes``) + and can be determined without reading any additional data. """ - return sum(self.basket_uncompressed_bytes(i) for i in range(self.num_baskets)) + return self.member("fTotBytes") @property def compression_ratio(self): @@ -2554,7 +2553,8 @@ def basket_chunk_cursor(self, basket_num): def basket_compressed_bytes(self, basket_num): """ - The number of compressed bytes for the ``TBasket`` at ``basket_num``. + The number of compressed bytes for the ``TBasket`` at ``basket_num``, + including the TKey header. The number of compressed bytes is specified in the ``TBranch`` metadata and can be determined without reading any additional data. The @@ -2576,7 +2576,8 @@ def basket_compressed_bytes(self, basket_num): def basket_uncompressed_bytes(self, basket_num): """ - The number of uncompressed bytes for the ``TBasket`` at ``basket_num``. + The number of uncompressed bytes for the ``TBasket`` at ``basket_num``, + including the TKey header. The number of uncompressed bytes cannot be determined without reading a ``TKey``, which are small, but may be slow for remote connections because diff --git a/src/uproot/behaviors/TTree.py b/src/uproot/behaviors/TTree.py index e92c9f6d5..60704d0ad 100644 --- a/src/uproot/behaviors/TTree.py +++ b/src/uproot/behaviors/TTree.py @@ -84,6 +84,40 @@ def tree(self): """ return self + @property + def compressed_bytes(self): + """ + The number of compressed bytes in all ``TBaskets`` of all ``TBranches`` + of this ``TTree``, including all the TKey headers (which are always + uncompressed). + + This information is specified in the ``TTree`` metadata (``fZipBytes``) + and can be determined without reading any additional data. + """ + return self.member("fZipBytes") + + @property + def uncompressed_bytes(self): + """ + The number of uncompressed bytes in all ``TBaskets`` of all ``TBranches`` + of this ``TTree``, including all the TKey headers. + + This information is specified in the ``TTree`` metadata (``fZipBytes``) + and can be determined without reading any additional data. + """ + return self.member("fTotBytes") + + @property + def compression_ratio(self): + """ + The number of uncompressed bytes divided by the number of compressed + bytes for this ``TBranch``. + + See :ref:`uproot.behaviors.TTree.TTree.compressed_bytes` and + :ref:`uproot.behaviors.TTree.TTree.uncompressed_bytes`. + """ + return float(self.uncompressed_bytes) / float(self.compressed_bytes) + @property def aliases(self): u""" diff --git a/src/uproot/compression.py b/src/uproot/compression.py index 5a72368c2..4c3a21b88 100644 --- a/src/uproot/compression.py +++ b/src/uproot/compression.py @@ -86,6 +86,12 @@ def level(self, value): raise ValueError("Compression level must be between 0 and 9 (inclusive)") self._level = int(value) + def __eq__(self, other): + if isinstance(other, Compression): + return self.name == other.name and self.level == other.level + else: + return False + class _DecompressZLIB(object): name = "ZLIB" @@ -432,23 +438,27 @@ def compress(data, compression): block, next = next[:_3BYTE_MAX], next[_3BYTE_MAX:] compressed = compression.compress(block) + len_compressed = len(compressed) + + if isinstance(compression, LZ4): + xxhash = uproot.extras.xxhash() + computed_checksum = xxhash.xxh64(compressed).intdigest() + checksum = _decompress_checksum_format.pack(computed_checksum) + len_compressed += 8 + else: + checksum = b"" uncompressed_size = _4byte.pack(len(block))[:-1] - compressed_size = _4byte.pack(len(compressed))[:-1] + compressed_size = _4byte.pack(len_compressed)[:-1] out.append( compression._2byte + compression._method + compressed_size + uncompressed_size + + checksum ) - if isinstance(compression, LZ4): - xxhash = uproot.extras.xxhash() - out.append( - _decompress_checksum_format.pack(xxhash.xxh64(block).intdigest()) - ) - out.append(compressed) out = b"".join(out) diff --git a/src/uproot/models/TBasket.py b/src/uproot/models/TBasket.py index 25e7b1afd..df97ddb21 100644 --- a/src/uproot/models/TBasket.py +++ b/src/uproot/models/TBasket.py @@ -163,35 +163,23 @@ def is_embedded(self): @property def uncompressed_bytes(self): """ - The number of bytes for the uncompressed data, not including the header. + The number of bytes for the uncompressed data, including the TKey header. If the ``TBasket`` is uncompressed, this is equal to :ref:`uproot.models.TBasket.Model_TBasket.compressed_bytes`. """ - if self.is_embedded: - if self._byte_offsets is None: - return self._data.nbytes - else: - return self._data.nbytes + 4 + self.num_entries * 4 - else: - return self._members["fObjlen"] + return self._members["fKeylen"] + self._members["fObjlen"] @property def compressed_bytes(self): """ - The number of bytes for the compressed data, not including the header + The number of bytes for the compressed data, including the TKey header (which is always uncompressed). If the ``TBasket`` is uncompressed, this is equal to :ref:`uproot.models.TBasket.Model_TBasket.uncompressed_bytes`. """ - if self.is_embedded: - if self._byte_offsets is None: - return self._data.nbytes - else: - return self._data.nbytes + 4 + self.num_entries * 4 - else: - return self._members["fNbytes"] - self._members["fKeylen"] + return self._members["fNbytes"] @property def block_compression_info(self): @@ -285,27 +273,30 @@ def read_members(self, chunk, cursor, context, file): self._data = cursor.bytes(chunk, self.border, context) else: - if self.compressed_bytes != self.uncompressed_bytes: + compressed_bytes = self._members["fNbytes"] - self._members["fKeylen"] + uncompressed_bytes = self._members["fObjlen"] + + if compressed_bytes != uncompressed_bytes: self._block_compression_info = [] uncompressed = uproot.compression.decompress( chunk, cursor, {}, - self.compressed_bytes, - self.uncompressed_bytes, + compressed_bytes, + uncompressed_bytes, self._block_compression_info, ) self._block_compression_info = tuple(self._block_compression_info) self._raw_data = uncompressed.get( 0, - self.uncompressed_bytes, + uncompressed_bytes, uproot.source.cursor.Cursor(0), context, ) else: - self._raw_data = cursor.bytes(chunk, self.uncompressed_bytes, context) + self._raw_data = cursor.bytes(chunk, uncompressed_bytes, context) - if self.border != self.uncompressed_bytes: + if self.border != uncompressed_bytes: self._data = self._raw_data[: self.border] raw_byte_offsets = self._raw_data[self.border :].view( _tbasket_offsets_dtype diff --git a/src/uproot/writing/_cascadetree.py b/src/uproot/writing/_cascadetree.py index 63fbbae1c..60292304b 100644 --- a/src/uproot/writing/_cascadetree.py +++ b/src/uproot/writing/_cascadetree.py @@ -581,7 +581,7 @@ def extend(self, file, sink, data): big_endian.shape[1:], ) ) - tofill.append((branch_name, big_endian, None)) + tofill.append((branch_name, datum["compression"], big_endian, None)) if datum["kind"] == "counter": datum["tleaf_maximum_value"] = max( @@ -672,21 +672,28 @@ def extend(self, file, sink, data): ) big_endian_offsets = offsets.astype(">i4", copy=True) - tofill.append((branch_name, big_endian.reshape(-1), big_endian_offsets)) + tofill.append( + ( + branch_name, + datum["compression"], + big_endian.reshape(-1), + big_endian_offsets, + ) + ) # actually write baskets into the file uncompressed_bytes = 0 compressed_bytes = 0 - for branch_name, big_endian, big_endian_offsets in tofill: + for branch_name, compression, big_endian, big_endian_offsets in tofill: datum = self._branch_data[self._branch_lookup[branch_name]] if big_endian_offsets is None: totbytes, zipbytes, location = self.write_np_basket( - sink, branch_name, big_endian + sink, branch_name, compression, big_endian ) else: totbytes, zipbytes, location = self.write_jagged_basket( - sink, branch_name, big_endian, big_endian_offsets + sink, branch_name, compression, big_endian, big_endian_offsets ) datum["fEntryOffsetLen"] = 4 * (len(big_endian_offsets) - 1) uncompressed_bytes += totbytes @@ -1155,7 +1162,7 @@ def write_updates(self, sink): ), ) - def write_np_basket(self, sink, branch_name, array): + def write_np_basket(self, sink, branch_name, compression, array): fClassName = uproot.serialization.string("TBasket") fName = uproot.serialization.string(branch_name) fTitle = uproot.serialization.string(self._name) @@ -1169,14 +1176,15 @@ def write_np_basket(self, sink, branch_name, array): + 1 ) - raw_array = uproot._util.tobytes(array) itemsize = array.dtype.itemsize for item in array.shape[1:]: itemsize *= item - fObjlen = len(raw_array) + uncompressed_data = uproot._util.tobytes(array) + compressed_data = uproot.compression.compress(uncompressed_data, compression) - fNbytes = fKeylen + fObjlen # FIXME: no compression yet + fObjlen = len(uncompressed_data) + fNbytes = fKeylen + len(compressed_data) parent_location = self._directory.key.location # FIXME: is this correct? @@ -1204,20 +1212,20 @@ def write_np_basket(self, sink, branch_name, array): 32000, # fBufferSize itemsize, # fNevBufSize len(array), # fNevBuf - fKeylen + len(raw_array), # fLast + fKeylen + len(uncompressed_data), # fLast ) ) out.append(b"\x00") # part of the Key (included in fKeylen, at least) - out.append(raw_array) + out.append(compressed_data) sink.write(location, b"".join(out)) sink.set_file_length(self._freesegments.fileheader.end) sink.flush() - return fNbytes, fNbytes, location + return fKeylen + fObjlen, fNbytes, location - def write_jagged_basket(self, sink, branch_name, array, offsets): + def write_jagged_basket(self, sink, branch_name, compression, array, offsets): fClassName = uproot.serialization.string("TBasket") fName = uproot.serialization.string(branch_name) fTitle = uproot.serialization.string(self._name) @@ -1231,22 +1239,26 @@ def write_jagged_basket(self, sink, branch_name, array, offsets): + 1 ) - raw_array = uproot._util.tobytes(array) + # offsets became a *copy* of the Awkward Array's offsets + # when it was converted to big-endian (astype with copy=True) itemsize = array.dtype.itemsize for item in array.shape[1:]: itemsize *= item - - # offsets became a *copy* of the Awkward Array's offsets - # when it was converted to big-endian (astype with copy=True) offsets *= itemsize offsets += fKeylen - fLast = offsets[-1] - offsets[-1] = 0 + + raw_array = uproot._util.tobytes(array) raw_offsets = uproot._util.tobytes(offsets) + uncompressed_data = ( + raw_array + _tbasket_offsets_length.pack(len(offsets)) + raw_offsets + ) + compressed_data = uproot.compression.compress(uncompressed_data, compression) - fObjlen = len(raw_array) + 4 + len(raw_offsets) + fLast = offsets[-1] + offsets[-1] = 0 - fNbytes = fKeylen + fObjlen # FIXME: no compression yet + fObjlen = len(uncompressed_data) + fNbytes = fKeylen + len(compressed_data) parent_location = self._directory.key.location # FIXME: is this correct? @@ -1279,15 +1291,13 @@ def write_jagged_basket(self, sink, branch_name, array, offsets): ) out.append(b"\x00") # part of the Key (included in fKeylen, at least) - out.append(raw_array) - out.append(_tbasket_offsets_length.pack(len(offsets))) - out.append(raw_offsets) + out.append(compressed_data) sink.write(location, b"".join(out)) sink.set_file_length(self._freesegments.fileheader.end) sink.flush() - return fNbytes, fNbytes, location + return fKeylen + fObjlen, fNbytes, location _tbasket_offsets_length = struct.Struct(">I") diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 692faf810..5e12bb8d4 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -11,6 +11,10 @@ import os import uuid +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping try: import queue except ImportError: @@ -181,7 +185,12 @@ def compression(self): @compression.setter def compression(self, value): - self._cascading.fileheader.compression = value + if value is None or isinstance(value, uproot.compression.Compression): + self._cascading.fileheader.compression = value + else: + raise TypeError( + "compression must be None or a uproot.compression.Compression object, like uproot.ZLIB(4) or uproot.ZSTD(0)" + ) @property def fSeekFree(self): @@ -1121,5 +1130,96 @@ def __enter__(self): def __exit__(self, exception_type, exception_value, traceback): self._file.sink.__exit__(exception_type, exception_value, traceback) + @property + def compression(self): + """ + FIXME: docstring + """ + out = {} + last = None + for datum in self._cascading._branch_data: + if datum["kind"] != "record": + last = out[datum["fName"]] = datum["compression"] + if all(x == last for x in out.values()): + return last + else: + return out + + @compression.setter + def compression(self, value): + if value is None or isinstance(value, uproot.compression.Compression): + for datum in self._cascading._branch_data: + if datum["kind"] != "record": + datum["compression"] = value + + elif ( + isinstance(value, Mapping) + and all( + uproot._util.isstr(k) + and (v is None or isinstance(v, uproot.compression.Compression)) + for k, v in value.items() + ) + and all( + datum["fName"] in value + for datum in self._cascading._branch_data + if datum["kind"] != "record" + ) + and len(value) + == len( + [ + datum + for datum in self._cascading._branch_data + if datum["kind"] != "record" + ] + ) + ): + for datum in self._cascading._branch_data: + if datum["kind"] != "record": + datum["compression"] = value[datum["fName"]] + + else: + raise TypeError( + "compression must be None, a uproot.compression.Compression object, like uproot.ZLIB(4) or uproot.ZSTD(0), or a mapping of branch names to such objects" + ) + + def __getitem__(self, where): + for datum in self._cascading._branch_data: + if datum["kind"] != "record" and datum["fName"] == where: + return WritableBranch(self, datum) + else: + raise uproot.KeyInFileError( + where, + because="no such branch in writable tree", + file_path=self.file_path, + ) + def extend(self, data): self._cascading.extend(self._file, self._file.sink, data) + + +class WritableBranch(object): + """ + FIXME: docstring + """ + + def __init__(self, tree, datum): + self._tree = tree + self._datum = datum + + def __repr__(self): + return "".format( + repr(self._datum["fName"]), repr("/" + "/".join(self._tree.path)), id(self) + ) + + @property + def compression(self): + return self._datum["compression"] + + @compression.setter + def compression(self, value): + if value is None or isinstance(value, uproot.compression.Compression): + self._datum["compression"] = value + else: + raise TypeError( + "compression must be None or a uproot.compression.Compression object, like uproot.ZLIB(4) or uproot.ZSTD(0)" + ) diff --git a/tests/test_0406-write-a-ttree.py b/tests/test_0406-write-a-ttree.py index e1ba6a0ec..3417cfd05 100644 --- a/tests/test_0406-write-a-ttree.py +++ b/tests/test_0406-write-a-ttree.py @@ -379,10 +379,8 @@ def test_interface(tmp_path): (entries, edges) = np.histogram(branch2) with uproot.recreate(newfile) as fout: - fout["tree"] = {"branch1": branch1, "branch2": branch2} fout["tree"].extend({"branch1": branch1, "branch2": branch2}) - fout["hist"] = (entries, edges) with uproot.open(newfile) as fin: diff --git a/tests/test_0416-writing-compressed-data.py b/tests/test_0416-writing-compressed-data.py index 669c22f0e..279b3dbf2 100644 --- a/tests/test_0416-writing-compressed-data.py +++ b/tests/test_0416-writing-compressed-data.py @@ -79,7 +79,7 @@ def test_histogram_ZLIB(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") SIZE = 2 ** 21 - histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1)) last = histogram[0][-1] with uproot.recreate(newfile, compression=uproot.ZLIB(1)) as fout: @@ -104,7 +104,7 @@ def test_histogram_LZMA(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") SIZE = 2 ** 20 - histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1)) last = histogram[0][-1] with uproot.recreate(newfile, compression=uproot.LZMA(1)) as fout: @@ -129,7 +129,7 @@ def test_histogram_LZ4(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") SIZE = 2 ** 21 - histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1)) last = histogram[0][-1] with uproot.recreate(newfile, compression=uproot.LZ4(1)) as fout: @@ -154,7 +154,7 @@ def test_histogram_ZSTD(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") SIZE = 2 ** 21 - histogram = (np.random.normal(0, 1, SIZE), np.linspace(0, 1, SIZE + 1)) + histogram = (np.random.randint(0, 10, SIZE), np.linspace(0, 1, SIZE + 1)) last = histogram[0][-1] with uproot.recreate(newfile, compression=uproot.ZSTD(1)) as fout: @@ -171,3 +171,245 @@ def test_histogram_ZSTD(tmp_path): assert h3.GetNbinsX() == SIZE assert h3.GetBinContent(SIZE) == last f3.Close() + + +def test_flattree_ZLIB(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) * 10 + branch2 = 1.1 * np.arange(10) * 10 + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() * 2 + assert [x.branch2 for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_flattree_LZMA(tmp_path): + pytest.importorskip("lzma") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) * 10 + branch2 = 1.1 * np.arange(10) * 10 + + with uproot.recreate(newfile, compression=uproot.LZMA(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() * 2 + assert [x.branch2 for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_flattree_LZ4(tmp_path): + pytest.importorskip("lz4") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) + branch2 = 1.1 * np.arange(10) + + with uproot.recreate(newfile, compression=uproot.LZ4(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() * 2 + assert [x.branch2 for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_flattree_ZSTD(tmp_path): + pytest.importorskip("zstandard") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) + branch2 = 1.1 * np.arange(10) + + with uproot.recreate(newfile, compression=uproot.ZSTD(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() * 2 + assert [x.branch2 for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_jaggedtree_ZLIB(tmp_path): + ak = pytest.importorskip("awkward") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = ak.Array([[1, 2, 3], [], [4, 5]] * 10) + branch2 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 10) + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array().tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array().tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [list(x.branch1) for x in t3] == branch1.tolist() * 2 + assert [list(x.branch2) for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_jaggedtree_LZMA(tmp_path): + pytest.importorskip("lzma") + ak = pytest.importorskip("awkward") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = ak.Array([[1, 2, 3], [], [4, 5]] * 10) + branch2 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 10) + + with uproot.recreate(newfile, compression=uproot.LZMA(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array().tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array().tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [list(x.branch1) for x in t3] == branch1.tolist() * 2 + assert [list(x.branch2) for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_jaggedtree_LZ4(tmp_path): + pytest.importorskip("lz4") + ak = pytest.importorskip("awkward") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = ak.Array([[1, 2, 3], [], [4, 5]] * 10) + branch2 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 10) + + with uproot.recreate(newfile, compression=uproot.LZ4(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array().tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array().tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [list(x.branch1) for x in t3] == branch1.tolist() * 2 + assert [list(x.branch2) for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_jaggedtree_ZSTD(tmp_path): + pytest.importorskip("zstandard") + ak = pytest.importorskip("awkward") + + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = ak.Array([[1, 2, 3], [], [4, 5]] * 10) + branch2 = ak.Array([[1.1, 2.2, 3.3], [], [4.4, 5.5]] * 10) + + with uproot.recreate(newfile, compression=uproot.ZSTD(5)) as fout: + fout["tree"] = {"branch1": branch1, "branch2": branch2} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array().tolist() == branch1.tolist() * 2 + assert fin["tree/branch2"].array().tolist() == branch2.tolist() * 2 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [list(x.branch1) for x in t3] == branch1.tolist() * 2 + assert [list(x.branch2) for x in t3] == branch2.tolist() * 2 + f3.Close() + + +def test_multicompression_1(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) * 10 + branch2 = 1.1 * np.arange(10) * 10 + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"]["branch1"].compression = uproot.ZLIB(5) + fout["tree"]["branch2"].compression = None + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression == uproot.ZLIB(5) + assert fin["tree/branch2"].compression is None + assert fin["tree/branch1"].compressed_bytes == 116 + assert fin["tree/branch2"].compressed_bytes == 154 + assert fin["tree/branch1"].uncompressed_bytes == 154 + assert fin["tree/branch2"].uncompressed_bytes == 154 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +def test_multicompression_2(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(10) * 10 + branch2 = 1.1 * np.arange(10) * 10 + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None} + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression == uproot.ZLIB(5) + assert fin["tree/branch2"].compression is None + assert fin["tree/branch1"].compressed_bytes == 116 + assert fin["tree/branch2"].compressed_bytes == 154 + assert fin["tree/branch1"].uncompressed_bytes == 154 + assert fin["tree/branch2"].uncompressed_bytes == 154 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() From 7484cee95903454fff98698da97235ad2d52e358 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 23 Aug 2021 16:17:23 -0500 Subject: [PATCH 4/7] Fix documentation. --- src/uproot/behaviors/TTree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/behaviors/TTree.py b/src/uproot/behaviors/TTree.py index 60704d0ad..8031f4487 100644 --- a/src/uproot/behaviors/TTree.py +++ b/src/uproot/behaviors/TTree.py @@ -102,7 +102,7 @@ def uncompressed_bytes(self): The number of uncompressed bytes in all ``TBaskets`` of all ``TBranches`` of this ``TTree``, including all the TKey headers. - This information is specified in the ``TTree`` metadata (``fZipBytes``) + This information is specified in the ``TTree`` metadata (``fTotBytes``) and can be determined without reading any additional data. """ return self.member("fTotBytes") From 09f4d56c8138a78ad3bb713a76a6f10da067b16d Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 23 Aug 2021 16:18:00 -0500 Subject: [PATCH 5/7] Fix documentation again. --- src/uproot/behaviors/TTree.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uproot/behaviors/TTree.py b/src/uproot/behaviors/TTree.py index 8031f4487..9b62770e4 100644 --- a/src/uproot/behaviors/TTree.py +++ b/src/uproot/behaviors/TTree.py @@ -111,7 +111,7 @@ def uncompressed_bytes(self): def compression_ratio(self): """ The number of uncompressed bytes divided by the number of compressed - bytes for this ``TBranch``. + bytes for this ``TTree``. See :ref:`uproot.behaviors.TTree.TTree.compressed_bytes` and :ref:`uproot.behaviors.TTree.TTree.uncompressed_bytes`. From 5462c9e88819f4b72360c76603589c293b794ed1 Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 23 Aug 2021 16:25:21 -0500 Subject: [PATCH 6/7] Fix unsafe test comparison (can't be sure we'll have the same zlib everywhere). --- tests/test_0416-writing-compressed-data.py | 40 +++++++++++----------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/test_0416-writing-compressed-data.py b/tests/test_0416-writing-compressed-data.py index 279b3dbf2..bc40c8d21 100644 --- a/tests/test_0416-writing-compressed-data.py +++ b/tests/test_0416-writing-compressed-data.py @@ -176,8 +176,8 @@ def test_histogram_ZSTD(tmp_path): def test_flattree_ZLIB(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) * 10 - branch2 = 1.1 * np.arange(10) * 10 + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: fout["tree"] = {"branch1": branch1, "branch2": branch2} @@ -199,8 +199,8 @@ def test_flattree_LZMA(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) * 10 - branch2 = 1.1 * np.arange(10) * 10 + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile, compression=uproot.LZMA(5)) as fout: fout["tree"] = {"branch1": branch1, "branch2": branch2} @@ -222,8 +222,8 @@ def test_flattree_LZ4(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) - branch2 = 1.1 * np.arange(10) + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile, compression=uproot.LZ4(5)) as fout: fout["tree"] = {"branch1": branch1, "branch2": branch2} @@ -245,8 +245,8 @@ def test_flattree_ZSTD(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) - branch2 = 1.1 * np.arange(10) + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile, compression=uproot.ZSTD(5)) as fout: fout["tree"] = {"branch1": branch1, "branch2": branch2} @@ -361,8 +361,8 @@ def test_jaggedtree_ZSTD(tmp_path): def test_multicompression_1(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) * 10 - branch2 = 1.1 * np.arange(10) * 10 + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile) as fout: fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) @@ -375,10 +375,10 @@ def test_multicompression_1(tmp_path): assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() assert fin["tree/branch1"].compression == uproot.ZLIB(5) assert fin["tree/branch2"].compression is None - assert fin["tree/branch1"].compressed_bytes == 116 - assert fin["tree/branch2"].compressed_bytes == 154 - assert fin["tree/branch1"].uncompressed_bytes == 154 - assert fin["tree/branch2"].uncompressed_bytes == 154 + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 f3 = ROOT.TFile(newfile) t3 = f3.Get("tree") @@ -390,8 +390,8 @@ def test_multicompression_1(tmp_path): def test_multicompression_2(tmp_path): newfile = os.path.join(tmp_path, "newfile.root") - branch1 = np.arange(10) * 10 - branch2 = 1.1 * np.arange(10) * 10 + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) with uproot.recreate(newfile) as fout: fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) @@ -403,10 +403,10 @@ def test_multicompression_2(tmp_path): assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() assert fin["tree/branch1"].compression == uproot.ZLIB(5) assert fin["tree/branch2"].compression is None - assert fin["tree/branch1"].compressed_bytes == 116 - assert fin["tree/branch2"].compressed_bytes == 154 - assert fin["tree/branch1"].uncompressed_bytes == 154 - assert fin["tree/branch2"].uncompressed_bytes == 154 + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 f3 = ROOT.TFile(newfile) t3 = f3.Get("tree") From 51a82811aec35c28c33a91af241c735e0038364c Mon Sep 17 00:00:00 2001 From: Jim Pivarski Date: Mon, 23 Aug 2021 16:40:57 -0500 Subject: [PATCH 7/7] Interface for assigning compression level. --- src/uproot/writing/_cascade.py | 12 +--- src/uproot/writing/writable.py | 10 +++ tests/test_0416-writing-compressed-data.py | 84 ++++++++++++++++++++++ 3 files changed, 97 insertions(+), 9 deletions(-) diff --git a/src/uproot/writing/_cascade.py b/src/uproot/writing/_cascade.py index 3011a72af..a467543dc 100644 --- a/src/uproot/writing/_cascade.py +++ b/src/uproot/writing/_cascade.py @@ -1899,15 +1899,9 @@ def compression(self): @compression.setter def compression(self, value): - if self._compression is None and value is None: - pass - elif ( - self._compression is None - or self.value is None - or self._compression.code != value.code - ): + if self._compression != value: self._file_dirty = True - self._compression = value + self._compression = value @property def info_location(self): @@ -1937,7 +1931,7 @@ def uuid(self): def uuid(self, value): if self._uuid != value: self._file_dirty = True - self._uuid = value + self._uuid = value @property def version(self): diff --git a/src/uproot/writing/writable.py b/src/uproot/writing/writable.py index 5e12bb8d4..c0aff669b 100644 --- a/src/uproot/writing/writable.py +++ b/src/uproot/writing/writable.py @@ -187,6 +187,8 @@ def compression(self): def compression(self, value): if value is None or isinstance(value, uproot.compression.Compression): self._cascading.fileheader.compression = value + self._cascading.fileheader.write(self._sink) + self._sink.flush() else: raise TypeError( "compression must be None or a uproot.compression.Compression object, like uproot.ZLIB(4) or uproot.ZSTD(0)" @@ -309,6 +311,14 @@ def __enter__(self): def __exit__(self, exception_type, exception_value, traceback): self._file.sink.__exit__(exception_type, exception_value, traceback) + @property + def compression(self): + return self._file.compression + + @compression.setter + def compression(self, value): + self._file.compression = value + def __len__(self): return self._cascading.data.num_keys + sum( len(self._subdir(x)) for x in self._cascading.data.dir_names diff --git a/tests/test_0416-writing-compressed-data.py b/tests/test_0416-writing-compressed-data.py index bc40c8d21..481dffa70 100644 --- a/tests/test_0416-writing-compressed-data.py +++ b/tests/test_0416-writing-compressed-data.py @@ -413,3 +413,87 @@ def test_multicompression_2(tmp_path): assert [x.branch1 for x in t3] == branch1.tolist() assert [x.branch2 for x in t3] == branch2.tolist() f3.Close() + + +def test_multicompression_3(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].compression = {"branch1": uproot.ZLIB(5), "branch2": None} + fout["tree"].compression = uproot.ZLIB(5) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression == uproot.ZLIB(5) + assert fin["tree/branch2"].compression == uproot.ZLIB(5) + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes < 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +def test_multicompression_4(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression == uproot.ZLIB(5) + assert fin["tree/branch2"].compression == uproot.ZLIB(5) + assert fin["tree/branch1"].compressed_bytes < 874 + assert fin["tree/branch2"].compressed_bytes < 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close() + + +def test_multicompression_5(tmp_path): + newfile = os.path.join(tmp_path, "newfile.root") + + branch1 = np.arange(100) + branch2 = 1.1 * np.arange(100) + + with uproot.recreate(newfile, compression=uproot.ZLIB(5)) as fout: + fout.compression = None + fout.mktree("tree", {"branch1": branch1.dtype, "branch2": branch2.dtype}) + fout["tree"].extend({"branch1": branch1, "branch2": branch2}) + + with uproot.open(newfile) as fin: + assert fin["tree/branch1"].array(library="np").tolist() == branch1.tolist() + assert fin["tree/branch2"].array(library="np").tolist() == branch2.tolist() + assert fin["tree/branch1"].compression is None + assert fin["tree/branch2"].compression is None + assert fin["tree/branch1"].compressed_bytes == 874 + assert fin["tree/branch2"].compressed_bytes == 874 + assert fin["tree/branch1"].uncompressed_bytes == 874 + assert fin["tree/branch2"].uncompressed_bytes == 874 + + f3 = ROOT.TFile(newfile) + t3 = f3.Get("tree") + assert [x.branch1 for x in t3] == branch1.tolist() + assert [x.branch2 for x in t3] == branch2.tolist() + f3.Close()