Skip to content

Commit

Permalink
clean up RNTuple and add precompile (#303)
Browse files Browse the repository at this point in the history
  • Loading branch information
Moelf authored Feb 14, 2024
1 parent 785b453 commit 98a84e0
Show file tree
Hide file tree
Showing 6 changed files with 43 additions and 77 deletions.
2 changes: 2 additions & 0 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ Memoization = "6fafb56a-5788-4b4e-91ca-c0cea6611c73"
Mixers = "2a8e4939-dab8-5edc-8f64-72a8776f13de"
Mmap = "a63ad114-7e13-5084-954f-fe012c677804"
Parameters = "d96e819e-fc66-5662-9728-84c9c7592b0a"
PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a"
PrettyTables = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d"
SentinelArrays = "91c51154-3ec4-41a3-a24f-3f23e20d615c"
StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
Expand Down Expand Up @@ -50,6 +51,7 @@ Mixers = "^0.1"
Mmap = "^1.0"
Parameters = "^0.12"
Pkg = "^1.0"
PrecompileTools = "^1.2.0"
PrettyTables = "^2.1"
SHA = "^1.0"
SentinelArrays = "^1.3"
Expand Down
34 changes: 11 additions & 23 deletions src/RNTuple/bootstrap.jl
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ macro SimpleStruct(ex)
end

struct RNTupleEnvelope{T}
id::UInt16
type_id::UInt16
envelope_length::UInt64
payload::T
checksum::UInt64
Expand All @@ -157,47 +157,35 @@ function _rntuple_read(io, ::Type{RNTupleEnvelope{T}}) where T
seek(io, 0)
id_length = read(io, UInt64)
# 16/48 split
id = UInt16(0xffff & id_length)
type_id = UInt16(0xffff & id_length)
payload_length = id_length >> 16
payload = _rntuple_read(io, T)
_checksum = xxh3_64(bytes[begin:end-8])
@assert _checksum == reinterpret(UInt64, @view bytes[end-7:end])[1]
return RNTupleEnvelope(id, payload_length, payload, _checksum)
@assert _checksum == reinterpret(UInt64, @view bytes[end-7:end])[1] "Envelope checksum doesn't match"
return RNTupleEnvelope(type_id, payload_length, payload, _checksum)
end

struct RNTupleFrame{T} end
struct RNTupleFrame{T}
payload::T
end
function _rntuple_read(io, ::Type{RNTupleFrame{T}}) where T
pos = position(io)
Size = read(io, Int64)
end_pos = pos + Size
@assert Size >= 0
res = _rntuple_read(io, T)
seek(io, end_pos)
return res
end

struct RNTupleListFrame{T} end
_rntuple_read(io, ::Type{Vector{T}}) where T = _rntuple_read(io, RNTupleListFrame{T})
function _rntuple_read(io, ::Type{RNTupleListFrame{T}}) where T
pos = position(io)
Size = read(io, Int64)
@assert Size < 0
NumItems = read(io, Int32)
end_pos = pos - Size
res = [_rntuple_read(io, RNTupleFrame{T}) for _=1:NumItems]
seek(io, end_pos)
return res
return RNTupleFrame(res)
end

# without the inner Frame for each item
struct RNTupleListNoFrame{T} end
function _rntuple_read(io, ::Type{RNTupleListNoFrame{T}}) where T
# const RNTupleListFrame{T} = Vector{T}
function _rntuple_read(io, ::Type{Vector{T}}) where T
pos = position(io)
Size = read(io, Int64)
@assert Size < 0
NumItems = read(io, Int32)
end_pos = pos - Size
res = [_rntuple_read(io, T) for _=1:NumItems]
res = T[_rntuple_read(io, RNTupleFrame{T}).payload for _=1:NumItems]
seek(io, end_pos)
return res
end
38 changes: 4 additions & 34 deletions src/RNTuple/constants.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,16 @@ const rntuple_col_type_dict = (
Switch, # Switch
UInt8, # byte in blob
UInt8, # char
Bool, # it's actually `Bit` in ROOT, there's no byte in RNTuple spec
Bool, # it's actually `Bit` in ROOT, there's no byte bool in RNTuple spec
Float64,
Float32,
Float16,
UInt64,
UInt32,
UInt16,
UInt8,
Index64, # split delta encoding
Index32, # split
Index64, # split delta
Index32, # split delta
Float64, # split
Float32, # split
Float16, # split
Expand All @@ -35,37 +35,7 @@ const rntuple_col_type_dict = (
Int32, # split + Zig-Zag encoding
Int16, # split + Zig-Zag encoding
)
const rntuple_col_nbits_dict = (
64,
32,
96, # Switch
8,
8, # char
1, # it's actually `Bit` in ROOT, there's no byte in RNTuple spec
64,
32,
16,
64,
32,
16,
8,
64, # SplitIndex64 delta encoding
32, # SplitIndex32 delta encoding
64, # split
32, # split
16, # split
64, # split
32, # split
16, # split

64,
32,
16,
8,
64, # split + Zig-Zag encoding
32, # split + Zig-Zag encoding
16, # split + Zig-Zag encoding
)
const rntuple_col_nbits_dict = Tuple([(sizeof.(rntuple_col_type_dict[1:5]) .* 8) ...; 1; (sizeof.(rntuple_col_type_dict[7:end]) .* 8)...])

const rntuple_role_leaf = 0x0000
const rntuple_role_vector = 0x0001
Expand Down
31 changes: 21 additions & 10 deletions src/RNTuple/footer.jl
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ column since `pagedesc` only contains `num_elements` information.
Boolean values are always stored as bit in RNTuple, so `nbits = 1`.
"""
function read_pagedesc(io, pagedescs::Vector{PageDescription}, nbits::Integer; split=false)
function read_pagedesc(io, pagedescs::AbstractVector{PageDescription}, nbits::Integer; split=false)
output_L = div(sum((p.num_elements for p in pagedescs))*nbits, 8, RoundUp)
res = Vector{UInt8}(undef, output_L)

Expand Down Expand Up @@ -143,15 +143,26 @@ end
number_of_entries::Int64
end

struct PageLink
header_checksum::UInt64
cluster_summaries::Vector{ClusterSummary}
nested_page_locations::Vector{Vector{Vector{PageDescription}}}
struct RNTupleListNoFrame{T} <: AbstractVector{T}
payload::Vector{T}
end
Base.size(r::RNTupleListNoFrame) = size(r.payload)
Base.getindex(r::RNTupleListNoFrame, i) = r.payload[i]
Base.setindex!(r::RNTupleListNoFrame, v, i) = (r.payload[i] = v)
# without the inner Frame for each item
function _rntuple_read(io, ::Type{RNTupleListNoFrame{T}}) where T
pos = position(io)
Size = read(io, Int64)
@assert Size < 0
NumItems = read(io, Int32)
end_pos = pos - Size
res = T[_rntuple_read(io, T) for _=1:NumItems]
seek(io, end_pos)
return RNTupleListNoFrame(res)
end

function _rntuple_read(io, ::Type{PageLink})
header_checksum = read(io, UInt64)
cluster_summaries = _rntuple_read(io, Vector{ClusterSummary})
nested_page_locations = _rntuple_read(io, RNTupleListNoFrame{RNTupleListNoFrame{RNTupleListNoFrame{PageDescription}}})
return PageLink(header_checksum, cluster_summaries, nested_page_locations)
@SimpleStruct struct PageLink
header_checksum::UInt64
cluster_summaries::Vector{ClusterSummary}
nested_page_locations::RNTupleListNoFrame{RNTupleListNoFrame{RNTupleListNoFrame{PageDescription}}}
end
1 change: 1 addition & 0 deletions src/RNTuple/header.jl
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ end
type_ver_from::UInt32
type_ver_to::UInt32
content_identifier::UInt32
type_name::String
end

@SimpleStruct struct RNTupleHeader
Expand Down
14 changes: 4 additions & 10 deletions src/UnROOT.jl
Original file line number Diff line number Diff line change
Expand Up @@ -59,26 +59,20 @@ include("RNTuple/highlevel.jl")
include("RNTuple/fieldcolumn_reading.jl")
include("RNTuple/displays.jl")

# let f1 = UnROOT.samplefile("RNTuple/test_ntuple_stl_containers.root")
# show(devnull, f1["ntuple"])
# df = LazyTree(f1, "ntuple")
# collect(df[1])
# show(devnull, df)
# show(devnull, df[1])
# end
#

_maxthreadid() = @static if VERSION < v"1.9"
Threads.nthreads()
else
Threads.maxthreadid()
end

using PrecompileTools: @compile_workload

if VERSION >= v"1.9"
let
@compile_workload begin
t = LazyTree(UnROOT.samplefile("tree_with_jagged_array.root"), "t1")
show(devnull, t)
show(devnull, t[1])
UnROOT.samplefile("RNTuple/test_ntuple_stl_containers.root")["ntuple"]
end
end

Expand Down

0 comments on commit 98a84e0

Please sign in to comment.