expose some utilities for working with recordings.msgpack.zst directly (

#39)
beacon-biosignals · Apr 13, 2020 · fbc243a · fbc243a · jrevels · Apr 13, 2020
1 parent 07b4e82
commit fbc243a
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 22 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "Onda"
 uuid = "e853f5be-6863-11e9-128d-476edb89bfb5"
 authors = ["Beacon Biosignals, Inc."]
-version = "0.8.2"
+version = "0.8.3"
 
 [deps]
 CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"

diff --git a/docs/src/index.md b/docs/src/index.md
@@ -33,6 +33,8 @@ Annotation
 Recording
 set_span!
 annotate!
+read_recordings_msgpack_zst
+write_recordings_msgpack_zst
 ```
 
 ## `Samples`

diff --git a/src/Onda.jl b/src/Onda.jl
@@ -86,7 +86,7 @@ export AbstractTimeSpan, TimeSpan, contains, overlaps, shortest_timespan_contain
 
 include("recordings.jl")
 export Recording, Signal, validate_signal, signal_from_template, Annotation, annotate!,
-       span, sizeof_samples
+       span, sizeof_samples, read_recordings_msgpack_zst, write_recordings_msgpack_zst
 
 include("serialization.jl")
 export AbstractLPCMSerializer, serializer, deserialize_lpcm, serialize_lpcm,

diff --git a/src/dataset.jl b/src/dataset.jl
@@ -2,6 +2,8 @@
 ##### `Dataset`
 #####
 
+const RECORDINGS_FILE_NAME = "recordings.msgpack.zst"
+
 struct Dataset
     path::String
     header::Header
@@ -28,12 +30,12 @@ function Dataset(path; create::Bool=false)
         end
         initial_header = Header(ONDA_FORMAT_VERSION, true)
         initial_recordings = Dict{UUID,Recording}()
-        write_recordings_file(path, initial_header, initial_recordings)
+        write_recordings_msgpack_zst(joinpath(path, RECORDINGS_FILE_NAME), initial_header, initial_recordings)
     elseif !isdir(path)
         throw(ArgumentError("$path is not a valid Onda dataset"))
     end
     !isdir(samples_path) && mkdir(samples_path)
-    header, recordings = read_recordings_file(path)
+    header, recordings = read_recordings_msgpack_zst(joinpath(path, RECORDINGS_FILE_NAME))
     return Dataset(path, header, recordings)
 end
 
@@ -44,7 +46,8 @@ Overwrite `joinpath(dataset.path, "recordings.msgpack.zst")` with the contents
 of `dataset.recordings`.
 """
 function save_recordings_file(dataset::Dataset)
-    return write_recordings_file(dataset.path, dataset.header, dataset.recordings)
+    file_path = joinpath(dataset.path, RECORDINGS_FILE_NAME)
+    return write_recordings_msgpack_zst(file_path, dataset.header, dataset.recordings)
 end
 
 #####

diff --git a/src/recordings.jl b/src/recordings.jl
@@ -325,15 +325,26 @@ end
 
 MsgPack.msgpack_type(::Type{Header}) = MsgPack.StructType()
 
-function read_recordings_file(path)
-    file_path = joinpath(path, "recordings.msgpack.zst")
-    bytes = zstd_decompress(read(file_path))
-    io = IOBuffer(bytes)
-    # `0x92` is the MessagePack byte prefix for 2-element array
-    read(io, UInt8) == 0x92 || error("recordings.msgpack.zst has bad byte prefix")
+"""
+    read_recordings_msgpack_zst(file_path::AbstractString)
+
+Return `read_recordings_msgpack_zst(read(file_path))`.
+"""
+read_recordings_msgpack_zst(file_path::AbstractString) = read_recordings_msgpack_zst(read(file_path))
+
+"""
+    read_recordings_msgpack_zst(compressed_bytes::Vector{UInt8})
+
+Return the `(header::Header, recordings::Dict{UUID,Recording})` yielded from deserializing `compressed_bytes`,
+which is assumed to be in zstd-compressed MsgPack format and comply with the Onda format's specification of
+the contents of `recordings.msgpack.zst`.
+"""
+function read_recordings_msgpack_zst(compressed_bytes::Vector{UInt8})
+    io = IOBuffer(zstd_decompress(compressed_bytes))
+    read(io, UInt8) == 0x92 || error("Onda recordings file has unexpected first byte; expected 0x92 for a 2-element MsgPack array")
     header = MsgPack.unpack(io, Header)
     if !is_supported_onda_format_version(header.onda_format_version)
-        @warn("attempting to load `Dataset` with unsupported Onda version",
+        @warn("attempting to load `Dataset` recordings file with unsupported Onda version",
               supported=ONDA_FORMAT_VERSION, attempting=header.onda_format_version)
         @warn("consider upgrading old datasets via `Onda.upgrade_onda_format_from_v0_2_to_v0_3!`")
     end
@@ -342,14 +353,31 @@ function read_recordings_file(path)
     return header, recordings
 end
 
-function write_recordings_file(path, header::Header, recordings::Dict{UUID,Recording})
-    file_path = joinpath(path, "recordings.msgpack.zst")
-    backup_file_path = joinpath(path, "_recordings.msgpack.zst.backup")
+"""
+    write_recordings_msgpack_zst(file_path::AbstractString, header::Header, recordings::Dict{UUID,Recording})
+
+Overwrite `file_path` with `write_recordings_msgpack_zst(header, recordings)`.
+
+If `file_path` already exists, this function creates a backup at `\$file_path.backup` before overwriting `file_path`;
+this backup is automatically deleted after the overwrite succeeds.
+"""
+function write_recordings_msgpack_zst(file_path::AbstractString, header::Header, recordings::Dict{UUID,Recording})
+    backup_file_path = string(file_path, ".backup")
     isfile(file_path) && mv(file_path, backup_file_path)
-    io = IOBuffer()
-    MsgPack.pack(io, [header, recordings])
-    bytes = zstd_compress(resize!(io.data, io.size))
-    write(file_path, bytes)
+    write(file_path, write_recordings_msgpack_zst(header, recordings))
     rm(backup_file_path; force=true)
     return nothing
 end
+
+"""
+    write_recordings_msgpack_zst(header::Header, recordings::Dict{UUID,Recording})
+
+Return the `Vector{UInt8}` that results from serializing `(header::Header, recordings::Dict{UUID,Recording})` to zstd-compressed MsgPack format.
+"""
+function write_recordings_msgpack_zst(header::Header, recordings::Dict{UUID,Recording})
+    # we do this `resize!` maneuver instead of `MsgPack.pack([header, recordings])` (which
+    # calls `take!`) so that we sidestep https://github.com/JuliaLang/julia/issues/27741
+    io = IOBuffer()
+    MsgPack.pack(io, [header, recordings])
+    return zstd_compress(resize!(io.data, io.size))
+end
diff --git a/test/dataset.jl b/test/dataset.jl
@@ -84,6 +84,9 @@ using Test, Onda, Dates, MsgPack
             store!(dataset, uuid, name, s)
         end
         save_recordings_file(dataset)
+        @test read_recordings_msgpack_zst(joinpath(dataset.path, "recordings.msgpack.zst")) ==
+              read_recordings_msgpack_zst(read(joinpath(dataset.path, "recordings.msgpack.zst")))
+        @test write_recordings_msgpack_zst(dataset.header, dataset.recordings) == read(joinpath(dataset.path, "recordings.msgpack.zst"))
 
         # read back in the test dataset, add some annotations
         old_dataset = dataset
@@ -152,9 +155,9 @@ using Test, Onda, Dates, MsgPack
 
         # read back everything, but without assuming an order on the metadata
         dataset = Dataset(joinpath(root, "test"))
-        Onda.write_recordings_file(dataset.path,
-                                   Onda.Header(dataset.header.onda_format_version, false),
-                                   dataset.recordings)
+        Onda.write_recordings_msgpack_zst(joinpath(dataset.path, "recordings.msgpack.zst"),
+                                          Onda.Header(dataset.header.onda_format_version, false),
+                                          dataset.recordings)
         dataset = Dataset(joinpath(root, "test"))
         @test Dict(old_uuid => old_recording) == dataset.recordings
         delete!(dataset, old_uuid)