From 2f235f4aedad45ac5accddd2ef84d2df4eec55f7 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Mon, 1 Mar 2021 04:30:59 -0600 Subject: [PATCH 1/7] Major rewrite for correctness, performance This package was started in 2015 (back in the Julia 0.3 or 0.4 days), and a lot has changed since then. It's never really gotten a serious freshening. This rewrite has several goals. Improving the robustness of package/Module identification --------------------------------------------------------- In modern versions of Julia, the package manager uses name/UUID combinations to identify packages. This is far more robust and flexible than older strategies for specifying packages. FileIO doesn't do this: it identifies modules by their name only. We should adopt the new approach here: going forward (once the deprecation period has passed and we release FileIO v2), all not-yet-loaded modules must be specified by name/UUID. There are some cases--often used in tests or transiently during development of a new I/O package--where the handler *isn't* a registered package, and so there's no UUID available. Currently we try to look up the module based on a `name::Symbol`. It used to be that most modules were loaded into `Main`, then Julia switched to `Base.__toplevel__`; currently we search both, since modules defined in the REPL or tests might still live in `Main`. Of course, even back in the old days, sub-modules could not be found in `Main`, so the current system can't handle submodules. To address the need for specifying modules that aren't packages, while improving both correctness and flexibility, this PR allows you to specify it by (duh) the module itself rather than the name of the module. The combination of using either the module itself or a name/UUID combination means that we can replace a lot of brittle & slow code. When we have the module, we're done; when we have a name/UUID combination, we just call `Base.require` to get the module. It even checks for us whether the module is already loaded. End of story. To help transition existing users to the new system, this has "depwarn"-code to look for the module based on its name. It searches: 1. the currently-loaded modules 2. `Main` 3. The user's current `Pkg` environment One key (breaking) difference is that this lookup is now done during `add_format` rather than when the user tries to `load` or `save` a file. This is obviously better for runtime efficiency, but it does change the point in the code where an error occurs. One of the relatively changes to the tests addresses this change. **Summary**: the new system is strictly more flexible than the old one, since we could never previously support sub-modules. It is also strictly more correct since the registry now specifies precisely what it means by `ImageIO`. There is depwarn-code to help existing users transition, and the only known breakages only concern the specific point in the code from which an error would be thrown. Improving performance and reducing latency with better inferrability -------------------------------------------------------------------- In the original design of this package, `load` and `save` were designed to be specialized by packages. To allow format-specific dispatch, we encoded the file format into the type system using types like `DataFormat{:PNG}`. However, at a certain point we switched to calling module-specific unexported `load` and `save` methods. As a consequence, we don't really need to encode the format in the type system, we can just use a runtime value. Indeed, the downside of using the type system is that having each format be a separate type makes it impossible to infer types. This hurts the runtime performance, increases latency due to unnecessary method specialization by the compiler, and increases the risk of invalidation. However, one way in which we may *under*-specialize is for the filename. defined in `FilePathsBase`. That's a nice change, but this package does quite a lot of manipulation based on file name, and having the type be non-inferrable has some downsides. Finally, several of the container types have historically been poorly-specified, e.g., `const magic_list = Vector{Pair}()`. This rewrite tries to straddle two goals: improving internal inferrability while maintaining backwards compatibility. The strategy taken is to try to wait until the last possible moment to construct non-inferrable objects---to wait until the results are reported back to the caller. In this rewrite, the data format is encoded internally just as a `Symbol`, and the file is passed around as a separate object. This prevents one from needing to specialize on the data format while preserving inferrability for the file. There are a couple of minor changes to internal types, and this forced a couple of changes to the tests. Most significantly, `File{fmt}` is no longer a concrete type, because `File` got a second type-parameter to encode the filename type. To prevent inference failures due to varying-length tuples, this also transitions all magic bytes from `NTuple{N,UInt8}` to `Vector{UInt8}`. As a case study, with the existing FileIO release, I get ~50us to load a 10x10 RGB png file. With this version, it's ~25us. It's remarkable that inference can compete with I/O as a source of slowness, but there you have it. --- Project.toml | 3 +- src/FileIO.jl | 13 +- src/deprecated.jl | 78 ++++++++++++ src/error_handling.jl | 6 + src/loadsave.jl | 267 +++++++++++++++++------------------------ src/query.jl | 247 ++++++++++++++++++++++---------------- src/registry.jl | 216 +++++++++++++++++---------------- src/registry_setup.jl | 153 ++++++++++++----------- src/types.jl | 43 ++++--- test/error_handling.jl | 8 +- test/loadsave.jl | 25 +++- test/query.jl | 48 ++++---- test/runtests.jl | 1 + 13 files changed, 626 insertions(+), 482 deletions(-) create mode 100644 src/deprecated.jl diff --git a/Project.toml b/Project.toml index def75096..26e1a0d4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,10 @@ name = "FileIO" uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.5.0" +version = "1.6.0" [deps] Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] julia = "0.7, 1" diff --git a/src/FileIO.jl b/src/FileIO.jl index 4837d1bd..6e726e5d 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -23,8 +23,9 @@ export DataFormat, metadata import Base.showerror -using Base: RefValue +using Base: RefValue, PkgId using Pkg +using UUIDs include("types.jl") include("registry_setup.jl") @@ -62,9 +63,11 @@ include("registry.jl") """ FileIO -if VERSION >= v"1.4.2" # https://github.com/JuliaLang/julia/pull/35378 - include("precompile.jl") - _precompile_() -end +# if VERSION >= v"1.4.2" # https://github.com/JuliaLang/julia/pull/35378 +# include("precompile.jl") +# _precompile_() +# end + +include("deprecated.jl") end diff --git a/src/deprecated.jl b/src/deprecated.jl new file mode 100644 index 00000000..06937dc5 --- /dev/null +++ b/src/deprecated.jl @@ -0,0 +1,78 @@ +# Deprecations added in 1.5.0, March 2021 + +function File(fmt::Type{DataFormat{sym}}, filename) where {sym} + Base.depwarn("`File(format\"$sym\", filename)` is deprecated, please use `File{format\"$sym\"}(filename)` instead.", :File) + return File{fmt}(filename) +end +function Stream(fmt::Type{DataFormat{sym}}, args...) where {sym} + Base.depwarn("`Stream(format\"$sym\", filename)` is deprecated, please use `Stream{format\"$sym\"}(filename)` instead.", :Stream) + return Stream{fmt}(args...) +end + +# These aren't used here, but old versions of ImageIO expect them + +function _findmod(f::Symbol) + Base.depwarn("_findmod is deprecated and will be removed. Use `Base.require(::Base.PkgId)` instead.", :_findmod) + for (u,v) in Base.loaded_modules + (Symbol(v) == f) && return u + end + nothing +end +function topimport(modname) + Base.depwarn("topimport is deprecated and will be removed. Use `Base.require(::Base.PkgId)` instead.", :topimport) + @eval Base.__toplevel__ import $modname + u = _findmod(modname) + @eval $modname = Base.loaded_modules[$u] +end + +# Legacy add_loader/add_saver +for add_ in (:add_loader, :add_saver) + @eval begin + function $add_(fmt, pkg) + # TODO: delete this method in FileIO v2 + sym = isa(fmt, Symbol) ? fmt : formatname(fmt)::Symbol + Base.depwarn(string($add_) * "(fmt, pkg::$(typeof(pkg))) is deprecated, supply `pkg` as a Module or `name=>uuid`", Symbol($add_)) + pkg === :MimeWriter && return $add_(sym, MimeWriter) + # Try to look it up in the caller's environment + pkgname = string(pkg) + id = Base.identify_package(pkgname) + if id === nothing + # See if it's in Main + pkgsym = Symbol(pkg) + if isdefined(Main, pkgsym) + id = getfield(Main, pkgsym) + if !isa(id, Module) + id = nothing + end + end + if id === nothing + # Look it up in the registries. The tricky part here is supporting different Julia versions + ctx = Pkg.API.Context() + uuids = UUID[] + @static if Base.VERSION >= v"1.2" + if hasfield(typeof(ctx), :registries) + for reg in ctx.registries + append!(uuids, Pkg.Registry.uuids_from_name(reg, pkgname)) + end + else + ctx = Pkg.API.Context!(ctx) + if isdefined(Pkg.Types, :find_registered!) && hasmethod(Pkg.Types.find_registered!, (typeof(ctx.env), Vector{String})) + Pkg.Types.find_registered!(ctx.env, [pkgname]) + elseif isdefined(Pkg.Types, :find_registered!) && hasmethod(Pkg.Types.find_registered!, (typeof(ctx), Vector{String})) + Pkg.Types.find_registered!(ctx, [pkgname]) + end + append!(uuids, get(ctx.env.uuids, pkgname, UUID[])) + end + else + Pkg.Types.find_registered!(ctx.env) + append!(uuids, get(ctx.env.uuids, pkgname, UUID[])) + end + isempty(uuids) && throw(ArgumentError("no UUID found for $pkg")) + length(uuids) == 1 || throw(ArgumentError("multiple UUIDs found for $pkg")) + id = PkgId(uuids[1], pkgname) + end + end + $add_(sym, id) + end + end +end \ No newline at end of file diff --git a/src/error_handling.jl b/src/error_handling.jl index 572e8676..4b83aa81 100644 --- a/src/error_handling.jl +++ b/src/error_handling.jl @@ -54,6 +54,12 @@ end handle_current_error(e::NotInstalledError) = @warn string("lib ", e.library, " not installed, trying next library") +struct SpecError <: Exception + mod::Module + call::Symbol +end +Base.showerror(io::IO, e::SpecError) = print(io, e.mod, " is missing $(e.call) and fileio_$(e.call)") + """ Handles a list of thrown errors after no IO library was found working """ diff --git a/src/loadsave.jl b/src/loadsave.jl index 2e480107..2c0e3425 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -1,64 +1,29 @@ -const sym2loader = Dict{Symbol,Vector{Symbol}}() -const sym2saver = Dict{Symbol,Vector{Symbol}}() -const load_locker = Base.ReentrantLock() - -is_installed(pkg::Symbol) = get(Pkg.installed(), string(pkg), nothing) != nothing - -function _findmod(f::Symbol) - for (u,v) in Base.loaded_modules - (Symbol(v) == f) && return u - end - nothing -end -function topimport(modname) - @eval Base.__toplevel__ import $modname - u = _findmod(modname) - @eval $modname = Base.loaded_modules[$u] -end - -function checked_import(pkg::Symbol) - lock(load_locker) do - # kludge for test suite - if isdefined(Main, pkg) - m1 = getfield(Main, pkg) - isa(m1, Module) && return m1 - end - if isdefined(FileIO, pkg) - m1 = getfield(FileIO, pkg) - isa(m1, Module) && return m1 - end - m = _findmod(pkg) - m == nothing || return Base.loaded_modules[m] - topimport(pkg) - return Base.loaded_modules[_findmod(pkg)] - end -end - -applicable_error(applicable, sym) = error("No $applicable found for $sym") +const ActionSource = Union{PkgId,Module} +const sym2loader = Dict{Symbol,Vector{ActionSource}}() +const sym2saver = Dict{Symbol,Vector{ActionSource}}() for (applicable_, add_, dict_) in ( (:applicable_loaders, :add_loader, :sym2loader), (:applicable_savers, :add_saver, :sym2saver)) @eval begin - function $applicable_(@nospecialize(fmt::Union{Type{<:DataFormat}, Formatted})) - sym = formatname(fmt) - if haskey($dict_, sym) - return $dict_[sym] - end - Base.invokelatest(applicable_error, $applicable_, sym) + function $applicable_(sym::Symbol) + ret = get($dict_, sym, nothing) + ret === nothing && error(string("No ", $applicable_, " found for ", sym)) + return ret end - function $add_(@nospecialize(fmt::Type{<:DataFormat}), pkg::Symbol) - sym = formatname(fmt) - list = get($dict_, sym, Symbol[]) - $dict_[sym] = push!(list, pkg) + $add_(@nospecialize(fmt::Type), id::Union{ActionSource,Pair}) = $add_(formatname(fmt)::Symbol, id) + function $add_(sym::Symbol, id::ActionSource) + list = get!(Vector{ActionSource}, $dict_, sym) + push!(list, id) end + $add_(sym::Symbol, pkg::Pair{<:Union{String,Symbol}, UUID}) = $add_(sym, Base.PkgId(pkg.second, String(pkg.first))) end end """ - add_loader(fmt, :Package) - add_loader(fmt, [:Package, specifiers...]) + add_loader(fmt, :Package=>uuid) + add_loader(fmt, [:Package=>uuid, specifiers...]) Declare that format `fmt` can be loaded with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. @@ -66,8 +31,8 @@ Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to par add_loader """ - add_saver(fmt, :Package) - add_saver(fmt, [:Package, specifiers...]) + add_saver(fmt, :Package=>uuid) + add_saver(fmt, [:Package=>uuid, specifiers...]) Declare that format `fmt` can be saved with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. @@ -80,8 +45,8 @@ the format from `filename` and/or magic bytes in the file. - `load(strm)` loads from an `IOStream` or similar object. In this case, there is no filename extension, so we rely on the magic bytes for format identification. -- `load(File(format"PNG", filename))` specifies the format directly, and bypasses inference. -- `load(Stream(format"PNG", io))` specifies the format directly, and bypasses inference. +- `load(File{format"PNG"}(filename))` specifies the format directly, and bypasses inference. +- `load(Stream{format"PNG"}(io))` specifies the format directly, and bypasses inference. - `load(f; options...)` passes keyword arguments on to the loader. """ load @@ -99,9 +64,9 @@ at once - `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. In this case, there is no filename extension, so we rely on the magic bytes for format identification. -- `loadstreaming(File(format"WAV",filename))` specifies the format directly, and +- `loadstreaming(File{format"WAV"}(filename))` specifies the format directly, and bypasses inference. -- `loadstreaming(Stream(format"WAV", io))` specifies the format directly, and +- `loadstreaming(Stream{format"WAV"}(io))` specifies the format directly, and bypasses inference. - `loadstreaming(f; options...)` passes keyword arguments on to the loader. """ @@ -110,8 +75,8 @@ loadstreaming """ - `save(filename, data...)` saves the contents of a formatted file, trying to infer the format from `filename`. -- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `save(File(format"PNG",filename), data...)` specifies the format directly, and bypasses inference. +- `save(Stream{format"PNG"}(io), data...)` specifies the format directly, and bypasses inference. +- `save(File{format"PNG"}(filename), data...)` specifies the format directly, and bypasses inference. - `save(f, data...; options...)` passes keyword arguments on to the saver. """ save @@ -123,9 +88,9 @@ accept formatted objects, like an image or chunk of video or audio. - `savestreaming(filename, data...)` saves the contents of a formatted file, trying to infer the format from `filename`. -- `savestreaming(File(format"WAV",filename))` specifies the format directly, and +- `savestreaming(File{format"WAV"}(filename))` specifies the format directly, and bypasses inference. -- `savestreaming(Stream(format"WAV", io))` specifies the format directly, and +- `savestreaming(Stream{format"WAV"}(io))` specifies the format directly, and bypasses inference. - `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. """ @@ -134,38 +99,48 @@ savestreaming # if a bare filename or IO stream are given, query for the format and dispatch # to the formatted handlers below for fn in (:load, :loadstreaming, :metadata) - @eval $fn(file, args...; options...) = $fn(query(file), args...; options...) + fnq = QuoteNode(fn) + @eval function $fn(file, args...; options...) + checkpath_load(file) + sym = querysym(file) + libraries = applicable_loaders(sym) + return action($fnq, libraries, sym, file, args...; options...) + end + # Version that bypasses format-inference + @eval function $fn(@nospecialize(file::Formatted), args...; options...) + checkpath_load(filename(file)) + sym = formatname(file)::Symbol + libraries = applicable_loaders(sym) + return action($fnq, libraries, file, args...; options...) + end end for fn in (:save, :savestreaming) - @eval $fn(file, args...; options...) = $fn(query(file; checkfile=false), args...; options...) + fnq = QuoteNode(fn) + @eval function $fn(file, args...; options...) + checkpath_save(file) + sym = querysym(file; checkfile=false) + libraries = applicable_savers(sym) + return action($fnq, libraries, sym, file, args...; options...) + end + @eval function $fn(@nospecialize(file::Formatted), args...; options...) + checkpath_save(filename(file)) + sym = formatname(file)::Symbol + libraries = applicable_savers(sym) + return action($fnq, libraries, file, args...; options...) + end + @eval function $fn(@nospecialize(fmt::Type), file, args...; options...) + checkpath_save(file) + sym = formatname(fmt)::Symbol + libraries = applicable_savers(sym) + return action($fnq, libraries, sym, file, args...; options...) + end end # return a save function, so you can do `thing_to_save |> save("filename.ext")` -save(file; options...) = data -> save(file, data; options...) - -# Allow format to be overridden with first argument -function save(df::Type{DataFormat{sym}}, filename, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(save, File(DataFormat{sym}, filename), data...; options...) -end - -function savestreaming(df::Type{DataFormat{sym}}, s::IO, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(savestreaming, Stream(DataFormat{sym}, s), data...; options...) -end - -function save(df::Type{DataFormat{sym}}, s::IO, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(save, Stream(DataFormat{sym}, s), data...; options...) -end - -function savestreaming(df::Type{DataFormat{sym}}, filename, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(savestreaming, File(DataFormat{sym}, filename), data...; options...) +function save(file; options...) + sym = querysym(file; checkfile=false) + libraries = applicable_loaders(sym) + return data -> action(:save, libraries, sym, file, data; options...) end # do-syntax for streaming IO @@ -180,79 +155,61 @@ for fn in (:loadstreaming, :savestreaming) end end -# Handlers for formatted files/streams - -for fn in (:load, :loadstreaming, :metadata) - fn_func_name = Symbol(fn, "_filename") - gen2_func_name = Symbol("fileio_", fn) - @eval function $fn(@nospecialize(q::Formatted), @nospecialize(args...); @nospecialize(options...)) - Base.invokelatest($fn_func_name, q, filename(q), args...; options...) - end - @eval function $fn_func_name(@nospecialize(q::Formatted), filename, @nospecialize(args...); @nospecialize(options...)) - if unknown(q) - isfile(filename) || open(filename) # force systemerror - throw(UnknownFormat(q)) - end - if q isa File - !isfile(filename) && throw(ArgumentError("No file exists at given path: $(filename)")) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if isdefined(Library, $(QuoteNode(gen2_func_name))) - return Base.invokelatest(Library.$gen2_func_name, q, args...; options...) - end - if !has_method_from(methods(Library.$fn), Library) - throw(LoaderError(string(library), "$($fn) not defined")) - end - return Base.invokelatest(Library.$fn, q, args...; options...) - catch e - push!(failures, (e, q)) - end - end - handle_exceptions(failures, "loading $(repr(filename))") - end +function checkpath_load(file) + file === nothing && return nothing # likely stream io + !isfile(file) && throw(ArgumentError("No file exists at given path: $file")) + return nothing +end +function checkpath_save(file) + file === nothing && return nothing + isa(file, IO) && return nothing + isdir(file) && throw(ArgumentError("Given file path is a directory: $file")) + dn = dirname(file) + !isdir(dn) && mkpath(dn) + return nothing end -for fn in (:save, :savestreaming) - gen2_func_name = Symbol("fileio_", fn) - @eval function $fn(@nospecialize(q::Formatted), @nospecialize(data...); @nospecialize(options...)) - unknown(q) && throw(UnknownFormat(q)) - if q isa File - isdir(filename(q)) && throw(ArgumentError("Given file path is a directory: $(filename(q))")) - !isdir(dirname(filename(q))) && mkpath(dirname(filename(q))) - end - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if isdefined(Library, $(QuoteNode(gen2_func_name))) - return Base.invokelatest(Library.$gen2_func_name, q, data...; options...) - end - if !has_method_from(methods(Library.$fn), Library) - throw(WriterError(string(library), "$($fn) not defined")) +action(call::Symbol, libraries::Vector{ActionSource}, sym::Symbol, io::IO, args...; options...) = + action(call, libraries, Stream{DataFormat{sym}}(io), args...; options...) +action(call::Symbol, libraries::Vector{ActionSource}, sym::Symbol, file, args...; options...) = + action(call, libraries, File{DataFormat{sym}}(file), args...; options...) + +# To test for broken packages which extend FileIO functions +const fileiofuncs = Dict{Symbol,Function}(:load => load, + :loadstring => loadstreaming, + :metadata => metadata, + :save => save, + :savestreaming => savestreaming) + +function action(call::Symbol, libraries::Vector{ActionSource}, @nospecialize(file::Formatted), args...; options...) + issave = call ∈ (:save, :savestreaming) + failures = Tuple{Any,ActionSource}[] + pkgfuncname = Symbol("fileio_", call) + local mod + for library in libraries + try + mod = isa(library, Module) ? library : Base.require(library) + f = if isdefined(mod, pkgfuncname) + getfield(mod, pkgfuncname) + else + getfield(mod, call) + end + if f === get(fileiofuncs, call, nothing) + argtyps = map(Core.Typeof, args) + m = which(f, (typeof(file), argtyps...)) + if m == which(f, (Formatted, argtyps...)) + throw(SpecError(mod, call)) end - return Base.invokelatest(Library.$fn, q, data...; options...) - catch e - push!(failures, (e, q)) + @warn "$mod incorrectly extends FileIO functions (see FileIO documentation)" end - end - handle_exceptions(failures, "saving $(repr(filename(q)))") - end -end - -# returns true if the given method table includes a method defined by the given -# module, false otherwise -function has_method_from(mt, Library) - for m in mt - if getmodule(m) == Library - return true + return Base.invokelatest(f, file, args...; options...) + catch e + if isa(e, MethodError) || isa(e, SpecError) + str = "neither $call nor $pkgfuncname is defined" + e = issave ? WriterError(string(mod), str) : LoaderError(string(mod), str) + end + push!(failures, (e, library)) end end - false + handle_exceptions(failures, "$call $(repr(file))") end - -getmodule(m) = m.module diff --git a/src/query.jl b/src/query.jl index 3b0c9847..a4a7c636 100644 --- a/src/query.jl +++ b/src/query.jl @@ -3,48 +3,43 @@ """ `unknown(f)` returns true if the format of `f` is unknown. """ -unknown(::Type{format"UNKNOWN"}) = true -unknown(::Type{DataFormat{sym}}) where {sym} = false - -unknown(::File{F}) where {F} = unknown(F) -unknown(::Stream{F}) where {F} = unknown(F) +unknown(@nospecialize(f::Union{Formatted,Type})) = unknown(formatname(f)::Symbol) +unknown(name::Symbol) = name === :UNKNOWN const unknown_df = DataFormat{:UNKNOWN} """ `info(fmt)` returns the magic bytes/extension information for -`DataFormat` `fmt`. +`fmt`. """ -info(::Type{DataFormat{sym}}) where {sym} = sym2info[sym] +info(@nospecialize(f::Union{Formatted,Type})) = info(formatname(f)::Symbol) +info(sym::Symbol) = sym2info[sym] "`magic(fmt)` returns the magic bytes of format `fmt`" -magic(fmt::Type{<:DataFormat})= UInt8[info(fmt)[1]...] - +magic(@nospecialize(fmt::Type)) = magic(formatname(fmt)::Symbol) +magic(sym::Symbol) = info(sym)[1] """ -`skipmagic(s)` sets the position of `Stream` `s` to be just after the magic bytes. +`skipmagic(s::Stream)` sets the position of `s` to be just after the magic bytes. For a plain IO object, you can use `skipmagic(io, fmt)`. """ -skipmagic(s::Stream{F}) where {F} = (skipmagic(stream(s), F); s) -function skipmagic(io, fmt::Type{DataFormat{sym}}) where sym +skipmagic(@nospecialize(s::Stream)) = (skipmagic(stream(s), formatname(s)::Symbol); s) +skipmagic(io, @nospecialize(fmt::Type)) = skipmagic(io, formatname(fmt)::Symbol) +function skipmagic(io, sym::Symbol) magic, _ = sym2info[sym] skipmagic(io, magic) nothing end -skipmagic(io, magic::Function) = nothing -skipmagic(io, magic::NTuple{N,UInt8}) where {N} = seek(io, length(magic)) -function skipmagic(io, magic::Tuple) - lengths = map(length, magic) - all(x-> lengths[1] == x, lengths) && return seek(io, lengths[1]) # it doesn't matter what magic bytes get skipped as they all have the same length - magic = [magic...] - sort!(magic, lt = (a,b)-> length(a) >= length(b)) # start with longest first, to avoid overlapping magic bytes - seekend(io) - len = position(io) - seekstart(io) - filter!(x-> length(x) <= len, magic) # throw out magic bytes that are longer than IO - tmp = read(io, length(first(magic))) # now, first is both the longest and guaranteed to fit into io, so we can just read the bytes - for m in magic +skipmagic(io, @nospecialize(magic::Function)) = nothing +skipmagic(io, magic::Vector{UInt8}) = seek(io, length(magic)) +function skipmagic(io, magics::Vector{Vector{UInt8}}) + lengths = map(length, magics) + l1 = lengths[1] + all(isequal(l1), lengths) && return seek(io, l1) # it doesn't matter what magic bytes get skipped as they all have the same length + len = getlength(io) + tmp = read(io, min(len, maximum(lengths))) + for m in reverse(magics) # start with the longest since they are most specific if magic_equal(m, tmp) seek(io, length(m)) return nothing @@ -52,14 +47,21 @@ function skipmagic(io, magic::Tuple) end error("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $io") end + function magic_equal(magic, buffer) + length(magic) > length(buffer) && return false for (i,elem) in enumerate(magic) buffer[i] != elem && return false end true end - +function getlength(io, pos=position(io)) + seekend(io) + len = position(io) + seek(io, pos) + return len +end """ query(filename; checkfile=true) @@ -70,87 +72,143 @@ If `filename` already exists, the file's magic bytes will take priority unless `checkfile` is false. """ function query(filename; checkfile::Bool=true) + filename = abspath(filename) + sym = querysym(filename; checkfile=checkfile) + return File{DataFormat{sym}}(filename) +end +query(@nospecialize(f::Formatted); checkfile::Bool=true) = f + +# This is recommended for internal use because it returns Symbol (or errors) +function querysym(filename; checkfile::Bool=true) + hasmagic(@nospecialize(magic)) = !(isa(magic, Vector{UInt8}) && isempty(magic)) + checkfile &= isfile(filename) _, ext = splitext(filename) if haskey(ext2sym, ext) sym = ext2sym[ext] - no_magic = !hasmagic(sym) - if lensym(sym) == 1 && (no_magic || !checkfile) # we only found one candidate and there is no magic bytes, or no file, trust the extension - return File{DataFormat{sym}}(filename) - elseif !checkfile && lensym(sym) > 1 - return File{DataFormat{sym[1]}}(filename) + if isa(sym, Symbol) # there's only one format with this extension + checkfile || return sym # since we're not checking, we can return it immediately + magic = sym2info[sym][1] + hasmagic(magic) || return sym + return open(filename) do io + match(io, magic) && return sym + # if it doesn't match, we prioritize the magic bytes over the guess based on extension + return querysym_all(io)[1] + end end - no_function = !hasfunction(sym) - if no_magic && no_function - error("Some formats with extension ", ext, " have no magic bytes; use `File{format\"FMT\"}(filename)` to resolve the ambiguity.") + # There are multiple formats consistent with this extension + syms = sym::Vector{Symbol} + checkfile || return syms[1] # with !checkfile we default to the first. TODO?: change to an error? + return open(filename) do io + badmagic = false + for sym in syms + magic = sym2info[sym][1] + if !hasmagic(magic) + badmagic = true + continue + end + match(io, magic) && return sym + end + badmagic && error("Some formats with extension ", ext, " have no magic bytes; use `File{format\"FMT\"}(filename)` to resolve the ambiguity.") + return querysym_all(io)[1] end - if no_magic && !no_function - # try specific function first, if available - ret = query(open(filename), abspath(filename), sym) - ret !== nothing && return file!(ret) + end + !checkfile && return :UNKNOWN + return open(filename) do io + return querysym_all(io)[1] + end +end + +function match(io, magic::Vector{UInt8}) + len = getlength(io) + len < length(magic) && return false + return magic_equal(magic, read(io, length(magic))) +end + +function match(io, magics::Vector{Vector{UInt8}}) + lengths = map(length, magics) + len = getlength(io) + tmp = read(io, min(len, maximum(lengths))) + for m in reverse(magics) # start with the longest since they are most specific + if magic_equal(m, tmp) + return true end end - !checkfile && return File{unknown_df}(filename) # (no extension || no magic byte || no function) && no file - # Otherwise, check against all magic bytes, then functions - file!(query(open(filename), abspath(filename))) + return false end -lensym(s::Symbol) = 1 -lensym(v::Vector) = length(v) +function match(io, @nospecialize(magic::Function)) + seekstart(io) + try + magic(io) + catch e + println("There was an error in magic function $magic") + println("Please open an issue at FileIO.jl. Error:") + println(e) + false + end +end -hasmagic(s::Symbol) = hasmagic(sym2info[s][1]) -hasmagic(v::Vector) = any(hasmagic, v) +# Returns sym, magic (the latter may be empty if a magic-function matched) +# Upon return the stream position is set to the end of magic. +function querysym_all(io) + seekstart(io) + len = getlength(io) + lengths = map(magic_list) do p + length(p.first) + end + tmp = read(io, min(len, maximum(lengths))) + for (magic, sym) in reverse(magic_list) + isempty(magic) && break + if magic_equal(magic, tmp) + seek(io, length(magic)) + return sym, magic + end + end + for (magic, sym) in magic_func + seekstart(io) + match(io, magic) && return sym, empty_magic + end + seekstart(io) + return :UNKNOWN, empty_magic +end -hasmagic(t::Tuple) = !isempty(t) -hasmagic(::Any) = false # for when magic is a function +function querysym(io::IO) + if seekable(io) + sym, _ = querysym_all(io) + seekstart(io) + return sym + end + # When it's not seekable, we can only work our way upwards in length of magic bytes + # We're essentially counting on the fact that one of them will match, otherwise the stream + # is corrupted. + buffer = UInt8[] + for (magic, sym) in magic_list + isempty(magic) && continue + while length(buffer) < length(magic) && !eof(io) + push!(buffer, read(io, UInt8)) + end + if magic_equal(magic, buffer) + return sym + end + eof(io) && break + end + return :UNKNOWN +end -hasfunction(s::Symbol) = hasfunction(sym2info[s][1]) -hasfunction(v::Vector) = any(hasfunction, v) -hasfunction(s::Any) = true #has function -hasfunction(s::Tuple) = false #has magic """ `query(io, [filename])` returns a `Stream` object with information about the format inferred from the magic bytes. """ function query(io::IO, filename = nothing) - magic = Vector{UInt8}() - pos = position(io) - for p in magic_list - m = first(p) - length(m) == 0 && continue - while length(m) > length(magic) - if eof(io) - seek(io, pos) - return Stream{unknown_df, typeof(io)}(io, filename) - end - push!(magic, read(io, UInt8)) - end - if iter_eq(magic, m) - seek(io, pos) - return Stream{DataFormat{last(p)},typeof(io)}(io, filename) - end - end - if seekable(io) - for p in magic_func - seek(io, pos) - f = first(p) - try - if f(io) - return Stream{DataFormat{last(p)},typeof(io)}(seek(io, pos), filename) - end - catch e - println("There was an error in magick function $f") - println("Please open an issue at FileIO.jl. Error:") - println(e) - end - end - seek(io, pos) - end - Stream{unknown_df,typeof(io)}(io, filename) + sym = querysym(io) + return Stream{DataFormat{sym}}(io, filename) end +query(io::IO, @nospecialize(filename::Formatted)) = error("no need to query when format is known") + +# TODO?: update to querysym? function query(io::IO, filename::String, sym::Vector{Symbol}) - magic = Vector{UInt8}() pos = position(io) if seekable(io) for (f, fmtsym) in magic_func @@ -161,30 +219,17 @@ function query(io::IO, filename::String, sym::Vector{Symbol}) return Stream{DataFormat{fmtsym},typeof(io)}(seek(io, pos), filename) end catch e - println("There was an error in magick function $f") + println("There was an error in magic function $f") println("Please open an issue at FileIO.jl. Error:") println(e) end end seek(io, pos) end - close(io) + close(io) # FIXME? nothing end seekable(io::IOBuffer) = io.seekable seekable(::IOStream) = true seekable(::Any) = false - -function iter_eq(A, B) - length(A) == length(B) || return false - i,j = 1,1 - for _=1:length(A) - a=A[i]; b=B[j] - a == b && (i+=1; j+=1; continue) - a == UInt32('\r') && (i+=1; continue) # this seems like the shadiest solution to deal with windows \r\n - b == UInt32('\r') && (j+=1; continue) - return false #now both must be unequal, and no \r windows excemption any more - end - true -end diff --git a/src/registry.jl b/src/registry.jl index 16f05299..12c4dc16 100644 --- a/src/registry.jl +++ b/src/registry.jl @@ -1,13 +1,26 @@ +### "Package registry" +# Useful for packages that get used more than once below +# Please alphabetize +const idCSVFiles = :CSVFiles => UUID("5d742f6a-9f54-50ce-8119-2520741973ca") +const idImageIO = :ImageIO => UUID("82e4d734-157c-48bb-816b-45c225c6df19") +const idImageMagick = :ImageMagick => UUID("6218d12a-5da1-5696-b52f-db25d2ecc6d1") +const idMeshIO = :MeshIO => UUID("7269a6da-0436-5bbc-96c2-40638cbb6118") +const idNetpbm = :Netpbm => UUID("f09324ee-3d7c-5217-9330-fc30815ba969") +const idQuartzImageIO = :QuartzImageIO => UUID("dca85d43-d64c-5e67-8c65-017450d5d020") +const idRData = :RData => UUID("df47a6cb-8c03-5eed-afd8-b6050d6c41da") +const idStatFiles = :StatFiles => UUID("1463e38c-9381-5320-bcd4-4134955f093a") +const idVegaLite = :VegaLite => UUID("112f6efa-9a02-5b7d-90c0-432ed331239a") + ### Simple cases # data formats add_format(format"JLD", (unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.0"), - unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.1")), ".jld", [:JLD]) + unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.1")), ".jld", [:JLD => UUID("4138dd39-2aa7-5051-a626-17a0bb65d9c8")]) add_format(format"JLD2", (unsafe_wrap(Vector{UInt8},"Julia data file (HDF5), version 0.2"), - unsafe_wrap(Vector{UInt8}, "HDF5-based Julia Data Format, version ")), ".jld2", [:JLD2]) -add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz]) -add_format(format"BSON",(),".bson", [:BSON]) -add_format(format"JLSO", (), ".jlso", [:JLSO]) + unsafe_wrap(Vector{UInt8}, "HDF5-based Julia Data Format, version ")), ".jld2", [:JLD2 => UUID("033835bb-8acc-5ee8-8aae-3f567f8a3819")]) +add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz => UUID("2ec943e9-cfe8-584d-b93d-64dcb6d567b7")]) +add_format(format"BSON",(),".bson", [:BSON => UUID("fbb218c0-5317-5bc6-957e-2ee96dd4b1f0")]) +add_format(format"JLSO", (), ".jlso", [:JLSO => UUID("9da8a3cd-07a3-59c0-a743-3fdc52c30d11")]) # test for RD?n magic sequence at the beginning of R data input stream function detect_rdata(io) @@ -19,7 +32,7 @@ function detect_rdata(io) (c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n'))) end -add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD]) +add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [idRData, LOAD]) function detect_rdata_single(io) seekstart(io) @@ -29,100 +42,100 @@ function detect_rdata_single(io) return res end -add_format(format"RDataSingle", detect_rdata_single, [".rds"], [:RData, LOAD]) +add_format(format"RDataSingle", detect_rdata_single, [".rds"], [idRData, LOAD]) -add_format(format"CSV", (), [".csv"], [:CSVFiles]) -add_format(format"TSV", (), [".tsv"], [:CSVFiles]) -add_format(format"Feather", "FEA1", [".feather"], [:FeatherFiles]) -add_format(format"Excel", (), [".xls", ".xlsx"], [:ExcelFiles]) -add_format(format"Stata", (), [".dta"], [:StatFiles, LOAD]) -add_format(format"SPSS", "\$FL2", [".sav"], [:StatFiles, LOAD]) +add_format(format"CSV", (), [".csv"], [idCSVFiles]) +add_format(format"TSV", (), [".tsv"], [idCSVFiles]) +add_format(format"Feather", "FEA1", [".feather"], [:FeatherFiles => UUID("b675d258-116a-5741-b937-b79f054b0542")]) +add_format(format"Excel", (), [".xls", ".xlsx"], [:ExcelFiles => UUID("89b67f3b-d1aa-5f6f-9ca4-282e8d98620d")]) +add_format(format"Stata", (), [".dta"], [idStatFiles, LOAD]) +add_format(format"SPSS", "\$FL2", [".sav"], [idStatFiles, LOAD]) add_format(format"SAS", UInt8[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x60,0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, - 0x10, 0x11], [".sas7bdat"], [:StatFiles, LOAD]) -add_format(format"Parquet", "PAR1", [".parquet"], [:ParquetFiles, LOAD]) + 0x10, 0x11], [".sas7bdat"], [idStatFiles, LOAD]) +add_format(format"Parquet", "PAR1", [".parquet"], [:ParquetFiles => UUID("46a55296-af5a-53b0-aaa0-97023b66127f"), LOAD]) # Image formats -add_format(format"PBMBinary", b"P4", ".pbm", [:ImageIO], [:Netpbm], [:ImageMagick]) -add_format(format"PGMBinary", b"P5", ".pgm", [:ImageIO], [:Netpbm]) -add_format(format"PPMBinary", b"P6", ".ppm", [:ImageIO], [:Netpbm]) -add_format(format"PBMText", b"P1", ".pbm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) -add_format(format"PGMText", b"P2", ".pgm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) -add_format(format"PPMText", b"P3", ".ppm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) - -add_format(format"NRRD", "NRRD", [".nrrd", ".nhdr"], [:NRRD]) - -add_format(format"AndorSIF", "Andor Technology Multi-Channel File", ".sif", [:AndorSIF, LOAD]) - -add_format(format"FLO", b"PIEH", ".flo", [:OpticalFlowUtils]) - -add_format(format"CRW", UInt8[0x49,0x49,0x1a,0x00,0x00,0x00,0x48,0x45], ".crw", [:ImageMagick]) -add_format(format"CUR", UInt8[0x00,0x00,0x02,0x00], ".cur", [:ImageMagick]) -add_format(format"DCX", UInt8[0xb1,0x68,0xde,0x3a], ".dcx", [:ImageMagick]) -add_format(format"DOT", UInt8[0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1], ".dot", [:ImageMagick]) -add_format(format"EPS", UInt8[0x25,0x21,0x50,0x53,0x2d,0x41,0x64,0x6f], ".eps", [:ImageMagick], [:MimeWriter, SAVE]) -add_format(format"HDR", UInt8[0x23,0x3f,0x52,0x41,0x44,0x49,0x41,0x4e], ".hdr", [:ImageMagick]) -add_format(format"ICO", UInt8[0x00,0x00,0x01,0x00], ".ico", [:ImageMagick]) -add_format(format"INFO", UInt8[0x7a,0x62,0x65,0x78], ".info",[:ImageMagick]) -add_format(format"JP2", UInt8[0x00,0x00,0x00,0x0c,0x6a,0x50,0x20,0x20], ".jp2", [:ImageMagick]) -add_format(format"PDB", UInt8[0x73,0x7a,0x65,0x7a], ".pdb", [:ImageMagick]) -add_format(format"PDF", UInt8[0x25,0x50,0x44,0x46], ".pdf", [:ImageMagick], [:MimeWriter, SAVE]) -add_format(format"PGM", UInt8[0x50,0x35,0x0a], ".pgm", [:ImageMagick]) -add_format(format"PSD", UInt8[0x38,0x42,0x50,0x53], ".psd", [:ImageMagick]) -add_format(format"RGB", UInt8[0x01,0xda,0x01,0x01,0x00,0x03], ".rgb", [:ImageMagick]) -add_format(format"WMF", UInt8[0xd7,0xcd,0xc6,0x9a], ".wmf", [:ImageMagick]) -add_format(format"WPG", UInt8[0xff,0x57,0x50,0x43], ".wpg", [:ImageMagick]) -add_format(format"Imagine", "IMAGINE", ".imagine", [:ImagineFormat]) +add_format(format"PBMBinary", b"P4", ".pbm", [idImageIO], [idNetpbm], [idImageMagick]) +add_format(format"PGMBinary", b"P5", ".pgm", [idImageIO], [idNetpbm]) +add_format(format"PPMBinary", b"P6", ".ppm", [idImageIO], [idNetpbm]) +add_format(format"PBMText", b"P1", ".pbm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PGMText", b"P2", ".pgm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PPMText", b"P3", ".ppm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) + +add_format(format"NRRD", "NRRD", [".nrrd", ".nhdr"], [:NRRD => UUID("9bb6cfbd-7763-5393-b1b5-1c8e09872146")]) + +add_format(format"AndorSIF", "Andor Technology Multi-Channel File", ".sif", [:AndorSIF => UUID("d04cd5f8-5917-4006-ac6f-d139328806a7"), LOAD]) + +add_format(format"FLO", b"PIEH", ".flo", [:OpticalFlowUtils => UUID("ab0dad50-ab19-448c-b796-13553ec8b2d3")]) + +add_format(format"CRW", UInt8[0x49,0x49,0x1a,0x00,0x00,0x00,0x48,0x45], ".crw", [idImageMagick]) +add_format(format"CUR", UInt8[0x00,0x00,0x02,0x00], ".cur", [idImageMagick]) +add_format(format"DCX", UInt8[0xb1,0x68,0xde,0x3a], ".dcx", [idImageMagick]) +add_format(format"DOT", UInt8[0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1], ".dot", [idImageMagick]) +add_format(format"EPS", UInt8[0x25,0x21,0x50,0x53,0x2d,0x41,0x64,0x6f], ".eps", [idImageMagick], [MimeWriter, SAVE]) +add_format(format"HDR", UInt8[0x23,0x3f,0x52,0x41,0x44,0x49,0x41,0x4e], ".hdr", [idImageMagick]) +add_format(format"ICO", UInt8[0x00,0x00,0x01,0x00], ".ico", [idImageMagick]) +add_format(format"INFO", UInt8[0x7a,0x62,0x65,0x78], ".info",[idImageMagick]) +add_format(format"JP2", UInt8[0x00,0x00,0x00,0x0c,0x6a,0x50,0x20,0x20], ".jp2", [idImageMagick]) +add_format(format"PDB", UInt8[0x73,0x7a,0x65,0x7a], ".pdb", [idImageMagick]) +add_format(format"PDF", UInt8[0x25,0x50,0x44,0x46], ".pdf", [idImageMagick], [MimeWriter, SAVE]) +add_format(format"PGM", UInt8[0x50,0x35,0x0a], ".pgm", [idImageMagick]) +add_format(format"PSD", UInt8[0x38,0x42,0x50,0x53], ".psd", [idImageMagick]) +add_format(format"RGB", UInt8[0x01,0xda,0x01,0x01,0x00,0x03], ".rgb", [idImageMagick]) +add_format(format"WMF", UInt8[0xd7,0xcd,0xc6,0x9a], ".wmf", [idImageMagick]) +add_format(format"WPG", UInt8[0xff,0x57,0x50,0x43], ".wpg", [idImageMagick]) +add_format(format"Imagine", "IMAGINE", ".imagine", [:ImagineFormat => UUID("4bab44a2-5ff2-5a6b-8e10-825fb9ac126a")]) add_format( format"TGA", (), ".tga", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"GIF", UInt8[0x47,0x49,0x46,0x38], ".gif", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"PNG", UInt8[0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png", - [:ImageIO], - [:QuartzImageIO, OSX], - [:ImageMagick], - [:MimeWriter, SAVE] + [idImageIO], + [idQuartzImageIO, OSX], + [idImageMagick], + [MimeWriter, SAVE] ) add_format( format"JPEG", UInt8[0xff,0xd8,0xff], [".jpeg", ".jpg", ".JPG"], - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) # 0xe1 add_format( format"BMP", UInt8[0x42,0x4d], ".bmp", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"PCX", (UInt8[0x0a,0x02], UInt8[0x0a,0x05]), ".pcx", - [:ImageMagick] + [idImageMagick] ) add_format( format"SVG", (), ".svg", - [:MimeWriter, SAVE] + [MimeWriter, SAVE] ) #= @@ -136,37 +149,37 @@ add_saver(format"ZIP", :ZipeFile) =# #Shader files -add_format(format"GLSLShader", (), [".frag", ".vert", ".geom", ".comp"], [:GLAbstraction]) +# add_format(format"GLSLShader", (), [".frag", ".vert", ".geom", ".comp"], [:GLAbstraction]) # Mesh formats -add_format(format"OBJ", (), ".obj", [:MeshIO]) -add_format(format"PLY_ASCII", "ply\nformat ascii 1.0", ".ply", [:MeshIO]) -add_format(format"PLY_BINARY", "ply\nformat binary_little_endian 1.0", ".ply", [:MeshIO]) -add_format(format"2DM", "MESH2D", ".2dm", [:MeshIO]) -add_format(format"OFF", "OFF", ".off", [:MeshIO]) -add_format(format"MSH", (), ".msh", [:MeshIO]) +add_format(format"OBJ", (), ".obj", [idMeshIO]) +add_format(format"PLY_ASCII", "ply\nformat ascii 1.0", ".ply", [idMeshIO]) +add_format(format"PLY_BINARY", "ply\nformat binary_little_endian 1.0", ".ply", [idMeshIO]) +add_format(format"2DM", "MESH2D", ".2dm", [idMeshIO]) +add_format(format"OFF", "OFF", ".off", [idMeshIO]) +add_format(format"MSH", (), ".msh", [idMeshIO]) # Bundler SfM format -add_format(format"OUT", "# Bundle file v0.3\n", ".out", [:BundlerIO]) +add_format(format"OUT", "# Bundle file v0.3\n", ".out", [:BundlerIO => UUID("654bb1e1-1cb7-4447-b770-09a16346af94")]) # GSLIB/SGeMS format (http://gslib.com) -add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO]) +add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO => UUID("4610876b-9b01-57c8-9ad9-06315f1a66a5")]) ### Audio formats function detectwav(io) - seekstart(io) - magic = read!(io, Vector{UInt8}(undef, 4)) - magic == b"RIFF" || return false + getlength(io) >= 12 || return false + buf = Vector{UInt8}(undef, 4) + read!(io, buf) + buf == b"RIFF" || return false seek(io, 8) - submagic = read!(io, Vector{UInt8}(undef, 4)) - + read!(io, buf) submagic == b"WAVE" end -add_format(format"WAV", detectwav, ".wav", [:WAV]) -add_format(format"FLAC","fLaC",".flac",[:FLAC]) +add_format(format"WAV", detectwav, ".wav", [:WAV => UUID("8149f6b0-98f6-5db9-b78f-408fbbb8ef88")]) +add_format(format"FLAC","fLaC",".flac",[:FLAC => UUID("abae9e3b-a9a0-4778-b5c6-ca109b507d99")]) ## Profile data -add_format(format"JLPROF", [0x4a, 0x4c, 0x50, 0x52, 0x4f, 0x46, 0x01, 0x00], ".jlprof", [:FlameGraphs]) # magic is "JLPROF" followed by [0x01, 0x00] +add_format(format"JLPROF", [0x4a, 0x4c, 0x50, 0x52, 0x4f, 0x46, 0x01, 0x00], ".jlprof", [:FlameGraphs => UUID("08572546-2f56-4bcf-ba4e-bab62c3a3f89")]) # magic is "JLPROF" followed by [0x01, 0x00] ### Complex cases @@ -249,22 +262,22 @@ function detect_bedgraph(io) return false end -add_format(format"bedGraph", detect_bedgraph, [".bedgraph"], [:BedgraphFiles]) +add_format(format"bedGraph", detect_bedgraph, [".bedgraph"], [:BedgraphFiles => UUID("85eb9095-274b-55ce-be28-9e90f41ac741")]) # Handle OME-TIFFs, which are identical to normal TIFFs with the primary difference being the filename and embedded XML metadata const tiff_magic = (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00],UInt8[0x49,0x49,0x2b,0x00]) function detecttiff(io) - seekstart(io) + getlength(io) >= 4 || return false magic = read!(io, Vector{UInt8}(undef, 4)) # do any of the first 4 bytes match any of the 4 possible combinations of tiff magics return any(map(x->all(magic .== x), tiff_magic)) end # normal TIFF detect_noometiff(io) = detecttiff(io) && ((:name ∉ propertynames(io)) || !(endswith(io.name, ".ome.tif>") || endswith(io.name, ".ome.tiff>"))) -add_format(format"TIFF", detect_noometiff, [".tiff", ".tif"], [:QuartzImageIO, OSX], [:ImageMagick]) +add_format(format"TIFF", detect_noometiff, [".tiff", ".tif"], [idQuartzImageIO, OSX], [idImageMagick]) # OME-TIFF detect_ometiff(io) = detecttiff(io) && (:name ∈ propertynames(io)) && (endswith(io.name, ".ome.tif>") || endswith(io.name, ".ome.tiff>")) -add_format(format"OMETIFF", detect_ometiff, [".tif", ".tiff"], [:OMETIFF]) +add_format(format"OMETIFF", detect_ometiff, [".tif", ".tiff"], [:OMETIFF => UUID("2d0ec36b-e807-5756-994b-45af29551fcf")]) # custom skipmagic functions for function-based tiff magic detection skipmagic(io, ::typeof(detect_ometiff)) = seek(io, 4) @@ -272,7 +285,7 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4) # AVI is a subtype of RIFF, as is WAV function detectavi(io) - seekstart(io) + getlength(io) >= 12 || return false magic = read!(io, Vector{UInt8}(undef, 4)) magic == b"RIFF" || return false seek(io, 8) @@ -280,21 +293,19 @@ function detectavi(io) submagic == b"AVI " end -add_format(format"AVI", detectavi, ".avi", [:ImageMagick]) +add_format(format"AVI", detectavi, ".avi", [idImageMagick]) # HDF5: the complication is that the magic bytes may start at # 0, 512, 1024, 2048, or any multiple of 2 thereafter -h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a) +const h5magic = [0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a] function detecthdf5(io) position(io) == 0 || return false - seekend(io) - len = position(io) - seekstart(io) + len = getlength(io) magic = Vector{UInt8}(undef, length(h5magic)) pos = position(io) while pos+length(h5magic) <= len read!(io, magic) - if iter_eq(magic, h5magic) + if magic == h5magic return true end pos = pos == 0 ? 512 : 2*pos @@ -304,14 +315,12 @@ function detecthdf5(io) end false end -add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5]) +add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5 => UUID("f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f")]) function detect_stlascii(io) pos = position(io) try - seekend(io) - len = position(io) - seek(io, pos) + len = getlength(io, pos) len < 80 && return false header = read(io, 80) # skip header seek(io, pos) @@ -325,9 +334,7 @@ function detect_stlbinary(io) size_header = 80 + sizeof(UInt32) size_triangleblock = (4 * 3 * sizeof(Float32)) + sizeof(UInt16) pos = position(io) - seekend(io) - len = position(io) - seek(io, pos) + len = getlength(io, pos) len < size_header && return false skip(io, 80) # skip header @@ -340,14 +347,14 @@ function detect_stlbinary(io) result = eof(io) # if end of file, we have a stl! return result end -add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"], [:MeshIO]) -add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"], [:MeshIO]) +add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"], [idMeshIO]) +add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"], [idMeshIO]) # Astro Data add_format(format"FITS", # See https://www.loc.gov/preservation/digital/formats/fdd/fdd000317.shtml#sign [0x53,0x49,0x4d,0x50,0x4c,0x45,0x20,0x20,0x3d,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x54], - [".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS"], [:FITSIO]) + [".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS"], [:FITSIO => UUID("525bcba6-941b-5504-bd06-fd0dc1a4d2eb")]) function detect_gadget2(io) pos = position(io) @@ -361,16 +368,15 @@ function detect_gadget2(io) seek(io, pos) return temp1 == temp2 end -add_format(format"Gadget2", detect_gadget2, [".gadget2", ".Gadget2", ".GADGET2"], [:AstroIO]) - +add_format(format"Gadget2", detect_gadget2, [".gadget2", ".Gadget2", ".GADGET2"], [:AstroIO => UUID("c85a633c-0c3f-44a2-bffe-7f9d0681b3e7")]) -add_format(format"RawArray", [0x61,0x72,0x61,0x77,0x72,0x72,0x79,0x61], ".ra", [:RawArray]) +add_format(format"RawArray", [0x61,0x72,0x61,0x77,0x72,0x72,0x79,0x61], ".ra", [:RawArray => UUID("d3d335b2-f152-507c-820e-958e337efb65")]) -add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat]) +add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat => UUID("1950589f-4d68-56f0-9b94-9d8646217309")]) -add_format(format"vegalite", (), [".vegalite"], [:VegaLite]) -add_format(format"vega", (), [".vega"], [:Vega], [:VegaLite, SAVE]) +add_format(format"vegalite", (), [".vegalite"], [idVegaLite]) +add_format(format"vega", (), [".vega"], [:Vega => UUID("239c3e63-733f-47ad-beb7-a12fde22c578")], [idVegaLite, SAVE]) -add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles]) +add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles => UUID("d76558cf-badf-52d4-a17e-381ab0b0d937")]) -add_format(format"HTML", (), [".html", ".htm"], [:MimeWriter, SAVE]) +add_format(format"HTML", (), [".html", ".htm"], [MimeWriter, SAVE]) diff --git a/src/registry_setup.jl b/src/registry_setup.jl index 337162cc..4a1351d1 100644 --- a/src/registry_setup.jl +++ b/src/registry_setup.jl @@ -1,58 +1,66 @@ # This file contains the code that allows things to be added to the registry const ext2sym = Dict{String, Union{Symbol,Vector{Symbol}}}() -const magic_list = Vector{Pair}() # sorted, see magic_cmp below -const sym2info = Dict{Symbol,Any}() # Symbol=>(magic, extension) -const magic_func = Vector{Pair{Function,Symbol}}() # for formats with complex magic #s +const magic_list = Vector{Pair{Vector{UInt8},Symbol}}() # sorted, see magic_cmp below +const sym2info = Dict{Symbol,Tuple{Any,Any}}() # Symbol=>(magic, extension) +const magic_func = Vector{Pair{Function,Symbol}}() # for formats with complex magic detection +const empty_magic = UInt8[] ## OS: -abstract type OS end -abstract type Unix <: OS end -struct Windows <: OS end -struct OSX <: Unix end -struct Linux <: Unix end - -split_predicates(list) = filter(x-> x <: OS, list), filter(x-> !(x <: OS), list) -applies_to_os(os::Vector) = isempty(os) || any(applies_to_os, os) -applies_to_os(os::Type{<:OS}) = false - -applies_to_os(os::Type{<:Unix}) = Sys.isunix() -applies_to_os(os::Type{Windows}) = Sys.iswindows() -applies_to_os(os::Type{OSX}) = Sys.isapple() -applies_to_os(os::Type{Linux}) = Sys.islinux() +@enum OS Unix Windows OSX Linux + +applies_to_os(oslist) = isempty(oslist) || any(applies_to_os, oslist) +function applies_to_os(os::OS) + os == Unix && return Sys.isunix() + os == Windows && return Sys.iswindows() + os == OSX && return Sys.isapple() + os == Linux && return Sys.islinux() + return false +end ## Magic bytes: # magic_cmp results in magic_list being sorted in order of increasing -# length(magic), then (among tuples with the same length) in -# dictionary order. This ordering has the advantage that you can +# length(magic), then (among sequences with the same length) in +# lexographic order. This ordering has the advantage that you can # incrementally read bytes from the stream without worrying that # you'll encounter an EOF yet still have potential matches later in # the list. -function magic_cmp(p::Pair, t::Tuple) - pt = first(p) - lp, lt = length(pt), length(t) - lp < lt && return true - lp > lt && return false - pt < t -end -function magic_cmp(t::Tuple, p::Pair) - pt = first(p) - lp, lt = length(pt), length(t) - lt < lp && return true - lt > lp && return false - t < pt -end - -canonicalize_magic(m::NTuple{N,UInt8}) where {N} = m -canonicalize_magic(m::AbstractVector{UInt8}) = tuple(m...) +function magic_cmp(a::Vector{UInt8}, b::Vector{UInt8}) + la, lb = length(a), length(b) + la < lb && return true + la > lb && return false + for (ia, ib) in zip(a, b) + ia < ib && return true + ia > ib && return false + end + return false +end +magic_cmp(p::Pair, m::Vector{UInt8}) = magic_cmp(p.first, m) +magic_cmp(m::Vector{UInt8}, p::Pair) = magic_cmp(m, p.first) + +canonicalize_magic(@nospecialize(m::Tuple{Vararg{UInt8}})) = UInt8[m...] +canonicalize_magic(m::AbstractVector{UInt8}) = convert(Vector{UInt8}, m) canonicalize_magic(m::String) = canonicalize_magic(codeunits(m)) ## Load/Save -struct LOAD end -struct SAVE end +@enum IOSupport LOAD SAVE + +function split_predicates(list) + os = OS[] + ls = IOSupport[] + for item in list + if isa(item, OS) + push!(os, item) + else + push!(ls, item) + end + end + return os, ls +end + function add_loadsave(format, predicates) library = popfirst!(predicates) @@ -70,10 +78,11 @@ end ## Add Format: function add_format(fmt, magic, extension, load_save_libraries...) - add_format(fmt, magic, extension) for library in load_save_libraries add_loadsave(fmt, library) end + # Add the format after we've validated the packages (to prevent a partially-registered format) + add_format(fmt, magic, extension) fmt end @@ -97,50 +106,56 @@ where example `pkgspecifiers` are: You can combine `LOAD`, `SAVE`, `OSX`, `Unix`, `Windows` and `Linux` arbitrarily to narrow `pkgspecifiers`. """ -function add_format(fmt::Type{DataFormat{sym}}, magic::Union{Tuple,AbstractVector,String}, extension) where sym - haskey(sym2info, sym) && error("format ", fmt, " is already registered") - m = canonicalize_magic(magic) - rng = searchsorted(magic_list, m, lt=magic_cmp) - if !isempty(m) && !isempty(rng) - error("magic bytes ", m, " are already registered") +add_format(@nospecialize(fmt::Type), args...) = add_format(formatname(fmt)::Symbol, args...) +add_format(sym::Symbol, magic::Union{Tuple,AbstractVector{UInt8},String}, extension) = + add_format(sym, canonicalize_magic(magic), extension) +function add_format(sym::Symbol, + @nospecialize(magics::Tuple{Vector{UInt8},Vararg{Vector{UInt8}}}), extension) + add_format(sym, [magics...], extension) +end + +function add_format(sym::Symbol, magic::Vector{UInt8}, extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") + rng = searchsorted(magic_list, magic, lt=magic_cmp) + if !isempty(magic) && !isempty(rng) + error("magic bytes ", magic, " are already registered") end - insert!(magic_list, first(rng), Pair(m, sym)) # m=>sym in 0.4 - sym2info[sym] = (m, extension) + insert!(magic_list, first(rng), magic=>sym) + sym2info[sym] = (magic, extension) add_extension(extension, sym) - fmt + nothing end # for multiple magic bytes -function add_format(fmt::Type{DataFormat{sym}}, - magics::Tuple{T,Vararg{T}}, extension) where {sym, T <: Vector{UInt8}} - haskey(sym2info, sym) && error("format ", fmt, " is already registered") - magics = map(canonicalize_magic, magics) +function add_format(sym::Symbol, magics::Vector{Vector{UInt8}}, extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") for magic in magics rng = searchsorted(magic_list, magic, lt=magic_cmp) if !isempty(magic) && !isempty(rng) error("magic bytes ", magic, " are already registered") end - insert!(magic_list, first(rng), Pair(magic, sym)) # m=>sym in 0.4 + insert!(magic_list, first(rng), magic=>sym) end - sym2info[sym] = (magics, extension) + sym2info[sym] = (sort(magics; lt=magic_cmp), extension) add_extension(extension, sym) - fmt + nothing end # For when "magic" is supplied as a function (see the HDF5 example in # registry.jl) -function add_format(fmt::Type{DataFormat{sym}}, magic, extension) where sym - haskey(sym2info, sym) && error("format ", fmt, " is already registered") +function add_format(sym::Symbol, @nospecialize(magic::Function), extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") push!(magic_func, Pair(magic,sym)) # magic=>sym in 0.4 sym2info[sym] = (magic, extension) add_extension(extension, sym) - fmt + nothing end """ `del_format(fmt::DataFormat)` deletes `fmt` from the format registry. """ -function del_format(fmt::Type{DataFormat{sym}}) where sym +del_format(@nospecialize(fmt::Type)) = del_format(formatname(fmt)::Symbol) +function del_format(sym::Symbol) magic, extension = sym2info[sym] del_magic(magic, sym) delete!(sym2info, sym) @@ -148,12 +163,13 @@ function del_format(fmt::Type{DataFormat{sym}}) where sym nothing end -# Deletes multiple magic bytes -del_magic(magic::Tuple, sym) = for m in magic - del_magic(m, sym) -end +# # Deletes multiple magic bytes +# del_magic(magic::Tuple, sym) = for m in magic +# del_magic(m, sym) +# end # Deletes single magic bytes -function del_magic(magic::NTuple{N, UInt8}, sym) where N +del_magic(@nospecialize(magic), sym::Symbol) = del_magic(canonicalize_magic(magic), sym) +function del_magic(magic::Vector{UInt8}, sym::Symbol) rng = searchsorted(magic_list, magic, lt=magic_cmp) if length(magic) == 0 fullrng = rng @@ -171,15 +187,18 @@ function del_magic(magic::NTuple{N, UInt8}, sym) where N deleteat!(magic_list, first(rng)) nothing end +del_magic(magics::Vector{Vector{UInt8}}, sym::Symbol) = foreach(magics) do magic + del_magic(magic, sym) +end -function del_magic(magic::Function, sym) - deleteat!(magic_func, something(findfirst(isequal(Pair(magic,sym)), magic_func), 0)) +function del_magic(@nospecialize(magic::Function), sym::Symbol) + deleteat!(magic_func, something(findfirst(isequal(Pair{Function,Symbol}(magic,sym)), magic_func), 0)) nothing end ## File Extensions: -function add_extension(ext::String, sym) +function add_extension(ext::String, sym::Symbol) if haskey(ext2sym, ext) v = ext2sym[ext] if isa(v, Symbol) diff --git a/src/types.jl b/src/types.jl index 3f952da2..c9bbdb8c 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,4 +1,7 @@ -# The core types that represent the file formats +# "Public" types that represent the file formats. These are used +# to communicate results externally, but are generally avoided for +# internal operations because they trigger excessive specialization +# and inference failures. ## DataFormat: """ @@ -24,14 +27,17 @@ formatname(::Formatted{F}) where F<:DataFormat = formatname(F) ## File: """ -`File(fmt, filename)` indicates that `filename` is a file of known -DataFormat `fmt`. For example, `File{fmtpng}(filename)` would indicate a PNG +`File{fmt}(filename)` indicates that `filename` is a file of known +DataFormat `fmt`. For example, `File{format"PNG"}(filename)` would indicate a PNG file. """ -struct File{F<:DataFormat} <: Formatted{F} - filename +struct File{F<:DataFormat, Name} <: Formatted{F} + filename::Name end -File(fmt::Type{DataFormat{sym}}, filename) where {sym} = File{fmt}(filename) +File{F}(file::File{F}) where F<:DataFormat = file +File{DataFormat{sym}}(@nospecialize(file::Formatted)) where sym = error("cannot change the format of $file to $sym") +File{F}(file::AbstractString) where F<:DataFormat = File{F,String}(String(file)) # canonicalize to limit type-diversity +File{F}(file) where F<:DataFormat = File{F,typeof(file)}(file) # The docs are separated from the definition because of https://github.com/JuliaLang/julia/issues/34122 filename(@nospecialize(f::File)) = f.filename @@ -49,20 +55,25 @@ file_extension(::File) ## Stream: """ -`Stream(fmt, io, [filename])` indicates that the stream `io` is -written in known `Format`. For example, `Stream{PNG}(io)` would +`Stream{fmt}(io, [filename])` indicates that the stream `io` is +written in known format `fmt`. For example, `Stream{format"PNG"}(io)` would indicate PNG format. If known, the optional `filename` argument can be used to improve error messages, etc. """ -struct Stream{F <: DataFormat, IOtype <: IO} <: Formatted{F} +struct Stream{F <: DataFormat, IOtype <: IO, Name} <: Formatted{F} io::IOtype - filename + filename::Name end -Stream(::Type{F}, io::IO) where {F<:DataFormat} = Stream{F,typeof(io)}(io, nothing) -Stream(::Type{F}, io::IO, filename::AbstractString) where {F<:DataFormat} = Stream{F, typeof(io)}(io, String(filename)) -Stream(::Type{F}, io::IO, filename) where {F<:DataFormat} = Stream{F, typeof(io)}(io, filename) -Stream(file::File{F}, io::IO) where {F} = Stream{F, typeof(io)}(io, filename(file)) +Stream{F,IOtype}(io::IO, filename::AbstractString) where {F<:DataFormat,IOtype} = Stream{F, IOtype, String}(io, String(filename)) +Stream{F,IOtype}(io::IO, filename) where {F<:DataFormat,IOtype} = Stream{F, IOtype, typeof(filename)}(io, filename) +Stream{F,IOtype}(io::IO) where {F<:DataFormat,IOtype} = Stream{F, IOtype}(io, nothing) + +Stream{F,IOtype}(file::Formatted{F}, io::IO) where {F<:DataFormat,IOtype} = Stream{F,IOtype}(io, filename(file)) +Stream{F,IOtype}(@nospecialize(file::Formatted), io::IO) where {F<:DataFormat,IOtype} = error("cannot change the format of $file to $(formatname(F)::Symbol)") + +Stream{F}(io::IO, args...) where {F<:DataFormat} = Stream{F, typeof(io)}(io, args...) +Stream(file::File{F}, io::IO) where {F<:DataFormat} = Stream{F}(io, filename(file)) stream(@nospecialize(s::Stream)) = s.io "`stream(s)` returns the stream associated with `Stream` `s`" @@ -77,7 +88,7 @@ filename(::Stream) function file_extension(@nospecialize(f::Stream)) fname = filename(f) - (fname == nothing) && return nothing + (fname === nothing) && return nothing splitext(fname)[2] end """ @@ -98,7 +109,7 @@ end # Implement standard I/O operations for File and Stream @inline function Base.open(@nospecialize(file::File{F}), @nospecialize(args...)) where F<:DataFormat fn = filename(file) - Stream(F, open(fn, args...), abspath(fn)) + Stream{F}(open(fn, args...), abspath(fn)) end Base.close(@nospecialize(s::Stream)) = close(stream(s)) diff --git a/test/error_handling.jl b/test/error_handling.jl index 652fd81b..b5b9341c 100644 --- a/test/error_handling.jl +++ b/test/error_handling.jl @@ -14,16 +14,18 @@ add_format(format"PATHERROR", (), ".patherror", [:PathError]) fn = joinpath(temp_dir, "file.patherror") save(fn, "test content") @test isdir(temp_dir) - + # handling a filepath that's an existing directory, during save @test_throws ArgumentError save(format"PATHERROR", mktempdir(), "test content") - + # handling a nonexistent filepath, during load @test_throws ArgumentError load(joinpath(mktempdir(), "dummy.patherror")) end @testset "Not installed" begin - add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) + @test_throws ArgumentError add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) + # Give it a fake UUID + add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled=>UUID("79e393ae-7a7b-11eb-1530-bf4d98024096")]) @test_throws ArgumentError save("test.not_installed", nothing) # Core.eval(Base, :(is_interactive = true)) # for interactive error handling diff --git a/test/loadsave.jl b/test/loadsave.jl index aa1478ad..e4ba4bb4 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -8,7 +8,7 @@ import FileIO: File, @format_str load(file::File{format"PBMText"}) = "PBMText" load(file::File{format"PBMBinary"}) = "PBMBinary" load(file::File{format"JLD"}) = "JLD" -load(file::File{format"GZIP"}) = "GZIP" +load(file::File{format"GZIP"}) = "GZIP" end module TestLoadSave2 import FileIO: File, @format_str @@ -318,7 +318,7 @@ del_format(format"DUMMY") # PPM/PBM can be either binary or text. Test that the defaults work, # and that we can force a choice. module AmbigExt -import FileIO: File, @format_str, Stream, stream, skipmagic +using FileIO: File, @format_str, Stream, stream, skipmagic load(f::File{format"AmbigExt1"}) = open(f) do io skipmagic(io) @@ -354,13 +354,13 @@ end B = load(fn) @test B == A - @test typeof(query(fn)) == File{format"AmbigExt2"} + @test typeof(query(fn)) <: File{format"AmbigExt2"} rm(fn) save(fn, A) B = load(fn) @test B == A - @test typeof(query(fn)) == File{format"AmbigExt1"} + @test typeof(query(fn)) <: File{format"AmbigExt1"} rm(fn) del_format(format"AmbigExt1") @@ -368,5 +368,20 @@ end end @testset "Absent file" begin - @test_throws SystemError load("nonexistent.oops") + @test_throws Union{ArgumentError,SystemError} load("nonexistent.oops") +end + +module BadOverride +using FileIO +FileIO.load(::File{format"OVERRIDE"}) = 22 +add_format(format"OVERRIDE", "OVRD0101", ".ovr", [BadOverride]) +end + +@testset "Warn FileIO overrides" begin + fn = string(tempname(), ".ovr") + open(fn, "w") do io + write(io, magic(:OVERRIDE)) + print(io, "\nDone") + end + @test (@test_logs (:warn, r"incorrectly extends FileIO functions \(see FileIO documentation\)") load(fn)) == 22 end diff --git a/test/query.jl b/test/query.jl index 24de6eaf..70b01130 100644 --- a/test/query.jl +++ b/test/query.jl @@ -67,12 +67,12 @@ try @test unknown(format"UNKNOWN") add_format(format"CSV", UInt8[], ".csv") - @test FileIO.info(format"CSV") == ((),".csv") + @test FileIO.info(format"CSV") == ([],".csv") add_format(format"FOO", (), ".foo") # issue #17 @test_throws Exception FileIO.info(format"OOPS") @test FileIO.ext2sym[".csv"] == :CSV del_format(format"FOO") - @test FileIO.magic_list == [Pair((),:CSV)] + @test FileIO.magic_list == [Pair([],:CSV)] del_format(format"CSV") @test isempty(FileIO.ext2sym) @test isempty(FileIO.magic_list) @@ -81,18 +81,18 @@ try add_format(format"JUNK", "JUNK", [".jnk",".junk",".JNK"]) - @test FileIO.info(format"JUNK") == (tuple(b"JUNK"...),[".jnk",".junk",".JNK"]) + @test FileIO.info(format"JUNK") == (b"JUNK",[".jnk",".junk",".JNK"]) @test FileIO.ext2sym[".jnk"] == :JUNK @test FileIO.ext2sym[".junk"] == :JUNK @test FileIO.ext2sym[".JNK"] == :JUNK - @test FileIO.magic_list == [Pair((0x4a,0x55,0x4e,0x4b),:JUNK)] + @test FileIO.magic_list == [Pair([0x4a,0x55,0x4e,0x4b],:JUNK)] end @testset "streams" begin io = IOBuffer() s = Stream(format"JUNK", io) - @test typeof(s) == Stream{DataFormat{:JUNK},IOBuffer} + @test typeof(s) <: Stream{DataFormat{:JUNK},IOBuffer} @test filename(s) == nothing @test_throws Exception FileIO.file!(s) s = Stream(format"JUNK", io, "junk.jnk") @@ -120,7 +120,7 @@ try write(io, "JUNK and some more stuff") seek(io, 0) q = query(io) - @test typeof(q) == Stream{format"JUNK",typeof(io)} + @test typeof(q) <: Stream{format"JUNK",typeof(io)} @test !(unknown(q)) @test file_extension(q) == nothing @@ -131,7 +131,7 @@ try write(file, str) end q = query(fn) - @test typeof(q) == File{format"JUNK"} + @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".jnk" rm(fn) @@ -142,7 +142,7 @@ try write(file, str) end q = query(fn) - @test typeof(q) == File{format"JUNK"} + @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".csv" rm(fn) @@ -153,12 +153,12 @@ try write(file, "Here's some data") end q = query(fn) - @test typeof(q) == File{format"BAD"} + @test typeof(q) <: File{format"BAD"} @test file_extension(q) == ".bad" rm(fn) q = query( "some_non_existant_file.bad") - @test typeof(q) == File{format"BAD"} + @test typeof(q) <: File{format"BAD"} # Unknown extension fn = string("tempname", ".wrd") @@ -176,13 +176,13 @@ try write(file, "test1") end q = query(fn) - @test typeof(q) == File{format"DOUBLE_1"} + @test typeof(q) <: File{format"DOUBLE_1"} rm(fn) add_format(format"MAGIC", "this so magic", ".mmm") q = query( "some_non_existant_file.mmm") - @test typeof(q) == File{format"MAGIC"} + @test typeof(q) <: File{format"MAGIC"} add_format(format"DOUBLE_MAGIC", (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00]), ".dd2") @@ -192,7 +192,7 @@ try write(file, randstring(19)) end q = query(fn) - @test typeof(q) == File{format"DOUBLE_MAGIC"} + @test typeof(q) <: File{format"DOUBLE_MAGIC"} io = open(q) skipmagic(io) @test position(io) == 4 @@ -204,7 +204,7 @@ try write(file, randstring(19)) end q = query(fn) - @test typeof(q) == File{format"DOUBLE_MAGIC"} + @test typeof(q) <: File{format"DOUBLE_MAGIC"} io = open(q) @test file_extension(q) == ".dd2" skipmagic(io) @@ -272,7 +272,7 @@ file_path = Path(file_dir) @testset "Querying with $(typeof(fp))" for fp in (file_dir, file_path) @testset "bedGraph" begin q = query(joinpath(file_dir, "file.bedgraph")) - @test typeof(q) == File{format"bedGraph"} + @test typeof(q) <: File{format"bedGraph"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -285,9 +285,9 @@ file_path = Path(file_dir) end @testset "STL detection" begin q = query(joinpath(file_dir, "ascii.stl")) - @test typeof(q) == File{format"STL_ASCII"} + @test typeof(q) <: File{format"STL_ASCII"} q = query(joinpath(file_dir, "binary_stl_from_solidworks.STL")) - @test typeof(q) == File{format"STL_BINARY"} + @test typeof(q) <: File{format"STL_BINARY"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -296,16 +296,16 @@ file_path = Path(file_dir) end @testset "PLY detection" begin q = query(joinpath(file_dir, "ascii.ply")) - @test typeof(q) == File{format"PLY_ASCII"} + @test typeof(q) <: File{format"PLY_ASCII"} q = query(joinpath(file_dir, "binary.ply")) - @test typeof(q) == File{format"PLY_BINARY"} + @test typeof(q) <: File{format"PLY_BINARY"} end @testset "Multiple Magic bytes" begin q = query(joinpath(file_dir, "magic1.tiff")) - @test typeof(q) == File{format"TIFF"} + @test typeof(q) <: File{format"TIFF"} q = query(joinpath(file_dir, "magic2.tiff")) - @test typeof(q) == File{format"TIFF"} + @test typeof(q) <: File{format"TIFF"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -323,11 +323,11 @@ file_path = Path(file_dir) @test !(FileIO.detectavi(s)) end q = query(joinpath(file_dir, "bees.avi")) - @test typeof(q) == File{format"AVI"} + @test typeof(q) <: File{format"AVI"} end @testset "RDA detection" begin q = query(joinpath(file_dir, "minimal_ascii.rda")) - @test typeof(q) == File{format"RData"} + @test typeof(q) <: File{format"RData"} open(q) do io @test position(io) == 0 @test FileIO.detect_rdata(io) @@ -337,7 +337,7 @@ file_path = Path(file_dir) end @testset "RDS detection" begin q = query(joinpath(file_dir, "minimal_ascii.rds")) - @test typeof(q) == File{format"RDataSingle"} + @test typeof(q) <: File{format"RDataSingle"} open(q) do io @test position(io) == 0 @test FileIO.detect_rdata_single(io) diff --git a/test/runtests.jl b/test/runtests.jl index b8adb3bc..5d2c2a2b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using FileIO using FilePathsBase using Test +using UUIDs Threads.nthreads() <= 1 && @info "Threads.nthreads() = $(Threads.nthreads()), multithread tests will be disabled" From ea3e9afc6dbfdb1a36cae6a8d93a4595e8b782c4 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 2 Mar 2021 08:07:09 -0600 Subject: [PATCH 2/7] Improve docstrings This is in preparation for adding Documenter docs, but it's useful on its own. --- src/FileIO.jl | 4 ++-- src/loadsave.jl | 42 ++++++++++++++++++++++++------------------ src/registry_setup.jl | 12 +++++++----- src/types.jl | 19 ++++++++++--------- 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/src/FileIO.jl b/src/FileIO.jl index 6e726e5d..97e65abf 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -43,7 +43,7 @@ include("registry.jl") - `File{fmt}` and `Stream{fmt}`: types of objects that declare that a resource has a particular format `fmt` - `load([filename|stream])`: read data in formatted file, inferring the format -- `load(File(format"PNG",filename))`: specify the format manually +- `load(File{format"PNG"}(filename))`: specify the format manually - `loadstreaming([filename|stream])`: similar to `load`, except that it returns an object that can be read from - `save(filename, data...)` for similar operations involving saving data - `savestreaming([filename|stream])`: similar to `save`, except that it returns an object that can be written to @@ -57,7 +57,7 @@ include("registry.jl") - `magic(fmt)` returns the magic bytes for format `fmt` - `info(fmt)` returns `(magic, extensions)` for format `fmt` -- `add_format(fmt, magic, extension)`: register a new format +- `add_format(fmt, magic, extension, libraries...)`: register a new format - `add_loader(fmt, :Package)`: indicate that `Package` supports loading files of type `fmt` - `add_saver(fmt, :Package)`: indicate that `Package` supports saving files of type `fmt` """ diff --git a/src/loadsave.jl b/src/loadsave.jl index 2c0e3425..8e1db6ac 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -27,6 +27,8 @@ end Declare that format `fmt` can be loaded with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. + +See also [`add_format`](@ref) which can combine package support with the format declaration. """ add_loader @@ -36,17 +38,19 @@ add_loader Declare that format `fmt` can be saved with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. + +See also [`add_format`](@ref) which can combine package support with the format declaration. """ add_saver """ - `load(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. + the format from `filename` and/or magic bytes in the file (see [`query`](@ref)). - `load(strm)` loads from an `IOStream` or similar object. In this case, -there is no filename extension, so we rely on the magic bytes for format -identification. -- `load(File{format"PNG"}(filename))` specifies the format directly, and bypasses inference. -- `load(Stream{format"PNG"}(io))` specifies the format directly, and bypasses inference. + there is no filename extension, so we rely on the magic bytes for format + identification. +- `load(File{format"PNG"}(filename))` specifies the format directly, and bypasses the format [`query`](@ref). +- `load(Stream{format"PNG"}(io))` specifies the format directly, and bypasses the format [`query`](@ref). - `load(f; options...)` passes keyword arguments on to the loader. """ load @@ -58,25 +62,27 @@ higher-level streams should return a formatted object, like an image or chunk of video or audio. - `loadstreaming(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. It returns a streaming -type that can be read from in chunks, rather than loading the whole contents all -at once + the format from `filename` and/or magic bytes in the file. It returns a streaming + type that can be read from in chunks, rather than loading the whole contents all + at once. - `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. -In this case, there is no filename extension, so we rely on the magic bytes -for format identification. + In this case, there is no filename extension, so we rely on the magic bytes + for format identification. - `loadstreaming(File{format"WAV"}(filename))` specifies the format directly, and -bypasses inference. + bypasses the format [`query`](@ref). - `loadstreaming(Stream{format"WAV"}(io))` specifies the format directly, and -bypasses inference. + bypasses the format [`query`](@ref). - `loadstreaming(f; options...)` passes keyword arguments on to the loader. """ loadstreaming """ - `save(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `save(Stream{format"PNG"}(io), data...)` specifies the format directly, and bypasses inference. -- `save(File{format"PNG"}(filename), data...)` specifies the format directly, and bypasses inference. + trying to infer the format from `filename`. +- `save(Stream{format"PNG"}(io), data...)` specifies the format directly, and + bypasses the format [`query`](@ref). +- `save(File{format"PNG"}(filename), data...)` specifies the format directly, and + bypasses the format [`query`](@ref). - `save(f, data...; options...)` passes keyword arguments on to the saver. """ save @@ -87,11 +93,11 @@ be written in chunks, rather than all at once. These higher-level streams should accept formatted objects, like an image or chunk of video or audio. - `savestreaming(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. + trying to infer the format from `filename`. - `savestreaming(File{format"WAV"}(filename))` specifies the format directly, and -bypasses inference. + bypasses the format [`query`](@ref). - `savestreaming(Stream{format"WAV"}(io))` specifies the format directly, and -bypasses inference. + bypasses the format [`query`](@ref). - `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. """ savestreaming diff --git a/src/registry_setup.jl b/src/registry_setup.jl index 4a1351d1..0adca9ff 100644 --- a/src/registry_setup.jl +++ b/src/registry_setup.jl @@ -87,7 +87,7 @@ function add_format(fmt, magic, extension, load_save_libraries...) end """ -`add_format(fmt, magic, extension)` registers a new `DataFormat`. +`add_format(fmt, magic, extension)` registers a new [`DataFormat`](@ref). For example: add_format(format"TIFF", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"]) @@ -99,10 +99,12 @@ Note that extensions, magic numbers, and format-identifiers are case-sensitive. You can also specify particular packages that support the format with `add_format(fmt, magic, extension, pkgspecifiers...)`, where example `pkgspecifiers` are: - add_format(fmt, magic, extension, [:PkgA]) # only PkgA supports the format (load & save) - add_format(fmt, magic, extension, [:PkgA], [:PkgB]) # try PkgA first, but if it fails try PkgB - add_format(fmt, magic, extension, [:PkgA, LOAD], [:PkgB]) # try PkgA first for `load`, otherwise use PkgB - add_format(fmt, magic, extension, [:PkgA, OSX], [:PkgB]) # use PkgA on OSX, and PkgB otherwise + add_format(fmt, magic, extension, [:PkgA=>UUID(...)]) # only PkgA supports the format (load & save) + add_format(fmt, magic, extension, [:PkgA=>uuidA], [:PkgB=>uuidB]) # try PkgA first, but if it fails try PkgB + add_format(fmt, magic, extension, [:PkgA=>uuidA, LOAD], [:PkgB=>uuidB]) # try PkgA first for `load`, otherwise use PkgB + add_format(fmt, magic, extension, [:PkgA=>uuidA, OSX], [:PkgB=>uuidB]) # use PkgA on OSX, and PkgB otherwise + +The `uuid`s are all of type `UUID` and can be obtained from the package's `Project.toml` file. You can combine `LOAD`, `SAVE`, `OSX`, `Unix`, `Windows` and `Linux` arbitrarily to narrow `pkgspecifiers`. """ diff --git a/src/types.jl b/src/types.jl index c9bbdb8c..4bfc92a5 100644 --- a/src/types.jl +++ b/src/types.jl @@ -28,7 +28,7 @@ formatname(::Formatted{F}) where F<:DataFormat = formatname(F) """ `File{fmt}(filename)` indicates that `filename` is a file of known -DataFormat `fmt`. For example, `File{format"PNG"}(filename)` would indicate a PNG +[`DataFormat`](@ref) `fmt`. For example, `File{format"PNG"}(filename)` would indicate a PNG file. """ struct File{F<:DataFormat, Name} <: Formatted{F} @@ -42,22 +42,23 @@ File{F}(file) where F<:DataFormat = File{F,typeof(file)}(file) # The docs are separated from the definition because of https://github.com/JuliaLang/julia/issues/34122 filename(@nospecialize(f::File)) = f.filename """ -`filename(file)` returns the filename associated with `File` `file`. +`filename(file)` returns the filename associated with [`File`](@ref) `file`. """ filename(::File) file_extension(@nospecialize(f::File)) = splitext(filename(f))[2] """ -`file_extension(file)` returns the file extension associated with `File` `file`. +`file_extension(file)` returns the file extension associated with [`File`](@ref) `file`. """ file_extension(::File) ## Stream: """ -`Stream{fmt}(io, [filename])` indicates that the stream `io` is -written in known format `fmt`. For example, `Stream{format"PNG"}(io)` would -indicate PNG format. If known, the optional `filename` argument can +`Stream{fmt}(io, filename=nothing)` indicates that the stream `io` is +written in known format [`DataFormat`](@ref) `fmt`. +For example, `Stream{format"PNG"}(io)` would indicate PNG format. +If known, the optional `filename` argument can be used to improve error messages, etc. """ struct Stream{F <: DataFormat, IOtype <: IO, Name} <: Formatted{F} @@ -76,13 +77,13 @@ Stream{F}(io::IO, args...) where {F<:DataFormat} = Stream{F, typeof(io)}(io, arg Stream(file::File{F}, io::IO) where {F<:DataFormat} = Stream{F}(io, filename(file)) stream(@nospecialize(s::Stream)) = s.io -"`stream(s)` returns the stream associated with `Stream` `s`" +"`stream(s)` returns the stream associated with [`Stream`](@ref) `s`" stream(::Stream) filename(@nospecialize(s::Stream)) = s.filename """ `filename(stream)` returns a string of the filename -associated with `Stream` `stream`, or nothing if there is no file associated. +associated with [`Stream`](@ref) `stream`, or nothing if there is no file associated. """ filename(::Stream) @@ -92,7 +93,7 @@ function file_extension(@nospecialize(f::Stream)) splitext(fname)[2] end """ -`file_extension(file)` returns a nullable-string for the file extension associated with `Stream` `stream`. +`file_extension(file)` returns a nullable-string for the file extension associated with [`Stream`](@ref) `stream`. """ file_extension(::Stream) From 6a5b688c011ee6c2867fd5065554aeff3ff0f52d Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 2 Mar 2021 08:07:37 -0600 Subject: [PATCH 3/7] Use strings for magic bytes where possible This will improve printing in documentation tables --- src/registry.jl | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/registry.jl b/src/registry.jl index 12c4dc16..d18fe1e3 100644 --- a/src/registry.jl +++ b/src/registry.jl @@ -57,18 +57,18 @@ add_format(format"SAS", UInt8[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, add_format(format"Parquet", "PAR1", [".parquet"], [:ParquetFiles => UUID("46a55296-af5a-53b0-aaa0-97023b66127f"), LOAD]) # Image formats -add_format(format"PBMBinary", b"P4", ".pbm", [idImageIO], [idNetpbm], [idImageMagick]) -add_format(format"PGMBinary", b"P5", ".pgm", [idImageIO], [idNetpbm]) -add_format(format"PPMBinary", b"P6", ".ppm", [idImageIO], [idNetpbm]) -add_format(format"PBMText", b"P1", ".pbm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) -add_format(format"PGMText", b"P2", ".pgm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) -add_format(format"PPMText", b"P3", ".ppm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PBMBinary", "P4", ".pbm", [idImageIO], [idNetpbm], [idImageMagick]) +add_format(format"PGMBinary", "P5", ".pgm", [idImageIO], [idNetpbm]) +add_format(format"PPMBinary", "P6", ".ppm", [idImageIO], [idNetpbm]) +add_format(format"PBMText", "P1", ".pbm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PGMText", "P2", ".pgm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PPMText", "P3", ".ppm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) add_format(format"NRRD", "NRRD", [".nrrd", ".nhdr"], [:NRRD => UUID("9bb6cfbd-7763-5393-b1b5-1c8e09872146")]) add_format(format"AndorSIF", "Andor Technology Multi-Channel File", ".sif", [:AndorSIF => UUID("d04cd5f8-5917-4006-ac6f-d139328806a7"), LOAD]) -add_format(format"FLO", b"PIEH", ".flo", [:OpticalFlowUtils => UUID("ab0dad50-ab19-448c-b796-13553ec8b2d3")]) +add_format(format"FLO", "PIEH", ".flo", [:OpticalFlowUtils => UUID("ab0dad50-ab19-448c-b796-13553ec8b2d3")]) add_format(format"CRW", UInt8[0x49,0x49,0x1a,0x00,0x00,0x00,0x48,0x45], ".crw", [idImageMagick]) add_format(format"CUR", UInt8[0x00,0x00,0x02,0x00], ".cur", [idImageMagick]) From c25f5cd7a7c691c22e4b9e2ed854424cc004ca28 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Tue, 2 Mar 2021 20:15:54 -0600 Subject: [PATCH 4/7] Add precompiles --- src/FileIO.jl | 8 +++--- src/precompile.jl | 69 +++++++++++------------------------------------ 2 files changed, 20 insertions(+), 57 deletions(-) diff --git a/src/FileIO.jl b/src/FileIO.jl index 97e65abf..a1950247 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -63,10 +63,10 @@ include("registry.jl") """ FileIO -# if VERSION >= v"1.4.2" # https://github.com/JuliaLang/julia/pull/35378 -# include("precompile.jl") -# _precompile_() -# end +if VERSION >= v"1.4.2" # https://github.com/JuliaLang/julia/pull/35378 + include("precompile.jl") + _precompile_() +end include("deprecated.jl") diff --git a/src/precompile.jl b/src/precompile.jl index 39392b62..54257271 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,59 +1,22 @@ function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - @assert precompile(Tuple{typeof(detect_bedgraph),IOStream}) - @assert precompile(Tuple{typeof(detect_noometiff),IOStream}) - @assert precompile(Tuple{typeof(detect_rdata),IOStream}) - @assert precompile(Tuple{typeof(detect_rdata_single),IOStream}) - @assert precompile(Tuple{typeof(detectwav),IOStream}) - - @assert precompile(Tuple{typeof(load),File}) - @assert precompile(Tuple{typeof(load),Formatted}) - @assert precompile(Tuple{typeof(load),String}) - @assert precompile(Tuple{typeof(FileIO.load_filename),Formatted,String}) - if isdefined(Base, :bodyfunction) - fbody = Base.bodyfunction(which(FileIO.load_filename, (Formatted, String))) - @assert precompile(fbody, (Any, typeof(FileIO.load_filename), Formatted, String)) - @assert precompile(fbody, (Any, typeof(FileIO.load_filename), Formatted, String, Vararg{Any,100})) + for f in (detect_rdata, detect_rdata_single, detectwav, detect_bedgraph, + detecttiff, detect_noometiff, detect_ometiff, detectavi, + detecthdf5, detect_stlascii, detect_stlbinary, detect_gadget2) + @assert precompile(f, (IOStream,)) end - @assert precompile(Tuple{typeof(query),String}) - @assert precompile(Tuple{typeof(query),IOStream}) - @assert precompile(Tuple{typeof(query),IOStream,String}) - @assert precompile(Tuple{typeof(query),IOStream,Nothing}) - - @assert precompile(Tuple{typeof(hasfunction),Function}) - @assert precompile(Tuple{typeof(hasmagic),Function}) - - @assert precompile(Tuple{typeof(applicable_loaders),Type{<:DataFormat}}) - @assert precompile(Tuple{typeof(applicable_loaders),Formatted}) - @assert precompile(Tuple{typeof(applicable_savers),Type{<:DataFormat}}) - @assert precompile(Tuple{typeof(applicable_savers),Formatted}) - @assert precompile(Tuple{typeof(add_loader),Type{<:DataFormat},Symbol}) - @assert precompile(Tuple{typeof(add_saver),Type{<:DataFormat},Symbol}) - - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{10,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{20,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{30,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{32,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{35,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{4,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{6,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{7,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{8,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},Tuple{UInt8,UInt8,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},Tuple{UInt8,UInt8}}) - - if isdefined(Base, :bodyfunction) - m = which(query, (String,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Bool, typeof(query), String)) - m = which(load, (String,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, typeof(load), String)) - m = which(load, (Formatted,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Any, typeof(load), Formatted)) - @assert precompile(f, (Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, typeof(load), File)) + for F in (String, IOStream, Formatted) + @assert precompile(query, (F,)) + @assert precompile(load, (F,)) + @assert precompile(save, (F,Nothing,)) + @assert precompile(loadstreaming, (F,)) + @assert precompile(savestreaming, (F,)) end - + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Symbol,String)) + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Symbol,IOStream)) + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Formatted)) + @assert precompile(loadstreaming, (Function, Any)) + @assert precompile(savestreaming, (Function, Any)) + @assert precompile(skipmagic, (IOStream,Vector{Vector{UInt8}},)) end From 50cc444d77970dbad9e42ff468acb4e62d7ac70c Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 3 Mar 2021 04:35:12 -0600 Subject: [PATCH 5/7] Delete cruft & improve test coverage --- src/error_handling.jl | 34 --------------------------- src/query.jl | 32 ++++---------------------- src/types.jl | 6 +++-- test/query.jl | 53 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 59 insertions(+), 66 deletions(-) diff --git a/src/error_handling.jl b/src/error_handling.jl index 4b83aa81..ed3605d9 100644 --- a/src/error_handling.jl +++ b/src/error_handling.jl @@ -24,35 +24,6 @@ Base.showerror(io::IO, e::WriterError) = println( e.msg, "\n Will try next writer." ) -""" -`NotInstalledError` should be thrown when a library is currently not installed. -""" -struct NotInstalledError <: Exception - library::Symbol - message::String -end -Base.showerror(io::IO, e::NotInstalledError) = println(io, e.library, " is not installed.") - -""" -`UnknownFormat` gets thrown when FileIO can't recognize the format of a file. -""" -struct UnknownFormat{T <: Formatted} <: Exception - format::T -end -Base.showerror(io::IO, e::UnknownFormat) = println(io, e.format, " couldn't be recognized by FileIO.") - - -""" -Handles error as soon as they get thrown while doing IO -""" -function handle_current_error(e, library, islast::Bool) - bt = catch_backtrace() - bts = sprint(io->Base.show_backtrace(io, bt)) - message = islast ? "" : "\nTrying next loading library! Please report this issue on the Github page for $library" - @warn string(e, bts, message) -end -handle_current_error(e::NotInstalledError) = @warn string("lib ", e.library, " not installed, trying next library") - struct SpecError <: Exception mod::Module @@ -86,8 +57,3 @@ function handle_exceptions(exceptions::Vector, action) end handle_error(e, q) = throw(e) - -function handle_error(e::NotInstalledError, q) - println("Library \"", e.library, "\" is not installed but is recommended as a library to load format: \"", file_extension(q), "\"") - rethrow(e) -end diff --git a/src/query.jl b/src/query.jl index a4a7c636..73de294c 100644 --- a/src/query.jl +++ b/src/query.jl @@ -76,7 +76,7 @@ function query(filename; checkfile::Bool=true) sym = querysym(filename; checkfile=checkfile) return File{DataFormat{sym}}(filename) end -query(@nospecialize(f::Formatted); checkfile::Bool=true) = f +query(@nospecialize(f::Formatted)) = f # This is recommended for internal use because it returns Symbol (or errors) function querysym(filename; checkfile::Bool=true) @@ -142,9 +142,8 @@ function match(io, @nospecialize(magic::Function)) try magic(io) catch e - println("There was an error in magic function $magic") - println("Please open an issue at FileIO.jl. Error:") - println(e) + @error("""There was an error in magic function $magic. + Please open an issue at FileIO.jl.""", exception=(e, catch_backtrace())) false end end @@ -205,30 +204,7 @@ function query(io::IO, filename = nothing) sym = querysym(io) return Stream{DataFormat{sym}}(io, filename) end -query(io::IO, @nospecialize(filename::Formatted)) = error("no need to query when format is known") - -# TODO?: update to querysym? -function query(io::IO, filename::String, sym::Vector{Symbol}) - pos = position(io) - if seekable(io) - for (f, fmtsym) in magic_func - fmtsym in sym || continue - seek(io, pos) - try - if f(io) - return Stream{DataFormat{fmtsym},typeof(io)}(seek(io, pos), filename) - end - catch e - println("There was an error in magic function $f") - println("Please open an issue at FileIO.jl. Error:") - println(e) - end - end - seek(io, pos) - end - close(io) # FIXME? - nothing -end +query(io::IO, @nospecialize(file::Formatted)) = Stream{DataFormat{formatname(file)::Symbol}}(io, filename(file)) seekable(io::IOBuffer) = io.seekable seekable(::IOStream) = true diff --git a/src/types.jl b/src/types.jl index 4bfc92a5..5c6f5a93 100644 --- a/src/types.jl +++ b/src/types.jl @@ -35,7 +35,7 @@ struct File{F<:DataFormat, Name} <: Formatted{F} filename::Name end File{F}(file::File{F}) where F<:DataFormat = file -File{DataFormat{sym}}(@nospecialize(file::Formatted)) where sym = error("cannot change the format of $file to $sym") +File{DataFormat{sym}}(@nospecialize(file::Formatted)) where sym = throw(ArgumentError("cannot change the format of $file to $sym")) File{F}(file::AbstractString) where F<:DataFormat = File{F,String}(String(file)) # canonicalize to limit type-diversity File{F}(file) where F<:DataFormat = File{F,typeof(file)}(file) @@ -71,9 +71,11 @@ Stream{F,IOtype}(io::IO, filename) where {F<:DataFormat,IOtype} Stream{F,IOtype}(io::IO) where {F<:DataFormat,IOtype} = Stream{F, IOtype}(io, nothing) Stream{F,IOtype}(file::Formatted{F}, io::IO) where {F<:DataFormat,IOtype} = Stream{F,IOtype}(io, filename(file)) -Stream{F,IOtype}(@nospecialize(file::Formatted), io::IO) where {F<:DataFormat,IOtype} = error("cannot change the format of $file to $(formatname(F)::Symbol)") +Stream{F,IOtype}(@nospecialize(file::Formatted), io::IO) where {F<:DataFormat,IOtype} = + throw(ArgumentError("cannot change the format of $file to $(formatname(F)::Symbol)")) Stream{F}(io::IO, args...) where {F<:DataFormat} = Stream{F, typeof(io)}(io, args...) +Stream{F}(file::File, io::IO) where {F<:DataFormat} = Stream{F, typeof(io)}(file, io) Stream(file::File{F}, io::IO) where {F<:DataFormat} = Stream{F}(io, filename(file)) stream(@nospecialize(s::Stream)) = s.io diff --git a/test/query.jl b/test/query.jl index 70b01130..6503c00e 100644 --- a/test/query.jl +++ b/test/query.jl @@ -87,6 +87,7 @@ try @test FileIO.ext2sym[".JNK"] == :JUNK @test FileIO.magic_list == [Pair([0x4a,0x55,0x4e,0x4b],:JUNK)] + add_format(format"OTHER", [0x01, 0x02], ".othr") end @testset "streams" begin @@ -94,11 +95,13 @@ try s = Stream(format"JUNK", io) @test typeof(s) <: Stream{DataFormat{:JUNK},IOBuffer} @test filename(s) == nothing - @test_throws Exception FileIO.file!(s) + @test_throws ErrorException("filename unknown") FileIO.file!(s) s = Stream(format"JUNK", io, "junk.jnk") @test filename(s) == "junk.jnk" s = Stream(format"JUNK", io, "junk2.jnk") @test filename(s) == "junk2.jnk" + s = Stream(format"JUNK", io, "somefile.jnk") + @test FileIO.file!(s) isa File{format"JUNK"} end @testset "query" begin @@ -123,6 +126,20 @@ try @test typeof(q) <: Stream{format"JUNK",typeof(io)} @test !(unknown(q)) @test file_extension(q) == nothing + # unseekable IO + seek(io, 0) + io.seekable = false + @test !FileIO.seekable(io) + q = query(io) + @test typeof(q) <: Stream{format"JUNK",typeof(io)} + io.seekable = true + # too short to match + io2 = IOBuffer() + write(io2, "JU") + seek(io2, 0) + io2.seekable = false + q = query(io2) + @test unknown(q) # File with correct extension str = String(take!(io)) @@ -133,6 +150,17 @@ try q = query(fn) @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".jnk" + # for good measure, test some constructors & other query calls + @test query(q) == q + @test File{format"JUNK"}(q) == q + @test_throws ArgumentError("cannot change the format of $q to OTHER") File{format"OTHER"}(q) + open(fn) do io + @test query(io) isa Stream{format"JUNK", typeof(io)} + @test query(io, q) isa Stream{format"JUNK", typeof(io)} + @test Stream(q, io) isa Stream{format"JUNK", typeof(io)} + @test Stream{format"JUNK"}(q, io) isa Stream{format"JUNK", typeof(io)} + @test_throws ArgumentError Stream{format"OTHER"}(q, io) + end rm(fn) @@ -145,6 +173,15 @@ try @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".csv" rm(fn) + # erroneous extension with a file that has magic bytes + fn = string(tempname(), ".othr") + open(fn, "w") do file + write(file, str) + end + q = query(fn) + @test typeof(q) <: File{format"JUNK"} + @test query(fn; checkfile=false) isa File{format"OTHER"} + rm(fn) # Format with no magic bytes add_format(format"BAD", (), ".bad") @@ -179,6 +216,15 @@ try @test typeof(q) <: File{format"DOUBLE_1"} rm(fn) + # Busted detection function + busted(io) = error("whoops") + add_format(format"BUSTED", busted, ".bstd") + fn = string(tempname(), ".bstd") + open(fn, "w") do file + write(file, "JUNK stuff") + end + @test (@test_logs (:error,r"There was an error in magic function .*busted") query(fn)) isa File{format"JUNK"} + del_format(format"BUSTED") add_format(format"MAGIC", "this so magic", ".mmm") q = query( "some_non_existant_file.mmm") @@ -214,7 +260,10 @@ try write(file, randstring(19)) # corrupt magic bytes end open(fn, "r") do file - @test_throws Exception skipmagic(file) + @test_throws ErrorException("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $file") skipmagic(Stream{format"DOUBLE_MAGIC"}(file, fn)) + end + open(fn, "r") do file + @test_throws ErrorException("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $file") skipmagic(file, format"DOUBLE_MAGIC") end rm(fn) lene0 = length(FileIO.ext2sym) From 714b915dc900257f24b7dd8353f8b7b78d919324 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 3 Mar 2021 05:10:51 -0600 Subject: [PATCH 6/7] Switch badge to codecov --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 26141ae1..afd627ed 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # FileIO [![Build status](https://github.com/JuliaIO/FileIO.jl/actions/workflows/test.yml/badge.svg)](https://github.com/JuliaIO/FileIO.jl/actions/workflows/test.yml) -[![Coverage Status](https://coveralls.io/repos/JuliaIO/FileIO.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaIO/FileIO.jl?branch=master) +[![codecov](https://codecov.io/gh/JuliaIO/FileIO.jl/branch/master/graph/badge.svg?token=I0NjrZpJKh)](https://codecov.io/gh/JuliaIO/FileIO.jl) FileIO aims to provide a common framework for detecting file formats and dispatching to appropriate readers/writers. The two core From 412fdc642b2d08837ad90f41cb312bc25a6a2d66 Mon Sep 17 00:00:00 2001 From: Tim Holy Date: Wed, 3 Mar 2021 06:19:04 -0600 Subject: [PATCH 7/7] Eliminate test depwarns --- test/error_handling.jl | 14 +++++++------- test/loadsave.jl | 22 +++++++++++----------- test/query.jl | 14 +++++++------- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/test/error_handling.jl b/test/error_handling.jl index b5b9341c..e699e754 100644 --- a/test/error_handling.jl +++ b/test/error_handling.jl @@ -5,7 +5,7 @@ import FileIO: File, @format_str save(file::File{format"PATHERROR"}, data) = nothing load(file::File{format"PATHERROR"}) = nothing end -add_format(format"PATHERROR", (), ".patherror", [:PathError]) +add_format(format"PATHERROR", (), ".patherror", [PathError]) @testset "Path errors" begin # handling a nonexistent parent directory, during save @@ -23,7 +23,7 @@ add_format(format"PATHERROR", (), ".patherror", [:PathError]) end @testset "Not installed" begin - @test_throws ArgumentError add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) + @test_logs (:warn, r"supply `pkg` as a Module or `name=>uuid`") @test_throws ArgumentError add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) # Give it a fake UUID add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled=>UUID("79e393ae-7a7b-11eb-1530-bf4d98024096")]) @test_throws ArgumentError save("test.not_installed", nothing) @@ -56,13 +56,13 @@ end module BrokenIO using FileIO end -add_format(format"BROKEN", (), ".brok", [:BrokenIO]) +add_format(format"BROKEN", (), ".brok", [BrokenIO]) @testset "Absent implementation" begin stderr_copy = stderr rserr, wrerr = redirect_stderr() - @test_throws FileIO.LoaderError load(Stream(format"BROKEN",stdin)) - @test_throws FileIO.WriterError save(Stream(format"BROKEN",stdout), nothing) + @test_throws FileIO.LoaderError load(Stream{format"BROKEN"}(stdin)) + @test_throws FileIO.WriterError save(Stream{format"BROKEN"}(stdout), nothing) redirect_stderr(stderr_copy) close(rserr);close(wrerr) end @@ -81,8 +81,8 @@ end format"MultiError", (), ".multierr", - [:MultiError1], - [:MultiError2] + [MultiError1], + [MultiError2] ) tmpfile = joinpath(mktempdir(), "test.multierr") open(tmpfile, "w") do io diff --git a/test/loadsave.jl b/test/loadsave.jl index e4ba4bb4..3eb55bc3 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -27,11 +27,11 @@ try @testset "Load $(typeof(fp))" for fp in (file_dir, file_path) - add_loader(format"PBMText", :TestLoadSave) - add_loader(format"PBMBinary", :TestLoadSave) - add_loader(format"HDF5", :TestLoadSave2) - add_loader(format"JLD", :TestLoadSave) - add_loader(format"GZIP", :TestLoadSave) + add_loader(format"PBMText", TestLoadSave) + add_loader(format"PBMBinary", TestLoadSave) + add_loader(format"HDF5", TestLoadSave2) + add_loader(format"JLD", TestLoadSave) + add_loader(format"GZIP", TestLoadSave) @test load(joinpath(fp,"file1.pbm")) == "PBMText" @test load(joinpath(fp,"file2.pbm")) == "PBMBinary" @@ -172,8 +172,8 @@ function save(s::Stream{format"DUMMY"}, data; extra=UInt8[]) write(s, extra) end -add_loader(format"DUMMY", :Dummy) -add_saver(format"DUMMY", :Dummy) +add_loader(format"DUMMY", Dummy) +add_saver(format"DUMMY", Dummy) end # module Dummy @@ -189,7 +189,7 @@ end # module Dummy f = query(fnrel) @test isabspath(filename(f)) @test endswith(filename(f),fn) # TravisOSX prepends "/private" - f = File(format"DUMMY", fnrel) + f = File{format"DUMMY"}(fnrel) @test !(isabspath(filename(f))) open(f) do s @test isabspath(filename(s)) @@ -206,7 +206,7 @@ end # module Dummy f = query(fnrel) @test isabspath(filename(f)) @test endswith(filename(f),fn2) # TravisOSX prepends "/private" - f = File(format"DUMMY", fnrel) + f = File{format"DUMMY"}(fnrel) @test !(isabspath(filename(f))) open(f) do s @test isabspath(filename(s)) @@ -342,8 +342,8 @@ end end @testset "Ambiguous extension" begin - add_format(format"AmbigExt1", "ambigext1", ".aext", [:AmbigExt]) - add_format(format"AmbigExt2", "ambigext2", ".aext", [:AmbigExt]) + add_format(format"AmbigExt1", "ambigext1", ".aext", [AmbigExt]) + add_format(format"AmbigExt2", "ambigext2", ".aext", [AmbigExt]) A = "this is a test" fn = string(tempname(), ".aext") # Test the forced version first: we wouldn't want some method in Netpbm diff --git a/test/query.jl b/test/query.jl index 6503c00e..e26f40bd 100644 --- a/test/query.jl +++ b/test/query.jl @@ -92,15 +92,15 @@ try @testset "streams" begin io = IOBuffer() - s = Stream(format"JUNK", io) + s = Stream{format"JUNK"}(io) @test typeof(s) <: Stream{DataFormat{:JUNK},IOBuffer} @test filename(s) == nothing @test_throws ErrorException("filename unknown") FileIO.file!(s) - s = Stream(format"JUNK", io, "junk.jnk") + s = Stream{format"JUNK"}(io, "junk.jnk") @test filename(s) == "junk.jnk" - s = Stream(format"JUNK", io, "junk2.jnk") + s = Stream{format"JUNK"}(io, "junk2.jnk") @test filename(s) == "junk2.jnk" - s = Stream(format"JUNK", io, "somefile.jnk") + s = Stream{format"JUNK"}(io, "somefile.jnk") @test FileIO.file!(s) isa File{format"JUNK"} end @@ -284,8 +284,8 @@ try format"MultiLib", UInt8[0x42,0x4d], ".mlb", - [:LoadTest1, FileIO.LOAD, OSKey], - [:LoadTest2] + [LoadTest1, FileIO.LOAD, OSKey], + [LoadTest2] ) @test lensave0 + 1 == length(FileIO.sym2saver) @test lenload0 + 1 == length(FileIO.sym2loader) @@ -408,6 +408,6 @@ end end @testset "Format with function for magic bytes" begin - add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV]) + add_format(format"FUNCTION_FOR_MAGIC_BYTES", io -> true, ".wav", [LoadTest1]) del_format(format"FUNCTION_FOR_MAGIC_BYTES") end