diff --git a/Project.toml b/Project.toml index def75096..26e1a0d4 100644 --- a/Project.toml +++ b/Project.toml @@ -1,9 +1,10 @@ name = "FileIO" uuid = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549" -version = "1.5.0" +version = "1.6.0" [deps] Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" +UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] julia = "0.7, 1" diff --git a/README.md b/README.md index 26141ae1..afd627ed 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # FileIO [![Build status](https://github.com/JuliaIO/FileIO.jl/actions/workflows/test.yml/badge.svg)](https://github.com/JuliaIO/FileIO.jl/actions/workflows/test.yml) -[![Coverage Status](https://coveralls.io/repos/JuliaIO/FileIO.jl/badge.svg?branch=master&service=github)](https://coveralls.io/github/JuliaIO/FileIO.jl?branch=master) +[![codecov](https://codecov.io/gh/JuliaIO/FileIO.jl/branch/master/graph/badge.svg?token=I0NjrZpJKh)](https://codecov.io/gh/JuliaIO/FileIO.jl) FileIO aims to provide a common framework for detecting file formats and dispatching to appropriate readers/writers. The two core diff --git a/src/FileIO.jl b/src/FileIO.jl index 4837d1bd..a1950247 100644 --- a/src/FileIO.jl +++ b/src/FileIO.jl @@ -23,8 +23,9 @@ export DataFormat, metadata import Base.showerror -using Base: RefValue +using Base: RefValue, PkgId using Pkg +using UUIDs include("types.jl") include("registry_setup.jl") @@ -42,7 +43,7 @@ include("registry.jl") - `File{fmt}` and `Stream{fmt}`: types of objects that declare that a resource has a particular format `fmt` - `load([filename|stream])`: read data in formatted file, inferring the format -- `load(File(format"PNG",filename))`: specify the format manually +- `load(File{format"PNG"}(filename))`: specify the format manually - `loadstreaming([filename|stream])`: similar to `load`, except that it returns an object that can be read from - `save(filename, data...)` for similar operations involving saving data - `savestreaming([filename|stream])`: similar to `save`, except that it returns an object that can be written to @@ -56,7 +57,7 @@ include("registry.jl") - `magic(fmt)` returns the magic bytes for format `fmt` - `info(fmt)` returns `(magic, extensions)` for format `fmt` -- `add_format(fmt, magic, extension)`: register a new format +- `add_format(fmt, magic, extension, libraries...)`: register a new format - `add_loader(fmt, :Package)`: indicate that `Package` supports loading files of type `fmt` - `add_saver(fmt, :Package)`: indicate that `Package` supports saving files of type `fmt` """ @@ -67,4 +68,6 @@ if VERSION >= v"1.4.2" # https://github.com/JuliaLang/julia/pull/35378 _precompile_() end +include("deprecated.jl") + end diff --git a/src/deprecated.jl b/src/deprecated.jl new file mode 100644 index 00000000..06937dc5 --- /dev/null +++ b/src/deprecated.jl @@ -0,0 +1,78 @@ +# Deprecations added in 1.5.0, March 2021 + +function File(fmt::Type{DataFormat{sym}}, filename) where {sym} + Base.depwarn("`File(format\"$sym\", filename)` is deprecated, please use `File{format\"$sym\"}(filename)` instead.", :File) + return File{fmt}(filename) +end +function Stream(fmt::Type{DataFormat{sym}}, args...) where {sym} + Base.depwarn("`Stream(format\"$sym\", filename)` is deprecated, please use `Stream{format\"$sym\"}(filename)` instead.", :Stream) + return Stream{fmt}(args...) +end + +# These aren't used here, but old versions of ImageIO expect them + +function _findmod(f::Symbol) + Base.depwarn("_findmod is deprecated and will be removed. Use `Base.require(::Base.PkgId)` instead.", :_findmod) + for (u,v) in Base.loaded_modules + (Symbol(v) == f) && return u + end + nothing +end +function topimport(modname) + Base.depwarn("topimport is deprecated and will be removed. Use `Base.require(::Base.PkgId)` instead.", :topimport) + @eval Base.__toplevel__ import $modname + u = _findmod(modname) + @eval $modname = Base.loaded_modules[$u] +end + +# Legacy add_loader/add_saver +for add_ in (:add_loader, :add_saver) + @eval begin + function $add_(fmt, pkg) + # TODO: delete this method in FileIO v2 + sym = isa(fmt, Symbol) ? fmt : formatname(fmt)::Symbol + Base.depwarn(string($add_) * "(fmt, pkg::$(typeof(pkg))) is deprecated, supply `pkg` as a Module or `name=>uuid`", Symbol($add_)) + pkg === :MimeWriter && return $add_(sym, MimeWriter) + # Try to look it up in the caller's environment + pkgname = string(pkg) + id = Base.identify_package(pkgname) + if id === nothing + # See if it's in Main + pkgsym = Symbol(pkg) + if isdefined(Main, pkgsym) + id = getfield(Main, pkgsym) + if !isa(id, Module) + id = nothing + end + end + if id === nothing + # Look it up in the registries. The tricky part here is supporting different Julia versions + ctx = Pkg.API.Context() + uuids = UUID[] + @static if Base.VERSION >= v"1.2" + if hasfield(typeof(ctx), :registries) + for reg in ctx.registries + append!(uuids, Pkg.Registry.uuids_from_name(reg, pkgname)) + end + else + ctx = Pkg.API.Context!(ctx) + if isdefined(Pkg.Types, :find_registered!) && hasmethod(Pkg.Types.find_registered!, (typeof(ctx.env), Vector{String})) + Pkg.Types.find_registered!(ctx.env, [pkgname]) + elseif isdefined(Pkg.Types, :find_registered!) && hasmethod(Pkg.Types.find_registered!, (typeof(ctx), Vector{String})) + Pkg.Types.find_registered!(ctx, [pkgname]) + end + append!(uuids, get(ctx.env.uuids, pkgname, UUID[])) + end + else + Pkg.Types.find_registered!(ctx.env) + append!(uuids, get(ctx.env.uuids, pkgname, UUID[])) + end + isempty(uuids) && throw(ArgumentError("no UUID found for $pkg")) + length(uuids) == 1 || throw(ArgumentError("multiple UUIDs found for $pkg")) + id = PkgId(uuids[1], pkgname) + end + end + $add_(sym, id) + end + end +end \ No newline at end of file diff --git a/src/error_handling.jl b/src/error_handling.jl index 572e8676..ed3605d9 100644 --- a/src/error_handling.jl +++ b/src/error_handling.jl @@ -24,35 +24,12 @@ Base.showerror(io::IO, e::WriterError) = println( e.msg, "\n Will try next writer." ) -""" -`NotInstalledError` should be thrown when a library is currently not installed. -""" -struct NotInstalledError <: Exception - library::Symbol - message::String -end -Base.showerror(io::IO, e::NotInstalledError) = println(io, e.library, " is not installed.") -""" -`UnknownFormat` gets thrown when FileIO can't recognize the format of a file. -""" -struct UnknownFormat{T <: Formatted} <: Exception - format::T +struct SpecError <: Exception + mod::Module + call::Symbol end -Base.showerror(io::IO, e::UnknownFormat) = println(io, e.format, " couldn't be recognized by FileIO.") - - -""" -Handles error as soon as they get thrown while doing IO -""" -function handle_current_error(e, library, islast::Bool) - bt = catch_backtrace() - bts = sprint(io->Base.show_backtrace(io, bt)) - message = islast ? "" : "\nTrying next loading library! Please report this issue on the Github page for $library" - @warn string(e, bts, message) -end -handle_current_error(e::NotInstalledError) = @warn string("lib ", e.library, " not installed, trying next library") - +Base.showerror(io::IO, e::SpecError) = print(io, e.mod, " is missing $(e.call) and fileio_$(e.call)") """ Handles a list of thrown errors after no IO library was found working @@ -80,8 +57,3 @@ function handle_exceptions(exceptions::Vector, action) end handle_error(e, q) = throw(e) - -function handle_error(e::NotInstalledError, q) - println("Library \"", e.library, "\" is not installed but is recommended as a library to load format: \"", file_extension(q), "\"") - rethrow(e) -end diff --git a/src/loadsave.jl b/src/loadsave.jl index 2e480107..8e1db6ac 100644 --- a/src/loadsave.jl +++ b/src/loadsave.jl @@ -1,87 +1,56 @@ -const sym2loader = Dict{Symbol,Vector{Symbol}}() -const sym2saver = Dict{Symbol,Vector{Symbol}}() -const load_locker = Base.ReentrantLock() - -is_installed(pkg::Symbol) = get(Pkg.installed(), string(pkg), nothing) != nothing - -function _findmod(f::Symbol) - for (u,v) in Base.loaded_modules - (Symbol(v) == f) && return u - end - nothing -end -function topimport(modname) - @eval Base.__toplevel__ import $modname - u = _findmod(modname) - @eval $modname = Base.loaded_modules[$u] -end - -function checked_import(pkg::Symbol) - lock(load_locker) do - # kludge for test suite - if isdefined(Main, pkg) - m1 = getfield(Main, pkg) - isa(m1, Module) && return m1 - end - if isdefined(FileIO, pkg) - m1 = getfield(FileIO, pkg) - isa(m1, Module) && return m1 - end - m = _findmod(pkg) - m == nothing || return Base.loaded_modules[m] - topimport(pkg) - return Base.loaded_modules[_findmod(pkg)] - end -end - -applicable_error(applicable, sym) = error("No $applicable found for $sym") +const ActionSource = Union{PkgId,Module} +const sym2loader = Dict{Symbol,Vector{ActionSource}}() +const sym2saver = Dict{Symbol,Vector{ActionSource}}() for (applicable_, add_, dict_) in ( (:applicable_loaders, :add_loader, :sym2loader), (:applicable_savers, :add_saver, :sym2saver)) @eval begin - function $applicable_(@nospecialize(fmt::Union{Type{<:DataFormat}, Formatted})) - sym = formatname(fmt) - if haskey($dict_, sym) - return $dict_[sym] - end - Base.invokelatest(applicable_error, $applicable_, sym) + function $applicable_(sym::Symbol) + ret = get($dict_, sym, nothing) + ret === nothing && error(string("No ", $applicable_, " found for ", sym)) + return ret end - function $add_(@nospecialize(fmt::Type{<:DataFormat}), pkg::Symbol) - sym = formatname(fmt) - list = get($dict_, sym, Symbol[]) - $dict_[sym] = push!(list, pkg) + $add_(@nospecialize(fmt::Type), id::Union{ActionSource,Pair}) = $add_(formatname(fmt)::Symbol, id) + function $add_(sym::Symbol, id::ActionSource) + list = get!(Vector{ActionSource}, $dict_, sym) + push!(list, id) end + $add_(sym::Symbol, pkg::Pair{<:Union{String,Symbol}, UUID}) = $add_(sym, Base.PkgId(pkg.second, String(pkg.first))) end end """ - add_loader(fmt, :Package) - add_loader(fmt, [:Package, specifiers...]) + add_loader(fmt, :Package=>uuid) + add_loader(fmt, [:Package=>uuid, specifiers...]) Declare that format `fmt` can be loaded with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. + +See also [`add_format`](@ref) which can combine package support with the format declaration. """ add_loader """ - add_saver(fmt, :Package) - add_saver(fmt, [:Package, specifiers...]) + add_saver(fmt, :Package=>uuid) + add_saver(fmt, [:Package=>uuid, specifiers...]) Declare that format `fmt` can be saved with package `:Package`. Specifiers include `OSX`, `Unix`, `Windows` and `Linux` to restrict usage to particular operating systems. + +See also [`add_format`](@ref) which can combine package support with the format declaration. """ add_saver """ - `load(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. + the format from `filename` and/or magic bytes in the file (see [`query`](@ref)). - `load(strm)` loads from an `IOStream` or similar object. In this case, -there is no filename extension, so we rely on the magic bytes for format -identification. -- `load(File(format"PNG", filename))` specifies the format directly, and bypasses inference. -- `load(Stream(format"PNG", io))` specifies the format directly, and bypasses inference. + there is no filename extension, so we rely on the magic bytes for format + identification. +- `load(File{format"PNG"}(filename))` specifies the format directly, and bypasses the format [`query`](@ref). +- `load(Stream{format"PNG"}(io))` specifies the format directly, and bypasses the format [`query`](@ref). - `load(f; options...)` passes keyword arguments on to the loader. """ load @@ -93,25 +62,27 @@ higher-level streams should return a formatted object, like an image or chunk of video or audio. - `loadstreaming(filename)` loads the contents of a formatted file, trying to infer -the format from `filename` and/or magic bytes in the file. It returns a streaming -type that can be read from in chunks, rather than loading the whole contents all -at once + the format from `filename` and/or magic bytes in the file. It returns a streaming + type that can be read from in chunks, rather than loading the whole contents all + at once. - `loadstreaming(strm)` loads the stream from an `IOStream` or similar object. -In this case, there is no filename extension, so we rely on the magic bytes -for format identification. -- `loadstreaming(File(format"WAV",filename))` specifies the format directly, and -bypasses inference. -- `loadstreaming(Stream(format"WAV", io))` specifies the format directly, and -bypasses inference. + In this case, there is no filename extension, so we rely on the magic bytes + for format identification. +- `loadstreaming(File{format"WAV"}(filename))` specifies the format directly, and + bypasses the format [`query`](@ref). +- `loadstreaming(Stream{format"WAV"}(io))` specifies the format directly, and + bypasses the format [`query`](@ref). - `loadstreaming(f; options...)` passes keyword arguments on to the loader. """ loadstreaming """ - `save(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `save(Stream(format"PNG",io), data...)` specifies the format directly, and bypasses inference. -- `save(File(format"PNG",filename), data...)` specifies the format directly, and bypasses inference. + trying to infer the format from `filename`. +- `save(Stream{format"PNG"}(io), data...)` specifies the format directly, and + bypasses the format [`query`](@ref). +- `save(File{format"PNG"}(filename), data...)` specifies the format directly, and + bypasses the format [`query`](@ref). - `save(f, data...; options...)` passes keyword arguments on to the saver. """ save @@ -122,11 +93,11 @@ be written in chunks, rather than all at once. These higher-level streams should accept formatted objects, like an image or chunk of video or audio. - `savestreaming(filename, data...)` saves the contents of a formatted file, -trying to infer the format from `filename`. -- `savestreaming(File(format"WAV",filename))` specifies the format directly, and -bypasses inference. -- `savestreaming(Stream(format"WAV", io))` specifies the format directly, and -bypasses inference. + trying to infer the format from `filename`. +- `savestreaming(File{format"WAV"}(filename))` specifies the format directly, and + bypasses the format [`query`](@ref). +- `savestreaming(Stream{format"WAV"}(io))` specifies the format directly, and + bypasses the format [`query`](@ref). - `savestreaming(f, data...; options...)` passes keyword arguments on to the saver. """ savestreaming @@ -134,38 +105,48 @@ savestreaming # if a bare filename or IO stream are given, query for the format and dispatch # to the formatted handlers below for fn in (:load, :loadstreaming, :metadata) - @eval $fn(file, args...; options...) = $fn(query(file), args...; options...) + fnq = QuoteNode(fn) + @eval function $fn(file, args...; options...) + checkpath_load(file) + sym = querysym(file) + libraries = applicable_loaders(sym) + return action($fnq, libraries, sym, file, args...; options...) + end + # Version that bypasses format-inference + @eval function $fn(@nospecialize(file::Formatted), args...; options...) + checkpath_load(filename(file)) + sym = formatname(file)::Symbol + libraries = applicable_loaders(sym) + return action($fnq, libraries, file, args...; options...) + end end for fn in (:save, :savestreaming) - @eval $fn(file, args...; options...) = $fn(query(file; checkfile=false), args...; options...) + fnq = QuoteNode(fn) + @eval function $fn(file, args...; options...) + checkpath_save(file) + sym = querysym(file; checkfile=false) + libraries = applicable_savers(sym) + return action($fnq, libraries, sym, file, args...; options...) + end + @eval function $fn(@nospecialize(file::Formatted), args...; options...) + checkpath_save(filename(file)) + sym = formatname(file)::Symbol + libraries = applicable_savers(sym) + return action($fnq, libraries, file, args...; options...) + end + @eval function $fn(@nospecialize(fmt::Type), file, args...; options...) + checkpath_save(file) + sym = formatname(fmt)::Symbol + libraries = applicable_savers(sym) + return action($fnq, libraries, sym, file, args...; options...) + end end # return a save function, so you can do `thing_to_save |> save("filename.ext")` -save(file; options...) = data -> save(file, data; options...) - -# Allow format to be overridden with first argument -function save(df::Type{DataFormat{sym}}, filename, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(save, File(DataFormat{sym}, filename), data...; options...) -end - -function savestreaming(df::Type{DataFormat{sym}}, s::IO, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(savestreaming, Stream(DataFormat{sym}, s), data...; options...) -end - -function save(df::Type{DataFormat{sym}}, s::IO, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(save, Stream(DataFormat{sym}, s), data...; options...) -end - -function savestreaming(df::Type{DataFormat{sym}}, filename, data...; options...) where sym - libraries = applicable_savers(df) - checked_import(libraries[1]) - return Base.invokelatest(savestreaming, File(DataFormat{sym}, filename), data...; options...) +function save(file; options...) + sym = querysym(file; checkfile=false) + libraries = applicable_loaders(sym) + return data -> action(:save, libraries, sym, file, data; options...) end # do-syntax for streaming IO @@ -180,79 +161,61 @@ for fn in (:loadstreaming, :savestreaming) end end -# Handlers for formatted files/streams - -for fn in (:load, :loadstreaming, :metadata) - fn_func_name = Symbol(fn, "_filename") - gen2_func_name = Symbol("fileio_", fn) - @eval function $fn(@nospecialize(q::Formatted), @nospecialize(args...); @nospecialize(options...)) - Base.invokelatest($fn_func_name, q, filename(q), args...; options...) - end - @eval function $fn_func_name(@nospecialize(q::Formatted), filename, @nospecialize(args...); @nospecialize(options...)) - if unknown(q) - isfile(filename) || open(filename) # force systemerror - throw(UnknownFormat(q)) - end - if q isa File - !isfile(filename) && throw(ArgumentError("No file exists at given path: $(filename)")) - end - libraries = applicable_loaders(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if isdefined(Library, $(QuoteNode(gen2_func_name))) - return Base.invokelatest(Library.$gen2_func_name, q, args...; options...) - end - if !has_method_from(methods(Library.$fn), Library) - throw(LoaderError(string(library), "$($fn) not defined")) - end - return Base.invokelatest(Library.$fn, q, args...; options...) - catch e - push!(failures, (e, q)) - end - end - handle_exceptions(failures, "loading $(repr(filename))") - end +function checkpath_load(file) + file === nothing && return nothing # likely stream io + !isfile(file) && throw(ArgumentError("No file exists at given path: $file")) + return nothing +end +function checkpath_save(file) + file === nothing && return nothing + isa(file, IO) && return nothing + isdir(file) && throw(ArgumentError("Given file path is a directory: $file")) + dn = dirname(file) + !isdir(dn) && mkpath(dn) + return nothing end -for fn in (:save, :savestreaming) - gen2_func_name = Symbol("fileio_", fn) - @eval function $fn(@nospecialize(q::Formatted), @nospecialize(data...); @nospecialize(options...)) - unknown(q) && throw(UnknownFormat(q)) - if q isa File - isdir(filename(q)) && throw(ArgumentError("Given file path is a directory: $(filename(q))")) - !isdir(dirname(filename(q))) && mkpath(dirname(filename(q))) - end - libraries = applicable_savers(q) - failures = Any[] - for library in libraries - try - Library = checked_import(library) - if isdefined(Library, $(QuoteNode(gen2_func_name))) - return Base.invokelatest(Library.$gen2_func_name, q, data...; options...) - end - if !has_method_from(methods(Library.$fn), Library) - throw(WriterError(string(library), "$($fn) not defined")) +action(call::Symbol, libraries::Vector{ActionSource}, sym::Symbol, io::IO, args...; options...) = + action(call, libraries, Stream{DataFormat{sym}}(io), args...; options...) +action(call::Symbol, libraries::Vector{ActionSource}, sym::Symbol, file, args...; options...) = + action(call, libraries, File{DataFormat{sym}}(file), args...; options...) + +# To test for broken packages which extend FileIO functions +const fileiofuncs = Dict{Symbol,Function}(:load => load, + :loadstring => loadstreaming, + :metadata => metadata, + :save => save, + :savestreaming => savestreaming) + +function action(call::Symbol, libraries::Vector{ActionSource}, @nospecialize(file::Formatted), args...; options...) + issave = call ∈ (:save, :savestreaming) + failures = Tuple{Any,ActionSource}[] + pkgfuncname = Symbol("fileio_", call) + local mod + for library in libraries + try + mod = isa(library, Module) ? library : Base.require(library) + f = if isdefined(mod, pkgfuncname) + getfield(mod, pkgfuncname) + else + getfield(mod, call) + end + if f === get(fileiofuncs, call, nothing) + argtyps = map(Core.Typeof, args) + m = which(f, (typeof(file), argtyps...)) + if m == which(f, (Formatted, argtyps...)) + throw(SpecError(mod, call)) end - return Base.invokelatest(Library.$fn, q, data...; options...) - catch e - push!(failures, (e, q)) + @warn "$mod incorrectly extends FileIO functions (see FileIO documentation)" end - end - handle_exceptions(failures, "saving $(repr(filename(q)))") - end -end - -# returns true if the given method table includes a method defined by the given -# module, false otherwise -function has_method_from(mt, Library) - for m in mt - if getmodule(m) == Library - return true + return Base.invokelatest(f, file, args...; options...) + catch e + if isa(e, MethodError) || isa(e, SpecError) + str = "neither $call nor $pkgfuncname is defined" + e = issave ? WriterError(string(mod), str) : LoaderError(string(mod), str) + end + push!(failures, (e, library)) end end - false + handle_exceptions(failures, "$call $(repr(file))") end - -getmodule(m) = m.module diff --git a/src/precompile.jl b/src/precompile.jl index 39392b62..54257271 100644 --- a/src/precompile.jl +++ b/src/precompile.jl @@ -1,59 +1,22 @@ function _precompile_() ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - @assert precompile(Tuple{typeof(detect_bedgraph),IOStream}) - @assert precompile(Tuple{typeof(detect_noometiff),IOStream}) - @assert precompile(Tuple{typeof(detect_rdata),IOStream}) - @assert precompile(Tuple{typeof(detect_rdata_single),IOStream}) - @assert precompile(Tuple{typeof(detectwav),IOStream}) - - @assert precompile(Tuple{typeof(load),File}) - @assert precompile(Tuple{typeof(load),Formatted}) - @assert precompile(Tuple{typeof(load),String}) - @assert precompile(Tuple{typeof(FileIO.load_filename),Formatted,String}) - if isdefined(Base, :bodyfunction) - fbody = Base.bodyfunction(which(FileIO.load_filename, (Formatted, String))) - @assert precompile(fbody, (Any, typeof(FileIO.load_filename), Formatted, String)) - @assert precompile(fbody, (Any, typeof(FileIO.load_filename), Formatted, String, Vararg{Any,100})) + for f in (detect_rdata, detect_rdata_single, detectwav, detect_bedgraph, + detecttiff, detect_noometiff, detect_ometiff, detectavi, + detecthdf5, detect_stlascii, detect_stlbinary, detect_gadget2) + @assert precompile(f, (IOStream,)) end - @assert precompile(Tuple{typeof(query),String}) - @assert precompile(Tuple{typeof(query),IOStream}) - @assert precompile(Tuple{typeof(query),IOStream,String}) - @assert precompile(Tuple{typeof(query),IOStream,Nothing}) - - @assert precompile(Tuple{typeof(hasfunction),Function}) - @assert precompile(Tuple{typeof(hasmagic),Function}) - - @assert precompile(Tuple{typeof(applicable_loaders),Type{<:DataFormat}}) - @assert precompile(Tuple{typeof(applicable_loaders),Formatted}) - @assert precompile(Tuple{typeof(applicable_savers),Type{<:DataFormat}}) - @assert precompile(Tuple{typeof(applicable_savers),Formatted}) - @assert precompile(Tuple{typeof(add_loader),Type{<:DataFormat},Symbol}) - @assert precompile(Tuple{typeof(add_saver),Type{<:DataFormat},Symbol}) - - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{10,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{20,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{30,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{32,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{35,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{4,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{6,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{7,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},NTuple{8,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},Tuple{UInt8,UInt8,UInt8}}) - @assert precompile(Tuple{typeof(iter_eq),Array{UInt8,1},Tuple{UInt8,UInt8}}) - - if isdefined(Base, :bodyfunction) - m = which(query, (String,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Bool, typeof(query), String)) - m = which(load, (String,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, typeof(load), String)) - m = which(load, (Formatted,)) - f = Base.bodyfunction(m) - @assert precompile(f, (Any, typeof(load), Formatted)) - @assert precompile(f, (Iterators.Pairs{Union{},Union{},Tuple{},NamedTuple{(),Tuple{}}}, typeof(load), File)) + for F in (String, IOStream, Formatted) + @assert precompile(query, (F,)) + @assert precompile(load, (F,)) + @assert precompile(save, (F,Nothing,)) + @assert precompile(loadstreaming, (F,)) + @assert precompile(savestreaming, (F,)) end - + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Symbol,String)) + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Symbol,IOStream)) + @assert precompile(action, (Symbol,Vector{Union{PkgId, Module}},Formatted)) + @assert precompile(loadstreaming, (Function, Any)) + @assert precompile(savestreaming, (Function, Any)) + @assert precompile(skipmagic, (IOStream,Vector{Vector{UInt8}},)) end diff --git a/src/query.jl b/src/query.jl index 3b0c9847..73de294c 100644 --- a/src/query.jl +++ b/src/query.jl @@ -3,48 +3,43 @@ """ `unknown(f)` returns true if the format of `f` is unknown. """ -unknown(::Type{format"UNKNOWN"}) = true -unknown(::Type{DataFormat{sym}}) where {sym} = false - -unknown(::File{F}) where {F} = unknown(F) -unknown(::Stream{F}) where {F} = unknown(F) +unknown(@nospecialize(f::Union{Formatted,Type})) = unknown(formatname(f)::Symbol) +unknown(name::Symbol) = name === :UNKNOWN const unknown_df = DataFormat{:UNKNOWN} """ `info(fmt)` returns the magic bytes/extension information for -`DataFormat` `fmt`. +`fmt`. """ -info(::Type{DataFormat{sym}}) where {sym} = sym2info[sym] +info(@nospecialize(f::Union{Formatted,Type})) = info(formatname(f)::Symbol) +info(sym::Symbol) = sym2info[sym] "`magic(fmt)` returns the magic bytes of format `fmt`" -magic(fmt::Type{<:DataFormat})= UInt8[info(fmt)[1]...] - +magic(@nospecialize(fmt::Type)) = magic(formatname(fmt)::Symbol) +magic(sym::Symbol) = info(sym)[1] """ -`skipmagic(s)` sets the position of `Stream` `s` to be just after the magic bytes. +`skipmagic(s::Stream)` sets the position of `s` to be just after the magic bytes. For a plain IO object, you can use `skipmagic(io, fmt)`. """ -skipmagic(s::Stream{F}) where {F} = (skipmagic(stream(s), F); s) -function skipmagic(io, fmt::Type{DataFormat{sym}}) where sym +skipmagic(@nospecialize(s::Stream)) = (skipmagic(stream(s), formatname(s)::Symbol); s) +skipmagic(io, @nospecialize(fmt::Type)) = skipmagic(io, formatname(fmt)::Symbol) +function skipmagic(io, sym::Symbol) magic, _ = sym2info[sym] skipmagic(io, magic) nothing end -skipmagic(io, magic::Function) = nothing -skipmagic(io, magic::NTuple{N,UInt8}) where {N} = seek(io, length(magic)) -function skipmagic(io, magic::Tuple) - lengths = map(length, magic) - all(x-> lengths[1] == x, lengths) && return seek(io, lengths[1]) # it doesn't matter what magic bytes get skipped as they all have the same length - magic = [magic...] - sort!(magic, lt = (a,b)-> length(a) >= length(b)) # start with longest first, to avoid overlapping magic bytes - seekend(io) - len = position(io) - seekstart(io) - filter!(x-> length(x) <= len, magic) # throw out magic bytes that are longer than IO - tmp = read(io, length(first(magic))) # now, first is both the longest and guaranteed to fit into io, so we can just read the bytes - for m in magic +skipmagic(io, @nospecialize(magic::Function)) = nothing +skipmagic(io, magic::Vector{UInt8}) = seek(io, length(magic)) +function skipmagic(io, magics::Vector{Vector{UInt8}}) + lengths = map(length, magics) + l1 = lengths[1] + all(isequal(l1), lengths) && return seek(io, l1) # it doesn't matter what magic bytes get skipped as they all have the same length + len = getlength(io) + tmp = read(io, min(len, maximum(lengths))) + for m in reverse(magics) # start with the longest since they are most specific if magic_equal(m, tmp) seek(io, length(m)) return nothing @@ -52,14 +47,21 @@ function skipmagic(io, magic::Tuple) end error("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $io") end + function magic_equal(magic, buffer) + length(magic) > length(buffer) && return false for (i,elem) in enumerate(magic) buffer[i] != elem && return false end true end - +function getlength(io, pos=position(io)) + seekend(io) + len = position(io) + seek(io, pos) + return len +end """ query(filename; checkfile=true) @@ -70,121 +72,140 @@ If `filename` already exists, the file's magic bytes will take priority unless `checkfile` is false. """ function query(filename; checkfile::Bool=true) + filename = abspath(filename) + sym = querysym(filename; checkfile=checkfile) + return File{DataFormat{sym}}(filename) +end +query(@nospecialize(f::Formatted)) = f + +# This is recommended for internal use because it returns Symbol (or errors) +function querysym(filename; checkfile::Bool=true) + hasmagic(@nospecialize(magic)) = !(isa(magic, Vector{UInt8}) && isempty(magic)) + checkfile &= isfile(filename) _, ext = splitext(filename) if haskey(ext2sym, ext) sym = ext2sym[ext] - no_magic = !hasmagic(sym) - if lensym(sym) == 1 && (no_magic || !checkfile) # we only found one candidate and there is no magic bytes, or no file, trust the extension - return File{DataFormat{sym}}(filename) - elseif !checkfile && lensym(sym) > 1 - return File{DataFormat{sym[1]}}(filename) - end - no_function = !hasfunction(sym) - if no_magic && no_function - error("Some formats with extension ", ext, " have no magic bytes; use `File{format\"FMT\"}(filename)` to resolve the ambiguity.") + if isa(sym, Symbol) # there's only one format with this extension + checkfile || return sym # since we're not checking, we can return it immediately + magic = sym2info[sym][1] + hasmagic(magic) || return sym + return open(filename) do io + match(io, magic) && return sym + # if it doesn't match, we prioritize the magic bytes over the guess based on extension + return querysym_all(io)[1] + end end - if no_magic && !no_function - # try specific function first, if available - ret = query(open(filename), abspath(filename), sym) - ret !== nothing && return file!(ret) + # There are multiple formats consistent with this extension + syms = sym::Vector{Symbol} + checkfile || return syms[1] # with !checkfile we default to the first. TODO?: change to an error? + return open(filename) do io + badmagic = false + for sym in syms + magic = sym2info[sym][1] + if !hasmagic(magic) + badmagic = true + continue + end + match(io, magic) && return sym + end + badmagic && error("Some formats with extension ", ext, " have no magic bytes; use `File{format\"FMT\"}(filename)` to resolve the ambiguity.") + return querysym_all(io)[1] end end - !checkfile && return File{unknown_df}(filename) # (no extension || no magic byte || no function) && no file - # Otherwise, check against all magic bytes, then functions - file!(query(open(filename), abspath(filename))) + !checkfile && return :UNKNOWN + return open(filename) do io + return querysym_all(io)[1] + end end -lensym(s::Symbol) = 1 -lensym(v::Vector) = length(v) - -hasmagic(s::Symbol) = hasmagic(sym2info[s][1]) -hasmagic(v::Vector) = any(hasmagic, v) +function match(io, magic::Vector{UInt8}) + len = getlength(io) + len < length(magic) && return false + return magic_equal(magic, read(io, length(magic))) +end -hasmagic(t::Tuple) = !isempty(t) -hasmagic(::Any) = false # for when magic is a function +function match(io, magics::Vector{Vector{UInt8}}) + lengths = map(length, magics) + len = getlength(io) + tmp = read(io, min(len, maximum(lengths))) + for m in reverse(magics) # start with the longest since they are most specific + if magic_equal(m, tmp) + return true + end + end + return false +end -hasfunction(s::Symbol) = hasfunction(sym2info[s][1]) -hasfunction(v::Vector) = any(hasfunction, v) -hasfunction(s::Any) = true #has function -hasfunction(s::Tuple) = false #has magic +function match(io, @nospecialize(magic::Function)) + seekstart(io) + try + magic(io) + catch e + @error("""There was an error in magic function $magic. + Please open an issue at FileIO.jl.""", exception=(e, catch_backtrace())) + false + end +end -""" -`query(io, [filename])` returns a `Stream` object with information about the -format inferred from the magic bytes. -""" -function query(io::IO, filename = nothing) - magic = Vector{UInt8}() - pos = position(io) - for p in magic_list - m = first(p) - length(m) == 0 && continue - while length(m) > length(magic) - if eof(io) - seek(io, pos) - return Stream{unknown_df, typeof(io)}(io, filename) - end - push!(magic, read(io, UInt8)) - end - if iter_eq(magic, m) - seek(io, pos) - return Stream{DataFormat{last(p)},typeof(io)}(io, filename) - end +# Returns sym, magic (the latter may be empty if a magic-function matched) +# Upon return the stream position is set to the end of magic. +function querysym_all(io) + seekstart(io) + len = getlength(io) + lengths = map(magic_list) do p + length(p.first) end - if seekable(io) - for p in magic_func - seek(io, pos) - f = first(p) - try - if f(io) - return Stream{DataFormat{last(p)},typeof(io)}(seek(io, pos), filename) - end - catch e - println("There was an error in magick function $f") - println("Please open an issue at FileIO.jl. Error:") - println(e) - end + tmp = read(io, min(len, maximum(lengths))) + for (magic, sym) in reverse(magic_list) + isempty(magic) && break + if magic_equal(magic, tmp) + seek(io, length(magic)) + return sym, magic end - seek(io, pos) end - Stream{unknown_df,typeof(io)}(io, filename) + for (magic, sym) in magic_func + seekstart(io) + match(io, magic) && return sym, empty_magic + end + seekstart(io) + return :UNKNOWN, empty_magic end -function query(io::IO, filename::String, sym::Vector{Symbol}) - magic = Vector{UInt8}() - pos = position(io) + +function querysym(io::IO) if seekable(io) - for (f, fmtsym) in magic_func - fmtsym in sym || continue - seek(io, pos) - try - if f(io) - return Stream{DataFormat{fmtsym},typeof(io)}(seek(io, pos), filename) - end - catch e - println("There was an error in magick function $f") - println("Please open an issue at FileIO.jl. Error:") - println(e) - end + sym, _ = querysym_all(io) + seekstart(io) + return sym + end + # When it's not seekable, we can only work our way upwards in length of magic bytes + # We're essentially counting on the fact that one of them will match, otherwise the stream + # is corrupted. + buffer = UInt8[] + for (magic, sym) in magic_list + isempty(magic) && continue + while length(buffer) < length(magic) && !eof(io) + push!(buffer, read(io, UInt8)) end - seek(io, pos) + if magic_equal(magic, buffer) + return sym + end + eof(io) && break end - close(io) - nothing + return :UNKNOWN +end + + +""" +`query(io, [filename])` returns a `Stream` object with information about the +format inferred from the magic bytes. +""" +function query(io::IO, filename = nothing) + sym = querysym(io) + return Stream{DataFormat{sym}}(io, filename) end +query(io::IO, @nospecialize(file::Formatted)) = Stream{DataFormat{formatname(file)::Symbol}}(io, filename(file)) seekable(io::IOBuffer) = io.seekable seekable(::IOStream) = true seekable(::Any) = false - -function iter_eq(A, B) - length(A) == length(B) || return false - i,j = 1,1 - for _=1:length(A) - a=A[i]; b=B[j] - a == b && (i+=1; j+=1; continue) - a == UInt32('\r') && (i+=1; continue) # this seems like the shadiest solution to deal with windows \r\n - b == UInt32('\r') && (j+=1; continue) - return false #now both must be unequal, and no \r windows excemption any more - end - true -end diff --git a/src/registry.jl b/src/registry.jl index 16f05299..d18fe1e3 100644 --- a/src/registry.jl +++ b/src/registry.jl @@ -1,13 +1,26 @@ +### "Package registry" +# Useful for packages that get used more than once below +# Please alphabetize +const idCSVFiles = :CSVFiles => UUID("5d742f6a-9f54-50ce-8119-2520741973ca") +const idImageIO = :ImageIO => UUID("82e4d734-157c-48bb-816b-45c225c6df19") +const idImageMagick = :ImageMagick => UUID("6218d12a-5da1-5696-b52f-db25d2ecc6d1") +const idMeshIO = :MeshIO => UUID("7269a6da-0436-5bbc-96c2-40638cbb6118") +const idNetpbm = :Netpbm => UUID("f09324ee-3d7c-5217-9330-fc30815ba969") +const idQuartzImageIO = :QuartzImageIO => UUID("dca85d43-d64c-5e67-8c65-017450d5d020") +const idRData = :RData => UUID("df47a6cb-8c03-5eed-afd8-b6050d6c41da") +const idStatFiles = :StatFiles => UUID("1463e38c-9381-5320-bcd4-4134955f093a") +const idVegaLite = :VegaLite => UUID("112f6efa-9a02-5b7d-90c0-432ed331239a") + ### Simple cases # data formats add_format(format"JLD", (unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.0"), - unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.1")), ".jld", [:JLD]) + unsafe_wrap(Vector{UInt8}, "Julia data file (HDF5), version 0.1")), ".jld", [:JLD => UUID("4138dd39-2aa7-5051-a626-17a0bb65d9c8")]) add_format(format"JLD2", (unsafe_wrap(Vector{UInt8},"Julia data file (HDF5), version 0.2"), - unsafe_wrap(Vector{UInt8}, "HDF5-based Julia Data Format, version ")), ".jld2", [:JLD2]) -add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz]) -add_format(format"BSON",(),".bson", [:BSON]) -add_format(format"JLSO", (), ".jlso", [:JLSO]) + unsafe_wrap(Vector{UInt8}, "HDF5-based Julia Data Format, version ")), ".jld2", [:JLD2 => UUID("033835bb-8acc-5ee8-8aae-3f567f8a3819")]) +add_format(format"GZIP", [0x1f, 0x8b], ".gz", [:Libz => UUID("2ec943e9-cfe8-584d-b93d-64dcb6d567b7")]) +add_format(format"BSON",(),".bson", [:BSON => UUID("fbb218c0-5317-5bc6-957e-2ee96dd4b1f0")]) +add_format(format"JLSO", (), ".jlso", [:JLSO => UUID("9da8a3cd-07a3-59c0-a743-3fdc52c30d11")]) # test for RD?n magic sequence at the beginning of R data input stream function detect_rdata(io) @@ -19,7 +32,7 @@ function detect_rdata(io) (c = read(io, UInt8); c == UInt8('\n') || (c == UInt8('\r') && read(io, UInt8) == UInt8('\n'))) end -add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [:RData, LOAD]) +add_format(format"RData", detect_rdata, [".rda", ".RData", ".rdata"], [idRData, LOAD]) function detect_rdata_single(io) seekstart(io) @@ -29,100 +42,100 @@ function detect_rdata_single(io) return res end -add_format(format"RDataSingle", detect_rdata_single, [".rds"], [:RData, LOAD]) +add_format(format"RDataSingle", detect_rdata_single, [".rds"], [idRData, LOAD]) -add_format(format"CSV", (), [".csv"], [:CSVFiles]) -add_format(format"TSV", (), [".tsv"], [:CSVFiles]) -add_format(format"Feather", "FEA1", [".feather"], [:FeatherFiles]) -add_format(format"Excel", (), [".xls", ".xlsx"], [:ExcelFiles]) -add_format(format"Stata", (), [".dta"], [:StatFiles, LOAD]) -add_format(format"SPSS", "\$FL2", [".sav"], [:StatFiles, LOAD]) +add_format(format"CSV", (), [".csv"], [idCSVFiles]) +add_format(format"TSV", (), [".tsv"], [idCSVFiles]) +add_format(format"Feather", "FEA1", [".feather"], [:FeatherFiles => UUID("b675d258-116a-5741-b937-b79f054b0542")]) +add_format(format"Excel", (), [".xls", ".xlsx"], [:ExcelFiles => UUID("89b67f3b-d1aa-5f6f-9ca4-282e8d98620d")]) +add_format(format"Stata", (), [".dta"], [idStatFiles, LOAD]) +add_format(format"SPSS", "\$FL2", [".sav"], [idStatFiles, LOAD]) add_format(format"SAS", UInt8[0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xc2, 0xea, 0x81, 0x60,0xb3, 0x14, 0x11, 0xcf, 0xbd, 0x92, 0x08, 0x00, 0x09, 0xc7, 0x31, 0x8c, 0x18, 0x1f, - 0x10, 0x11], [".sas7bdat"], [:StatFiles, LOAD]) -add_format(format"Parquet", "PAR1", [".parquet"], [:ParquetFiles, LOAD]) + 0x10, 0x11], [".sas7bdat"], [idStatFiles, LOAD]) +add_format(format"Parquet", "PAR1", [".parquet"], [:ParquetFiles => UUID("46a55296-af5a-53b0-aaa0-97023b66127f"), LOAD]) # Image formats -add_format(format"PBMBinary", b"P4", ".pbm", [:ImageIO], [:Netpbm], [:ImageMagick]) -add_format(format"PGMBinary", b"P5", ".pgm", [:ImageIO], [:Netpbm]) -add_format(format"PPMBinary", b"P6", ".ppm", [:ImageIO], [:Netpbm]) -add_format(format"PBMText", b"P1", ".pbm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) -add_format(format"PGMText", b"P2", ".pgm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) -add_format(format"PPMText", b"P3", ".ppm", [:ImageIO], [:Netpbm], [:ImageMagick, LOAD]) - -add_format(format"NRRD", "NRRD", [".nrrd", ".nhdr"], [:NRRD]) - -add_format(format"AndorSIF", "Andor Technology Multi-Channel File", ".sif", [:AndorSIF, LOAD]) - -add_format(format"FLO", b"PIEH", ".flo", [:OpticalFlowUtils]) - -add_format(format"CRW", UInt8[0x49,0x49,0x1a,0x00,0x00,0x00,0x48,0x45], ".crw", [:ImageMagick]) -add_format(format"CUR", UInt8[0x00,0x00,0x02,0x00], ".cur", [:ImageMagick]) -add_format(format"DCX", UInt8[0xb1,0x68,0xde,0x3a], ".dcx", [:ImageMagick]) -add_format(format"DOT", UInt8[0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1], ".dot", [:ImageMagick]) -add_format(format"EPS", UInt8[0x25,0x21,0x50,0x53,0x2d,0x41,0x64,0x6f], ".eps", [:ImageMagick], [:MimeWriter, SAVE]) -add_format(format"HDR", UInt8[0x23,0x3f,0x52,0x41,0x44,0x49,0x41,0x4e], ".hdr", [:ImageMagick]) -add_format(format"ICO", UInt8[0x00,0x00,0x01,0x00], ".ico", [:ImageMagick]) -add_format(format"INFO", UInt8[0x7a,0x62,0x65,0x78], ".info",[:ImageMagick]) -add_format(format"JP2", UInt8[0x00,0x00,0x00,0x0c,0x6a,0x50,0x20,0x20], ".jp2", [:ImageMagick]) -add_format(format"PDB", UInt8[0x73,0x7a,0x65,0x7a], ".pdb", [:ImageMagick]) -add_format(format"PDF", UInt8[0x25,0x50,0x44,0x46], ".pdf", [:ImageMagick], [:MimeWriter, SAVE]) -add_format(format"PGM", UInt8[0x50,0x35,0x0a], ".pgm", [:ImageMagick]) -add_format(format"PSD", UInt8[0x38,0x42,0x50,0x53], ".psd", [:ImageMagick]) -add_format(format"RGB", UInt8[0x01,0xda,0x01,0x01,0x00,0x03], ".rgb", [:ImageMagick]) -add_format(format"WMF", UInt8[0xd7,0xcd,0xc6,0x9a], ".wmf", [:ImageMagick]) -add_format(format"WPG", UInt8[0xff,0x57,0x50,0x43], ".wpg", [:ImageMagick]) -add_format(format"Imagine", "IMAGINE", ".imagine", [:ImagineFormat]) +add_format(format"PBMBinary", "P4", ".pbm", [idImageIO], [idNetpbm], [idImageMagick]) +add_format(format"PGMBinary", "P5", ".pgm", [idImageIO], [idNetpbm]) +add_format(format"PPMBinary", "P6", ".ppm", [idImageIO], [idNetpbm]) +add_format(format"PBMText", "P1", ".pbm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PGMText", "P2", ".pgm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) +add_format(format"PPMText", "P3", ".ppm", [idImageIO], [idNetpbm], [idImageMagick, LOAD]) + +add_format(format"NRRD", "NRRD", [".nrrd", ".nhdr"], [:NRRD => UUID("9bb6cfbd-7763-5393-b1b5-1c8e09872146")]) + +add_format(format"AndorSIF", "Andor Technology Multi-Channel File", ".sif", [:AndorSIF => UUID("d04cd5f8-5917-4006-ac6f-d139328806a7"), LOAD]) + +add_format(format"FLO", "PIEH", ".flo", [:OpticalFlowUtils => UUID("ab0dad50-ab19-448c-b796-13553ec8b2d3")]) + +add_format(format"CRW", UInt8[0x49,0x49,0x1a,0x00,0x00,0x00,0x48,0x45], ".crw", [idImageMagick]) +add_format(format"CUR", UInt8[0x00,0x00,0x02,0x00], ".cur", [idImageMagick]) +add_format(format"DCX", UInt8[0xb1,0x68,0xde,0x3a], ".dcx", [idImageMagick]) +add_format(format"DOT", UInt8[0xd0,0xcf,0x11,0xe0,0xa1,0xb1,0x1a,0xe1], ".dot", [idImageMagick]) +add_format(format"EPS", UInt8[0x25,0x21,0x50,0x53,0x2d,0x41,0x64,0x6f], ".eps", [idImageMagick], [MimeWriter, SAVE]) +add_format(format"HDR", UInt8[0x23,0x3f,0x52,0x41,0x44,0x49,0x41,0x4e], ".hdr", [idImageMagick]) +add_format(format"ICO", UInt8[0x00,0x00,0x01,0x00], ".ico", [idImageMagick]) +add_format(format"INFO", UInt8[0x7a,0x62,0x65,0x78], ".info",[idImageMagick]) +add_format(format"JP2", UInt8[0x00,0x00,0x00,0x0c,0x6a,0x50,0x20,0x20], ".jp2", [idImageMagick]) +add_format(format"PDB", UInt8[0x73,0x7a,0x65,0x7a], ".pdb", [idImageMagick]) +add_format(format"PDF", UInt8[0x25,0x50,0x44,0x46], ".pdf", [idImageMagick], [MimeWriter, SAVE]) +add_format(format"PGM", UInt8[0x50,0x35,0x0a], ".pgm", [idImageMagick]) +add_format(format"PSD", UInt8[0x38,0x42,0x50,0x53], ".psd", [idImageMagick]) +add_format(format"RGB", UInt8[0x01,0xda,0x01,0x01,0x00,0x03], ".rgb", [idImageMagick]) +add_format(format"WMF", UInt8[0xd7,0xcd,0xc6,0x9a], ".wmf", [idImageMagick]) +add_format(format"WPG", UInt8[0xff,0x57,0x50,0x43], ".wpg", [idImageMagick]) +add_format(format"Imagine", "IMAGINE", ".imagine", [:ImagineFormat => UUID("4bab44a2-5ff2-5a6b-8e10-825fb9ac126a")]) add_format( format"TGA", (), ".tga", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"GIF", UInt8[0x47,0x49,0x46,0x38], ".gif", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"PNG", UInt8[0x89,0x50,0x4e,0x47,0x0d,0x0a,0x1a,0x0a], ".png", - [:ImageIO], - [:QuartzImageIO, OSX], - [:ImageMagick], - [:MimeWriter, SAVE] + [idImageIO], + [idQuartzImageIO, OSX], + [idImageMagick], + [MimeWriter, SAVE] ) add_format( format"JPEG", UInt8[0xff,0xd8,0xff], [".jpeg", ".jpg", ".JPG"], - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) # 0xe1 add_format( format"BMP", UInt8[0x42,0x4d], ".bmp", - [:QuartzImageIO, OSX], - [:ImageMagick] + [idQuartzImageIO, OSX], + [idImageMagick] ) add_format( format"PCX", (UInt8[0x0a,0x02], UInt8[0x0a,0x05]), ".pcx", - [:ImageMagick] + [idImageMagick] ) add_format( format"SVG", (), ".svg", - [:MimeWriter, SAVE] + [MimeWriter, SAVE] ) #= @@ -136,37 +149,37 @@ add_saver(format"ZIP", :ZipeFile) =# #Shader files -add_format(format"GLSLShader", (), [".frag", ".vert", ".geom", ".comp"], [:GLAbstraction]) +# add_format(format"GLSLShader", (), [".frag", ".vert", ".geom", ".comp"], [:GLAbstraction]) # Mesh formats -add_format(format"OBJ", (), ".obj", [:MeshIO]) -add_format(format"PLY_ASCII", "ply\nformat ascii 1.0", ".ply", [:MeshIO]) -add_format(format"PLY_BINARY", "ply\nformat binary_little_endian 1.0", ".ply", [:MeshIO]) -add_format(format"2DM", "MESH2D", ".2dm", [:MeshIO]) -add_format(format"OFF", "OFF", ".off", [:MeshIO]) -add_format(format"MSH", (), ".msh", [:MeshIO]) +add_format(format"OBJ", (), ".obj", [idMeshIO]) +add_format(format"PLY_ASCII", "ply\nformat ascii 1.0", ".ply", [idMeshIO]) +add_format(format"PLY_BINARY", "ply\nformat binary_little_endian 1.0", ".ply", [idMeshIO]) +add_format(format"2DM", "MESH2D", ".2dm", [idMeshIO]) +add_format(format"OFF", "OFF", ".off", [idMeshIO]) +add_format(format"MSH", (), ".msh", [idMeshIO]) # Bundler SfM format -add_format(format"OUT", "# Bundle file v0.3\n", ".out", [:BundlerIO]) +add_format(format"OUT", "# Bundle file v0.3\n", ".out", [:BundlerIO => UUID("654bb1e1-1cb7-4447-b770-09a16346af94")]) # GSLIB/SGeMS format (http://gslib.com) -add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO]) +add_format(format"GSLIB", (), [".gslib",".sgems"], [:GslibIO => UUID("4610876b-9b01-57c8-9ad9-06315f1a66a5")]) ### Audio formats function detectwav(io) - seekstart(io) - magic = read!(io, Vector{UInt8}(undef, 4)) - magic == b"RIFF" || return false + getlength(io) >= 12 || return false + buf = Vector{UInt8}(undef, 4) + read!(io, buf) + buf == b"RIFF" || return false seek(io, 8) - submagic = read!(io, Vector{UInt8}(undef, 4)) - + read!(io, buf) submagic == b"WAVE" end -add_format(format"WAV", detectwav, ".wav", [:WAV]) -add_format(format"FLAC","fLaC",".flac",[:FLAC]) +add_format(format"WAV", detectwav, ".wav", [:WAV => UUID("8149f6b0-98f6-5db9-b78f-408fbbb8ef88")]) +add_format(format"FLAC","fLaC",".flac",[:FLAC => UUID("abae9e3b-a9a0-4778-b5c6-ca109b507d99")]) ## Profile data -add_format(format"JLPROF", [0x4a, 0x4c, 0x50, 0x52, 0x4f, 0x46, 0x01, 0x00], ".jlprof", [:FlameGraphs]) # magic is "JLPROF" followed by [0x01, 0x00] +add_format(format"JLPROF", [0x4a, 0x4c, 0x50, 0x52, 0x4f, 0x46, 0x01, 0x00], ".jlprof", [:FlameGraphs => UUID("08572546-2f56-4bcf-ba4e-bab62c3a3f89")]) # magic is "JLPROF" followed by [0x01, 0x00] ### Complex cases @@ -249,22 +262,22 @@ function detect_bedgraph(io) return false end -add_format(format"bedGraph", detect_bedgraph, [".bedgraph"], [:BedgraphFiles]) +add_format(format"bedGraph", detect_bedgraph, [".bedgraph"], [:BedgraphFiles => UUID("85eb9095-274b-55ce-be28-9e90f41ac741")]) # Handle OME-TIFFs, which are identical to normal TIFFs with the primary difference being the filename and embedded XML metadata const tiff_magic = (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00],UInt8[0x49,0x49,0x2b,0x00]) function detecttiff(io) - seekstart(io) + getlength(io) >= 4 || return false magic = read!(io, Vector{UInt8}(undef, 4)) # do any of the first 4 bytes match any of the 4 possible combinations of tiff magics return any(map(x->all(magic .== x), tiff_magic)) end # normal TIFF detect_noometiff(io) = detecttiff(io) && ((:name ∉ propertynames(io)) || !(endswith(io.name, ".ome.tif>") || endswith(io.name, ".ome.tiff>"))) -add_format(format"TIFF", detect_noometiff, [".tiff", ".tif"], [:QuartzImageIO, OSX], [:ImageMagick]) +add_format(format"TIFF", detect_noometiff, [".tiff", ".tif"], [idQuartzImageIO, OSX], [idImageMagick]) # OME-TIFF detect_ometiff(io) = detecttiff(io) && (:name ∈ propertynames(io)) && (endswith(io.name, ".ome.tif>") || endswith(io.name, ".ome.tiff>")) -add_format(format"OMETIFF", detect_ometiff, [".tif", ".tiff"], [:OMETIFF]) +add_format(format"OMETIFF", detect_ometiff, [".tif", ".tiff"], [:OMETIFF => UUID("2d0ec36b-e807-5756-994b-45af29551fcf")]) # custom skipmagic functions for function-based tiff magic detection skipmagic(io, ::typeof(detect_ometiff)) = seek(io, 4) @@ -272,7 +285,7 @@ skipmagic(io, ::typeof(detect_noometiff)) = seek(io, 4) # AVI is a subtype of RIFF, as is WAV function detectavi(io) - seekstart(io) + getlength(io) >= 12 || return false magic = read!(io, Vector{UInt8}(undef, 4)) magic == b"RIFF" || return false seek(io, 8) @@ -280,21 +293,19 @@ function detectavi(io) submagic == b"AVI " end -add_format(format"AVI", detectavi, ".avi", [:ImageMagick]) +add_format(format"AVI", detectavi, ".avi", [idImageMagick]) # HDF5: the complication is that the magic bytes may start at # 0, 512, 1024, 2048, or any multiple of 2 thereafter -h5magic = (0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a) +const h5magic = [0x89,0x48,0x44,0x46,0x0d,0x0a,0x1a,0x0a] function detecthdf5(io) position(io) == 0 || return false - seekend(io) - len = position(io) - seekstart(io) + len = getlength(io) magic = Vector{UInt8}(undef, length(h5magic)) pos = position(io) while pos+length(h5magic) <= len read!(io, magic) - if iter_eq(magic, h5magic) + if magic == h5magic return true end pos = pos == 0 ? 512 : 2*pos @@ -304,14 +315,12 @@ function detecthdf5(io) end false end -add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5]) +add_format(format"HDF5", detecthdf5, [".h5", ".hdf5"], [:HDF5 => UUID("f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f")]) function detect_stlascii(io) pos = position(io) try - seekend(io) - len = position(io) - seek(io, pos) + len = getlength(io, pos) len < 80 && return false header = read(io, 80) # skip header seek(io, pos) @@ -325,9 +334,7 @@ function detect_stlbinary(io) size_header = 80 + sizeof(UInt32) size_triangleblock = (4 * 3 * sizeof(Float32)) + sizeof(UInt16) pos = position(io) - seekend(io) - len = position(io) - seek(io, pos) + len = getlength(io, pos) len < size_header && return false skip(io, 80) # skip header @@ -340,14 +347,14 @@ function detect_stlbinary(io) result = eof(io) # if end of file, we have a stl! return result end -add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"], [:MeshIO]) -add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"], [:MeshIO]) +add_format(format"STL_ASCII", detect_stlascii, [".stl", ".STL"], [idMeshIO]) +add_format(format"STL_BINARY", detect_stlbinary, [".stl", ".STL"], [idMeshIO]) # Astro Data add_format(format"FITS", # See https://www.loc.gov/preservation/digital/formats/fdd/fdd000317.shtml#sign [0x53,0x49,0x4d,0x50,0x4c,0x45,0x20,0x20,0x3d,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x54], - [".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS"], [:FITSIO]) + [".fit", ".fits", ".fts", ".FIT", ".FITS", ".FTS"], [:FITSIO => UUID("525bcba6-941b-5504-bd06-fd0dc1a4d2eb")]) function detect_gadget2(io) pos = position(io) @@ -361,16 +368,15 @@ function detect_gadget2(io) seek(io, pos) return temp1 == temp2 end -add_format(format"Gadget2", detect_gadget2, [".gadget2", ".Gadget2", ".GADGET2"], [:AstroIO]) - +add_format(format"Gadget2", detect_gadget2, [".gadget2", ".Gadget2", ".GADGET2"], [:AstroIO => UUID("c85a633c-0c3f-44a2-bffe-7f9d0681b3e7")]) -add_format(format"RawArray", [0x61,0x72,0x61,0x77,0x72,0x72,0x79,0x61], ".ra", [:RawArray]) +add_format(format"RawArray", [0x61,0x72,0x61,0x77,0x72,0x72,0x79,0x61], ".ra", [:RawArray => UUID("d3d335b2-f152-507c-820e-958e337efb65")]) -add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat]) +add_format(format"MetaImage", "ObjectType", ".mhd", [:MetaImageFormat => UUID("1950589f-4d68-56f0-9b94-9d8646217309")]) -add_format(format"vegalite", (), [".vegalite"], [:VegaLite]) -add_format(format"vega", (), [".vega"], [:Vega], [:VegaLite, SAVE]) +add_format(format"vegalite", (), [".vegalite"], [idVegaLite]) +add_format(format"vega", (), [".vega"], [:Vega => UUID("239c3e63-733f-47ad-beb7-a12fde22c578")], [idVegaLite, SAVE]) -add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles]) +add_format(format"FCS", "FCS", [".fcs"], [:FCSFiles => UUID("d76558cf-badf-52d4-a17e-381ab0b0d937")]) -add_format(format"HTML", (), [".html", ".htm"], [:MimeWriter, SAVE]) +add_format(format"HTML", (), [".html", ".htm"], [MimeWriter, SAVE]) diff --git a/src/registry_setup.jl b/src/registry_setup.jl index 337162cc..0adca9ff 100644 --- a/src/registry_setup.jl +++ b/src/registry_setup.jl @@ -1,58 +1,66 @@ # This file contains the code that allows things to be added to the registry const ext2sym = Dict{String, Union{Symbol,Vector{Symbol}}}() -const magic_list = Vector{Pair}() # sorted, see magic_cmp below -const sym2info = Dict{Symbol,Any}() # Symbol=>(magic, extension) -const magic_func = Vector{Pair{Function,Symbol}}() # for formats with complex magic #s +const magic_list = Vector{Pair{Vector{UInt8},Symbol}}() # sorted, see magic_cmp below +const sym2info = Dict{Symbol,Tuple{Any,Any}}() # Symbol=>(magic, extension) +const magic_func = Vector{Pair{Function,Symbol}}() # for formats with complex magic detection +const empty_magic = UInt8[] ## OS: -abstract type OS end -abstract type Unix <: OS end -struct Windows <: OS end -struct OSX <: Unix end -struct Linux <: Unix end - -split_predicates(list) = filter(x-> x <: OS, list), filter(x-> !(x <: OS), list) -applies_to_os(os::Vector) = isempty(os) || any(applies_to_os, os) -applies_to_os(os::Type{<:OS}) = false - -applies_to_os(os::Type{<:Unix}) = Sys.isunix() -applies_to_os(os::Type{Windows}) = Sys.iswindows() -applies_to_os(os::Type{OSX}) = Sys.isapple() -applies_to_os(os::Type{Linux}) = Sys.islinux() +@enum OS Unix Windows OSX Linux + +applies_to_os(oslist) = isempty(oslist) || any(applies_to_os, oslist) +function applies_to_os(os::OS) + os == Unix && return Sys.isunix() + os == Windows && return Sys.iswindows() + os == OSX && return Sys.isapple() + os == Linux && return Sys.islinux() + return false +end ## Magic bytes: # magic_cmp results in magic_list being sorted in order of increasing -# length(magic), then (among tuples with the same length) in -# dictionary order. This ordering has the advantage that you can +# length(magic), then (among sequences with the same length) in +# lexographic order. This ordering has the advantage that you can # incrementally read bytes from the stream without worrying that # you'll encounter an EOF yet still have potential matches later in # the list. -function magic_cmp(p::Pair, t::Tuple) - pt = first(p) - lp, lt = length(pt), length(t) - lp < lt && return true - lp > lt && return false - pt < t -end -function magic_cmp(t::Tuple, p::Pair) - pt = first(p) - lp, lt = length(pt), length(t) - lt < lp && return true - lt > lp && return false - t < pt -end - -canonicalize_magic(m::NTuple{N,UInt8}) where {N} = m -canonicalize_magic(m::AbstractVector{UInt8}) = tuple(m...) +function magic_cmp(a::Vector{UInt8}, b::Vector{UInt8}) + la, lb = length(a), length(b) + la < lb && return true + la > lb && return false + for (ia, ib) in zip(a, b) + ia < ib && return true + ia > ib && return false + end + return false +end +magic_cmp(p::Pair, m::Vector{UInt8}) = magic_cmp(p.first, m) +magic_cmp(m::Vector{UInt8}, p::Pair) = magic_cmp(m, p.first) + +canonicalize_magic(@nospecialize(m::Tuple{Vararg{UInt8}})) = UInt8[m...] +canonicalize_magic(m::AbstractVector{UInt8}) = convert(Vector{UInt8}, m) canonicalize_magic(m::String) = canonicalize_magic(codeunits(m)) ## Load/Save -struct LOAD end -struct SAVE end +@enum IOSupport LOAD SAVE + +function split_predicates(list) + os = OS[] + ls = IOSupport[] + for item in list + if isa(item, OS) + push!(os, item) + else + push!(ls, item) + end + end + return os, ls +end + function add_loadsave(format, predicates) library = popfirst!(predicates) @@ -70,15 +78,16 @@ end ## Add Format: function add_format(fmt, magic, extension, load_save_libraries...) - add_format(fmt, magic, extension) for library in load_save_libraries add_loadsave(fmt, library) end + # Add the format after we've validated the packages (to prevent a partially-registered format) + add_format(fmt, magic, extension) fmt end """ -`add_format(fmt, magic, extension)` registers a new `DataFormat`. +`add_format(fmt, magic, extension)` registers a new [`DataFormat`](@ref). For example: add_format(format"TIFF", (UInt8[0x4d,0x4d,0x00,0x2b], UInt8[0x49,0x49,0x2a,0x00]), [".tiff", ".tif"]) @@ -90,57 +99,65 @@ Note that extensions, magic numbers, and format-identifiers are case-sensitive. You can also specify particular packages that support the format with `add_format(fmt, magic, extension, pkgspecifiers...)`, where example `pkgspecifiers` are: - add_format(fmt, magic, extension, [:PkgA]) # only PkgA supports the format (load & save) - add_format(fmt, magic, extension, [:PkgA], [:PkgB]) # try PkgA first, but if it fails try PkgB - add_format(fmt, magic, extension, [:PkgA, LOAD], [:PkgB]) # try PkgA first for `load`, otherwise use PkgB - add_format(fmt, magic, extension, [:PkgA, OSX], [:PkgB]) # use PkgA on OSX, and PkgB otherwise + add_format(fmt, magic, extension, [:PkgA=>UUID(...)]) # only PkgA supports the format (load & save) + add_format(fmt, magic, extension, [:PkgA=>uuidA], [:PkgB=>uuidB]) # try PkgA first, but if it fails try PkgB + add_format(fmt, magic, extension, [:PkgA=>uuidA, LOAD], [:PkgB=>uuidB]) # try PkgA first for `load`, otherwise use PkgB + add_format(fmt, magic, extension, [:PkgA=>uuidA, OSX], [:PkgB=>uuidB]) # use PkgA on OSX, and PkgB otherwise + +The `uuid`s are all of type `UUID` and can be obtained from the package's `Project.toml` file. You can combine `LOAD`, `SAVE`, `OSX`, `Unix`, `Windows` and `Linux` arbitrarily to narrow `pkgspecifiers`. """ -function add_format(fmt::Type{DataFormat{sym}}, magic::Union{Tuple,AbstractVector,String}, extension) where sym - haskey(sym2info, sym) && error("format ", fmt, " is already registered") - m = canonicalize_magic(magic) - rng = searchsorted(magic_list, m, lt=magic_cmp) - if !isempty(m) && !isempty(rng) - error("magic bytes ", m, " are already registered") +add_format(@nospecialize(fmt::Type), args...) = add_format(formatname(fmt)::Symbol, args...) +add_format(sym::Symbol, magic::Union{Tuple,AbstractVector{UInt8},String}, extension) = + add_format(sym, canonicalize_magic(magic), extension) +function add_format(sym::Symbol, + @nospecialize(magics::Tuple{Vector{UInt8},Vararg{Vector{UInt8}}}), extension) + add_format(sym, [magics...], extension) +end + +function add_format(sym::Symbol, magic::Vector{UInt8}, extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") + rng = searchsorted(magic_list, magic, lt=magic_cmp) + if !isempty(magic) && !isempty(rng) + error("magic bytes ", magic, " are already registered") end - insert!(magic_list, first(rng), Pair(m, sym)) # m=>sym in 0.4 - sym2info[sym] = (m, extension) + insert!(magic_list, first(rng), magic=>sym) + sym2info[sym] = (magic, extension) add_extension(extension, sym) - fmt + nothing end # for multiple magic bytes -function add_format(fmt::Type{DataFormat{sym}}, - magics::Tuple{T,Vararg{T}}, extension) where {sym, T <: Vector{UInt8}} - haskey(sym2info, sym) && error("format ", fmt, " is already registered") - magics = map(canonicalize_magic, magics) +function add_format(sym::Symbol, magics::Vector{Vector{UInt8}}, extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") for magic in magics rng = searchsorted(magic_list, magic, lt=magic_cmp) if !isempty(magic) && !isempty(rng) error("magic bytes ", magic, " are already registered") end - insert!(magic_list, first(rng), Pair(magic, sym)) # m=>sym in 0.4 + insert!(magic_list, first(rng), magic=>sym) end - sym2info[sym] = (magics, extension) + sym2info[sym] = (sort(magics; lt=magic_cmp), extension) add_extension(extension, sym) - fmt + nothing end # For when "magic" is supplied as a function (see the HDF5 example in # registry.jl) -function add_format(fmt::Type{DataFormat{sym}}, magic, extension) where sym - haskey(sym2info, sym) && error("format ", fmt, " is already registered") +function add_format(sym::Symbol, @nospecialize(magic::Function), extension) + haskey(sym2info, sym) && error("format ", sym, " is already registered") push!(magic_func, Pair(magic,sym)) # magic=>sym in 0.4 sym2info[sym] = (magic, extension) add_extension(extension, sym) - fmt + nothing end """ `del_format(fmt::DataFormat)` deletes `fmt` from the format registry. """ -function del_format(fmt::Type{DataFormat{sym}}) where sym +del_format(@nospecialize(fmt::Type)) = del_format(formatname(fmt)::Symbol) +function del_format(sym::Symbol) magic, extension = sym2info[sym] del_magic(magic, sym) delete!(sym2info, sym) @@ -148,12 +165,13 @@ function del_format(fmt::Type{DataFormat{sym}}) where sym nothing end -# Deletes multiple magic bytes -del_magic(magic::Tuple, sym) = for m in magic - del_magic(m, sym) -end +# # Deletes multiple magic bytes +# del_magic(magic::Tuple, sym) = for m in magic +# del_magic(m, sym) +# end # Deletes single magic bytes -function del_magic(magic::NTuple{N, UInt8}, sym) where N +del_magic(@nospecialize(magic), sym::Symbol) = del_magic(canonicalize_magic(magic), sym) +function del_magic(magic::Vector{UInt8}, sym::Symbol) rng = searchsorted(magic_list, magic, lt=magic_cmp) if length(magic) == 0 fullrng = rng @@ -171,15 +189,18 @@ function del_magic(magic::NTuple{N, UInt8}, sym) where N deleteat!(magic_list, first(rng)) nothing end +del_magic(magics::Vector{Vector{UInt8}}, sym::Symbol) = foreach(magics) do magic + del_magic(magic, sym) +end -function del_magic(magic::Function, sym) - deleteat!(magic_func, something(findfirst(isequal(Pair(magic,sym)), magic_func), 0)) +function del_magic(@nospecialize(magic::Function), sym::Symbol) + deleteat!(magic_func, something(findfirst(isequal(Pair{Function,Symbol}(magic,sym)), magic_func), 0)) nothing end ## File Extensions: -function add_extension(ext::String, sym) +function add_extension(ext::String, sym::Symbol) if haskey(ext2sym, ext) v = ext2sym[ext] if isa(v, Symbol) diff --git a/src/types.jl b/src/types.jl index 3f952da2..5c6f5a93 100644 --- a/src/types.jl +++ b/src/types.jl @@ -1,4 +1,7 @@ -# The core types that represent the file formats +# "Public" types that represent the file formats. These are used +# to communicate results externally, but are generally avoided for +# internal operations because they trigger excessive specialization +# and inference failures. ## DataFormat: """ @@ -24,64 +27,75 @@ formatname(::Formatted{F}) where F<:DataFormat = formatname(F) ## File: """ -`File(fmt, filename)` indicates that `filename` is a file of known -DataFormat `fmt`. For example, `File{fmtpng}(filename)` would indicate a PNG +`File{fmt}(filename)` indicates that `filename` is a file of known +[`DataFormat`](@ref) `fmt`. For example, `File{format"PNG"}(filename)` would indicate a PNG file. """ -struct File{F<:DataFormat} <: Formatted{F} - filename +struct File{F<:DataFormat, Name} <: Formatted{F} + filename::Name end -File(fmt::Type{DataFormat{sym}}, filename) where {sym} = File{fmt}(filename) +File{F}(file::File{F}) where F<:DataFormat = file +File{DataFormat{sym}}(@nospecialize(file::Formatted)) where sym = throw(ArgumentError("cannot change the format of $file to $sym")) +File{F}(file::AbstractString) where F<:DataFormat = File{F,String}(String(file)) # canonicalize to limit type-diversity +File{F}(file) where F<:DataFormat = File{F,typeof(file)}(file) # The docs are separated from the definition because of https://github.com/JuliaLang/julia/issues/34122 filename(@nospecialize(f::File)) = f.filename """ -`filename(file)` returns the filename associated with `File` `file`. +`filename(file)` returns the filename associated with [`File`](@ref) `file`. """ filename(::File) file_extension(@nospecialize(f::File)) = splitext(filename(f))[2] """ -`file_extension(file)` returns the file extension associated with `File` `file`. +`file_extension(file)` returns the file extension associated with [`File`](@ref) `file`. """ file_extension(::File) ## Stream: """ -`Stream(fmt, io, [filename])` indicates that the stream `io` is -written in known `Format`. For example, `Stream{PNG}(io)` would -indicate PNG format. If known, the optional `filename` argument can +`Stream{fmt}(io, filename=nothing)` indicates that the stream `io` is +written in known format [`DataFormat`](@ref) `fmt`. +For example, `Stream{format"PNG"}(io)` would indicate PNG format. +If known, the optional `filename` argument can be used to improve error messages, etc. """ -struct Stream{F <: DataFormat, IOtype <: IO} <: Formatted{F} +struct Stream{F <: DataFormat, IOtype <: IO, Name} <: Formatted{F} io::IOtype - filename + filename::Name end -Stream(::Type{F}, io::IO) where {F<:DataFormat} = Stream{F,typeof(io)}(io, nothing) -Stream(::Type{F}, io::IO, filename::AbstractString) where {F<:DataFormat} = Stream{F, typeof(io)}(io, String(filename)) -Stream(::Type{F}, io::IO, filename) where {F<:DataFormat} = Stream{F, typeof(io)}(io, filename) -Stream(file::File{F}, io::IO) where {F} = Stream{F, typeof(io)}(io, filename(file)) +Stream{F,IOtype}(io::IO, filename::AbstractString) where {F<:DataFormat,IOtype} = Stream{F, IOtype, String}(io, String(filename)) +Stream{F,IOtype}(io::IO, filename) where {F<:DataFormat,IOtype} = Stream{F, IOtype, typeof(filename)}(io, filename) +Stream{F,IOtype}(io::IO) where {F<:DataFormat,IOtype} = Stream{F, IOtype}(io, nothing) + +Stream{F,IOtype}(file::Formatted{F}, io::IO) where {F<:DataFormat,IOtype} = Stream{F,IOtype}(io, filename(file)) +Stream{F,IOtype}(@nospecialize(file::Formatted), io::IO) where {F<:DataFormat,IOtype} = + throw(ArgumentError("cannot change the format of $file to $(formatname(F)::Symbol)")) + +Stream{F}(io::IO, args...) where {F<:DataFormat} = Stream{F, typeof(io)}(io, args...) +Stream{F}(file::File, io::IO) where {F<:DataFormat} = Stream{F, typeof(io)}(file, io) +Stream(file::File{F}, io::IO) where {F<:DataFormat} = Stream{F}(io, filename(file)) stream(@nospecialize(s::Stream)) = s.io -"`stream(s)` returns the stream associated with `Stream` `s`" +"`stream(s)` returns the stream associated with [`Stream`](@ref) `s`" stream(::Stream) filename(@nospecialize(s::Stream)) = s.filename """ `filename(stream)` returns a string of the filename -associated with `Stream` `stream`, or nothing if there is no file associated. +associated with [`Stream`](@ref) `stream`, or nothing if there is no file associated. """ filename(::Stream) function file_extension(@nospecialize(f::Stream)) fname = filename(f) - (fname == nothing) && return nothing + (fname === nothing) && return nothing splitext(fname)[2] end """ -`file_extension(file)` returns a nullable-string for the file extension associated with `Stream` `stream`. +`file_extension(file)` returns a nullable-string for the file extension associated with [`Stream`](@ref) `stream`. """ file_extension(::Stream) @@ -98,7 +112,7 @@ end # Implement standard I/O operations for File and Stream @inline function Base.open(@nospecialize(file::File{F}), @nospecialize(args...)) where F<:DataFormat fn = filename(file) - Stream(F, open(fn, args...), abspath(fn)) + Stream{F}(open(fn, args...), abspath(fn)) end Base.close(@nospecialize(s::Stream)) = close(stream(s)) diff --git a/test/error_handling.jl b/test/error_handling.jl index 652fd81b..e699e754 100644 --- a/test/error_handling.jl +++ b/test/error_handling.jl @@ -5,7 +5,7 @@ import FileIO: File, @format_str save(file::File{format"PATHERROR"}, data) = nothing load(file::File{format"PATHERROR"}) = nothing end -add_format(format"PATHERROR", (), ".patherror", [:PathError]) +add_format(format"PATHERROR", (), ".patherror", [PathError]) @testset "Path errors" begin # handling a nonexistent parent directory, during save @@ -14,16 +14,18 @@ add_format(format"PATHERROR", (), ".patherror", [:PathError]) fn = joinpath(temp_dir, "file.patherror") save(fn, "test content") @test isdir(temp_dir) - + # handling a filepath that's an existing directory, during save @test_throws ArgumentError save(format"PATHERROR", mktempdir(), "test content") - + # handling a nonexistent filepath, during load @test_throws ArgumentError load(joinpath(mktempdir(), "dummy.patherror")) end @testset "Not installed" begin - add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) + @test_logs (:warn, r"supply `pkg` as a Module or `name=>uuid`") @test_throws ArgumentError add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled]) + # Give it a fake UUID + add_format(format"NotInstalled", (), ".not_installed", [:NotInstalled=>UUID("79e393ae-7a7b-11eb-1530-bf4d98024096")]) @test_throws ArgumentError save("test.not_installed", nothing) # Core.eval(Base, :(is_interactive = true)) # for interactive error handling @@ -54,13 +56,13 @@ end module BrokenIO using FileIO end -add_format(format"BROKEN", (), ".brok", [:BrokenIO]) +add_format(format"BROKEN", (), ".brok", [BrokenIO]) @testset "Absent implementation" begin stderr_copy = stderr rserr, wrerr = redirect_stderr() - @test_throws FileIO.LoaderError load(Stream(format"BROKEN",stdin)) - @test_throws FileIO.WriterError save(Stream(format"BROKEN",stdout), nothing) + @test_throws FileIO.LoaderError load(Stream{format"BROKEN"}(stdin)) + @test_throws FileIO.WriterError save(Stream{format"BROKEN"}(stdout), nothing) redirect_stderr(stderr_copy) close(rserr);close(wrerr) end @@ -79,8 +81,8 @@ end format"MultiError", (), ".multierr", - [:MultiError1], - [:MultiError2] + [MultiError1], + [MultiError2] ) tmpfile = joinpath(mktempdir(), "test.multierr") open(tmpfile, "w") do io diff --git a/test/loadsave.jl b/test/loadsave.jl index aa1478ad..3eb55bc3 100644 --- a/test/loadsave.jl +++ b/test/loadsave.jl @@ -8,7 +8,7 @@ import FileIO: File, @format_str load(file::File{format"PBMText"}) = "PBMText" load(file::File{format"PBMBinary"}) = "PBMBinary" load(file::File{format"JLD"}) = "JLD" -load(file::File{format"GZIP"}) = "GZIP" +load(file::File{format"GZIP"}) = "GZIP" end module TestLoadSave2 import FileIO: File, @format_str @@ -27,11 +27,11 @@ try @testset "Load $(typeof(fp))" for fp in (file_dir, file_path) - add_loader(format"PBMText", :TestLoadSave) - add_loader(format"PBMBinary", :TestLoadSave) - add_loader(format"HDF5", :TestLoadSave2) - add_loader(format"JLD", :TestLoadSave) - add_loader(format"GZIP", :TestLoadSave) + add_loader(format"PBMText", TestLoadSave) + add_loader(format"PBMBinary", TestLoadSave) + add_loader(format"HDF5", TestLoadSave2) + add_loader(format"JLD", TestLoadSave) + add_loader(format"GZIP", TestLoadSave) @test load(joinpath(fp,"file1.pbm")) == "PBMText" @test load(joinpath(fp,"file2.pbm")) == "PBMBinary" @@ -172,8 +172,8 @@ function save(s::Stream{format"DUMMY"}, data; extra=UInt8[]) write(s, extra) end -add_loader(format"DUMMY", :Dummy) -add_saver(format"DUMMY", :Dummy) +add_loader(format"DUMMY", Dummy) +add_saver(format"DUMMY", Dummy) end # module Dummy @@ -189,7 +189,7 @@ end # module Dummy f = query(fnrel) @test isabspath(filename(f)) @test endswith(filename(f),fn) # TravisOSX prepends "/private" - f = File(format"DUMMY", fnrel) + f = File{format"DUMMY"}(fnrel) @test !(isabspath(filename(f))) open(f) do s @test isabspath(filename(s)) @@ -206,7 +206,7 @@ end # module Dummy f = query(fnrel) @test isabspath(filename(f)) @test endswith(filename(f),fn2) # TravisOSX prepends "/private" - f = File(format"DUMMY", fnrel) + f = File{format"DUMMY"}(fnrel) @test !(isabspath(filename(f))) open(f) do s @test isabspath(filename(s)) @@ -318,7 +318,7 @@ del_format(format"DUMMY") # PPM/PBM can be either binary or text. Test that the defaults work, # and that we can force a choice. module AmbigExt -import FileIO: File, @format_str, Stream, stream, skipmagic +using FileIO: File, @format_str, Stream, stream, skipmagic load(f::File{format"AmbigExt1"}) = open(f) do io skipmagic(io) @@ -342,8 +342,8 @@ end end @testset "Ambiguous extension" begin - add_format(format"AmbigExt1", "ambigext1", ".aext", [:AmbigExt]) - add_format(format"AmbigExt2", "ambigext2", ".aext", [:AmbigExt]) + add_format(format"AmbigExt1", "ambigext1", ".aext", [AmbigExt]) + add_format(format"AmbigExt2", "ambigext2", ".aext", [AmbigExt]) A = "this is a test" fn = string(tempname(), ".aext") # Test the forced version first: we wouldn't want some method in Netpbm @@ -354,13 +354,13 @@ end B = load(fn) @test B == A - @test typeof(query(fn)) == File{format"AmbigExt2"} + @test typeof(query(fn)) <: File{format"AmbigExt2"} rm(fn) save(fn, A) B = load(fn) @test B == A - @test typeof(query(fn)) == File{format"AmbigExt1"} + @test typeof(query(fn)) <: File{format"AmbigExt1"} rm(fn) del_format(format"AmbigExt1") @@ -368,5 +368,20 @@ end end @testset "Absent file" begin - @test_throws SystemError load("nonexistent.oops") + @test_throws Union{ArgumentError,SystemError} load("nonexistent.oops") +end + +module BadOverride +using FileIO +FileIO.load(::File{format"OVERRIDE"}) = 22 +add_format(format"OVERRIDE", "OVRD0101", ".ovr", [BadOverride]) +end + +@testset "Warn FileIO overrides" begin + fn = string(tempname(), ".ovr") + open(fn, "w") do io + write(io, magic(:OVERRIDE)) + print(io, "\nDone") + end + @test (@test_logs (:warn, r"incorrectly extends FileIO functions \(see FileIO documentation\)") load(fn)) == 22 end diff --git a/test/query.jl b/test/query.jl index 24de6eaf..e26f40bd 100644 --- a/test/query.jl +++ b/test/query.jl @@ -67,12 +67,12 @@ try @test unknown(format"UNKNOWN") add_format(format"CSV", UInt8[], ".csv") - @test FileIO.info(format"CSV") == ((),".csv") + @test FileIO.info(format"CSV") == ([],".csv") add_format(format"FOO", (), ".foo") # issue #17 @test_throws Exception FileIO.info(format"OOPS") @test FileIO.ext2sym[".csv"] == :CSV del_format(format"FOO") - @test FileIO.magic_list == [Pair((),:CSV)] + @test FileIO.magic_list == [Pair([],:CSV)] del_format(format"CSV") @test isempty(FileIO.ext2sym) @test isempty(FileIO.magic_list) @@ -81,24 +81,27 @@ try add_format(format"JUNK", "JUNK", [".jnk",".junk",".JNK"]) - @test FileIO.info(format"JUNK") == (tuple(b"JUNK"...),[".jnk",".junk",".JNK"]) + @test FileIO.info(format"JUNK") == (b"JUNK",[".jnk",".junk",".JNK"]) @test FileIO.ext2sym[".jnk"] == :JUNK @test FileIO.ext2sym[".junk"] == :JUNK @test FileIO.ext2sym[".JNK"] == :JUNK - @test FileIO.magic_list == [Pair((0x4a,0x55,0x4e,0x4b),:JUNK)] + @test FileIO.magic_list == [Pair([0x4a,0x55,0x4e,0x4b],:JUNK)] + add_format(format"OTHER", [0x01, 0x02], ".othr") end @testset "streams" begin io = IOBuffer() - s = Stream(format"JUNK", io) - @test typeof(s) == Stream{DataFormat{:JUNK},IOBuffer} + s = Stream{format"JUNK"}(io) + @test typeof(s) <: Stream{DataFormat{:JUNK},IOBuffer} @test filename(s) == nothing - @test_throws Exception FileIO.file!(s) - s = Stream(format"JUNK", io, "junk.jnk") + @test_throws ErrorException("filename unknown") FileIO.file!(s) + s = Stream{format"JUNK"}(io, "junk.jnk") @test filename(s) == "junk.jnk" - s = Stream(format"JUNK", io, "junk2.jnk") + s = Stream{format"JUNK"}(io, "junk2.jnk") @test filename(s) == "junk2.jnk" + s = Stream{format"JUNK"}(io, "somefile.jnk") + @test FileIO.file!(s) isa File{format"JUNK"} end @testset "query" begin @@ -120,9 +123,23 @@ try write(io, "JUNK and some more stuff") seek(io, 0) q = query(io) - @test typeof(q) == Stream{format"JUNK",typeof(io)} + @test typeof(q) <: Stream{format"JUNK",typeof(io)} @test !(unknown(q)) @test file_extension(q) == nothing + # unseekable IO + seek(io, 0) + io.seekable = false + @test !FileIO.seekable(io) + q = query(io) + @test typeof(q) <: Stream{format"JUNK",typeof(io)} + io.seekable = true + # too short to match + io2 = IOBuffer() + write(io2, "JU") + seek(io2, 0) + io2.seekable = false + q = query(io2) + @test unknown(q) # File with correct extension str = String(take!(io)) @@ -131,8 +148,19 @@ try write(file, str) end q = query(fn) - @test typeof(q) == File{format"JUNK"} + @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".jnk" + # for good measure, test some constructors & other query calls + @test query(q) == q + @test File{format"JUNK"}(q) == q + @test_throws ArgumentError("cannot change the format of $q to OTHER") File{format"OTHER"}(q) + open(fn) do io + @test query(io) isa Stream{format"JUNK", typeof(io)} + @test query(io, q) isa Stream{format"JUNK", typeof(io)} + @test Stream(q, io) isa Stream{format"JUNK", typeof(io)} + @test Stream{format"JUNK"}(q, io) isa Stream{format"JUNK", typeof(io)} + @test_throws ArgumentError Stream{format"OTHER"}(q, io) + end rm(fn) @@ -142,9 +170,18 @@ try write(file, str) end q = query(fn) - @test typeof(q) == File{format"JUNK"} + @test typeof(q) <: File{format"JUNK"} @test file_extension(q) == ".csv" rm(fn) + # erroneous extension with a file that has magic bytes + fn = string(tempname(), ".othr") + open(fn, "w") do file + write(file, str) + end + q = query(fn) + @test typeof(q) <: File{format"JUNK"} + @test query(fn; checkfile=false) isa File{format"OTHER"} + rm(fn) # Format with no magic bytes add_format(format"BAD", (), ".bad") @@ -153,12 +190,12 @@ try write(file, "Here's some data") end q = query(fn) - @test typeof(q) == File{format"BAD"} + @test typeof(q) <: File{format"BAD"} @test file_extension(q) == ".bad" rm(fn) q = query( "some_non_existant_file.bad") - @test typeof(q) == File{format"BAD"} + @test typeof(q) <: File{format"BAD"} # Unknown extension fn = string("tempname", ".wrd") @@ -176,13 +213,22 @@ try write(file, "test1") end q = query(fn) - @test typeof(q) == File{format"DOUBLE_1"} + @test typeof(q) <: File{format"DOUBLE_1"} rm(fn) + # Busted detection function + busted(io) = error("whoops") + add_format(format"BUSTED", busted, ".bstd") + fn = string(tempname(), ".bstd") + open(fn, "w") do file + write(file, "JUNK stuff") + end + @test (@test_logs (:error,r"There was an error in magic function .*busted") query(fn)) isa File{format"JUNK"} + del_format(format"BUSTED") add_format(format"MAGIC", "this so magic", ".mmm") q = query( "some_non_existant_file.mmm") - @test typeof(q) == File{format"MAGIC"} + @test typeof(q) <: File{format"MAGIC"} add_format(format"DOUBLE_MAGIC", (UInt8[0x4d,0x4d,0x00,0x2a], UInt8[0x4d,0x4d,0x00]), ".dd2") @@ -192,7 +238,7 @@ try write(file, randstring(19)) end q = query(fn) - @test typeof(q) == File{format"DOUBLE_MAGIC"} + @test typeof(q) <: File{format"DOUBLE_MAGIC"} io = open(q) skipmagic(io) @test position(io) == 4 @@ -204,7 +250,7 @@ try write(file, randstring(19)) end q = query(fn) - @test typeof(q) == File{format"DOUBLE_MAGIC"} + @test typeof(q) <: File{format"DOUBLE_MAGIC"} io = open(q) @test file_extension(q) == ".dd2" skipmagic(io) @@ -214,7 +260,10 @@ try write(file, randstring(19)) # corrupt magic bytes end open(fn, "r") do file - @test_throws Exception skipmagic(file) + @test_throws ErrorException("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $file") skipmagic(Stream{format"DOUBLE_MAGIC"}(file, fn)) + end + open(fn, "r") do file + @test_throws ErrorException("tried to skip magic bytes of an IO that does not contain the magic bytes of the format. IO: $file") skipmagic(file, format"DOUBLE_MAGIC") end rm(fn) lene0 = length(FileIO.ext2sym) @@ -235,8 +284,8 @@ try format"MultiLib", UInt8[0x42,0x4d], ".mlb", - [:LoadTest1, FileIO.LOAD, OSKey], - [:LoadTest2] + [LoadTest1, FileIO.LOAD, OSKey], + [LoadTest2] ) @test lensave0 + 1 == length(FileIO.sym2saver) @test lenload0 + 1 == length(FileIO.sym2loader) @@ -272,7 +321,7 @@ file_path = Path(file_dir) @testset "Querying with $(typeof(fp))" for fp in (file_dir, file_path) @testset "bedGraph" begin q = query(joinpath(file_dir, "file.bedgraph")) - @test typeof(q) == File{format"bedGraph"} + @test typeof(q) <: File{format"bedGraph"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -285,9 +334,9 @@ file_path = Path(file_dir) end @testset "STL detection" begin q = query(joinpath(file_dir, "ascii.stl")) - @test typeof(q) == File{format"STL_ASCII"} + @test typeof(q) <: File{format"STL_ASCII"} q = query(joinpath(file_dir, "binary_stl_from_solidworks.STL")) - @test typeof(q) == File{format"STL_BINARY"} + @test typeof(q) <: File{format"STL_BINARY"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -296,16 +345,16 @@ file_path = Path(file_dir) end @testset "PLY detection" begin q = query(joinpath(file_dir, "ascii.ply")) - @test typeof(q) == File{format"PLY_ASCII"} + @test typeof(q) <: File{format"PLY_ASCII"} q = query(joinpath(file_dir, "binary.ply")) - @test typeof(q) == File{format"PLY_BINARY"} + @test typeof(q) <: File{format"PLY_BINARY"} end @testset "Multiple Magic bytes" begin q = query(joinpath(file_dir, "magic1.tiff")) - @test typeof(q) == File{format"TIFF"} + @test typeof(q) <: File{format"TIFF"} q = query(joinpath(file_dir, "magic2.tiff")) - @test typeof(q) == File{format"TIFF"} + @test typeof(q) <: File{format"TIFF"} open(q) do io @test position(io) == 0 skipmagic(io) @@ -323,11 +372,11 @@ file_path = Path(file_dir) @test !(FileIO.detectavi(s)) end q = query(joinpath(file_dir, "bees.avi")) - @test typeof(q) == File{format"AVI"} + @test typeof(q) <: File{format"AVI"} end @testset "RDA detection" begin q = query(joinpath(file_dir, "minimal_ascii.rda")) - @test typeof(q) == File{format"RData"} + @test typeof(q) <: File{format"RData"} open(q) do io @test position(io) == 0 @test FileIO.detect_rdata(io) @@ -337,7 +386,7 @@ file_path = Path(file_dir) end @testset "RDS detection" begin q = query(joinpath(file_dir, "minimal_ascii.rds")) - @test typeof(q) == File{format"RDataSingle"} + @test typeof(q) <: File{format"RDataSingle"} open(q) do io @test position(io) == 0 @test FileIO.detect_rdata_single(io) @@ -359,6 +408,6 @@ end end @testset "Format with function for magic bytes" begin - add_format(format"FUNCTION_FOR_MAGIC_BYTES", x -> 0x00, ".wav", [:WAV]) + add_format(format"FUNCTION_FOR_MAGIC_BYTES", io -> true, ".wav", [LoadTest1]) del_format(format"FUNCTION_FOR_MAGIC_BYTES") end diff --git a/test/runtests.jl b/test/runtests.jl index b8adb3bc..5d2c2a2b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,6 +1,7 @@ using FileIO using FilePathsBase using Test +using UUIDs Threads.nthreads() <= 1 && @info "Threads.nthreads() = $(Threads.nthreads()), multithread tests will be disabled"