diff --git a/Project.toml b/Project.toml index 0a30182..8527d49 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "Legolas" uuid = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd" authors = ["Beacon Biosignals, Inc."] -version = "0.5.21" +version = "0.5.22" [deps] Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45" @@ -18,9 +18,10 @@ ArrowTypes = "2.3" Compat = "3.34, 4" ConstructionBase = "1.5.7" DataFrames = "1" +Pkg = "<0.0.1, 1" Tables = "1.4" -Test = "1" -UUIDs = "1" +Test = "<0.0.1, 1" +UUIDs = "<0.0.1, 1" julia = "1.6" [extensions] @@ -31,11 +32,12 @@ Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" Compat = "34da2185-b29b-5c13-b0c7-acf172513d20" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" +Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [targets] -test = ["Accessors", "Aqua", "Compat", "DataFrames", "Test", "UUIDs"] +test = ["Accessors", "Aqua", "Compat", "DataFrames", "Pkg", "Test", "UUIDs"] [weakdeps] ConstructionBase = "187b0558-2788-49d3-abe0-74a17ed4e7c9" diff --git a/docs/src/index.md b/docs/src/index.md index e366f78..9b55ea3 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -20,6 +20,7 @@ Legolas.parse_identifier Legolas.name Legolas.version Legolas.identifier +Legolas.schema_provider Legolas.parent Legolas.declared_fields Legolas.declaration diff --git a/src/Legolas.jl b/src/Legolas.jl index 9c27d94..258597b 100644 --- a/src/Legolas.jl +++ b/src/Legolas.jl @@ -3,6 +3,8 @@ module Legolas using Tables, Arrow, UUIDs const LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY = "legolas_schema_qualified" +const LEGOLAS_SCHEMA_PROVIDER_NAME_METADATA_KEY = "legolas_julia_schema_provider_name" +const LEGOLAS_SCHEMA_PROVIDER_VERSION_METADATA_KEY = "legolas_julia_schema_provider_version" include("lift.jl") include("constraints.jl") diff --git a/src/schemas.jl b/src/schemas.jl index 6d01bd1..ef10daa 100644 --- a/src/schemas.jl +++ b/src/schemas.jl @@ -98,8 +98,12 @@ end struct UnknownSchemaVersionError <: Exception schema_version::SchemaVersion + schema_provider_name::Union{Missing,Symbol} + schema_provider_version::Union{Missing,VersionNumber} end +UnknownSchemaVersionError(schema_version::SchemaVersion) = UnknownSchemaVersionError(schema_version, missing, missing) + function Base.showerror(io::IO, e::UnknownSchemaVersionError) print(io, """ UnknownSchemaVersionError: encountered unknown Legolas schema version: @@ -110,13 +114,33 @@ function Base.showerror(io::IO, e::UnknownSchemaVersionError) This generally indicates that this schema has not been declared (i.e. the corresponding `@schema` and/or `@version` statements have not been executed) in the current Julia session. + """) + println(io) + + if !ismissing(e.schema_provider_name) + provider_string = string(e.schema_provider_name) + if !ismissing(e.schema_provider_version) + provider_string *= string(" ", e.schema_provider_version) + end + print(io, """ + The table's metadata indicates that the table was created with a schema defined in: - In practice, this can arise if you try to read a Legolas table with a - prescribed schema, but haven't actually loaded the schema definition - (or commonly, haven't loaded the dependency that contains the schema - definition - check the versions of loaded packages/modules to confirm - your environment is as expected). + $(provider_string) + + You likely need to load a compatible version of this package to populate your session with the schema definition. + """) + else + print(io, """ + In practice, this can arise if you try to read a Legolas table with a + prescribed schema, but haven't actually loaded the schema definition + (or commonly, haven't loaded the dependency that contains the schema + definition - check the versions of loaded packages/modules to confirm + your environment is as expected). + """) + end + println(io) + print(io, """ Note that if you're in this particular situation, you can still load the raw table as-is without Legolas (e.g. via `Arrow.Table(path_to_table)`). """) @@ -165,6 +189,24 @@ written via [`Legolas.write`](@ref). """ identifier(sv::SchemaVersion) = throw(UnknownSchemaVersionError(sv)) +""" + Legolas.schema_provider(::SchemaVersion) + +Returns a NamedTuple with keys `name` and `version`. The name is a `Symbol` corresponding to the package which defines the schema version, if known; otherwise `nothing`. Likewise the `version` is a `VersionNumber` or `nothing`. +""" +schema_provider(::SchemaVersion) = (; name=nothing, version=nothing) +# shadow `pkgversion` so we don't fail on pre-1.9 +pkgversion(m::Module) = isdefined(Base, :pkgversion) ? Base.pkgversion(m) : nothing + +# Used in the implementation of `schema_provider`. +function defining_package_version(m::Module) + rootmodule = Base.moduleroot(m) + # Check if this module was defined in a package. + path = pathof(rootmodule) + path === nothing && return (; name=nothing, version=nothing) + return (; name=Symbol(rootmodule), version=pkgversion(rootmodule)) +end + """ Legolas.declared_fields(sv::Legolas.SchemaVersion) @@ -375,7 +417,7 @@ macro schema(schema_name, schema_prefix) schema_prefix isa Symbol || return :(throw(ArgumentError(string("`Prefix` provided to `@schema` is not a valid type name: ", $(Base.Meta.quot(schema_prefix)))))) return quote # This approach provides some safety against accidentally replacing another module's schema's name, - # without making it annoying to reload code/modules in an interactice development context. + # without making it annoying to reload code/modules in an interactive development context. m = $Legolas._schema_declared_in_module(Val(Symbol($schema_name))) if m isa Module && string(m) != string(@__MODULE__) throw(ArgumentError(string("A schema with this name was already declared by a different module: ", m))) @@ -476,6 +518,7 @@ function _generate_schema_version_definitions(schema_version::SchemaVersion, par return quote @inline $Legolas.declared(::$quoted_schema_version_type) = true @inline $Legolas.identifier(::$quoted_schema_version_type) = $identifier_string + $Legolas.schema_provider(::$quoted_schema_version_type) = $Legolas.defining_package_version(@__MODULE__) @inline $Legolas.parent(::$quoted_schema_version_type) = $(Base.Meta.quot(parent)) $Legolas.declared_fields(::$quoted_schema_version_type) = $declared_field_names_types $Legolas.declaration(::$quoted_schema_version_type) = $(Base.Meta.quot(schema_version_declaration)) diff --git a/src/tables.jl b/src/tables.jl index 22aee02..464c854 100644 --- a/src/tables.jl +++ b/src/tables.jl @@ -132,10 +132,16 @@ return `first(parse_identifier(s))` Otherwise, return `nothing`. """ function extract_schema_version(table) + v = extract_metadata(table, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY) + isnothing(v) && return nothing + return first(parse_identifier(v)) +end + +function extract_metadata(table, key) metadata = Arrow.getmetadata(table) if !isnothing(metadata) for (k, v) in metadata - k == LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY && return first(parse_identifier(v)) + k == key && return v end end return nothing @@ -165,6 +171,15 @@ function read(io_or_path; validate::Bool=true) via `Legolas.read`; is it missing the expected custom metadata and/or the expected \"$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY\" field? """)) + + provider_name = lift(Symbol, extract_metadata(table, LEGOLAS_SCHEMA_PROVIDER_NAME_METADATA_KEY)) + provider_version = lift(VersionNumber, extract_metadata(table, LEGOLAS_SCHEMA_PROVIDER_VERSION_METADATA_KEY)) + # If we don't have the schema declared in our session, + # then throw an error with all the information we have available about where + # the schema was defined. + if !declared(sv) + throw(UnknownSchemaVersionError(sv, provider_name, provider_version)) + end try Legolas.validate(Tables.schema(table), sv) catch @@ -212,12 +227,14 @@ function write(io_or_path, table, sv::SchemaVersion; validate::Bool=true, @warn "could not determine `Tables.Schema` from table provided to `Legolas.write`; skipping schema validation" end end - schema_metadata = LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => identifier(sv) - if isnothing(metadata) - metadata = (schema_metadata,) - else - metadata = Set(metadata) - push!(metadata, schema_metadata) + metadata = Set{Pair{String,String}}(isnothing(metadata) ? [] : metadata) + push!(metadata, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => identifier(sv)) + provider = schema_provider(sv) + if !isnothing(provider.name) + push!(metadata, LEGOLAS_SCHEMA_PROVIDER_NAME_METADATA_KEY => string(provider.name)) + end + if !isnothing(provider.version) + push!(metadata, LEGOLAS_SCHEMA_PROVIDER_VERSION_METADATA_KEY => string(provider.version)) end write_arrow(io_or_path, table; metadata=metadata, kwargs...) return table @@ -237,4 +254,3 @@ function tobuffer(args...; kwargs...) seekstart(io) return io end - diff --git a/test/TestProviderPkg/Project.toml b/test/TestProviderPkg/Project.toml new file mode 100644 index 0000000..e03090c --- /dev/null +++ b/test/TestProviderPkg/Project.toml @@ -0,0 +1,6 @@ +name = "TestProviderPkg" +uuid = "0abfdf01-ee0b-4279-9694-f097aec3ad32" +version = "0.1.0" + +[deps] +Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd" diff --git a/test/TestProviderPkg/src/TestProviderPkg.jl b/test/TestProviderPkg/src/TestProviderPkg.jl new file mode 100644 index 0000000..020d43b --- /dev/null +++ b/test/TestProviderPkg/src/TestProviderPkg.jl @@ -0,0 +1,11 @@ +module TestProviderPkg + +using Legolas: @schema, @version + +@schema "test-provider-pkg.foo" Foo + +@version FooV1 begin + a::Int +end + +end # module TestProviderPkg diff --git a/test/runtests.jl b/test/runtests.jl index 2744369..9fd1664 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -4,6 +4,46 @@ using Legolas: @schema, @version, CheckConstraintError, SchemaVersion, SchemaVersionDeclarationError, DeclaredFieldInfo using Accessors using Aqua +using Pkg + +# This test set goes before we load `TestProviderPkg` +@testset "#46: Informative errors when reading unknown schemas from packages" begin + err = Legolas.UnknownSchemaVersionError(Legolas.SchemaVersion("test-provider-pkg.foo", 1), :TestProviderPkg, v"0.1.0") + @test_throws err Legolas.read("test_provider_pkg.arrow") + @test contains(sprint(Base.showerror, err), "TestProviderPkg") + + # Test we can load the table with Arrow.Table as the error message suggests + table = Arrow.Table("test_provider_pkg.arrow") + @test table.a[1] == 1 + @test length(table.a) == 1 + + # Let's test some more error printing while we're here; if we did not have the VersionNumber + # (e.g. since the table was generated on Julia pre-1.9), we should still print a reasonable message: + err = Legolas.UnknownSchemaVersionError(Legolas.SchemaVersion("test-provider-pkg.foo", 1), :TestProviderPkg, missing) + @test contains(sprint(Base.showerror, err), "TestProviderPkg") + + # Test a table that does not have the metadata + err = Legolas.UnknownSchemaVersionError(Legolas.SchemaVersion("test.issue-94-parent", 1), missing, missing) + @test_throws err Legolas.read("issue-94.arrow") + # Still a reasonable message + @test contains(sprint(Base.showerror, err), "UnknownSchemaVersionError: encountered unknown Legolas schema") +end + +# Now load the package, and verify we can write the tables with this metadata +Pkg.develop(; path=joinpath(@__DIR__, "TestProviderPkg")) +using TestProviderPkg + +@testset "#46: Writing informative metadata about packages providing schemas" begin + table = [TestProviderPkg.FooV1(; a=1)] + Legolas.write("test_provider_pkg.arrow", table, TestProviderPkg.FooV1SchemaVersion()) + table = Legolas.read("test_provider_pkg.arrow") + v = Legolas.extract_metadata(table, Legolas.LEGOLAS_SCHEMA_PROVIDER_NAME_METADATA_KEY) + @test v == "TestProviderPkg" + + v = Legolas.extract_metadata(table, Legolas.LEGOLAS_SCHEMA_PROVIDER_VERSION_METADATA_KEY) + # We currently only write the version on 1.9+ where we can use `pkgversion` + @test v == (VERSION >= v"1.9-" ? "0.1.0" : nothing) +end @test_throws SchemaVersionDeclarationError("no prior `@schema` declaration found in current module") @version(TestV1, begin x end) @@ -352,7 +392,9 @@ end @test_throws SchemaVersionDeclarationError("malformed `@version` field expression: f()") @version(ChildV2, begin f() end) end - @test_throws UndefVarError(:UnknownV1) @version(ChildV1 > UnknownV1, begin x end) + # Workaround https://github.com/JuliaLang/julia/issues/54082 + err = v"1.11-" <= VERSION <= v"1.12-" ? UndefVarError(:UnknownV1, Main) : UndefVarError(:UnknownV1) + @test_throws err @version(ChildV1 > UnknownV1, begin x end) undeclared = SchemaVersion("undeclared", 3) diff --git a/test/test_provider_pkg.arrow b/test/test_provider_pkg.arrow new file mode 100644 index 0000000..e9320a4 Binary files /dev/null and b/test/test_provider_pkg.arrow differ