Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Store package names in arrow metadata #122

Merged
merged 22 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Legolas"
uuid = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
authors = ["Beacon Biosignals, Inc."]
version = "0.5.19"
version = "0.5.20"

[deps]
Arrow = "69666777-d1a9-59fb-9406-91d4454c9d45"
Expand All @@ -24,6 +24,7 @@ ArrowTypes = "2.3"
Compat = "3.34, 4"
ConstructionBase = "1.5"
DataFrames = "1"
Pkg = "1"
Tables = "1.4"
Test = "1"
UUIDs = "1"
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -34,8 +35,9 @@ Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"

[targets]
test = ["Accessors", "Aqua", "Compat", "DataFrames", "Test", "UUIDs"]
test = ["Accessors", "Aqua", "Compat", "DataFrames", "Pkg", "Test", "UUIDs"]
1 change: 1 addition & 0 deletions src/Legolas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module Legolas
using Tables, Arrow, UUIDs

const LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY = "legolas_schema_qualified"
const LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY = "legolas_julia_schema_provider"

include("lift.jl")
include("schemas.jl")
Expand Down
53 changes: 46 additions & 7 deletions src/schemas.jl
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,11 @@

struct UnknownSchemaVersionError <: Exception
schema_version::SchemaVersion
schema_provider::Union{Nothing, Symbol}
end

UnknownSchemaVersionError(schema_version::SchemaVersion) = UnknownSchemaVersionError(schema_version, nothing)

function Base.showerror(io::IO, e::UnknownSchemaVersionError)
print(io, """
UnknownSchemaVersionError: encountered unknown Legolas schema version:
Expand All @@ -110,13 +113,30 @@
This generally indicates that this schema has not been declared (i.e.
the corresponding `@schema` and/or `@version` statements have not been
executed) in the current Julia session.
""")
println(io)

if e.schema_provider !== nothing
print(io, """
The table's metadata indicates that the schema was defined in:

$(e.schema_provider)

You likely need to load this package (`using $(e.schema_provider)`)
to populate your session with the schema definition.
""")
else
print(io, """

Check warning on line 129 in src/schemas.jl

View check run for this annotation

Codecov / codecov/patch

src/schemas.jl#L129

Added line #L129 was not covered by tests
In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).
""")
end
println(io)

In practice, this can arise if you try to read a Legolas table with a
prescribed schema, but haven't actually loaded the schema definition
(or commonly, haven't loaded the dependency that contains the schema
definition - check the versions of loaded packages/modules to confirm
your environment is as expected).

print(io, """
Note that if you're in this particular situation, you can still load the raw
table as-is without Legolas (e.g. via `Arrow.Table(path_to_table)`).
""")
Expand Down Expand Up @@ -165,6 +185,24 @@
"""
identifier(sv::SchemaVersion) = throw(UnknownSchemaVersionError(sv))

"""
Legolas.schema_provider(::Val{schema_name}) where schema_name

Returns a `Symbol` corresponding to the package which defines the schema, if known.
Otherwise returns `nothing`.
"""
schema_provider(::Val) = nothing
ericphanson marked this conversation as resolved.
Show resolved Hide resolved

# Used in the implementation of `schema_provider`.
function defining_package(m::Module)
rootmodule = Base.moduleroot(m)
# Check if this module was defined in a package.
# If not, return `nothing`
path = pathof(rootmodule)
path === nothing && return nothing
return Symbol(rootmodule)
end

"""
Legolas.declared_fields(sv::Legolas.SchemaVersion)

Expand Down Expand Up @@ -375,12 +413,13 @@
schema_prefix isa Symbol || return :(throw(ArgumentError(string("`Prefix` provided to `@schema` is not a valid type name: ", $(Base.Meta.quot(schema_prefix))))))
return quote
# This approach provides some safety against accidentally replacing another module's schema's name,
# without making it annoying to reload code/modules in an interactice development context.
# without making it annoying to reload code/modules in an interactive development context.
m = $Legolas._schema_declared_in_module(Val(Symbol($schema_name)))
if m isa Module && string(m) != string(@__MODULE__)
throw(ArgumentError(string("A schema with this name was already declared by a different module: ", m)))
else
$Legolas._schema_declared_in_module(::Val{Symbol($schema_name)}) = @__MODULE__
$Legolas.schema_provider(::Val{Symbol($schema_name)}) = $Legolas.defining_package(@__MODULE__)
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
if !isdefined(@__MODULE__, :__legolas_schema_name_from_prefix__)
$(esc(:__legolas_schema_name_from_prefix__))(::Val) = nothing
end
Expand Down
28 changes: 25 additions & 3 deletions src/tables.jl
Original file line number Diff line number Diff line change
Expand Up @@ -132,10 +132,16 @@
Otherwise, return `nothing`.
"""
function extract_schema_version(table)
v = extract_metadata(table, LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY)
isnothing(v) && return nothing
return first(parse_identifier(v))
end

function extract_metadata(table, key)
metadata = Arrow.getmetadata(table)
if !isnothing(metadata)
for (k, v) in metadata
k == LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY && return first(parse_identifier(v))
k == key && return v
end
end
return nothing
Expand Down Expand Up @@ -165,6 +171,14 @@
via `Legolas.read`; is it missing the expected custom metadata and/or the
expected \"$LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY\" field?
"""))

provider = extract_metadata(table, LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY)
# If we don't have the schema defined in our session (i.e. `Legolas.schema_provider` is `nothing`),
# but we do have a hint of where the schema was defined via the metadata, then throw an informative
# error. If we don't error now, we will throw an `UnknownSchemaVersionError` with less information later.
if Legolas.schema_provider(Val(Legolas.name(sv))) === nothing && provider !== nothing
ericphanson marked this conversation as resolved.
Show resolved Hide resolved
throw(UnknownSchemaVersionError(sv, Symbol(provider)))
end
try
Legolas.validate(Tables.schema(table), sv)
catch
Expand Down Expand Up @@ -213,11 +227,20 @@
end
end
schema_metadata = LEGOLAS_SCHEMA_QUALIFIED_METADATA_KEY => identifier(sv)
provider = schema_provider(Val(Legolas.name(sv)))
provider_metadata = LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY => provider
if isnothing(metadata)
metadata = (schema_metadata,)
if isnothing(provider)
metadata = (schema_metadata,)
else
metadata = (schema_metadata, provider_metadata)
end
else
metadata = Set(metadata)
push!(metadata, schema_metadata)
if !isnothing(provider)
push!(metadata, provider_metadata)

Check warning on line 242 in src/tables.jl

View check run for this annotation

Codecov / codecov/patch

src/tables.jl#L242

Added line #L242 was not covered by tests
end
end
write_arrow(io_or_path, table; metadata=metadata, kwargs...)
return table
Expand All @@ -237,4 +260,3 @@
seekstart(io)
return io
end

6 changes: 6 additions & 0 deletions test/TestProviderPkg/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
name = "TestProviderPkg"
uuid = "0abfdf01-ee0b-4279-9694-f097aec3ad32"
version = "0.1.0"

[deps]
Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
11 changes: 11 additions & 0 deletions test/TestProviderPkg/src/TestProviderPkg.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
module TestProviderPkg

using Legolas: @schema, @version

@schema "test-provider-pkg.foo" Foo

@version FooV1 begin
a::Int
end

end # module TestProviderPkg
20 changes: 20 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,26 @@ using Legolas, Test, DataFrames, Arrow, UUIDs
using Legolas: SchemaVersion, @schema, @version, SchemaVersionDeclarationError, DeclaredFieldInfo
using Accessors
using Aqua
using Pkg

# This test set goes before we load `TestProviderPkg`
@testset "#46: Informative errors when reading unknown schemas from packages" begin
err = Legolas.UnknownSchemaVersionError(Legolas.SchemaVersion("test-provider-pkg.foo", 1), :TestProviderPkg)
@test_throws err Legolas.read("test_provider_pkg.arrow")
@test contains(sprint(Base.showerror, err), "TestProviderPkg")
end

# Now load the package, and verify we can write the tables with this metadata
Pkg.develop(; path=joinpath(@__DIR__, "TestProviderPkg"))
using TestProviderPkg

@testset "#46: Writing informative metadata about packages providing schemas" begin
table = [TestProviderPkg.FooV1(; a=1)]
Legolas.write("test_provider_pkg.arrow", table, TestProviderPkg.FooV1SchemaVersion())
table = Legolas.read("test_provider_pkg.arrow")
v = Legolas.extract_metadata(table, Legolas.LEGOLAS_SCHEMA_PROVIDER_METADATA_KEY)
@test v == "TestProviderPkg"
end

@test_throws SchemaVersionDeclarationError("no prior `@schema` declaration found in current module") @version(TestV1, begin x end)

Expand Down
Binary file added test/test_provider_pkg.arrow
Binary file not shown.
Loading