Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

handle conversion of custom use types for files #4225

Merged
merged 7 commits into from
Jan 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions app/actors/hyrax/actors/file_actor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def perform_ingest_file_through_active_fedora(io)
def perform_ingest_file_through_valkyrie(io)
# Skip versioning because versions will be minted by VersionCommitter as necessary during save_characterize_and_record_committer.
unsaved_file_metadata = io.to_file_metadata
unsaved_file_metadata.use = relation
unsaved_file_metadata.type = [relation]
begin
saved_file_metadata = file_metadata_builder.create(io_wrapper: io, file_metadata: unsaved_file_metadata, file_set: file_set)
rescue StandardError => e # Handle error persisting file metadata
Expand All @@ -103,21 +103,26 @@ def normalize_relation_for_active_fedora(relation)
return relation if relation.is_a? Symbol
return relation.to_sym if relation.respond_to? :to_sym

# TODO: whereever these are set, they should use Valkyrie::Vocab::PCDMUse... making the casecmp unnecessary
return :original_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.original_file.to_s)
return :extracted_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.extracted_file.to_s)
return :thumbnail_file if relation.to_s.casecmp(Valkyrie::Vocab::PCDMUse.thumbnail_file.to_s)
# TODO: whereever these are set, they should use FileSet.*_use... making the casecmp unnecessary
return :original_file if relation.to_s.casecmp(Hyrax::FileSet::ORIGINAL_FILE_USE.to_s)
return :extracted_file if relation.to_s.casecmp(Hyrax::FileSet::EXTRACTED_TEXT_USE.to_s)
return :thumbnail_file if relation.to_s.casecmp(Hyrax::FileSet::THUMBNAIL_USE.to_s)
:original_file
end

def normalize_relation_for_valkyrie(relation)
# TODO: When this is fully switched to valkyrie, this should probably be removed and relation should always be passed
# in as a valid URI already set to the file's use
relation = relation.to_s.to_sym
return Valkyrie::Vocab::PCDMUse.original_file if relation == :original_file
return Valkyrie::Vocab::PCDMUse.extracted_file if relation == :extracted_file
return Valkyrie::Vocab::PCDMUse.thumbnail_file if relation == :thumbnail_file
Valkyrie::Vocab::PCDMUse.original_file
case relation.to_s.to_sym
when :original_file
Hyrax::FileSet::ORIGINAL_FILE_USE
when :extracted_file
Hyrax::FileSet.EXTRACTED_TEXT_USE
when :thumbnail_file
Hyrax::FileSet::THUMBNAIL_USE
else
Hyrax::FileSet::ORIGINAL_FILE_USE
end
end
end
end
Expand Down
10 changes: 5 additions & 5 deletions app/models/hyrax/file_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class FileMetadata < Valkyrie::Resource
attribute :label, ::Valkyrie::Types::Set
attribute :original_filename, ::Valkyrie::Types::Set
attribute :mime_type, ::Valkyrie::Types::Set
attribute :use, ::Valkyrie::Types::Set # AF::File type
attribute :type, ::Valkyrie::Types::Set # AF::File type
straleyb marked this conversation as resolved.
Show resolved Hide resolved
attribute :content, ::Valkyrie::Types::Set

# attributes set by fits
Expand Down Expand Up @@ -76,19 +76,19 @@ def self.for(file:)
new(label: file.original_filename,
original_filename: file.original_filename,
mime_type: file.content_type,
use: file.try(:use) || [::Valkyrie::Vocab::PCDMUse.OriginalFile])
type: file.try(:type) || [Hyrax::FileSet::ORIGINAL_FILE_USE])
end

def original_file?
use.include?(::Valkyrie::Vocab::PCDMUse.OriginalFile)
type.include?(Hyrax::FileSet::ORIGINAL_FILE_USE)
end

def thumbnail_file?
use.include?(::Valkyrie::Vocab::PCDMUse.ThumbnailImage)
type.include?(Hyrax::FileSet::THUMBNAIL_USE)
end

def extracted_file?
use.include?(::Valkyrie::Vocab::PCDMUse.ExtractedImage)
type.include?(Hyrax::FileSet::EXTRACTED_TEXT_USE)
end

def title
Expand Down
35 changes: 35 additions & 0 deletions app/models/hyrax/file_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ module Hyrax
class FileSet < Hyrax::Resource
include Hyrax::Schema(:core_metadata)

ORIGINAL_FILE_USE = ::Valkyrie::Vocab::PCDMUse.OriginalFile
EXTRACTED_TEXT_USE = ::Valkyrie::Vocab::PCDMUse.ExtractedText
THUMBNAIL_USE = ::Valkyrie::Vocab::PCDMUse.Thumbnail

attribute :file_ids, Valkyrie::Types::Array.of(Valkyrie::Types::ID) # id for FileMetadata resources
attribute :original_file_id, Valkyrie::Types::ID # id for FileMetadata resource
attribute :thumbnail_id, Valkyrie::Types::ID # id for FileMetadata resource
Expand All @@ -24,5 +28,36 @@ def pcdm_object?
def file_set?
true
end

##
# Gives file metadata for the file filling the http://pcdm.org/OriginalFile use
# @return [FileMetadata] the FileMetadata resource of the original file
def original_file
filter_files_by_type(Hyrax::FileSet::ORIGINAL_FILE_USE).first
end

##
# Gives file metadata for the file filling the http://pcdm.org/ExtractedText use
# @return [FileMetadata] the FileMetadata resource of the extracted text
def extracted_text
filter_files_by_type(Hyrax::FileSet::EXTRACTED_TEXT_USE).first
end

##
# Gives file metadata for the file filling the http://pcdm.org/Thumbnail use
# @return [FileMetadata] the FileMetadata resource of the thumbnail
def thumbnail
filter_files_by_type(Hyrax::FileSet::THUMBNAIL_USE).first
end

##
# Gives file metadata for files that have the requested RDF Type for use
# @param [RDF::URI] uri for the desired Type
# @return [Enumerable<FileMetadata>] the FileMetadata resources
# @example
# filter_files_by_type(::RDF::URI("http://pcdm.org/ExtractedText"))
def filter_files_by_type(uri)
Hyrax.query_service.custom_queries.find_many_file_metadata_by_use(resource: self, use: uri)
end
end
end
2 changes: 1 addition & 1 deletion app/models/job_io_wrapper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def to_file_metadata
Hyrax::FileMetadata.new(label: original_name,
original_filename: original_name,
mime_type: mime_type,
use: [Valkyrie::Vocab::PCDMUse.OriginalFile])
use: [Hyrax::FileSet::ORIGINAL_FILE_USE])
end

# The magic that switches *once* between local filepath and CarrierWave file
Expand Down
10 changes: 10 additions & 0 deletions app/services/hyrax/custom_queries/find_file_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ def find_many_file_metadata_by_ids(ids:)
results = query_service.find_many_by_ids(ids: ids)
results.select { |resource| resource.is_a? Hyrax::FileMetadata }
end

# Find file metadata for files within a resource that have the requested use.
# @param use [RDF::URI] uri for the desired use Type
# @return [Array<Hyrax::FileMetadata] or empty array if there are no files with the requested use
# @example
# Hyrax.query_service.find_file_metadata_by_use(use: ::RDF::URI("http://pcdm.org/ExtractedText"))
def find_many_file_metadata_by_use(resource:, use:)
straleyb marked this conversation as resolved.
Show resolved Hide resolved
results = find_many_file_metadata_by_ids(ids: resource.file_ids)
results.select { |fm| fm.type.include?(use) }
end
end
end
end
2 changes: 1 addition & 1 deletion app/services/hyrax/custom_queries/navigators/find_files.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class FindFiles
# @example
# Hyrax.query_service.custom_queries.find_files(file_set: file_set_resource)
# Hyrax.query_service.custom_queries.find_original_file(file_set: file_set_resource)
# Hyrax.query_service.custom_queries.find_extracted_text_file(file_set: file_set_resource)
# Hyrax.query_service.custom_queries.find_extracted_text(file_set: file_set_resource)
# Hyrax.query_service.custom_queries.find_thumbnail(file_set: file_set_resource)

def self.queries
Expand Down
50 changes: 22 additions & 28 deletions lib/wings/active_fedora_converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -120,54 +120,48 @@ def initialize(uri = RDF::Node.new, _parent = ActiveTriples::Resource.new)
def convert_members(af_object)
return unless resource.respond_to?(:member_ids) && resource.member_ids
# TODO: It would be better to find a way to add the members without resuming all the member AF objects
ordered_members = []
resource.member_ids.each do |valkyrie_id|
ordered_members << ActiveFedora::Base.find(valkyrie_id.id)
end
af_object.ordered_members = ordered_members
af_object.ordered_members = resource.member_ids.map { |valkyrie_id| ActiveFedora::Base.find(valkyrie_id.id) }
end

def convert_member_of_collections(af_object)
return unless resource.respond_to?(:member_of_collection_ids) && resource.member_of_collection_ids
# TODO: It would be better to find a way to set the parent collections without resuming all the collection AF objects
member_of_collections = []
resource.member_of_collection_ids.each do |valkyrie_id|
member_of_collections << ActiveFedora::Base.find(valkyrie_id.id)
end
af_object.member_of_collections = member_of_collections
af_object.member_of_collections = resource.member_of_collection_ids.map { |valkyrie_id| ActiveFedora::Base.find(valkyrie_id.id) }
end

def convert_files(af_object)
convert_file(af_object, :original_file)
convert_file(af_object, :thumbnail_file)
convert_file(af_object, :extracted_text_file)
# TODO: How to identify and convert files added with URI relationships (e.g. Valkyrie::Vocab::PCDMUse.Transcript)
# TODO: How to identify and convert customizations that add file relationships (e.g. :remastered)
return unless resource.respond_to? :file_ids
af_object.files = resource.file_ids.map do |fid|
pcdm_file = Hydra::PCDM::File.new(fid.id)
assign_association_target(af_object, pcdm_file)
end
end

def convert_file(af_object, relation)
resource_relation = "#{relation}_ids".to_sym
related_file_ids = resource.try(resource_relation)
return unless related_file_ids.present?
pcdm_file = Hydra::PCDM::File.new(related_file_ids.first.to_s)
af_object.association(relation).target = pcdm_file
def assign_association_target(af_object, pcdm_file)
case pcdm_file.metadata_node.type
when ->(types) { types.include?(RDF::URI.new('http://pcdm.org/use#OriginalFile')) }
af_object.association(:original_file).target = pcdm_file
when ->(types) { types.include?(RDF::URI.new('http://pcdm.org/use#ExtractedText')) }
af_object.association(:extracted_text).target = pcdm_file
when ->(types) { types.include?(RDF::URI.new('http://pcdm.org/use#Thumbnail')) }
af_object.association(:thumbnail).target = pcdm_file
else
pcdm_file
end
end

# Normalizes the attributes parsed from the resource
# (This ensures that scalar values are passed to the constructor for the
# ActiveFedora::Base Class)
# @return [Hash]
def normal_attributes
normalized = {}
attributes.each_pair do |attr, value|
attributes.each_with_object({}) do |(attr, value), hash|
property = active_fedora_class.properties[attr.to_s]
# This handles some cases where the attributes do not directly map to an
# RDF property value
normalized[attr] = value
# This handles some cases where the attributes do not directly map to an RDF property value
hash[attr] = value
next if property.nil?
normalized[attr] = Array.wrap(value) if property.multiple?
hash[attr] = Array.wrap(value) if property.multiple?
end
normalized
end

def apply_depositor_to(af_object)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,11 +132,6 @@ def in_collection_ids(valkyrie: false)
in_collections(valkyrie: valkyrie).map(&:id)
end

def original_file
af_object = Wings::ActiveFedoraConverter.new(resource: self).convert
af_object.original_file
end

##
# @return [Boolean] whether this instance is an audio.
def audio?
Expand Down
6 changes: 3 additions & 3 deletions lib/wings/hydra/works/services/add_file_to_file_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ def association_type(type)
end

def type_to_association_type(type)
return :original_file if type.to_s.casecmp?(Valkyrie::Vocab::PCDMUse.original_file.to_s)
return :extracted_text if type.to_s.casecmp?(Valkyrie::Vocab::PCDMUse.extracted_text.to_s)
return :thumbnail if type.to_s.casecmp?(Valkyrie::Vocab::PCDMUse.thumbnail.to_s)
return :original_file if type.to_s.casecmp?(Hyrax::FileSet::ORIGINAL_FILE_USE.to_s)
return :extracted_text if type.to_s.casecmp?(Hyrax::FileSet::EXTRACTED_TEXT_USE.to_s)
return :thumbnail if type.to_s.casecmp?(Hyrax::FileSet::THUMBNAIL_USE.to_s)
end

def type_to_rdf_uri(type)
Expand Down
20 changes: 18 additions & 2 deletions lib/wings/services/custom_queries/find_file_metadata.rb
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class FindFileMetadata
def self.queries
[:find_file_metadata_by,
:find_file_metadata_by_alternate_identifier,
:find_many_file_metadata_by_ids]
:find_many_file_metadata_by_ids,
:find_many_file_metadata_by_use]
end

def initialize(query_service:)
Expand Down Expand Up @@ -53,7 +54,8 @@ def find_file_metadata_by_alternate_identifier(alternate_identifier:, use_valkyr

# Find an array of file metadata using Valkyrie IDs, and map them to Hyrax::FileMetadata maintaining order based on given ids
# @param ids [Array<Valkyrie::ID, String>]
# @return [Array<Hyrax::FileMetadata>] or empty array if there are no ids or none of the ids map to Hyrax::FileMetadata
# @param use_valkyrie [boolean] defaults to true; optionally return ActiveFedora::File objects if false
# @return [Array<Hyrax::FileMetadata, Hydra::PCDM::File>] or empty array if there are no ids or none of the ids map to Hyrax::FileMetadata
# NOTE: Ignores non-existent ids and ids for non-file metadata resources.
def find_many_file_metadata_by_ids(ids:, use_valkyrie: true)
results = []
Expand All @@ -68,6 +70,20 @@ def find_many_file_metadata_by_ids(ids:, use_valkyrie: true)
end
results
end

##
# Find file metadata for files within a resource that have the requested use.
# @param use [RDF::URI] uri for the desired use Type
# @param use_valkyrie [boolean] defaults to true; optionally return ActiveFedora::File objects if false
# @return [Array<Hyrax::FileMetadata, Hydra::PCDM::File>] or empty array if there are no files with the requested use
# @example
# Hyrax.query_service.find_file_metadata_by_use(use: ::RDF::URI("http://pcdm.org/ExtractedText"))
def find_many_file_metadata_by_use(resource:, use:, use_valkyrie: true)
pcdm_files = find_many_file_metadata_by_ids(ids: resource.file_ids, use_valkyrie: false)
pcdm_files.select! { |pcdm_file| pcdm_file.metadata_node.type.include?(use) }
return pcdm_files if use_valkyrie == false
pcdm_files.collect { |pcdm_file| Wings::FileConverterService.af_file_to_resource(af_file: pcdm_file) }
end
end
end
end
5 changes: 3 additions & 2 deletions lib/wings/services/file_converter_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def base_af_file_attributes(af_file:)
content: af_file.content,
size: af_file.size,
original_filename: [af_file.original_name],
mime_type: [af_file.mime_type] }
mime_type: [af_file.mime_type],
type: af_file.metadata_node.type.to_a }
end

# extracts attributes that come from the metadata_node
Expand All @@ -47,7 +48,7 @@ def metadata_node_to_attributes(metadata_node:, attributes:)

def valkyrie_attributes_to_af_file(attributes:, af_file:)
attributes.each do |k, v|
next if [:id, :content].include? k
next if [:id, :content, :type].include? k
mname = (k.to_s + '=').to_sym
if af_file.respond_to? mname
af_file.send(mname, v)
Expand Down
2 changes: 1 addition & 1 deletion lib/wings/services/file_metadata_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def initialize(storage_adapter:, persister:)
# @param file_set [Valkyrie::Resouce, Hydra::Works::FileSet] the associated FileSet # TODO: WINGS - Remove Hydra::Works::FileSet as a potential type when valkyrization is complete.
# @return [Hyrax::FileMetadata] the persisted metadata file_metadata that represents the file
def create(io_wrapper:, file_metadata:, file_set:)
io_wrapper = build_file(io_wrapper, file_metadata.use)
io_wrapper = build_file(io_wrapper, file_metadata.type)
file_set.save unless file_set.persisted?
file_metadata.id = ::Valkyrie::ID.new(assign_id)
file_metadata.file_set_id = file_set.id
Expand Down
9 changes: 4 additions & 5 deletions lib/wings/valkyrie/storage/active_fedora.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,14 @@
module Wings::Storage
# Implements the DataMapper Pattern to store binary data in fedora following the ActiveFedora structures
class ActiveFedora < Valkyrie::Storage::Fedora
# @param file [IO]
# @param file [Wings::FileMetadataBuilder::IoDecorator]
# @param original_filename [String]
# @param resource [Valkyrie::Resource]
# @param content_type [String] content type of file (e.g. 'image/tiff') (default='application/octet-stream')
# @param resource_uri_transformer [Lambda] transforms the resource's id (e.g. 'DDS78RK') into a uri (optional)
# @param resource [Hyrax::FileMetadata] FileMetadata resource
# @param resource_uri_transformer [Proc] transforms the resource's id (e.g. 'DDS78RK') into a uri (optional)
# @param extra_arguments [Hash] additional arguments which may be passed to other adapters
# @return [Valkyrie::StorageAdapter::StreamFile]
def upload(file:, original_filename:, resource:, resource_uri_transformer: default_resource_uri_transformer, **_extra_arguments) # rubocop:disable Lint/UnusedMethodArgument
Wings::Works::AddFileToFileSet.call(file_set: file_set(resource), file: file, type: resource.use)
Wings::Works::AddFileToFileSet.call(file_set: file_set(resource), file: file, type: resource.type)
identifier = resource_uri_transformer.call(resource, base_url)
find_by(id: Valkyrie::ID.new(identifier.to_s.sub(/^.+\/\//, PROTOCOL)))
end
Expand Down
2 changes: 1 addition & 1 deletion spec/actors/hyrax/actors/file_actor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ class FileSetWithExtras < FileSet
context 'when using valkyrie' do
let(:user) { create(:user) }
let(:file_set) { create(:file_set) }
let(:relation) { Valkyrie::Vocab::PCDMUse.OriginalFile }
let(:relation) { Hyrax::FileSet::ORIGINAL_FILE_USE }
let(:actor) { described_class.new(file_set, relation, user, use_valkyrie: true) }
let(:fixture) { fixture_file_upload('/world.png', 'image/png') }
let(:huf) { Hyrax::UploadedFile.new(user: user, file: fixture) }
Expand Down
Loading