Skip to content

Commit

Permalink
Merge pull request #454 from samvera-labs/accurate-relationship-forming
Browse files Browse the repository at this point in the history
accurate-relationship-forming
  • Loading branch information
alishaevn authored Apr 8, 2022
2 parents a493070 + 7dc4ca2 commit 8d0c632
Show file tree
Hide file tree
Showing 8 changed files with 63 additions and 34 deletions.
7 changes: 4 additions & 3 deletions app/factories/bulkrax/object_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ class ObjectFactory
include DynamicRecordLookup

define_model_callbacks :save, :create
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id

# rubocop:disable Metrics/ParameterLists
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false)
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
@replace_files = replace_files
@update_files = update_files
Expand All @@ -19,6 +19,7 @@ def initialize(attributes:, source_identifier_value:, work_identifier:, related_
@related_parents_parsed_mapping = related_parents_parsed_mapping
@source_identifier_value = source_identifier_value
@klass = klass || Bulkrax.default_work_type.constantize
@importer_run_id = importer_run_id
end
# rubocop:enable Metrics/ParameterLists

Expand Down Expand Up @@ -153,7 +154,7 @@ def update_collection(attrs)

# This method is heavily inspired by Hyrax's AttachFilesToWorkJob
def create_file_set(attrs)
work = find_record(attributes[related_parents_parsed_mapping].first)
_, work = find_record(attributes[related_parents_parsed_mapping].first, importer_run_id)
work_permissions = work.permissions.map(&:to_hash)
attrs = clean_attrs(attrs)
file_set_attrs = attrs.slice(*object.attributes.keys)
Expand Down
19 changes: 11 additions & 8 deletions app/jobs/bulkrax/create_relationships_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def perform(parent_identifier:, importer_run_id:)
end.sort_by(&:order)

@importer_run_id = importer_run_id
@parent_record = find_record(parent_identifier)
@parent_entry, @parent_record = find_record(parent_identifier, importer_run_id)
@child_records = { works: [], collections: [] }
pending_relationships.each do |rel|
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
child_record = find_record(rel.child_id)
_, child_record = find_record(rel.child_id, importer_run_id)
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
end

Expand All @@ -53,9 +53,9 @@ def perform(parent_identifier:, importer_run_id:)
return false # stop current job from continuing to run after rescheduling
end

@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
importerexporter_type: "Bulkrax::Importer").first
@parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
importerexporter_type: "Bulkrax::Importer").first
create_relationships
pending_relationships.each(&:destroy)
rescue ::StandardError => e
Expand Down Expand Up @@ -91,7 +91,8 @@ def collection_parent_work_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: child_record.class
klass: child_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand All @@ -109,7 +110,8 @@ def collection_parent_collection_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand All @@ -132,7 +134,8 @@ def work_parent_work_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand Down
6 changes: 5 additions & 1 deletion app/jobs/bulkrax/import_file_set_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ class ImportFileSetJob < ApplicationJob

queue_as :import

attr_reader :importer_run_id

def perform(entry_id, importer_run_id)
@importer_run_id = importer_run_id
entry = Entry.find(entry_id)
parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip

Expand Down Expand Up @@ -63,7 +66,8 @@ def check_parent_is_a_work!(parent_identifier)
end

def find_parent_record(parent_identifier)
@parent_record ||= find_record(parent_identifier)
@parent_record ||= find_record(parent_identifier, importer_run_id)
@parent_record = parent_record.last if parent_record.is_a? Array
end
end
end
16 changes: 12 additions & 4 deletions app/models/concerns/bulkrax/dynamic_record_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,25 @@ module DynamicRecordLookup
# has the provided identifier.
#
# @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
# @return [Work, Collection, nil] Work or Collection if found, otherwise nil
def find_record(identifier)
record = Entry.find_by(identifier: identifier)
# @param importer_run_id [Number] ID of the current_run of this Importer Job
# @return [Entry, nil], [Work, Collection, nil] Entry if found, otherwise nil and a Work or Collection if found, otherwise nil
def find_record(identifier, importer_run_id = nil)
# check for our entry in our current importer first
importer_id = ImporterRun.find(importer_run_id).importer_id
record = Entry.find_by(identifier: identifier, importerexporter_id: importer_id) || Entry.find_by(identifier: identifier)

# TODO(alishaevn): discuss whether we are only looking for Collection models here
# use ActiveFedora::Base.find(identifier) instead?
record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy
if record.blank?
available_work_types.each do |work_type|
record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy
end
end

record.is_a?(Entry) ? record.factory.find : record
# return the found entry here instead of searching for it again in the CreateRelationshipsJob
# also accounts for when the found entry isn't a part of this importer
record.is_a?(Entry) ? [record, record.factory.find] : [nil, record]
end

# Check if the record is a Work
Expand Down
9 changes: 8 additions & 1 deletion app/models/concerns/bulkrax/file_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def parsed_remote_files
end

def new_remote_files
return if object.is_a? FileSet

@new_remote_files ||= if object.present? && object.file_sets.present?
parsed_remote_files.select do |file|
# is the url valid?
Expand Down Expand Up @@ -101,7 +103,12 @@ def set_removed_filesets
end

def local_file_sets
@local_file_sets ||= object&.ordered_file_sets
@local_file_sets ||= ordered_file_sets
end

def ordered_file_sets
# OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
object&.ordered_members.to_a.select(&:file_set?)
end

def import_files
Expand Down
1 change: 1 addition & 0 deletions app/models/concerns/bulkrax/import_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def factory
replace_files: replace_files,
user: user,
klass: factory_class,
importer_run_id: importerexporter.last_run.id,
update_files: update_files)
end

Expand Down
15 changes: 9 additions & 6 deletions spec/jobs/bulkrax/create_relationships_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ module Bulkrax

before do
allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work])
allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier).and_return(child_entry)
allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier).and_return(parent_entry)
allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier, importerexporter_id: importer.id).and_return(child_entry)
allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier, importerexporter_id: importer.id).and_return(parent_entry)
allow(parent_entry).to receive(:factory).and_return(parent_factory)
allow(child_entry).to receive(:factory).and_return(child_factory)
end
Expand All @@ -45,7 +45,8 @@ module Bulkrax
id: child_record.id,
member_of_collections_attributes: { 0 => { id: parent_record.id } }
},
klass: child_record.class
klass: child_record.class,
importer_run_id: importer.current_run.id
)
end

Expand Down Expand Up @@ -150,7 +151,8 @@ module Bulkrax
id: parent_record.id,
child_collection_id: child_record.id
},
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer.current_run.id
)
end

Expand Down Expand Up @@ -249,7 +251,8 @@ module Bulkrax
id: parent_record.id,
work_members_attributes: { 0 => { id: child_record.id } }
},
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer.current_run.id
)
end

Expand Down Expand Up @@ -366,7 +369,7 @@ module Bulkrax
importer_run_id: importer.current_run.id
)

expect(importer.last_run.failed_relationships).to eq(1)
expect(Bulkrax::Importer.find(importer.id).last_run.failed_relationships).to eq(1)
end
end

Expand Down
24 changes: 13 additions & 11 deletions spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
module Bulkrax
RSpec.shared_examples 'dynamic record lookup' do
let(:importer) { FactoryBot.create(:bulkrax_importer_csv_complex) }
let(:importer_id) { importer.id }
let(:importer_run_id) { importer.current_run.id }

before do
allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work])
Expand All @@ -19,11 +21,11 @@ module Bulkrax
let(:source_identifier) { 'bulkrax_identifier_1' }

it 'looks through entries, collections, and all work types' do
expect(Entry).to receive(:find_by).with(identifier: source_identifier).once
expect(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).once
expect(::Collection).to receive(:where).with(id: source_identifier).once.and_return([])
expect(::Work).to receive(:where).with(id: source_identifier).once.and_return([])

subject.find_record(source_identifier)
subject.find_record(source_identifier, importer_run_id)
end

context 'when an entry is found' do
Expand All @@ -32,27 +34,27 @@ module Bulkrax
let(:record) { instance_double(::Work, title: ["Found through Entry's factory"]) }

before do
allow(Entry).to receive(:find_by).with(identifier: source_identifier).and_return(entry)
allow(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).and_return(entry)
allow(entry).to receive(:factory).and_return(factory)
end

it "returns the entry's record" do
expect(subject.find_record(source_identifier)).to eq(record)
expect(subject.find_record(source_identifier, importer_run_id)[1]).to eq(record)
end

it "uses the entry's factory to find its record" do
expect(entry).to receive(:factory)
expect(factory).to receive(:find)

found_record = subject.find_record(source_identifier)
_, found_record = subject.find_record(source_identifier, importer_run_id)

expect(found_record.title).to eq(record.title)
end
end

context 'when nothing is found' do
it 'returns nil' do
expect(subject.find_record(source_identifier)).to be_nil
expect(subject.find_record(source_identifier, importer_run_id)[1]).to be_nil
end
end
end
Expand All @@ -61,11 +63,11 @@ module Bulkrax
let(:id) { 'xyz6789' }

it 'looks through entries, collections, and all work types' do
expect(Entry).to receive(:find_by).with(identifier: id).once
expect(Entry).to receive(:find_by).with(identifier: id, importerexporter_id: importer_id).once
expect(::Collection).to receive(:where).with(id: id).once.and_return([])
expect(::Work).to receive(:where).with(id: id).once.and_return([])

subject.find_record(id)
subject.find_record(id, importer_run_id)
end

context 'when a collection is found' do
Expand All @@ -76,7 +78,7 @@ module Bulkrax
end

it 'returns the collection' do
expect(subject.find_record(id)).to eq(collection)
expect(subject.find_record(id, importer_run_id)[1]).to eq(collection)
end
end

Expand All @@ -88,13 +90,13 @@ module Bulkrax
end

it 'returns the work' do
expect(subject.find_record(id)).to eq(work)
expect(subject.find_record(id, importer_run_id)[1]).to eq(work)
end
end

context 'when nothing is found' do
it 'returns nil' do
expect(subject.find_record(id)).to be_nil
expect(subject.find_record(id, importer_run_id)[1]).to be_nil
end
end
end
Expand Down

0 comments on commit 8d0c632

Please sign in to comment.