diff --git a/app/factories/bulkrax/object_factory.rb b/app/factories/bulkrax/object_factory.rb index cc5a6d79..c18ee888 100644 --- a/app/factories/bulkrax/object_factory.rb +++ b/app/factories/bulkrax/object_factory.rb @@ -7,10 +7,10 @@ class ObjectFactory include DynamicRecordLookup define_model_callbacks :save, :create - attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping + attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id # rubocop:disable Metrics/ParameterLists - def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false) + def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false) @attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes) @replace_files = replace_files @update_files = update_files @@ -19,6 +19,7 @@ def initialize(attributes:, source_identifier_value:, work_identifier:, related_ @related_parents_parsed_mapping = related_parents_parsed_mapping @source_identifier_value = source_identifier_value @klass = klass || Bulkrax.default_work_type.constantize + @importer_run_id = importer_run_id end # rubocop:enable Metrics/ParameterLists @@ -152,7 +153,7 @@ def update_collection(attrs) # This method is heavily inspired by Hyrax's AttachFilesToWorkJob def create_file_set(attrs) - work = find_record(attributes[related_parents_parsed_mapping].first) + _, work = find_record(attributes[related_parents_parsed_mapping].first, importer_run_id) work_permissions = work.permissions.map(&:to_hash) file_set_attrs = attrs.slice(*object.attributes.keys) object.assign_attributes(file_set_attrs) diff --git a/app/jobs/bulkrax/create_relationships_job.rb b/app/jobs/bulkrax/create_relationships_job.rb index b5b3b361..838b4ae5 100644 --- a/app/jobs/bulkrax/create_relationships_job.rb +++ b/app/jobs/bulkrax/create_relationships_job.rb @@ -37,11 +37,11 @@ def perform(parent_identifier:, importer_run_id:) end.sort_by(&:order) @importer_run_id = importer_run_id - @parent_record = find_record(parent_identifier) + @parent_entry, @parent_record = find_record(parent_identifier, importer_run_id) @child_records = { works: [], collections: [] } pending_relationships.each do |rel| raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil? - child_record = find_record(rel.child_id) + _, child_record = find_record(rel.child_id, importer_run_id) child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record end @@ -53,9 +53,9 @@ def perform(parent_identifier:, importer_run_id:) return false # stop current job from continuing to run after rescheduling end - @parent_entry = Bulkrax::Entry.where(identifier: parent_identifier, - importerexporter_id: ImporterRun.find(importer_run_id).importer_id, - importerexporter_type: "Bulkrax::Importer").first + @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier, + importerexporter_id: ImporterRun.find(importer_run_id).importer_id, + importerexporter_type: "Bulkrax::Importer").first create_relationships pending_relationships.each(&:destroy) rescue ::StandardError => e @@ -91,7 +91,8 @@ def collection_parent_work_child related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping, replace_files: false, user: user, - klass: child_record.class + klass: child_record.class, + importer_run_id: importer_run_id ).run # TODO: add counters for :processed_parents and :failed_parents Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations @@ -109,7 +110,8 @@ def collection_parent_collection_child related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping, replace_files: false, user: user, - klass: parent_record.class + klass: parent_record.class, + importer_run_id: importer_run_id ).run # TODO: add counters for :processed_parents and :failed_parents Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations @@ -132,7 +134,8 @@ def work_parent_work_child related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping, replace_files: false, user: user, - klass: parent_record.class + klass: parent_record.class, + importer_run_id: importer_run_id ).run # TODO: add counters for :processed_parents and :failed_parents Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations diff --git a/app/jobs/bulkrax/import_file_set_job.rb b/app/jobs/bulkrax/import_file_set_job.rb index da12d738..0e461838 100644 --- a/app/jobs/bulkrax/import_file_set_job.rb +++ b/app/jobs/bulkrax/import_file_set_job.rb @@ -7,7 +7,10 @@ class ImportFileSetJob < ApplicationJob queue_as :import + attr_reader :importer_run_id + def perform(entry_id, importer_run_id) + @importer_run_id = importer_run_id entry = Entry.find(entry_id) parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip @@ -63,7 +66,8 @@ def check_parent_is_a_work!(parent_identifier) end def find_parent_record(parent_identifier) - @parent_record ||= find_record(parent_identifier) + @parent_record ||= find_record(parent_identifier, importer_run_id) + @parent_record = parent_record.last if parent_record.is_a? Array end end end diff --git a/app/models/concerns/bulkrax/dynamic_record_lookup.rb b/app/models/concerns/bulkrax/dynamic_record_lookup.rb index 0332414c..14d04baa 100644 --- a/app/models/concerns/bulkrax/dynamic_record_lookup.rb +++ b/app/models/concerns/bulkrax/dynamic_record_lookup.rb @@ -6,9 +6,15 @@ module DynamicRecordLookup # has the provided identifier. # # @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier - # @return [Work, Collection, nil] Work or Collection if found, otherwise nil - def find_record(identifier) - record = Entry.find_by(identifier: identifier) + # @param importer_run_id [Number] ID of the current_run of this Importer Job + # @return [Entry, nil], [Work, Collection, nil] Entry if found, otherwise nil and a Work or Collection if found, otherwise nil + def find_record(identifier, importer_run_id = nil) + # check for our entry in our current importer first + importer_id = ImporterRun.find(importer_run_id).importer_id + record = Entry.find_by(identifier: identifier, importerexporter_id: importer_id) || Entry.find_by(identifier: identifier) + + # TODO(alishaevn): discuss whether we are only looking for Collection models here + # use ActiveFedora::Base.find(identifier) instead? record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy if record.blank? available_work_types.each do |work_type| @@ -16,7 +22,9 @@ def find_record(identifier) end end - record.is_a?(Entry) ? record.factory.find : record + # return the found entry here instead of searching for it again in the CreateRelationshipsJob + # also accounts for when the found entry isn't a part of this importer + record.is_a?(Entry) ? [record, record.factory.find] : [nil, record] end # Check if the record is a Work diff --git a/app/models/concerns/bulkrax/file_factory.rb b/app/models/concerns/bulkrax/file_factory.rb index 3c6311a8..1cf783b6 100644 --- a/app/models/concerns/bulkrax/file_factory.rb +++ b/app/models/concerns/bulkrax/file_factory.rb @@ -45,6 +45,8 @@ def parsed_remote_files end def new_remote_files + return if object.is_a? FileSet + @new_remote_files ||= if object.present? && object.file_sets.present? parsed_remote_files.select do |file| # is the url valid? @@ -101,7 +103,12 @@ def set_removed_filesets end def local_file_sets - @local_file_sets ||= object&.ordered_file_sets + @local_file_sets ||= ordered_file_sets + end + + def ordered_file_sets + # OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0 + object&.ordered_members.to_a.select(&:file_set?) end def import_files diff --git a/app/models/concerns/bulkrax/import_behavior.rb b/app/models/concerns/bulkrax/import_behavior.rb index 141d051c..47d6080e 100644 --- a/app/models/concerns/bulkrax/import_behavior.rb +++ b/app/models/concerns/bulkrax/import_behavior.rb @@ -98,6 +98,7 @@ def factory replace_files: replace_files, user: user, klass: factory_class, + importer_run_id: importerexporter.last_run.id, update_files: update_files) end diff --git a/spec/jobs/bulkrax/create_relationships_job_spec.rb b/spec/jobs/bulkrax/create_relationships_job_spec.rb index 51952ed8..4b6b5fb7 100644 --- a/spec/jobs/bulkrax/create_relationships_job_spec.rb +++ b/spec/jobs/bulkrax/create_relationships_job_spec.rb @@ -27,8 +27,8 @@ module Bulkrax before do allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work]) - allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier).and_return(child_entry) - allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier).and_return(parent_entry) + allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier, importerexporter_id: importer.id).and_return(child_entry) + allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier, importerexporter_id: importer.id).and_return(parent_entry) allow(parent_entry).to receive(:factory).and_return(parent_factory) allow(child_entry).to receive(:factory).and_return(child_factory) end @@ -45,7 +45,8 @@ module Bulkrax id: child_record.id, member_of_collections_attributes: { 0 => { id: parent_record.id } } }, - klass: child_record.class + klass: child_record.class, + importer_run_id: importer.current_run.id ) end @@ -150,7 +151,8 @@ module Bulkrax id: parent_record.id, child_collection_id: child_record.id }, - klass: parent_record.class + klass: parent_record.class, + importer_run_id: importer.current_run.id ) end @@ -249,7 +251,8 @@ module Bulkrax id: parent_record.id, work_members_attributes: { 0 => { id: child_record.id } } }, - klass: parent_record.class + klass: parent_record.class, + importer_run_id: importer.current_run.id ) end @@ -366,7 +369,7 @@ module Bulkrax importer_run_id: importer.current_run.id ) - expect(importer.last_run.failed_relationships).to eq(1) + expect(Bulkrax::Importer.find(importer.id).last_run.failed_relationships).to eq(1) end end diff --git a/spec/jobs/bulkrax/importer_job_spec.rb b/spec/jobs/bulkrax/importer_job_spec.rb index b3de81e0..11b3f6de 100644 --- a/spec/jobs/bulkrax/importer_job_spec.rb +++ b/spec/jobs/bulkrax/importer_job_spec.rb @@ -27,7 +27,7 @@ module Bulkrax importer_job.perform(1) expect(importer.current_run.total_work_entries).to eq(10) - expect(importer.current_run.total_collection_entries).to eq(421) + expect(importer.current_run.total_collection_entries).to eq(426) end end diff --git a/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb b/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb index ded8998c..8af3b967 100644 --- a/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb +++ b/spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb @@ -5,6 +5,8 @@ module Bulkrax RSpec.shared_examples 'dynamic record lookup' do let(:importer) { FactoryBot.create(:bulkrax_importer_csv_complex) } + let(:importer_id) { importer.id } + let(:importer_run_id) { importer.current_run.id } before do allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work]) @@ -19,11 +21,11 @@ module Bulkrax let(:source_identifier) { 'bulkrax_identifier_1' } it 'looks through entries, collections, and all work types' do - expect(Entry).to receive(:find_by).with(identifier: source_identifier).once + expect(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).once expect(::Collection).to receive(:where).with(id: source_identifier).once.and_return([]) expect(::Work).to receive(:where).with(id: source_identifier).once.and_return([]) - subject.find_record(source_identifier) + subject.find_record(source_identifier, importer_run_id) end context 'when an entry is found' do @@ -32,19 +34,19 @@ module Bulkrax let(:record) { instance_double(::Work, title: ["Found through Entry's factory"]) } before do - allow(Entry).to receive(:find_by).with(identifier: source_identifier).and_return(entry) + allow(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).and_return(entry) allow(entry).to receive(:factory).and_return(factory) end it "returns the entry's record" do - expect(subject.find_record(source_identifier)).to eq(record) + expect(subject.find_record(source_identifier, importer_run_id)[1]).to eq(record) end it "uses the entry's factory to find its record" do expect(entry).to receive(:factory) expect(factory).to receive(:find) - found_record = subject.find_record(source_identifier) + _, found_record = subject.find_record(source_identifier, importer_run_id) expect(found_record.title).to eq(record.title) end @@ -52,7 +54,7 @@ module Bulkrax context 'when nothing is found' do it 'returns nil' do - expect(subject.find_record(source_identifier)).to be_nil + expect(subject.find_record(source_identifier, importer_run_id)[1]).to be_nil end end end @@ -61,11 +63,11 @@ module Bulkrax let(:id) { 'xyz6789' } it 'looks through entries, collections, and all work types' do - expect(Entry).to receive(:find_by).with(identifier: id).once + expect(Entry).to receive(:find_by).with(identifier: id, importerexporter_id: importer_id).once expect(::Collection).to receive(:where).with(id: id).once.and_return([]) expect(::Work).to receive(:where).with(id: id).once.and_return([]) - subject.find_record(id) + subject.find_record(id, importer_run_id) end context 'when a collection is found' do @@ -76,7 +78,7 @@ module Bulkrax end it 'returns the collection' do - expect(subject.find_record(id)).to eq(collection) + expect(subject.find_record(id, importer_run_id)[1]).to eq(collection) end end @@ -88,13 +90,13 @@ module Bulkrax end it 'returns the work' do - expect(subject.find_record(id)).to eq(work) + expect(subject.find_record(id, importer_run_id)[1]).to eq(work) end end context 'when nothing is found' do it 'returns nil' do - expect(subject.find_record(id)).to be_nil + expect(subject.find_record(id, importer_run_id)[1]).to be_nil end end end