Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

accurate-relationship-forming #454

Merged
merged 12 commits into from
Apr 8, 2022
7 changes: 4 additions & 3 deletions app/factories/bulkrax/object_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,10 @@ class ObjectFactory
include DynamicRecordLookup

define_model_callbacks :save, :create
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping
attr_reader :attributes, :object, :source_identifier_value, :klass, :replace_files, :update_files, :work_identifier, :related_parents_parsed_mapping, :importer_run_id

# rubocop:disable Metrics/ParameterLists
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, update_files: false)
def initialize(attributes:, source_identifier_value:, work_identifier:, related_parents_parsed_mapping: nil, replace_files: false, user: nil, klass: nil, importer_run_id: nil, update_files: false)
@attributes = ActiveSupport::HashWithIndifferentAccess.new(attributes)
@replace_files = replace_files
@update_files = update_files
Expand All @@ -19,6 +19,7 @@ def initialize(attributes:, source_identifier_value:, work_identifier:, related_
@related_parents_parsed_mapping = related_parents_parsed_mapping
@source_identifier_value = source_identifier_value
@klass = klass || Bulkrax.default_work_type.constantize
@importer_run_id = importer_run_id
end
# rubocop:enable Metrics/ParameterLists

Expand Down Expand Up @@ -152,7 +153,7 @@ def update_collection(attrs)

# This method is heavily inspired by Hyrax's AttachFilesToWorkJob
def create_file_set(attrs)
work = find_record(attributes[related_parents_parsed_mapping].first)
_, work = find_record(attributes[related_parents_parsed_mapping].first, importer_run_id)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

find_record now returns two values, but we don't need the entry here

work_permissions = work.permissions.map(&:to_hash)
file_set_attrs = attrs.slice(*object.attributes.keys)
object.assign_attributes(file_set_attrs)
Expand Down
15 changes: 9 additions & 6 deletions app/jobs/bulkrax/create_relationships_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ def perform(parent_identifier:, importer_run_id:)
end.sort_by(&:order)

@importer_run_id = importer_run_id
@parent_record = find_record(parent_identifier)
@parent_entry, @parent_record = find_record(parent_identifier, importer_run_id)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

example use case that made this necessary:
a collection was imported in importer 1, and in importer 2 we are importing a work to add to that collection. in that case, line 56 below would have failed because this importer_run doesn't have the collection.

@child_records = { works: [], collections: [] }
pending_relationships.each do |rel|
raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil?
child_record = find_record(rel.child_id)
_, child_record = find_record(rel.child_id, importer_run_id)
child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record
end

Expand All @@ -53,7 +53,7 @@ def perform(parent_identifier:, importer_run_id:)
return false # stop current job from continuing to run after rescheduling
end

@parent_entry = Bulkrax::Entry.where(identifier: parent_identifier,
@parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier,
importerexporter_id: ImporterRun.find(importer_run_id).importer_id,
importerexporter_type: "Bulkrax::Importer").first
create_relationships
Expand Down Expand Up @@ -91,7 +91,8 @@ def collection_parent_work_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: child_record.class
klass: child_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand All @@ -109,7 +110,8 @@ def collection_parent_collection_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand All @@ -132,7 +134,8 @@ def work_parent_work_child
related_parents_parsed_mapping: parent_entry.parser.related_parents_parsed_mapping,
replace_files: false,
user: user,
klass: parent_record.class
klass: parent_record.class,
importer_run_id: importer_run_id
).run
# TODO: add counters for :processed_parents and :failed_parents
Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations
Expand Down
6 changes: 5 additions & 1 deletion app/jobs/bulkrax/import_file_set_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ class ImportFileSetJob < ApplicationJob

queue_as :import

attr_reader :importer_run_id

def perform(entry_id, importer_run_id)
@importer_run_id = importer_run_id
entry = Entry.find(entry_id)
parent_identifier = entry.raw_metadata[entry.related_parents_raw_mapping]&.strip

Expand Down Expand Up @@ -63,7 +66,8 @@ def check_parent_is_a_work!(parent_identifier)
end

def find_parent_record(parent_identifier)
@parent_record ||= find_record(parent_identifier)
@parent_record ||= find_record(parent_identifier, importer_run_id)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried calling .last on this method but it didn't work.

@parent_record = parent_record.last if parent_record.is_a? Array
end
end
end
16 changes: 12 additions & 4 deletions app/models/concerns/bulkrax/dynamic_record_lookup.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,25 @@ module DynamicRecordLookup
# has the provided identifier.
#
# @param identifier [String] Work/Collection ID or Bulkrax::Entry source_identifier
# @return [Work, Collection, nil] Work or Collection if found, otherwise nil
def find_record(identifier)
record = Entry.find_by(identifier: identifier)
# @param importer_run_id [Number] ID of the current_run of this Importer Job
# @return [Entry, nil], [Work, Collection, nil] Entry if found, otherwise nil and a Work or Collection if found, otherwise nil
def find_record(identifier, importer_run_id = nil)
# check for our entry in our current importer first
importer_id = ImporterRun.find(importer_run_id).importer_id
record = Entry.find_by(identifier: identifier, importerexporter_id: importer_id) || Entry.find_by(identifier: identifier)

# TODO(alishaevn): discuss whether we are only looking for Collection models here
# use ActiveFedora::Base.find(identifier) instead?
record ||= ::Collection.where(id: identifier).first # rubocop:disable Rails/FindBy
if record.blank?
available_work_types.each do |work_type|
record ||= work_type.where(id: identifier).first # rubocop:disable Rails/FindBy
end
end

record.is_a?(Entry) ? record.factory.find : record
# return the found entry here instead of searching for it again in the CreateRelationshipsJob
# also accounts for when the found entry isn't a part of this importer
record.is_a?(Entry) ? [ record, record.factory.find ] : [ nil, record ]
end

# Check if the record is a Work
Expand Down
9 changes: 8 additions & 1 deletion app/models/concerns/bulkrax/file_factory.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ def parsed_remote_files
end

def new_remote_files
return if object.is_a? FileSet

@new_remote_files ||= if object.present? && object.file_sets.present?
parsed_remote_files.select do |file|
# is the url valid?
Expand Down Expand Up @@ -101,7 +103,12 @@ def set_removed_filesets
end

def local_file_sets
@local_file_sets ||= object&.ordered_file_sets
@local_file_sets ||= ordered_file_sets
end

def ordered_file_sets
# OVERRIDE Hyrda-works 1.2.0 - this method was deprecated in v1.0
object&.ordered_members.to_a.select(&:file_set?)
end

def import_files
Expand Down
1 change: 1 addition & 0 deletions app/models/concerns/bulkrax/import_behavior.rb
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def factory
replace_files: replace_files,
user: user,
klass: factory_class,
importer_run_id: importerexporter.last_run.id,
update_files: update_files)
end

Expand Down
4 changes: 2 additions & 2 deletions spec/jobs/bulkrax/create_relationships_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ module Bulkrax

before do
allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work])
allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier).and_return(child_entry)
allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier).and_return(parent_entry)
allow(Entry).to receive(:find_by).with(identifier: child_entry.identifier, importerexporter_id: importer.id).and_return(child_entry)
allow(Entry).to receive(:find_by).with(identifier: parent_entry.identifier, importerexporter_id: importer.id).and_return(parent_entry)
allow(parent_entry).to receive(:factory).and_return(parent_factory)
allow(child_entry).to receive(:factory).and_return(child_factory)
end
Expand Down
2 changes: 1 addition & 1 deletion spec/jobs/bulkrax/importer_job_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ module Bulkrax
importer_job.perform(1)

expect(importer.current_run.total_work_entries).to eq(10)
expect(importer.current_run.total_collection_entries).to eq(421)
expect(importer.current_run.total_collection_entries).to eq(426)
end
end

Expand Down
24 changes: 13 additions & 11 deletions spec/models/concerns/bulkrax/dynamic_record_lookup_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
module Bulkrax
RSpec.shared_examples 'dynamic record lookup' do
let(:importer) { FactoryBot.create(:bulkrax_importer_csv_complex) }
let(:importer_id) { importer.id }
let(:importer_run_id) { importer.current_run.id }

before do
allow(::Hyrax.config).to receive(:curation_concerns).and_return([Work])
Expand All @@ -19,11 +21,11 @@ module Bulkrax
let(:source_identifier) { 'bulkrax_identifier_1' }

it 'looks through entries, collections, and all work types' do
expect(Entry).to receive(:find_by).with(identifier: source_identifier).once
expect(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).once
expect(::Collection).to receive(:where).with(id: source_identifier).once.and_return([])
expect(::Work).to receive(:where).with(id: source_identifier).once.and_return([])

subject.find_record(source_identifier)
subject.find_record(source_identifier, importer_run_id)
end

context 'when an entry is found' do
Expand All @@ -32,27 +34,27 @@ module Bulkrax
let(:record) { instance_double(::Work, title: ["Found through Entry's factory"]) }

before do
allow(Entry).to receive(:find_by).with(identifier: source_identifier).and_return(entry)
allow(Entry).to receive(:find_by).with(identifier: source_identifier, importerexporter_id: importer_id).and_return(entry)
allow(entry).to receive(:factory).and_return(factory)
end

it "returns the entry's record" do
expect(subject.find_record(source_identifier)).to eq(record)
expect(subject.find_record(source_identifier, importer_run_id)).to eq(record)
end

it "uses the entry's factory to find its record" do
expect(entry).to receive(:factory)
expect(factory).to receive(:find)

found_record = subject.find_record(source_identifier)
found_record = subject.find_record(source_identifier, importer_run_id)

expect(found_record.title).to eq(record.title)
end
end

context 'when nothing is found' do
it 'returns nil' do
expect(subject.find_record(source_identifier)).to be_nil
expect(subject.find_record(source_identifier, importer_run_id)).to be_nil
end
end
end
Expand All @@ -61,11 +63,11 @@ module Bulkrax
let(:id) { 'xyz6789' }

it 'looks through entries, collections, and all work types' do
expect(Entry).to receive(:find_by).with(identifier: id).once
expect(Entry).to receive(:find_by).with(identifier: id, importerexporter_id: importer_id).once
expect(::Collection).to receive(:where).with(id: id).once.and_return([])
expect(::Work).to receive(:where).with(id: id).once.and_return([])

subject.find_record(id)
subject.find_record(id, importer_run_id)
end

context 'when a collection is found' do
Expand All @@ -76,7 +78,7 @@ module Bulkrax
end

it 'returns the collection' do
expect(subject.find_record(id)).to eq(collection)
expect(subject.find_record(id, importer_run_id)).to eq(collection)
end
end

Expand All @@ -88,13 +90,13 @@ module Bulkrax
end

it 'returns the work' do
expect(subject.find_record(id)).to eq(work)
expect(subject.find_record(id, importer_run_id)).to eq(work)
end
end

context 'when nothing is found' do
it 'returns nil' do
expect(subject.find_record(id)).to be_nil
expect(subject.find_record(id, importer_run_id)).to be_nil
end
end
end
Expand Down