Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ultimate batch saving speedup #15761

Merged
merged 14 commits into from
Aug 10, 2017
12 changes: 12 additions & 0 deletions app/models/manager_refresh/inventory_collection.rb
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,8 @@ def stringify_reference(reference)
end

def manager_ref_to_cols
# TODO(lsmola) this should contain the polymorphic _type, otherwise the IC with polymorphic unique key will get
# conflicts
# Convert attributes from unique key to actual db cols
manager_ref.map do |ref|
association_to_foreign_key_mapping[ref] || ref
Expand Down Expand Up @@ -911,6 +913,14 @@ def fixed_foreign_keys
@fixed_foreign_keys_cache
end

def serializable_keys?(all_attribute_keys)
return @serializable_keys_cache unless @serializable_keys_cache.nil?

@serializable_keys_cache = all_attribute_keys.any? do |key|
model_class.type_for_attribute(key.to_s).respond_to?(:coder)
end
end

def base_class_name
return "" unless model_class

Expand Down Expand Up @@ -986,10 +996,12 @@ def db_collection_for_comparison
end

def db_collection_for_comparison_for(manager_uuids_set)
# TODO(lsmola) this should have the build_multi_selection_condition, like in the method above
full_collection_for_comparison.where(manager_ref.first => manager_uuids_set)
end

def db_collection_for_comparison_for_complement_of(manager_uuids_set)
# TODO(lsmola) this should have the build_multi_selection_condition, like in the method above
full_collection_for_comparison.where.not(manager_ref.first => manager_uuids_set)
end

Expand Down
18 changes: 10 additions & 8 deletions app/models/manager_refresh/inventory_object.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def initialize(inventory_collection, data)
end

def manager_uuid
# TODO(lsmola) should we have a separate function for uuid containing foreign keys? Probably yes, since it could
# speed up the ID fetching.
id_with_keys(manager_ref)
end

Expand Down Expand Up @@ -50,36 +52,36 @@ def attributes(inventory_collection_scope = nil)
data[key] = value.compact.map(&:load).compact
# We can use built in _ids methods to assign array of ids into has_many relations. So e.g. the :key_pairs=
# relation setter will become :key_pair_ids=
attributes_for_saving[key.to_s.singularize + "_ids"] = data[key].map(&:id).compact.uniq
attributes_for_saving[(key.to_s.singularize + "_ids").to_sym] = data[key].map(&:id).compact.uniq
elsif loadable?(value) || inventory_collection_scope.association_to_foreign_key_mapping[key]
# Lets fill also the original data, so other InventoryObject referring to this attribute gets the right
# result
data[key] = value.load if value.respond_to?(:load)
if (foreign_key = inventory_collection_scope.association_to_foreign_key_mapping[key])
# We have an association to fill, lets fill also the :key, cause some other InventoryObject can refer to it
record_id = data[key].try(:id)
attributes_for_saving[foreign_key] = record_id
record_id = data[key].try(:id)
attributes_for_saving[foreign_key.to_sym] = record_id

if (foreign_type = inventory_collection_scope.association_to_foreign_type_mapping[key])
# If we have a polymorphic association, we need to also fill a base class name, but we want to nullify it
# if record_id is missing
base_class = data[key].try(:base_class_name) || data[key].class.try(:base_class).try(:name)
attributes_for_saving[foreign_type] = record_id ? base_class : nil
attributes_for_saving[foreign_type.to_sym] = record_id ? base_class : nil
end
elsif data[key].kind_of?(::ManagerRefresh::InventoryObject)
# We have an association to fill but not an Activerecord association, so e.g. Ancestry, lets just load
# it here. This way of storing ancestry is ineffective in DB call count, but RAM friendly
attributes_for_saving[key] = data[key].base_class_name.constantize.find_by(:id => data[key].id)
attributes_for_saving[key.to_sym] = data[key].base_class_name.constantize.find_by(:id => data[key].id)
else
# We have a normal attribute to fill
attributes_for_saving[key] = data[key]
attributes_for_saving[key.to_sym] = data[key]
end
else
attributes_for_saving[key] = value
attributes_for_saving[key.to_sym] = value
end
end

attributes_for_saving.symbolize_keys
attributes_for_saving
end

def assign_attributes(attributes)
Expand Down
60 changes: 38 additions & 22 deletions app/models/manager_refresh/save_collection/saver/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,33 @@ class Base
include Vmdb::Logging
include ManagerRefresh::SaveCollection::Saver::SqlHelper

attr_reader :inventory_collection
attr_reader :inventory_collection, :association

def initialize(inventory_collection)
@inventory_collection = inventory_collection

# Private attrs
@unique_index_keys = inventory_collection.manager_ref_to_cols
@primary_key = inventory_collection.model_class.primary_key
@arel_primary_key = inventory_collection.model_class.arel_attribute(primary_key)
@unique_index_keys = inventory_collection.manager_ref_to_cols.map(&:to_sym)
@unique_index_keys_to_s = inventory_collection.manager_ref_to_cols.map(&:to_s)
@select_keys = [@primary_key] + @unique_index_keys_to_s
@unique_db_primary_keys = Set.new
@unique_db_indexes = Set.new

# TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
inventory_collection.parent.reload if inventory_collection.parent
@association = inventory_collection.db_collection_for_comparison

# Right now ApplicationRecordIterator in association is used for targeted refresh. Given the small amount of
# records flowing through there, we probably don't need to optimize that association to fetch a pure SQL.
@pure_sql_records_fetching = !inventory_collection.use_ar_object? && !@association.kind_of?(ManagerRefresh::ApplicationRecordIterator)

@batch_size_for_persisting = 10_000

@batch_size = @pure_sql_records_fetching ? @batch_size_for_persisting : inventory_collection.batch_size
@record_key_method = @pure_sql_records_fetching ? :pure_sql_record_key : :ar_record_key
@select_keys_indexes = @select_keys.each_with_object({}).with_index { |(key, obj), index| obj[key.to_s] = index }
end

def save_inventory_collection!
Expand All @@ -22,16 +40,14 @@ def save_inventory_collection!
# job. We want to do 1 :delete_complement job at 1 time, to keep to memory down.
return delete_complement if inventory_collection.all_manager_uuids.present?

# TODO(lsmola) do I need to reload every time? Also it should be enough to clear the associations.
inventory_collection.parent.reload if inventory_collection.parent
association = inventory_collection.db_collection_for_comparison

save!(association)
end

private

attr_reader :unique_index_keys, :unique_db_primary_keys, :unique_db_indexes
attr_reader :unique_index_keys, :unique_index_keys_to_s, :select_keys, :unique_db_primary_keys, :unique_db_indexes,
:primary_key, :arel_primary_key, :record_key_method, :pure_sql_records_fetching, :select_keys_indexes,
:batch_size, :batch_size_for_persisting

def save!(association)
attributes_index = {}
Expand All @@ -50,7 +66,7 @@ def save!(association)
association.find_each do |record|
index = inventory_collection.object_index_with_keys(unique_index_keys, record)

next unless assert_distinct_relation(record)
next unless assert_distinct_relation(record.id)
next unless assert_unique_record(record, index)

inventory_object = inventory_objects_index.delete(index)
Expand All @@ -62,7 +78,7 @@ def save!(association)
delete_record!(record) if inventory_collection.delete_allowed?
else
# Record was found in the DB and sent for saving, we will be updating the DB.
update_record!(record, hash, inventory_object) if assert_referential_integrity(hash, inventory_object)
update_record!(record, hash, inventory_object) if assert_referential_integrity(hash)
end
end
end
Expand All @@ -77,7 +93,7 @@ def save!(association)
inventory_objects_index.each do |index, inventory_object|
hash = attributes_index.delete(index)

create_record!(hash, inventory_object) if assert_referential_integrity(hash, inventory_object)
create_record!(hash, inventory_object) if assert_referential_integrity(hash)
end
end
end
Expand All @@ -94,8 +110,8 @@ def inventory_collection_details
"strategy: #{inventory_collection.strategy}, saver_strategy: #{inventory_collection.saver_strategy}, targeted: #{inventory_collection.targeted?}"
end

def batch_size
inventory_collection.batch_size
def record_key(record, key)
record.public_send(key)
end

def delete_complement
Expand Down Expand Up @@ -132,8 +148,8 @@ def assert_unique_record(_record, _index)
true
end

def assert_distinct_relation(record)
if unique_db_primary_keys.include?(record.id) # Include on Set is O(1)
def assert_distinct_relation(primary_key_value)
if unique_db_primary_keys.include?(primary_key_value) # Include on Set is O(1)
# Change the InventoryCollection's :association or :arel parameter to return distinct results. The :through
# relations can return the same record multiple times. We don't want to do SELECT DISTINCT by default, since
# it can be very slow.
Expand All @@ -145,17 +161,17 @@ def assert_distinct_relation(record)
raise("Please update :association or :arel for #{inventory_collection} to return a DISTINCT result. ")
end
else
unique_db_primary_keys << record.id
unique_db_primary_keys << primary_key_value
end
true
end

def assert_referential_integrity(hash, inventory_object)
inventory_object.inventory_collection.fixed_foreign_keys.each do |x|
next unless hash[x].blank?
subject = "#{inventory_object} of #{inventory_object.inventory_collection} because of missing foreign key #{x} for "\
"#{inventory_object.inventory_collection.parent.class.name}:"\
"#{inventory_object.inventory_collection.parent.try(:id)}"
def assert_referential_integrity(hash)
inventory_collection.fixed_foreign_keys.each do |x|
next unless hash[x].nil?
subject = "#{hash} of #{inventory_collection} because of missing foreign key #{x} for "\
"#{inventory_collection.parent.class.name}:"\
"#{inventory_collection.parent.try(:id)}"
if Rails.env.production?
_log.warn("Referential integrity check violated, ignoring #{subject}")
return false
Expand Down Expand Up @@ -186,7 +202,7 @@ def assign_attributes_for_update!(hash, update_time)
end

def assign_attributes_for_create!(hash, create_time)
hash[:type] = inventory_collection.model_class.name if inventory_collection.supports_sti? && hash[:type].blank?
hash[:type] = inventory_collection.model_class.name if inventory_collection.supports_sti? && hash[:type].nil?
hash[:created_on] = create_time if inventory_collection.supports_timestamps_on_variant?
hash[:created_at] = create_time if inventory_collection.supports_timestamps_at_variant?
assign_attributes_for_update!(hash, create_time)
Expand Down
Loading