Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhanced dependency and references scanning #13995

Merged
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ def configured_systems
inventory_object.hostname = host.name
inventory_object.virtual_instance_ref = host.instance_id
inventory_object.inventory_root_group = persister.inventory_root_groups.lazy_find(host.inventory_id.to_s)
# TODO(lsmola) get rid of direct DB access
inventory_object.counterpart = Vm.find_by(:uid_ems => host.instance_id)
inventory_object.counterpart = persister.vms.lazy_find(host.instance_id)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reads so much better too

end
end

Expand All @@ -34,15 +33,13 @@ def configuration_scripts
inventory_object.variables = job_template.extra_vars_hash
inventory_object.inventory_root_group = persister.inventory_root_groups.lazy_find(job_template.inventory_id.to_s)

authentications = []
inventory_object.authentications = []
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did this change actually do anything?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, so now the scanning is as a separate step and you can do inventory_object.authentications = [] and inventory_object.authentications << xy

before, the dependency scan was done on assignment, so inventory_object.authentications = [] would do the scan, but inventory_object.authentications << xy would not, since you access the [] directly

%w(credential_id cloud_credential_id network_credential_id).each do |credential_attr|
next unless job_template.respond_to?(credential_attr)
credential_id = job_template.public_send(credential_attr).to_s
next if credential_id.blank?
authentications << persister.credentials.lazy_find(credential_id)
inventory_object.authentications << persister.credentials.lazy_find(credential_id)
end

inventory_object.authentications = authentications
end
end

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,12 @@ def initialize_inventory_collections
%i(credentials),
:builder_params => {:resource => manager}
)

collections[:vms] = ::ManagerRefresh::InventoryCollection.new(
:model_class => Vm,
:arel => Vm,
:strategy => :local_db_find_references,
:manager_ref => [:uid_ems]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:mode => :lazy

😄 :trollface:

aka - this looks great

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doing non lazy will degrade to SQL query per find, unless you set the references upfront :-(

)
end
end
262 changes: 178 additions & 84 deletions app/models/manager_refresh/inventory_collection.rb
Original file line number Diff line number Diff line change
@@ -1,40 +1,45 @@
module ManagerRefresh
class InventoryCollection
attr_accessor :saved
attr_accessor :saved, :references, :data_collection_finalized

attr_reader :model_class, :strategy, :attributes_blacklist, :attributes_whitelist, :custom_save_block, :parent,
:internal_attributes, :delete_method, :data, :data_index, :dependency_attributes, :manager_ref,
:association, :complete, :update_only, :transitive_dependency_attributes, :custom_manager_uuid,
:custom_db_finder, :check_changed, :arel, :builder_params
:custom_db_finder, :check_changed, :arel, :builder_params, :loaded_references, :db_data_index

delegate :each, :size, :to => :to_a

def initialize(model_class: nil, manager_ref: nil, association: nil, parent: nil, strategy: nil, saved: nil,
custom_save_block: nil, delete_method: nil, data_index: nil, data: nil, dependency_attributes: nil,
attributes_blacklist: nil, attributes_whitelist: nil, complete: nil, update_only: nil,
check_changed: nil, custom_manager_uuid: nil, custom_db_finder: nil, arel: nil, builder_params: {})
@model_class = model_class
@manager_ref = manager_ref || [:ems_ref]
@custom_manager_uuid = custom_manager_uuid
@custom_db_finder = custom_db_finder
@association = association || []
@parent = parent || nil
@arel = arel
@dependency_attributes = dependency_attributes || {}
@transitive_dependency_attributes = Set.new
@data = data || []
@data_index = data_index || {}
@saved = saved || false
@strategy = process_strategy(strategy)
@delete_method = delete_method || :destroy
@model_class = model_class
@manager_ref = manager_ref || [:ems_ref]
@custom_manager_uuid = custom_manager_uuid
@custom_db_finder = custom_db_finder
@association = association || []
@parent = parent || nil
@arel = arel
@dependency_attributes = dependency_attributes || {}
@data = data || []
@data_index = data_index || {}
@saved = saved || false
@strategy = process_strategy(strategy)
@delete_method = delete_method || :destroy
@custom_save_block = custom_save_block
@check_changed = check_changed.nil? ? true : check_changed
@internal_attributes = [:__feedback_edge_set_parent]
@complete = complete.nil? ? true : complete
@update_only = update_only.nil? ? false : update_only
@builder_params = builder_params

@attributes_blacklist = Set.new
@attributes_whitelist = Set.new
@custom_save_block = custom_save_block
@check_changed = check_changed.nil? ? true : check_changed
@internal_attributes = [:__feedback_edge_set_parent]
@complete = complete.nil? ? true : complete
@update_only = update_only.nil? ? false : update_only
@builder_params = builder_params
@transitive_dependency_attributes = Set.new
@references = Set.new
@loaded_references = Set.new
@db_data_index = nil
@data_collection_finalized = false

blacklist_attributes!(attributes_blacklist) if attributes_blacklist.present?
whitelist_attributes!(attributes_whitelist) if attributes_whitelist.present?
Expand All @@ -52,40 +57,16 @@ def to_hash

def process_strategy(strategy_name)
case strategy_name
when :local_db_cache_all, :local_db_find_one
send("process_strategy_#{strategy_name}")
when :find_missing_in_local_db
when :local_db_cache_all
self.data_collection_finalized = true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm assuming this means "I actually hit the db"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is more a flag for 'no more references are coming' so we can do the query and cache it

self.saved = true
when :local_db_find_references
self.saved = true
when :local_db_find_missing_references
end
strategy_name
end

def load_from_db
return arel unless arel.nil?
parent.send(association)
end

def process_strategy_local_db_cache_all
self.saved = true
# TODO(lsmola) selected need to contain also :keys used in other InventoryCollections pointing to this one, once
# we get list of all keys for each InventoryCollection ,we can uncomnent
# selected = [:id] + manager_ref.map { |x| model_class.reflect_on_association(x).try(:foreign_key) || x }
# selected << :type if model_class.new.respond_to? :type
# load_from_db.select(selected).find_each do |record|
load_from_db.find_each do |record|
index = if custom_manager_uuid.nil?
object_index(record)
else
stringify_reference(custom_manager_uuid.call(record))
end
data_index[index] = new_inventory_object(record.attributes.symbolize_keys)
data_index[index].id = record.id
end
end

def process_strategy_local_db_find_one
self.saved = true
end

def check_changed?
check_changed
end
Expand Down Expand Up @@ -114,12 +95,14 @@ def saveable?
dependencies.all?(&:saved?)
end

def data_collection_finalized?
data_collection_finalized
end

def <<(inventory_object)
unless data_index[inventory_object.manager_uuid]
data_index[inventory_object.manager_uuid] = inventory_object
data << inventory_object

actualize_dependencies(inventory_object)
end
self
end
Expand Down Expand Up @@ -174,9 +157,9 @@ def find_or_build_by(manager_uuid_hash)
def find(manager_uuid)
return if manager_uuid.nil?
case strategy
when :local_db_find_one
when :local_db_find_references, :local_db_cache_all
find_in_db(manager_uuid)
when :find_missing_in_local_db
when :local_db_find_missing_references
data_index[manager_uuid] || find_in_db(manager_uuid)
else
data_index[manager_uuid]
Expand All @@ -190,21 +173,6 @@ def find_by(manager_uuid_hash)
find(object_index(manager_uuid_hash))
end

def find_in_db(manager_uuid)
manager_uuids = manager_uuid.split(stringify_joiner)
hash_uuid_by_ref = manager_ref.zip(manager_uuids).to_h
record = if custom_db_finder.nil?
parent.send(association).where(hash_uuid_by_ref).first
else
custom_db_finder.call(self, hash_uuid_by_ref)
end
return unless record

inventory_object = new_inventory_object(record.attributes.symbolize_keys)
inventory_object.id = record.id
inventory_object
end

def lazy_find(manager_uuid, key: nil, default: nil)
::ManagerRefresh::InventoryObjectLazy.new(self, manager_uuid, :key => key, :default => default)
end
Expand Down Expand Up @@ -354,29 +322,155 @@ def inspect
to_s
end

def actualize_dependency(key, value)
if dependency?(value)
(dependency_attributes[key] ||= Set.new) << value.inventory_collection
transitive_dependency_attributes << key if transitive_dependency?(value)
elsif value.kind_of?(Array) && value.any? { |x| dependency?(x) }
(dependency_attributes[key] ||= Set.new) << value.detect { |x| dependency?(x) }.inventory_collection
transitive_dependency_attributes << key if value.any? { |x| transitive_dependency?(x) }
def scan!
data.each do |inventory_object|
scan_inventory_object(inventory_object)
end
end

def db_collection_for_comparison
return arel unless arel.nil?
parent.send(association)
end

private

attr_writer :attributes_blacklist, :attributes_whitelist
attr_writer :attributes_blacklist, :attributes_whitelist, :db_data_index, :references

# Finds manager_uuid in the DB. Using a configured strategy we cache obtained data in the db_data_index, so the
# same find will not hit database twice. Also if we use lazy_links and this is called when
# data_collection_finalized?, we load all data from the DB, referenced by lazy_links, in one query.
#
# @param manager_uuid [String] a manager_uuid of the InventoryObject we search in the local DB
def find_in_db(manager_uuid)
# TODO(lsmola) selected need to contain also :keys used in other InventoryCollections pointing to this one, once
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what does "once we get list of all keys" mean

Not sure why this is commented out and what is needed to uncomment it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right, this commented code is pretty old actually, I just moved it. It's for the next step when I will track what attributes are being referenced and I will select only those

# we get list of all keys for each InventoryCollection ,we can uncomnent
# selected = [:id] + manager_ref.map { |x| model_class.reflect_on_association(x).try(:foreign_key) || x }
# selected << :type if model_class.new.respond_to? :type
# load_from_db.select(selected).find_each do |record|

# Use the cached db_data_index only data_collection_finalized?, meaning no new reference can occur
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for this comment - I was just about to ask this very question

if data_collection_finalized? && db_data_index
return db_data_index[manager_uuid]
else
return db_data_index[manager_uuid] if db_data_index && db_data_index[manager_uuid]
# We haven't found the reference, lets add it to the list of references and load it
references << manager_uuid unless references.include?(manager_uuid) # O(C) since references is Set
end

def actualize_dependencies(inventory_object)
populate_db_data_index!

db_data_index[manager_uuid]
end

# Fills db_data_index with InventoryObjects obtained from the DB
def populate_db_data_index!
# Load only new references from the DB
new_references = references - loaded_references
# And store which references we've already loaded
loaded_references.merge(new_references)

# Initialize db_data_index in nil
self.db_data_index ||= {}

# Return the the correct relation based on strategy and selection&projection
case strategy
when :local_db_cache_all
selection = nil
projection = nil
else
selection = extract_references(new_references)
projection = nil
end

db_relation(selection, projection).find_each do |record|
process_db_record!(record)
end
end

# Return a Rails relation or array that will be used to obtain the records we need to load from the DB
#
# @param selection [Hash] A selection hash resulting in Select operation (in Relation algebra terms)
# @param projection [Array] A projection array resulting in Project operation (in Relation algebra terms)
def db_relation(selection = nil, projection = nil)
relation = if !custom_db_finder.blank?
custom_db_finder.call(self, selection, projection)
else
rel = if !parent.nil? && !association.nil?
parent.send(association)
elsif !arel.nil?
arel
end
rel = rel.where(selection) if rel && selection
rel
end

relation || []
end

# Extracting references to a relation friendly format, or a format processable by a custom_db_finder
#
# @param new_references [Array] array of manager_uuids of the InventoryObjects
def extract_references(new_references = [])
# We collect what manager_uuids of this IC were referenced and we load only those
# TODO(lsmola) maybe in can be obj[x] = Set.new, since rails will do a query "col1 IN [a,b,b] AND col2 IN [e,f,e]"
# which is equivalent to "col1 IN [a,b] AND col2 IN [e,f]". The best would be to forcing rails to query
# "(col1, col2) IN [(a,e), (b,f), (b,e)]" which would load exactly what we need. Postgree supports this, but rails
# doesn't seem to. So for now, we can load a bit more from the DB than we need, in case of manager_ref.count > 1
hash_uuids_by_ref = manager_ref.each_with_object({}) { |x, obj| obj[x] = [] }

# TODO(lsmola) hm, if we call find in the parser code, not all references will be here, so this will really work
# only for lazy_find. So if we want to call find, I suppose we can cache all, possibly we could optimize this to
# set references upfront?
new_references.each do |reference|
refs = reference.split(stringify_joiner)

refs.each_with_index do |ref, index|
hash_uuids_by_ref[manager_ref[index]] << ref
end
end
hash_uuids_by_ref
end

# Takes ApplicationRecord record, converts it to the InventoryObject and places it to db_data_index
#
# @param record [ApplicationRecord] ApplicationRecord record we want to place to the db_data_index
def process_db_record!(record)
index = if custom_manager_uuid.nil?
object_index(record)
else
stringify_reference(custom_manager_uuid.call(record))
end
db_data_index[index] = new_inventory_object(record.attributes.symbolize_keys)
db_data_index[index].id = record.id
end

def scan_inventory_object(inventory_object)
inventory_object.data.each do |key, value|
actualize_dependency(key, value)
if value.kind_of?(Array)
value.each { |val| scan_inventory_object_attribute(key, val) }
else
scan_inventory_object_attribute(key, value)
end
end
end

def dependency?(value)
(value.kind_of?(::ManagerRefresh::InventoryObjectLazy) || value.kind_of?(::ManagerRefresh::InventoryObject)) &&
value.dependency?
def scan_inventory_object_attribute(key, value)
return unless inventory_object?(value)

# Storing attributes and their dependencies
(dependency_attributes[key] ||= Set.new) << value.inventory_collection if value.dependency?

# Storing if attribute is a transitive dependency, so a lazy_find :key results in dependency
transitive_dependency_attributes << key if transitive_dependency?(value)

# Storing a reference in the target inventory_collection, then each IC knows about all the references and can
# e.g. load all the referenced uuids from a DB
value.inventory_collection.references << value.to_s
end

def inventory_object?(value)
value.kind_of?(::ManagerRefresh::InventoryObjectLazy) || value.kind_of?(::ManagerRefresh::InventoryObject)
end

def transitive_dependency?(value)
Expand Down
19 changes: 19 additions & 0 deletions app/models/manager_refresh/inventory_collection/scanner.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
module ManagerRefresh
class InventoryCollection
class Scanner
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even more classes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, a class for every problem? :-D I might move this elsewhere, when scanning will turn to more generic preprocessing.

class << self
# Scanning inventory_collections for dependencies and references, storing the results in the inventory_collections
# themselves. Dependencies are needed for building a graph, references are needed for effective DB querying, where
# we can load all referenced objects of some InventoryCollection by one DB query.
#
# @param inventory_collections [Array] Array fo
def scan!(inventory_collections)
inventory_collections.each do |inventory_collection|
inventory_collection.data_collection_finalized = true
inventory_collection.scan!
end
end
end
end
end
end
Loading