From 3d135d0966c3748f18f2618c1382bd850ee42154 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 28 Mar 2017 18:40:06 +0200 Subject: [PATCH 1/8] Optimize store_ids_for_new_records by getting rid of the O(n^2) lookups Optimize store_ids_for_new_records by getting rid of the O(n^2) lookups. These lookups can take hours when we go over 50-100k records being processed by store_ids_for_new_records. And seems like there might be a Rails issue, since by doing association.push(new_records) at https://github.com/Ladas/manageiq/blob/48aa323551c8825e02170e251afa717ca807d2ed/app/models/ems_refresh/save_inventory_helper.rb#L69-L69 The association has the records doubled, the association is passed into store_ids_for_new_records as 'records' parameter https://github.com/Ladas/manageiq/blob/48aa323551c8825e02170e251afa717ca807d2ed/app/models/ems_refresh/save_inventory_helper.rb#L114-L114 And here when we do: records.count => 50k records.uniq.count => 25k records.reload.count => 25k That was slowing down the store_ids_for_new_records method even more it seems. Partially fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1436176 --- .../ems_refresh/save_inventory_helper.rb | 27 ++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index fe041d8a1c6..4042fcb8e20 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -113,10 +113,31 @@ def save_child_inventory(obj, hashes, child_keys, *args) def store_ids_for_new_records(records, hashes, keys) keys = Array(keys) - hashes.each do |h| - r = records.detect { |r| keys.all? { |k| r.send(k) == r.class.type_for_attribute(k.to_s).cast(h[k]) } } - h[:id] = r.id + # Lets first index the hashes based on keys, so we can do O(1) lookups + hashes_index = {} + hashes.each do |hash| + hashes_index[build_index_from_hash(keys, hash)] = hash + end + + records.find_each do |record| + record_index = build_index_from_record(keys, record) + hash = hashes_index[record_index] + hash[:id] = record.id if hash end + + hashes + end + + def build_index_from_hash(keys, hash) + keys.map { |key| hash[key].to_s }.join(index_joiner) + end + + def build_index_from_record(keys, record) + keys.map { |key| record.send(key).to_s }.join(index_joiner) + end + + def index_joiner + "___" end def link_children_references(records) From 54f71000d2a98145eb8ad9ea631cd8a7a6a3d25f Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 28 Mar 2017 23:36:13 +0200 Subject: [PATCH 2/8] Use the array as the index Use the array as the index --- app/models/ems_refresh/save_inventory_helper.rb | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index 4042fcb8e20..e6b13ec8224 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -129,15 +129,11 @@ def store_ids_for_new_records(records, hashes, keys) end def build_index_from_hash(keys, hash) - keys.map { |key| hash[key].to_s }.join(index_joiner) + keys.map { |key| hash[key].to_s } end def build_index_from_record(keys, record) - keys.map { |key| record.send(key).to_s }.join(index_joiner) - end - - def index_joiner - "___" + keys.map { |key| record.send(key).to_s } end def link_children_references(records) From 6c3b73db1e03a9990116b40a14346e0d6180be41 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 28 Mar 2017 23:39:00 +0200 Subject: [PATCH 3/8] Use uniq instead of find_each to get pass association.push duplicating records Use uniq instead of find_each to get pass association.push duplicating records. Uniq is not doing exra query, so it should be quicker. --- app/models/ems_refresh/save_inventory_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index e6b13ec8224..1d577e9108d 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -119,7 +119,7 @@ def store_ids_for_new_records(records, hashes, keys) hashes_index[build_index_from_hash(keys, hash)] = hash end - records.find_each do |record| + records.uniq.each do |record| record_index = build_index_from_record(keys, record) hash = hashes_index[record_index] hash[:id] = record.id if hash From 8289c25be76cc023d3be23aa735476e66ed7ce98 Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Tue, 28 Mar 2017 23:53:25 +0200 Subject: [PATCH 4/8] Change store_ids_for_new_records to support type cast again Change store_ids_for_new_records to support type cast again --- .../ems_refresh/save_inventory_helper.rb | 23 +++++++++---------- 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index 1d577e9108d..2f1c3fc1912 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -114,26 +114,25 @@ def save_child_inventory(obj, hashes, child_keys, *args) def store_ids_for_new_records(records, hashes, keys) keys = Array(keys) # Lets first index the hashes based on keys, so we can do O(1) lookups - hashes_index = {} - hashes.each do |hash| - hashes_index[build_index_from_hash(keys, hash)] = hash - end - + record_index = {} records.uniq.each do |record| - record_index = build_index_from_record(keys, record) - hash = hashes_index[record_index] - hash[:id] = record.id if hash + record_index[build_index_from_record(keys, record)] = record end - hashes + record_class = records.first.class + + hashes.each do |hash| + record = record_index[build_index_from_hash(keys, hash, record_class)] + hash[:id] = record.try(:id) + end end - def build_index_from_hash(keys, hash) - keys.map { |key| hash[key].to_s } + def build_index_from_hash(keys, hash, record_class) + keys.map { |key| record_class.type_for_attribute(key.to_s).cast(hash[key]) } end def build_index_from_record(keys, record) - keys.map { |key| record.send(key).to_s } + keys.map { |key| record.send(key) } end def link_children_references(records) From 3844890f4fdba0ea61adb7ba2468ca891cbcdcad Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Wed, 29 Mar 2017 00:10:40 +0200 Subject: [PATCH 5/8] Do not use uniq since it filter out the non saved objects Do not use uniq since it filter out the non saved objects, but then the calling .id result in the nil anyway. So the crosslink will get filled only in the second refresh. --- app/models/ems_refresh/save_inventory_helper.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index 2f1c3fc1912..db9248a9ee1 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -115,7 +115,7 @@ def store_ids_for_new_records(records, hashes, keys) keys = Array(keys) # Lets first index the hashes based on keys, so we can do O(1) lookups record_index = {} - records.uniq.each do |record| + records.each do |record| record_index[build_index_from_record(keys, record)] = record end @@ -123,7 +123,7 @@ def store_ids_for_new_records(records, hashes, keys) hashes.each do |hash| record = record_index[build_index_from_hash(keys, hash, record_class)] - hash[:id] = record.try(:id) + hash[:id] = record.id end end From 7f60ac3d6ad0c689c13ec24be55fbb8855176ddf Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 30 Mar 2017 17:52:17 +0200 Subject: [PATCH 6/8] Reformat the record_index using index_by Reformat the record_index using index_by --- app/models/ems_refresh/save_inventory_helper.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index db9248a9ee1..3c109833aba 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -114,11 +114,7 @@ def save_child_inventory(obj, hashes, child_keys, *args) def store_ids_for_new_records(records, hashes, keys) keys = Array(keys) # Lets first index the hashes based on keys, so we can do O(1) lookups - record_index = {} - records.each do |record| - record_index[build_index_from_record(keys, record)] = record - end - + record_index = records.index_by { |record| build_index_from_record(keys, record) } record_class = records.first.class hashes.each do |hash| From e928a78baa4a78f68cb5e80ddec432234319903a Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 30 Mar 2017 17:53:26 +0200 Subject: [PATCH 7/8] Check for blank records and return Check for blank records and return --- app/models/ems_refresh/save_inventory_helper.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index 3c109833aba..a0122a40591 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -112,6 +112,8 @@ def save_child_inventory(obj, hashes, child_keys, *args) end def store_ids_for_new_records(records, hashes, keys) + return if records.blank? + keys = Array(keys) # Lets first index the hashes based on keys, so we can do O(1) lookups record_index = records.index_by { |record| build_index_from_record(keys, record) } From b4c143ac1626892847080df2f60a7c4059509cbb Mon Sep 17 00:00:00 2001 From: Ladislav Smola Date: Thu, 30 Mar 2017 17:56:16 +0200 Subject: [PATCH 8/8] Use base_class for the type casting Use base_class for the type casting --- app/models/ems_refresh/save_inventory_helper.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/models/ems_refresh/save_inventory_helper.rb b/app/models/ems_refresh/save_inventory_helper.rb index a0122a40591..cd132d363c9 100644 --- a/app/models/ems_refresh/save_inventory_helper.rb +++ b/app/models/ems_refresh/save_inventory_helper.rb @@ -117,7 +117,7 @@ def store_ids_for_new_records(records, hashes, keys) keys = Array(keys) # Lets first index the hashes based on keys, so we can do O(1) lookups record_index = records.index_by { |record| build_index_from_record(keys, record) } - record_class = records.first.class + record_class = records.first.class.base_class hashes.each do |hash| record = record_index[build_index_from_hash(keys, hash, record_class)]