Skip to content

Commit

Permalink
Merge pull request solidusio#3982 from nebulab/kennyadsl/uniform-upda…
Browse files Browse the repository at this point in the history
…te-task-output

Improve address name migration task output
  • Loading branch information
kennyadsl authored Mar 10, 2021
2 parents 558fa4b + a4d278c commit 5368e8a
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 108 deletions.
214 changes: 109 additions & 105 deletions core/lib/tasks/migrations/migrate_address_names.rake
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,17 @@ namespace :solidus do
concatenation in order to retain historical data when upgrading to new
address name format'
task up: :environment do
puts "Combining addresses' firstname and lastname into name ... "
class Spree::AddressForMigration < ApplicationRecord
self.table_name = 'spree_addresses'
end

count = Spree::AddressForMigration.unscoped.where(name: [nil, '']).count
records = Spree::AddressForMigration.unscoped.where(name: [nil, ''])
count = records.count
connection = ActiveRecord::Base.connection
adapter_name = connection.adapter_name.downcase
shell = Thor::Shell::Basic.new
puts "Your DB contains #{count} addresses that may be affected by this task."
puts " Your DB contains #{count} addresses that may be affected by this task."
# `trim` is not needed for pg or mysql when using `concat_ws`:
# select concat_ws('joinstring', 'foo', null);
# concat_ws
Expand All @@ -37,115 +39,117 @@ namespace :solidus do
when /postgres/, /mysql2/
"name = CONCAT_WS(' ', firstname, lastname)"
else
abort "No migration path available for adapter #{adapter_name}. Please write your own."
abort " No migration path available for adapter #{adapter_name}. Please write your own."
end
end
answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
if answer == 'Y'
# The batch size should be limited to avoid locking the table records for too long. These are
# the numbers I got with 1_000_000 records in `spree_addresses`, all with different name and
# surname, with postgresql:
#
# Updating 1000000 records in one shot
# batch took 178 seconds
#
# Updating 1000000 addresses in batches of 200000
# batch took 36 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 30 seconds
#
# Updating 1000000 addresses in batches of 150000
# batch took 29 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 26 seconds
# batch took 26 seconds
# batch took 19 seconds
#
# Updating 1000000 addresses in batches of 100000
# batch took 17 seconds
# batch took 15 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
#
# This is with mysql:
# Updating 1000000 records in one shot
# batch updated in 153 seconds
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 41 seconds
# batch took 37 seconds
# batch took 35 seconds
# batch took 28 seconds
# batch took 27 seconds
#
# Updating 1000000 records in batches of 150000, this may take a while...
# batch took 30 seconds
# batch took 29 seconds
# batch took 18 seconds
# batch took 18 seconds
# batch took 17 seconds
# batch took 29 seconds
# batch took 12 seconds
#
# Updating 1000000 records in batches of 100000, this may take a while...
# batch took 10 seconds
# batch took 11 seconds
# batch took 12 seconds
# batch took 13 seconds
# batch took 12 seconds
# batch took 12 seconds
# batch took 14 seconds
# batch took 19 seconds
# batch took 20 seconds
# batch took 21 seconds
#
# Please note that the migration will be much faster when there's no index
# on the `name` column. For example, with mysql each batch takes exactly
# the same time:
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 16 seconds
# batch took 17 seconds
#
# So, if special need arise, one can drop the index added with migration
# 20210122110141_add_name_to_spree_addresses.rb and add the index later,
# in non blocking ways. For postgresql:
# add_index(:spree_addresses, :name, algorithm: :concurrently)
#
# For mysql 5.6:
# sql = "ALTER TABLE spree_addresses ADD INDEX index_spree_addresses_on_name (name), ALGORITHM=INPLACE, LOCK=NONE;"
# ActiveRecord::Base.connection.execute sql
#
puts 'Data migration will happen in batches. The default value is 100_000, which should take less than 20 seconds on mysql or postgresql.'
size = shell.ask('Please enter a different batch size, or press return to confirm the default')
size = (size.presence || 100_000).to_i

abort "Invalid batch size number #{size}, please run the task again." unless size.positive?
# The batch size should be limited to avoid locking the table records for too long. These are
# the numbers I got with 1_000_000 records in `spree_addresses`, all with different name and
# surname, with postgresql:
#
# Updating 1000000 records in one shot
# batch took 178 seconds
#
# Updating 1000000 addresses in batches of 200000
# batch took 36 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 31 seconds
# batch took 30 seconds
#
# Updating 1000000 addresses in batches of 150000
# batch took 29 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 27 seconds
# batch took 26 seconds
# batch took 26 seconds
# batch took 19 seconds
#
# Updating 1000000 addresses in batches of 100000
# batch took 17 seconds
# batch took 15 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
#
# This is with mysql:
# Updating 1000000 records in one shot
# batch updated in 153 seconds
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 41 seconds
# batch took 37 seconds
# batch took 35 seconds
# batch took 28 seconds
# batch took 27 seconds
#
# Updating 1000000 records in batches of 150000, this may take a while...
# batch took 30 seconds
# batch took 29 seconds
# batch took 18 seconds
# batch took 18 seconds
# batch took 17 seconds
# batch took 29 seconds
# batch took 12 seconds
#
# Updating 1000000 records in batches of 100000, this may take a while...
# batch took 10 seconds
# batch took 11 seconds
# batch took 12 seconds
# batch took 13 seconds
# batch took 12 seconds
# batch took 12 seconds
# batch took 14 seconds
# batch took 19 seconds
# batch took 20 seconds
# batch took 21 seconds
#
# Please note that the migration will be much faster when there's no index
# on the `name` column. For example, with mysql each batch takes exactly
# the same time:
#
# Updating 1000000 records in batches of 200000, this may take a while...
# batch took 17 seconds
# batch took 17 seconds
# batch took 17 seconds
# batch took 16 seconds
# batch took 17 seconds
#
# So, if special need arise, one can drop the index added with migration
# 20210122110141_add_name_to_spree_addresses.rb and add the index later,
# in non blocking ways. For postgresql:
# add_index(:spree_addresses, :name, algorithm: :concurrently)
#
# For mysql 5.6:
# sql = "ALTER TABLE spree_addresses ADD INDEX index_spree_addresses_on_name (name), ALGORITHM=INPLACE, LOCK=NONE;"
# ActiveRecord::Base.connection.execute sql
#
puts ' Data migration will happen in batches. The default value is 100_000, which should take less than 20 seconds on mysql or postgresql.'
size = shell.ask(' Please enter a different batch size, or press return to confirm the default: ')
size = (size.presence || 100_000).to_i

abort " Invalid batch size number #{size}, please run the task again." unless size.positive?

puts "We're going to migrate #{count} records in batches of #{size}"
answer = shell.ask('do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
if answer == 'Y'
puts "Updating #{count} records in batches of #{size}, this may take a while..."
batches_total = (count / size).ceil
puts " We're going to migrate #{count} records in #{batches_total} batches of #{size}."

answer = shell.ask(' Do you want to proceed?', limited_to: ['Y', 'N'], case_insensitive: true)
if answer == 'Y'
puts " Updating #{count} records in batches of #{size}, this may take a while..."

Spree::AddressForMigration.unscoped.where(name: [nil, '']).in_batches(of: size).each do |batch|
now = Time.zone.now
batch.update_all(concat_statement)
puts "batch took #{(Time.zone.now - now).to_i} seconds"
end
records.in_batches(of: size).each.with_index(1) do |batch, index|
now = Time.zone.now
batch.update_all(concat_statement)
puts " Batch #{index}/#{batches_total} done in #{(Time.zone.now - now).to_i} seconds."
end
else
puts " Database not migrated. Please, make sure to fill address's name field on your own."
end
end
end
Expand Down
14 changes: 11 additions & 3 deletions core/spec/lib/tasks/migrations/migrate_address_names_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,16 @@
Spree::Address.ignored_columns = []
Spree::Address.reset_column_information

original_stderr = $stderr
original_stdout = $stdout
$stderr = File.open(File::NULL, "w")
$stdout = File.open(File::NULL, "w")

example.run
ensure
$stderr = original_stderr
$stdout = original_stdout

Spree::Address.ignored_columns = ignored_columns
Spree::Address.reset_column_information
end
Expand All @@ -26,7 +34,7 @@
context "when there are no records to migrate" do
it "simply exits" do
expect { task.invoke }.to output(
"Your DB contains 0 addresses that may be affected by this task.\n"
"Combining addresses' firstname and lastname into name ... \n Your DB contains 0 addresses that may be affected by this task.\n"
).to_stdout
end
end
Expand All @@ -52,13 +60,13 @@
context "when the DB adapter is supported" do
before do
allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
'do you want to proceed?',
' Do you want to proceed?',
limited_to: ['Y', 'N'],
case_insensitive: true
).and_return('Y')

allow_any_instance_of(Thor::Shell::Basic).to receive(:ask).with(
'Please enter a different batch size, or press return to confirm the default'
' Please enter a different batch size, or press return to confirm the default: '
).and_return(size)
end

Expand Down

0 comments on commit 5368e8a

Please sign in to comment.