Skip to content

Commit

Permalink
Merge pull request #23182 from agrare/mark_workers_for_failed_units_s…
Browse files Browse the repository at this point in the history
…topped

Mark workers associated with failed systemd units as stopped
  • Loading branch information
Fryguy committed Sep 27, 2024
2 parents b6ed619 + 728e223 commit de72e9e
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
9 changes: 7 additions & 2 deletions app/models/miq_server/worker_management/systemd.rb
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,16 @@ def cleanup_failed_workers
end

def cleanup_failed_systemd_services
service_names = failed_miq_service_namees
service_names = failed_miq_service_names
return if service_names.empty?

_log.info("Disabling failed unit files: [#{service_names.join(", ")}]")
systemd_stop_services(service_names)

_log.info("Stopping worker records for failed units: [#{service_names.join(", ")}]")
MiqWorker.find_current_or_starting.where(:system_uid => service_names).each do |w|
w.update!(:status => MiqWorker::STATUS_STOPPED)
end
end

private
Expand Down Expand Up @@ -104,7 +109,7 @@ def failed_miq_services
miq_services.select { |service| service[:active_state] == "failed" }
end

def failed_miq_service_namees
def failed_miq_service_names
failed_miq_services.pluck(:name)
end

Expand Down
11 changes: 11 additions & 0 deletions spec/models/miq_server/worker_management/systemd_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
context "with failed services" do
let(:service_name) { "manageiq-generic@68400a7e-1747-4f10-be2a-d0fc91b705ca.service" }
let(:units) { [{:name => service_name, :description => "ManageIQ Generic Worker", :load_state => "loaded", :active_state => "failed", :sub_state => "plugged", :job_id => 0, :job_type => "", :job_object_path => "/"}] }
let!(:worker) { FactoryBot.create(:miq_generic_worker, :miq_server => server, :status => "creating", :system_uid => service_name) }

it "calls DisableUnitFiles with the service name" do
expect(systemd_manager).to receive(:StopUnit).with(service_name, "replace")
Expand All @@ -34,6 +35,16 @@

server.worker_manager.cleanup_failed_systemd_services
end

it "marks any active workers as stopped" do
expect(systemd_manager).to receive(:StopUnit).with(service_name, "replace")
expect(systemd_manager).to receive(:ResetFailedUnit).with(service_name)
expect(systemd_manager).to receive(:DisableUnitFiles).with([service_name], false)

server.worker_manager.cleanup_failed_systemd_services

expect(worker.reload.status).to eq("stopped")
end
end
end

Expand Down

0 comments on commit de72e9e

Please sign in to comment.