From 7ce0f5c9e1338fc99eead1702261de077c65e559 Mon Sep 17 00:00:00 2001 From: Steven Hardy Date: Wed, 21 Aug 2019 16:14:52 +0100 Subject: [PATCH] baremetal: Use podman inspect to check ironic service status Some people are hitting issues where the containers appear running in podman ps output, but are in fact unresponsive and podman exec/inspect CLI options fail. This may be a libpod bug (looking for related issues), but as a workaround we can check the inspect status, which should mean we can detect zombie containers and restart the ironic.service which appears to solve the issue. Related: https://github.com/openshift-metal3/dev-scripts/issues/753 --- .../files/usr/local/bin/startironic.sh.template | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/data/data/bootstrap/baremetal/files/usr/local/bin/startironic.sh.template b/data/data/bootstrap/baremetal/files/usr/local/bin/startironic.sh.template index 1ff42b6be46..ed79caa43d6 100755 --- a/data/data/bootstrap/baremetal/files/usr/local/bin/startironic.sh.template +++ b/data/data/bootstrap/baremetal/files/usr/local/bin/startironic.sh.template @@ -114,7 +114,14 @@ sudo podman run -d --net host --privileged --name ironic-api \ # The alternative would be RemainAfterExit=yes but then we lose the ability to restart if something crashes. while true; do for name in ironic-api ironic-conductor ironic-inspector dnsmasq httpd mariadb; do - podman ps | grep -w "$name$" || exit 1 + # Note it would be nicer to use the --format option here but it breaks the go templating + # in the installer and escaping the template appears difficult + state=$(podman inspect ${name} --format {{ "{{.State.Status}}" }}) + if [[ $state != "running" ]]; then + echo "ERROR: Unexpected service status for $name" + podman inspect ${name} + exit 1 + fi done sleep 10 done