From 068c5de4c513ee8f2bde2eda6248383df881d957 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Wed, 25 Mar 2020 12:56:17 -0700 Subject: [PATCH 1/2] ci-operator/step-registry/ipi: Shuffle installer log gathering A few changes here: * Background the 'destroy cluster' call and add a trap, so we can gracefully handle TERM. More on this in 4472ace120 (ci-operator/templates/openshift/installer: Restore backgrounded 'create cluster', 2019-01-23, #2680). * The 'set +e' and related wrapping around the 'wait' follows de3de20d02 (step-registry: add configure and install IPI steps, 2020-01-14, #6708), and ensures we gather logs and other assets in the event of a failed openshift-install invocation. More on this below. * We considered piping the installer's stderr into /dev/null. The same information is going to show up in .openshift_install.log, and .openshift_install.log includes timestamps which are not present in the container's captured standard streams. By using /dev/null, we could DRY up our password redaction, but we really want installer output to end up in the build log [1], so keeping the grep business there (even though that means we end up with largely duplicated assets between the container stderr and .openshift_install.log). * Quote $?. It's never going to contain shell-sensitive characters, but neither is $! and we quote that. * Make the log copy the first thing that happens after the installer exits. This ensures we capture the logs even if the installer fails before creating a kubeconfig or metadata.json. * Shift the log-bundle copy earlier, because it cannot fail, while the SHARED_DIR copy might fail. Although I'm not sure we have a case where we'd generate a log bundle but not generate the kubeconfig and metadata.json. Testing the 'set +e' approach, just to make sure it works as expected: $ echo $BASH_VERSION 5.0.11(1)-release $ cat test.sh #!/bin/bash set -o nounset set -o errexit set -o pipefail trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi; echo "done cleanup"' TERM "${@}" & set +e wait "$!" ret="$?" set -e echo gather logs exit "$ret" $ ./test.sh sleep 600 & [2] 19397 $ kill 19397 done cleanup gather logs [2]- Exit 143 ./test.sh sleep 20 $ ./test.sh true gather logs $ echo $? 0 $ ./test.sh false gather logs $ echo $? 1 So that all looks good. [1]: https://github.com/openshift/release/pull/7936#discussion_r398690674 --- .../ipi-deprovision-deprovision-commands.sh | 14 ++++++++++++-- .../install/ipi-install-install-commands.sh | 6 +++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh index 08707341d325..42e5a74992db 100644 --- a/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh +++ b/ci-operator/step-registry/ipi/deprovision/deprovision/ipi-deprovision-deprovision-commands.sh @@ -4,11 +4,21 @@ set -o nounset set -o errexit set -o pipefail +trap 'CHILDREN=$(jobs -p); if test -n "${CHILDREN}"; then kill ${CHILDREN} && wait; fi' TERM + cluster_profile=/var/run/secrets/ci.openshift.io/cluster-profile export AWS_SHARED_CREDENTIALS_FILE=$cluster_profile/.awscred export AZURE_AUTH_LOCATION=$cluster_profile/osServicePrincipal.json echo "Deprovisioning cluster ..." cp -ar "${SHARED_DIR}" /tmp/installer -openshift-install --dir /tmp/installer destroy cluster -cp /tmp/installer/.openshift_install.log "${ARTIFACT_DIR}/" +openshift-install --dir /tmp/installer destroy cluster & + +set +e +wait "$!" +ret="$?" +set -e + +cp /tmp/installer/.openshift_install.log "${ARTIFACT_DIR}" + +exit "$ret" diff --git a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh index 91af498fda83..f81112bb0cc3 100755 --- a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh +++ b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh @@ -39,13 +39,13 @@ TF_LOG=debug openshift-install --dir="${dir}" create cluster 2>&1 | grep --line- set +e wait "$!" -ret=$? +ret="$?" set -e +sed 's/password: .*/password: REDACTED/' "${dir}/.openshift_install.log" >"${ARTIFACT_DIR}/.openshift_install.log" +cp "${dir}"/log-bundle-*.tar.gz "${ARTIFACT_DIR}/" 2>/dev/null || : cp \ -t "${SHARED_DIR}" \ "${dir}/auth/kubeconfig" \ "${dir}/metadata.json" -cp "${dir}/.openshift_install.log" "${ARTIFACT_DIR}/" -cp "${dir}"/log-bundle-*.tar.gz "${ARTIFACT_DIR}/" 2>/dev/null || : exit "$ret" From b3f7fba09e430bafd05624a1f655e949d0a2d422 Mon Sep 17 00:00:00 2001 From: "W. Trevor King" Date: Mon, 30 Mar 2020 15:21:22 -0700 Subject: [PATCH 2/2] ci-operator/step-registry/ipi/install/install: Drop '|| :' Steve found this opaque [1], so shift the cp into the 'set +e' block instead. The '|| :' syntax was originally from 20157f66e1 (Capture log bundle in install step, 2020-03-24, #7899). [1]: https://github.com/openshift/release/pull/7936/files#r400512879 --- .../ipi/install/install/ipi-install-install-commands.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh index f81112bb0cc3..888414ff27b3 100755 --- a/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh +++ b/ci-operator/step-registry/ipi/install/install/ipi-install-install-commands.sh @@ -40,10 +40,10 @@ TF_LOG=debug openshift-install --dir="${dir}" create cluster 2>&1 | grep --line- set +e wait "$!" ret="$?" +cp "${dir}"/log-bundle-*.tar.gz "${ARTIFACT_DIR}/" 2>/dev/null set -e sed 's/password: .*/password: REDACTED/' "${dir}/.openshift_install.log" >"${ARTIFACT_DIR}/.openshift_install.log" -cp "${dir}"/log-bundle-*.tar.gz "${ARTIFACT_DIR}/" 2>/dev/null || : cp \ -t "${SHARED_DIR}" \ "${dir}/auth/kubeconfig" \