From 7e56379d0b1d2065038f87931cbd506e5e90f445 Mon Sep 17 00:00:00 2001 From: Alex Crawford Date: Mon, 14 Jan 2019 12:18:55 -0800 Subject: [PATCH] templates/openshift: grab bootstrap log on failure This uses the Terraform state to discover the IP address of the bootstrap node (ideally, the installer will provide this information in a form which easier to consume in the future). It then connects to the gatewayd instance on that machine and pulls the logs for various services. Hopefully, these logs will be useful when diagnosing installation failures. --- .../cluster-launch-installer-e2e.yaml | 32 +++++++++++++++++-- .../cluster-launch-installer-src.yaml | 32 +++++++++++++++++-- 2 files changed, 58 insertions(+), 6 deletions(-) diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml index 3c2458995fda..ada4c6542b06 100644 --- a/ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-e2e.yaml @@ -373,8 +373,7 @@ objects: exit 1 fi - /bin/openshift-install --dir=/tmp/artifacts/installer create cluster & - wait "$!" + /bin/openshift-install --dir=/tmp/artifacts/installer create cluster # Performs cleanup of all created resources - name: teardown @@ -421,7 +420,34 @@ objects: export PATH=$PATH:/tmp/shared echo "Gathering artifacts ..." - mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap + + if [ -f /tmp/artifacts/installer/terraform.tfstate ] + then + # we don't have jq, so the python equivalent of + # jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)' + bootstrap_ip=$(python -c \ + 'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \ + < /tmp/artifacts/installer/terraform.tfstate + ) + + if [ -n "${bootstrap_ip}" ] + then + for service in bootkube openshift kubelet crio + do + queue "/tmp/artifacts/bootstrap/${service}.service" curl \ + --insecure \ + --silent \ + --connect-timeout 5 \ + --retry 3 \ + --cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \ + --key /tmp/artifacts/installer/tls/journal-gatewayd.key \ + --url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" + done + fi + else + echo "No terraform statefile found. Skipping collection of bootstrap logs." + fi oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers diff --git a/ci-operator/templates/openshift/installer/cluster-launch-installer-src.yaml b/ci-operator/templates/openshift/installer/cluster-launch-installer-src.yaml index d9d2f38c7163..3f969363e718 100644 --- a/ci-operator/templates/openshift/installer/cluster-launch-installer-src.yaml +++ b/ci-operator/templates/openshift/installer/cluster-launch-installer-src.yaml @@ -338,8 +338,7 @@ objects: exit 1 fi - /bin/openshift-install --dir=/tmp/artifacts/installer create cluster & - wait "$!" + /bin/openshift-install --dir=/tmp/artifacts/installer create cluster # Performs cleanup of all created resources - name: teardown @@ -386,7 +385,34 @@ objects: export PATH=$PATH:/tmp/shared echo "Gathering artifacts ..." - mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics + mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap + + if [ -f /tmp/artifacts/installer/terraform.tfstate ] + then + # we don't have jq, so the python equivalent of + # jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)' + bootstrap_ip=$(python -c \ + 'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \ + < /tmp/artifacts/installer/terraform.tfstate + ) + + if [ -n "${bootstrap_ip}" ] + then + for service in bootkube openshift kubelet crio + do + queue "/tmp/artifacts/bootstrap/${service}.service" curl \ + --insecure \ + --silent \ + --connect-timeout 5 \ + --retry 3 \ + --cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \ + --key /tmp/artifacts/installer/tls/journal-gatewayd.key \ + --url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service" + done + fi + else + echo "No terraform statefile found. Skipping collection of bootstrap logs." + fi oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers