Skip to content

Commit

Permalink
templates/openshift: grab bootstrap log on failure
Browse files Browse the repository at this point in the history
This uses the Terraform state to discover the IP address of the
bootstrap node (ideally, the installer will provide this information in
a form which easier to consume in the future). It then connects to the
gatewayd instance on that machine and pulls the logs for various
services. Hopefully, these logs will be useful when diagnosing
installation failures.
  • Loading branch information
crawford committed Jan 19, 2019
1 parent bfb2475 commit 7e56379
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,7 @@ objects:
exit 1
fi
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster &
wait "$!"
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster
# Performs cleanup of all created resources
- name: teardown
Expand Down Expand Up @@ -421,7 +420,34 @@ objects:
export PATH=$PATH:/tmp/shared
echo "Gathering artifacts ..."
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap
if [ -f /tmp/artifacts/installer/terraform.tfstate ]
then
# we don't have jq, so the python equivalent of
# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)'
bootstrap_ip=$(python -c \
'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \
< /tmp/artifacts/installer/terraform.tfstate
)
if [ -n "${bootstrap_ip}" ]
then
for service in bootkube openshift kubelet crio
do
queue "/tmp/artifacts/bootstrap/${service}.service" curl \
--insecure \
--silent \
--connect-timeout 5 \
--retry 3 \
--cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \
--key /tmp/artifacts/installer/tls/journal-gatewayd.key \
--url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service"
done
fi
else
echo "No terraform statefile found. Skipping collection of bootstrap logs."
fi
oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -338,8 +338,7 @@ objects:
exit 1
fi
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster &
wait "$!"
/bin/openshift-install --dir=/tmp/artifacts/installer create cluster
# Performs cleanup of all created resources
- name: teardown
Expand Down Expand Up @@ -386,7 +385,34 @@ objects:
export PATH=$PATH:/tmp/shared
echo "Gathering artifacts ..."
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics
mkdir -p /tmp/artifacts/pods /tmp/artifacts/nodes /tmp/artifacts/metrics /tmp/artifacts/bootstrap
if [ -f /tmp/artifacts/installer/terraform.tfstate ]
then
# we don't have jq, so the python equivalent of
# jq '.modules[].resources."aws_instance.bootstrap".primary.attributes."public_ip" | select(.)'
bootstrap_ip=$(python -c \
'import sys, json; d=reduce(lambda x,y: dict(x.items() + y.items()), map(lambda x: x["resources"], json.load(sys.stdin)["modules"])); k="aws_instance.bootstrap"; print d[k]["primary"]["attributes"]["public_ip"] if k in d else ""' \
< /tmp/artifacts/installer/terraform.tfstate
)
if [ -n "${bootstrap_ip}" ]
then
for service in bootkube openshift kubelet crio
do
queue "/tmp/artifacts/bootstrap/${service}.service" curl \
--insecure \
--silent \
--connect-timeout 5 \
--retry 3 \
--cert /tmp/artifacts/installer/tls/journal-gatewayd.crt \
--key /tmp/artifacts/installer/tls/journal-gatewayd.key \
--url "https://${bootstrap_ip}:19531/entries?_SYSTEMD_UNIT=${service}.service"
done
fi
else
echo "No terraform statefile found. Skipping collection of bootstrap logs."
fi
oc --request-timeout=5s get nodes -o jsonpath --template '{range .items[*]}{.metadata.name}{"\n"}{end}' > /tmp/nodes
oc --request-timeout=5s get pods --all-namespaces --template '{{ range .items }}{{ $name := .metadata.name }}{{ $ns := .metadata.namespace }}{{ range .spec.containers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ range .spec.initContainers }}-n {{ $ns }} {{ $name }} -c {{ .name }}{{ "\n" }}{{ end }}{{ end }}' > /tmp/containers
Expand Down

0 comments on commit 7e56379

Please sign in to comment.