Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Wrap kubectl calls to properly output an error #74

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions hooks/command
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ function cleanup {

if [[ "$jobs_cleanup_via_plugin" == "true" ]]; then
# Delete all jobs older than a day.
kubectl delete job "$(kubectl get job -l buildkite/plugin=k8s | awk 'match($4,/[0-9]+d/) {print $1}')" 2>/dev/null || true
"$basedir/lib/kubectl_wrapper.sh" delete job "$($basedir/lib/kubectl_wrapper.sh get job -l buildkite/plugin=k8s | awk 'match($4,/[0-9]+d/) {print $1}')" 2>/dev/null || true
fi
}

Expand All @@ -75,7 +75,7 @@ function tail_logs {
while true;
do
set +e
log_snapshot="$(timeout "$log_attempt_timeout_sec" kubectl logs --limit-bytes "1024" "pod/$pod_name" --container "$container_name" 2>/dev/null)"
log_snapshot="$(timeout "$log_attempt_timeout_sec" "$basedir/lib/kubectl_wrapper.sh" logs --limit-bytes "1024" "pod/$pod_name" --container "$container_name" 2>/dev/null)"
set -e
if [[ -n "$log_snapshot" ]]; then
break
Expand All @@ -87,7 +87,7 @@ function tail_logs {
# 1) It can fail due to pod not being initialized yet: "Error from server (BadRequest): container "step" in pod "somepod" is waiting to start: PodInitializing"
# 2) It can fail mid-streaming, in this case we unfortunately will display logs multiple times (partially).
# 3) It can hang not providing any result, that's why we check not only exit code but also contents in the loop above.
while ! kubectl logs --follow "pod/$pod_name" --container "$container_name" 2>/dev/null;
while ! "$basedir/lib/kubectl_wrapper.sh" logs --follow "pod/$pod_name" --container "$container_name" 2>/dev/null;
do
sleep "$log_loop_retry_interval_sec"
done
Expand All @@ -100,7 +100,7 @@ if [[ -n $external_secrets || -n $external_secrets_list || -n $secret_store || -
echo "--- :kubernetes: Creating Secrets"

# Check if the ExternalSecrets CRD exists
if [[ -z $(kubectl get crd externalsecrets.external-secrets.io 2>/dev/null) ]]; then
if [[ -z $("$basedir/lib/kubectl_wrapper.sh" get crd externalsecrets.external-secrets.io 2>/dev/null) ]]; then
echo "External Secrets CRD not found. Is the External Secrets Operator installed in the cluster?" >&2
exit 42
fi
Expand Down Expand Up @@ -141,14 +141,14 @@ if [[ -n $external_secrets || -n $external_secrets_list || -n $secret_store || -
readonly secret_apply_end_time=$((SECONDS+30))

set +e
echo "$external_secret_spec" | kubectl apply -f -
echo "$external_secret_spec" | "$basedir/lib/kubectl_wrapper.sh" apply -f -
set -e

echo "Waiting for Secret to become available"
while [ $SECONDS -lt $secret_apply_end_time ]; do
# Check if the secret exists
secret_exists=$(kubectl get secret "$job_name" > /dev/null 2>&1; echo $?)
external_secret_status=$(kubectl get ExternalSecret "$job_name" -o json | jq -r '.status.conditions[0].status')
secret_exists=$("$basedir/lib/kubectl_wrapper.sh" get secret "$job_name" > /dev/null 2>&1; echo $?)
external_secret_status=$("$basedir/lib/kubectl_wrapper.sh" get ExternalSecret "$job_name" -o json | jq -r '.status.conditions[0].status')
if [ "$secret_exists" != 1 ] && [ "$external_secret_status" == "True" ]; then
echo "Secret Created"
break
Expand All @@ -157,7 +157,7 @@ if [[ -n $external_secrets || -n $external_secrets_list || -n $secret_store || -

if [ "$secret_exists" == 1 ] || [ "$external_secret_status" != "True" ]; then
echo "Secret creation failed, check ExternalSecrets logs in your cluster"
status=$(kubectl get ExternalSecret "$job_name" -o json | jq -r '.status.conditions[0].message')
status=$("$basedir/lib/kubectl_wrapper.sh" get ExternalSecret "$job_name" -o json | jq -r '.status.conditions[0].message')
echo "External Secret Status: $status"
return 42
fi
Expand All @@ -177,7 +177,7 @@ job_spec="$(jsonnet \
"${basedir}/lib/job.jsonnet")"

if [[ "$use_agent_node_affinity" == "true" ]]; then
agent_spec="$(kubectl get pod "$(cat /etc/hostname)" -o json)"
agent_spec="$("$basedir/lib/kubectl_wrapper.sh" get pod "$(cat /etc/hostname)" -o json)"
for field in affinity tolerations nodeSelector; do
agent_value=$(echo "$agent_spec" | jq ".spec.$field")
job_spec="$(echo "$job_spec" | jq ".spec.template.spec.$field=$agent_value")"
Expand All @@ -194,7 +194,7 @@ job_apply_exit_code=""
while [[ "$((SECONDS - job_apply_start_time))" -lt "$job_apply_timeout_sec" ]]
do
set +e
echo "$job_spec" | kubectl apply -f -
echo "$job_spec" | "$basedir/lib/kubectl_wrapper.sh" apply -f -
job_apply_exit_code="$?"
set -e

Expand All @@ -220,13 +220,13 @@ while true
do
exit_status=0
set +e
pod_name=$(kubectl get pod -l "job-name=$job_name" --output=jsonpath="{.items[*].metadata.name}" 2>&1)
pod_name=$("$basedir/lib/kubectl_wrapper.sh" get pod -l "job-name=$job_name" --output=jsonpath="{.items[*].metadata.name}" 2>&1)
exit_status=$?
set -e

if [[ $exit_status -ne 0 && "$pod_name" == *NotFound* ]]; then
set +e
jobstatus=$(kubectl get job "$job_name" -o 'jsonpath={.status.conditions[].type}')
jobstatus=$("$basedir/lib/kubectl_wrapper.sh" get job "$job_name" -o 'jsonpath={.status.conditions[].type}')
set -e
if [[ "$jobstatus" == "Failed" ]]; then
echo "Warning: exiting because no pod was found and job status was Failed"
Expand Down Expand Up @@ -262,7 +262,7 @@ counter="$timeout"
jobstatus=""
while [[ -z "$jobstatus" ]] ; do
set +e
jobstatus=$(kubectl get job "${job_name}" -o 'jsonpath={.status.conditions[].type}')
jobstatus=$("$basedir/lib/kubectl_wrapper.sh" get job "${job_name}" -o 'jsonpath={.status.conditions[].type}')
set -e

if [[ -n "$jobstatus" ]]; then
Expand Down Expand Up @@ -293,7 +293,7 @@ else
while true
do
set +e
pod_json=$(kubectl get pod "$pod_name" -o json 2>&1)
pod_json=$("$basedir/lib/kubectl_wrapper.sh" get pod "$pod_name" -o json 2>&1)
get_pod_status=$?
if [[ "$get_pod_status" -eq 0 ]]; then
init_container_status="$(echo "$pod_json" | jq ".status.initContainerStatuses[0].state.terminated.exitCode")"
Expand Down
10 changes: 6 additions & 4 deletions hooks/pre-exit
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

set -euo pipefail

basedir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && cd .. && pwd )"

if [[ $OSTYPE == darwin* ]]; then
exit 0
fi
Expand All @@ -28,7 +30,7 @@ external_secrets_list=${BUILDKITE_PLUGIN_K8S_EXTERNAL_SECRETS_0:-}
# If External Secrets were used, clean them up
if [[ -n $external_secrets || -n $external_secrets_list ]]; then
echo "Deleting ExternalSecrets"
kubectl delete ExternalSecret "$job_name"
"$basedir/lib/kubectl_wrapper.sh" delete ExternalSecret "$job_name"
fi

if [[ "$job_cleanup_after_finished_via_plugin" != "true" ]]; then
Expand All @@ -40,11 +42,11 @@ job_cleanup_exit_code=""
while [[ "$((SECONDS - job_cleanup_start_time))" -lt "$job_cleanup_timeout_sec" ]]
do
set +e
pod=$(kubectl get pod --output=name -l "job-name=${job_name}")
pod=$("$basedir/lib/kubectl_wrapper.sh" get pod --output=name -l "job-name=${job_name}")
if [[ -n "${pod}" ]] ; then
kubectl patch --patch '{"spec":{"activeDeadlineSeconds":1}}' "${pod}"
"$basedir/lib/kubectl_wrapper.sh" patch --patch '{"spec":{"activeDeadlineSeconds":1}}' "${pod}"
fi
kubectl patch --patch '{"spec":{"activeDeadlineSeconds":1}}' "job/${job_name}"
"$basedir/lib/kubectl_wrapper.sh" patch --patch '{"spec":{"activeDeadlineSeconds":1}}' "job/${job_name}"
job_cleanup_exit_code="$?"
set -e

Expand Down
21 changes: 21 additions & 0 deletions lib/kubectl_wrapper.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#!/bin/bash
set -euo pipefail

# Wrapper for kubectl invocations that prints errors in meaningful way including the kubectl command that was run
# Otherwise you might see random kubectl errors in your Builkite log without knowing that it came from the kubectl invocation
# Like "error: You must be logged in to the server (Unauthorized)" appearing in the middle or your build logs.

stderr_file=$(mktemp)
trap 'rm -f "$stderr_file"' EXIT

set +e
kubectl "$@" 2>"$stderr_file"
kubectl_exit_code=$?
set -e

if [[ "$kubectl_exit_code" != "0" ]]; then
echo "Error executing 'kubectl $@':" >&2
cat "$stderr_file" >&2
fi

exit "$kubectl_exit_code"