From 7701ea2dfc0a5a7d47a5d9c8a575db16b43c39a2 Mon Sep 17 00:00:00 2001 From: Luis Sanchez Date: Wed, 11 Mar 2020 17:21:37 -0400 Subject: [PATCH 1/4] gather_audit_logs: add logging and oauth-apiserver logs - simplified logic - added progress logging - also capture oauth-apiserver logs --- collection-scripts/gather_audit_logs | 61 ++++++++++++---------------- 1 file changed, 25 insertions(+), 36 deletions(-) diff --git a/collection-scripts/gather_audit_logs b/collection-scripts/gather_audit_logs index 3356e9fa..45e9c42f 100755 --- a/collection-scripts/gather_audit_logs +++ b/collection-scripts/gather_audit_logs @@ -1,40 +1,29 @@ #!/bin/bash -BASE_COLLECTION_PATH="/must-gather" -AUDIT_LOG_PATH="${BASE_COLLECTION_PATH}/audit_logs/" - -### Helper Functions -function queue() { - local TARGET="${1}" - shift - local LIVE="$(jobs | wc -l)" - while [[ "${LIVE}" -ge 45 ]]; do - sleep 1 - LIVE="$(jobs | wc -l)" - done - echo "${@}" - if [[ -n "${FILTER}" ]]; then - "${@}" | "${FILTER}" >"${TARGET}" & - else - "${@}" >"${TARGET}" & - fi -} - -# Collect System Audit Logs -function collect_audit_logs { ### Takes an input of PATH - ### (openshift-apiserver or kube-apiserver) - - echo "WARNING: Collecting one or more audit logs on ALL masters in your cluster. This could take a large amount of time." >&2 - mkdir -p ${AUDIT_LOG_PATH}/${1} - /usr/bin/oc adm node-logs --role=master --path=${1}/ | grep -v ".terminating" > ${AUDIT_LOG_PATH}/${1}.audit_logs_listing - while IFS=$'\n' read -r line; do - IFS=' ' read -ra log <<< "${line}" - FILTER=gzip queue ${AUDIT_LOG_PATH}/${1}/"${log[0]}"-"${log[1]}".gz /usr/bin/oc adm node-logs "${log[0]}" --path=${1}/"${log[1]}" - done < ${AUDIT_LOG_PATH}/${1}.audit_logs_listing - echo "INFO: Audit logs for $1 collected." -} - -collect_audit_logs openshift-apiserver -collect_audit_logs kube-apiserver +# Downloads the audit.log (and its rotated copies) from +# /var/logs/{kube-apiserver,openshift-apiserver} on each +# master node. +BASE_COLLECTION_PATH="${BASE_COLLECTION_PATH:-/must-gather}" +echo "WARNING: Collecting one or more audit logs on ALL masters in your cluster. This could take a large amount of time." >&2 +# the command executed by xargs below expects four parameters: +# $1 - node path under /var/logs to download +# $2 - local output path +# $3 - node name +# $4 - log file name +paths=(openshift-apiserver kube-apiserver oauth-apiserver) +for path in "${paths[@]}" ; do + output_dir="${BASE_COLLECTION_PATH}/audit_logs/$path" + mkdir -p "$output_dir" + oc adm node-logs --role=master --path="$path" | \ + tee "${BASE_COLLECTION_PATH}/audit_logs/$path.audit_logs_listing" | \ + grep -v ".terminating" | \ + sed "s|^|$path $output_dir |" +done | \ +xargs --max-args=4 --max-procs=45 bash -c \ + 'echo "INFO: Started downloading $1/$4 from $3"; + oc adm node-logs $3 --path=$1/$4 | gzip > $2/$3-$4.gz; + echo "INFO: Finished downloading $1/$4 from $3"' \ + bash +echo "INFO: Audit logs collected." # force disk flush to ensure that all data gathered is accessible in the copy container sync From 9ba1a64a4c7f3771684260948e378f8bbbed8b7a Mon Sep 17 00:00:00 2001 From: Luis Sanchez Date: Fri, 31 Jul 2020 14:53:02 -0400 Subject: [PATCH 2/4] gather_service_logs: limit service logs to one week --- collection-scripts/gather_service_logs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/collection-scripts/gather_service_logs b/collection-scripts/gather_service_logs index a369f963..2c71d092 100755 --- a/collection-scripts/gather_service_logs +++ b/collection-scripts/gather_service_logs @@ -16,14 +16,14 @@ function collect_serivce_logs { ## Takes a node role input (master or worker) PIDS=() DIR_PATH=${SERVICE_LOG_PATH}/${1}s echo "WARNING: Collecting one or more service logs on ALL linux $1 nodes in your cluster. This could take a large amount of time." >&2 - mkdir -p ${DIR_PATH} - for service in ${NODE_SERVICES[@]}; do + mkdir -p "${DIR_PATH}" + for service in "${NODE_SERVICES[@]}"; do echo "INFO: Collecting host service logs for $service" - /usr/bin/oc adm node-logs --role=$1 -l kubernetes.io/os=linux -u $service > ${DIR_PATH}/${service}_service.log & + /usr/bin/oc adm node-logs --role=$1 -l kubernetes.io/os=linux -u "$service" --since "${SINCE_TIMEFRAME:--7d}"> "${DIR_PATH}/${service}_service.log" & PIDS+=($!) done echo "INFO: Waiting for worker host service log collection to complete ..." - wait ${PIDS[@]} + wait "${PIDS[@]}" echo "INFO: Worker host service log collection to complete." } From 7b3fff50ac06fa3dd288aaf55679d46a53db8a84 Mon Sep 17 00:00:00 2001 From: Luis Sanchez Date: Sat, 1 Aug 2020 11:34:53 -0400 Subject: [PATCH 3/4] gather: stop gathering audit logs by default --- collection-scripts/gather | 3 --- 1 file changed, 3 deletions(-) diff --git a/collection-scripts/gather b/collection-scripts/gather index d4879607..6312b1bf 100755 --- a/collection-scripts/gather +++ b/collection-scripts/gather @@ -41,9 +41,6 @@ done # Gather etcd information /usr/bin/gather_etcd -# Collect System Audit Logs -/usr/bin/gather_audit_logs - # Gather Service Logs (using a suplamental Script); Scoped to Masters. /usr/bin/gather_service_logs master From d3bd4097a9d164d2ca5a62ac4d1985f74e6e9151 Mon Sep 17 00:00:00 2001 From: Luis Sanchez Date: Sun, 6 Sep 2020 00:47:03 -0400 Subject: [PATCH 4/4] gather_service_logs: fix collection & add mco services --- collection-scripts/gather_service_logs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/collection-scripts/gather_service_logs b/collection-scripts/gather_service_logs index 2c71d092..c21e3ed6 100755 --- a/collection-scripts/gather_service_logs +++ b/collection-scripts/gather_service_logs @@ -3,10 +3,10 @@ BASE_COLLECTION_PATH="/must-gather" ROLES=${1:-master} ### Defaults to only collecting things from Masters # Service Lists -GENERAL_SERVICES=(kubelet crio) -MASTER_SERVICES+=${2:-${GENERAL_SERVICES[@]}} +GENERAL_SERVICES=(kubelet crio machine-config-daemon-firstboot machine-config-daemon-host) +MASTER_SERVICES+=(${2:-"${GENERAL_SERVICES[@]}"}) MASTER_SERVICES+=() ### Placeholder to extend Master only services -NODE_SERVICES+=${2:-${GENERAL_SERVICES[@]}} +NODE_SERVICES+=(${2:-"${GENERAL_SERVICES[@]}"}) NODE_SERVICES+=() ### Placeholder to extend Node only services # Collect System Service Logs