From ef455a92838ee0e450d145fd597002632aceed54 Mon Sep 17 00:00:00 2001 From: Erwan Velu Date: Thu, 9 Mar 2017 14:37:25 +0100 Subject: [PATCH] docker: Adding docker_exec.sh When debugging containers, it's always a pain understanding why a fault occured. This is especially true since the last exec() call will make the container being stopped if the command fails. This commit is about creating a trick to catch errors and preventing the container from dying. This first implies adding some 'trap ERR' to catch errors but as per docker environment, PID 1 received the SIGTERM when 'docker stop' command occurs. So SIGTERM have to be forwarded to the exec'd process. The current implementation is split in two parts: - handling the ERR - handling exec + SIGTERM For the ERR, set_trap_err() does install the ERR trap and executes trap_error() if triggered. This function is just displaying information to the user and fall in sleep to keep the PID 1 alive for ever. As the container is in sleep, it is possible to spawn a new bash in it by using the 'docker exec' command. A special trick here, is that new bash will reimport the 'env' context of PID 1 by loading a modified ~/.bashrc. This file is also providing a override function of the built-in exec() call. It does install the SIGTERM trap, spawn the desired command in background and wait it to die before exiting with its return value. The spawned process is not PID 1 so the SIGTERM trap will forward it to the proper PID and then sucide itself with the appropriate return value. The beauty of that is that sourcing docker_exec.sh is enough to get all thoses features at once as this script is executing set_trap_err() by default and overrides the exec() built-in function. A typical usage looks like : if [ "$MY_DEBUG_CONDITION" = "what_ever_you_want" ]; then source docker_exec.sh fi Note: I'm not really happy about the NOTRAP variable to disengage the 'trap ERR' but I didn't found a way to remove it from the SIGTERM trap in an efficient way :/ --- .../kraken/ubuntu/16.04/daemon/docker_exec.sh | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 ceph-releases/kraken/ubuntu/16.04/daemon/docker_exec.sh diff --git a/ceph-releases/kraken/ubuntu/16.04/daemon/docker_exec.sh b/ceph-releases/kraken/ubuntu/16.04/daemon/docker_exec.sh new file mode 100644 index 000000000..d90023737 --- /dev/null +++ b/ceph-releases/kraken/ubuntu/16.04/daemon/docker_exec.sh @@ -0,0 +1,63 @@ +function set_trap_err { + # Let's propagate traps to all functions + set -E + + # Let's call trap_error if we catch an ERR + trap 'trap_error' ERR +} + +NOTRAP= +function trap_error { + set +x + if [ -z "$NOTRAP" ]; then + echo "An issue occured and you asked me to stay alive." + echo "You can connect to me with: sudo docker exec -i -t $HOSTNAME /bin/bash" + echo "The current environment variables will be reloaded by this bash to be in a similar context." + echo "When debugging is over stop me with: pkill sleep" + echo "I'll sleep for 365 days waiting for you darling, bye bye" + + # exporting current environement so the next bash will be in the same setup + for value in $(env); do + echo "export $value" >> /root/.bashrc + done + + sleep 365d + else + # If NOTRAP is defined, we need to return true to avoid triggering an ERR + true + fi +} + +child_for_exec=1 +function _term { + echo "Sending SIGTERM to PID $child_for_exec" + + # Disabling the ERR trap before killing the process + # That's an expected failure so don't handle it + # Doing "trap ERR" or "trap - ERR" didn't worked :/ + NOTRAP="yes" + + kill -TERM "$child_for_exec" 2>/dev/null +} + +function exec { + # This function overrides the built-in exec() call + # It starts the process in background to catch ERR but + # as per docker requirement, forward the SIGTERM to it. + trap _term SIGTERM + + $@ & + child_for_exec=$! + echo "exec: PID $child_for_exec: spawning $@" + wait "$child_for_exec" + return_code=$? + echo "exec: PID $child_for_exec: exit $return_code" + exit $return_code +} + +######## +# Main # +######## +# Let's engage set_trap_err +# and override the default exec() built-in +set_trap_err