From b9450fa537c912d450917e223ea8f1c114f8ed5a Mon Sep 17 00:00:00 2001
From: Zhijian Li <zhijianli@microsoft.com>
Date: Fri, 14 Jul 2023 18:44:31 +0800
Subject: [PATCH] Add param timeout to ansible library shell_cmds (#8931)

What is the motivation for this PR?
1. Add a parameter timeout to ansible library shell_cmds. The default value is 0 which means no limitation.
2. Set the timeout of some commands to 30s to avoid costing too much time, also updated the command to adapt the timeout wrapper.

How did you verify/test it?
1. Verified by run test_pretest and test_memory_exhaustion on physical testbeds.
2. Verified by PR test.

Signed-off-by: Zhijian Li <zhijianli@microsoft.com>
---
 ansible/library/shell_cmds.py               | 38 +++++++++++++++++----
 docs/api_wiki/ansible_methods/shell_cmds.md | 10 ++++--
 tests/common/devices/sonic.py               |  8 ++---
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/ansible/library/shell_cmds.py b/ansible/library/shell_cmds.py
index 59f85c6044..bfae69369a 100644
--- a/ansible/library/shell_cmds.py
+++ b/ansible/library/shell_cmds.py
@@ -21,7 +21,10 @@
 #         "admin"
 #       ],
 #       "cmd": "ls /home",
-#       "rc": 0
+#       "cmd_with_timeout": "",
+#       "rc": 0,
+#       "timeout": 0,
+#       "err_msg": ""
 #     },
 #     {
 #       "stderr_lines": [],
@@ -31,7 +34,10 @@
 #         "/home/admin"
 #       ],
 #       "cmd": "pwd",
-#       "rc": 0
+#       "cmd_with_timeout": "",
+#       "rc": 0,
+#       "timeout": 0,
+#       "err_msg": ""
 #     }
 #   ],
 #   "cmds": [
@@ -66,6 +72,7 @@
 options:
     cmds: List of commands. Each command should be a string.
     continue_on_fail: Bool. Specify whether to continue running rest of the commands if any of the command failed.
+    timeout: Integer. Specify time limit (in second) for each command. 0 means no limit. Default value is 0.
 '''
 
 EXAMPLES = r'''
@@ -76,19 +83,34 @@
         - ls /home
         - pwd
     continue_on_fail: False
+    timeout: 30
 '''
 
 
-def run_cmd(module, cmd):
+def run_cmd(module, cmd, timeout):
+    cmd_with_timeout = ''
+    err_msg = ''
+
+    if int(timeout) != 0 and "'" in cmd:
+        err_msg = "[WARNING] timeout is not supported for command contains single quote, ran without time limit"
+        timeout = 0
+
+    if int(timeout) == 0:
+        rc, out, err = module.run_command(cmd, use_unsafe_shell=True)
+    else:
+        cmd_with_timeout = "echo '{}' | timeout --preserve-status {} bash".format(cmd, timeout)
+        rc, out, err = module.run_command(cmd_with_timeout, use_unsafe_shell=True)
 
-    rc, out, err = module.run_command(cmd, use_unsafe_shell=True)
     result = dict(
         cmd=cmd,
+        cmd_with_timeout=cmd_with_timeout,
+        err_msg=err_msg,
         rc=rc,
         stdout=out,
         stderr=err,
         stdout_lines=out.splitlines(),
-        stderr_lines=err.splitlines()
+        stderr_lines=err.splitlines(),
+        timeout=timeout
     )
     return result
 
@@ -98,18 +120,20 @@ def main():
     module = AnsibleModule(
         argument_spec=dict(
             cmds=dict(type='list', required=True),
-            continue_on_fail=dict(type='bool', default=True)
+            continue_on_fail=dict(type='bool', default=True),
+            timeout=dict(type='int', default=0)
         )
     )
 
     cmds = module.params['cmds']
     continue_on_fail = module.params['continue_on_fail']
+    timeout = module.params['timeout']
 
     startd = datetime.datetime.now()
 
     results = []
     for cmd in cmds:
-        result = run_cmd(module, cmd)
+        result = run_cmd(module, cmd, timeout)
         results.append(result)
         if result['rc'] != 0 and not continue_on_fail:
             break
diff --git a/docs/api_wiki/ansible_methods/shell_cmds.md b/docs/api_wiki/ansible_methods/shell_cmds.md
index d11373f86e..c002db3fc1 100644
--- a/docs/api_wiki/ansible_methods/shell_cmds.md
+++ b/docs/api_wiki/ansible_methods/shell_cmds.md
@@ -25,12 +25,16 @@ def test_fun(duthosts, rand_one_dut_hostname):
     - Reguired: `False`
     - Type: `Boolean`
     - Default: `True`
+- `timeout` - Specify time limit (in second) for each command. 0 means no limit.
+    - Reguired: `False`
+    - Type: `Integer`
+    - Default: `0`
 
 ## Expected Output
 A dictionary with results from commands run. The dictionary hierarchy is described below, with each indentation describing a sub-dictionary:
 
 - `end` - Datetime for when the commands finished running
-- `cmds` - the list of commands that were run
+- `cmds` - the list of commands that user input.
 - `start` - Datetime for when the commands started running
 - `delta` - difference between `start` and `end`
 - `results` - List of dictionaries, each corresponding to the results for one of the commands run
@@ -38,5 +42,7 @@ A dictionary with results from commands run. The dictionary hierarchy is describ
     - `stderr` - What was printed to stderr (as one string) during execution of command
     - `stdout_lines` - What was printed to stdout (split by line) during execution of command
     - `stdout` - What was printed to stdout (as one string) during execution of command
-    - `cmd` - command that was run
+    - `cmd` - command that user input. It's what actaully ran if `timeout == 0`.
+    - `cmd_with_timeout` - command wrapped with `timeout`. It's what actually ran if `timeout != 0`.
     - `rc` - return code
+    - `timeout` - time limit (in second) for each command. 0 means no limit.
diff --git a/tests/common/devices/sonic.py b/tests/common/devices/sonic.py
index 94b348fcad..69b7fd431a 100644
--- a/tests/common/devices/sonic.py
+++ b/tests/common/devices/sonic.py
@@ -566,11 +566,11 @@ def critical_group_process(self):
         # Get critical group and process definitions by running cmds in batch to save overhead
         cmds = []
         for service in self.critical_services:
-            cmd = "docker exec {} bash -c '[ -f /etc/supervisor/critical_processes ]" \
-                  " && cat /etc/supervisor/critical_processes'".format(service)
+            cmd = 'docker exec {} bash -c "[ -f /etc/supervisor/critical_processes ]' \
+                  ' && cat /etc/supervisor/critical_processes"'.format(service)
 
             cmds.append(cmd)
-        results = self.shell_cmds(cmds=cmds, continue_on_fail=True, module_ignore_errors=True)['results']
+        results = self.shell_cmds(cmds=cmds, continue_on_fail=True, module_ignore_errors=True, timeout=30)['results']
 
         # Extract service name of each command result, transform results list to a dict keyed by service name
         service_results = {}
@@ -650,7 +650,7 @@ def all_critical_process_status(self):
         for service in self.critical_services:
             cmd = 'docker exec {} supervisorctl status'.format(service)
             cmds.append(cmd)
-        results = self.shell_cmds(cmds=cmds, continue_on_fail=True, module_ignore_errors=True)['results']
+        results = self.shell_cmds(cmds=cmds, continue_on_fail=True, module_ignore_errors=True, timeout=30)['results']
 
         # Extract service name of each command result, transform results list to a dict keyed by service name
         service_results = {}