From 974f9cc485a33d7e2dfe5b06832639691cbdf253 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=C2=A0Miros=C5=82aw?= Date: Thu, 24 Aug 2023 17:54:02 +0200 Subject: [PATCH] ztdm: inhfd: handle child hanging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's kill the child when the test is hanging. Due to PEP 475 the SIGALRM handler needs to throw an exception to be able to interrupt wait(). To improve debuggig, close the fd in the child after reading it and detect that in the parent to show whether the child hung part way. Signed-off-by: Michał Mirosław --- test/zdtm.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/test/zdtm.py b/test/zdtm.py index 7a7cdfd3b6..3cec4062b1 100755 --- a/test/zdtm.py +++ b/test/zdtm.py @@ -40,6 +40,7 @@ def alarm(*args): print("==== ALARM ====") + raise InterruptedError("ALARM") def traceit(f, e, a): @@ -717,11 +718,9 @@ def start(self): # regular files, so we loop. data = b'' while not data: - # In python 2.7, peer_file.read() doesn't call the read - # system call if it's read file to the end once. The - # next seek allows to workaround this problem. - data = os.read(peer_file.fileno(), 16) + data = os.read(peer_file.fileno(), len(msg) + 16) time.sleep(0.1) + peer_file.close() except Exception as e: print("Unable to read a peer file: %s" % e) sys.exit(1) @@ -752,7 +751,16 @@ def stop(self): my_file.write(msg) my_file.flush() i += 1 - pid, status = os.waitpid(self.__peer_pid, 0) + signal.alarm(10) + try: + pid, status = os.waitpid(self.__peer_pid, 0) + except InterruptedError: + fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid)) + self.kill() + pid, status = os.waitpid(self.__peer_pid, 0) + fds = self.__fds.difference(fds) + if fds: + print("before SIGKILL, child managed to close fds: " + str(fds)) with open(self.__name + ".out") as output: print(output.read()) self.__peer_pid = 0