Skip to content

Commit

Permalink
ztdm: inhfd: handle child hanging
Browse files Browse the repository at this point in the history
Let's kill the child when the test is hanging.  Due to PEP 475 the
SIGALRM handler needs to throw an exception to be able to interrupt
wait().  To improve debuggig, close the fd in the child after reading
it and detect that in the parent to show whether the child hung
part way.

Change-Id: I23ce3de08253bb7182249ae9539e93c36c209c40
Signed-off-by: Michał Mirosław <emmir@google.com>
  • Loading branch information
osctobe committed Nov 14, 2023
1 parent c474816 commit b883c49
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions test/zdtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

def alarm(*args):
print("==== ALARM ====")
raise InterruptedError("ALARM")


def traceit(f, e, a):
Expand Down Expand Up @@ -717,11 +718,9 @@ def start(self):
# regular files, so we loop.
data = b''
while not data:
# In python 2.7, peer_file.read() doesn't call the read
# system call if it's read file to the end once. The
# next seek allows to workaround this problem.
data = os.read(peer_file.fileno(), 16)
data = os.read(peer_file.fileno(), len(msg) + 16)
time.sleep(0.1)
peer_file.close()
except Exception as e:
print("Unable to read a peer file: %s" % e)
sys.exit(1)
Expand Down Expand Up @@ -752,7 +751,16 @@ def stop(self):
my_file.write(msg)
my_file.flush()
i += 1
pid, status = os.waitpid(self.__peer_pid, 0)
signal.alarm(10)
try:
pid, status = os.waitpid(self.__peer_pid, 0)
except InterruptedError:
fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid))
self.kill()
pid, status = os.waitpid(self.__peer_pid, 0)
fds = self.__fds.difference(fds)
if fds:
print("before SIGKILL, child managed to close fds: " + str(fds))
with open(self.__name + ".out") as output:
print(output.read())
self.__peer_pid = 0
Expand Down

0 comments on commit b883c49

Please sign in to comment.