Skip to content

Commit

Permalink
ztdm: inhfd: handle child hanging
Browse files Browse the repository at this point in the history
Let's kill the child when the test is hanging.  Due to PEP 475 the
SIGALRM handler needs to throw an exception to be able to interrupt
wait().  To improve debuggig, close the fd in the child after reading
it and detect that in the parent to show whether the child hung
part way.

Signed-off-by: Michał Mirosław <emmir@google.com>
  • Loading branch information
osctobe committed Oct 13, 2023
1 parent 42c1c84 commit 3e580c8
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions test/zdtm.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

def alarm(*args):
print("==== ALARM ====")
raise InterruptedError("ALARM")


def traceit(f, e, a):
Expand Down Expand Up @@ -717,11 +718,9 @@ def start(self):
# regular files, so we loop.
data = b''
while not data:
# In python 2.7, peer_file.read() doesn't call the read
# system call if it's read file to the end once. The
# next seek allows to workaround this problem.
data = os.read(peer_file.fileno(), 16)
data = os.read(peer_file.fileno(), len(msg) + 16)
time.sleep(0.1)
peer_file.close()
except Exception as e:
print("Unable to read a peer file: %s" % e)
sys.exit(1)
Expand Down Expand Up @@ -752,7 +751,16 @@ def stop(self):
my_file.write(msg)
my_file.flush()
i += 1
pid, status = os.waitpid(self.__peer_pid, 0)
signal.alarm(10)
try:
pid, status = os.waitpid(self.__peer_pid, 0)
except InterruptedError:
fds = set(os.listdir("/proc/%s/fd" % self.__peer_pid))
self.kill()
pid, status = os.waitpid(self.__peer_pid, 0)
fds = self.__fds.difference(fds)
if fds:
print("before SIGKILL, child managed to close fds: " + str(fds))
with open(self.__name + ".out") as output:
print(output.read())
self.__peer_pid = 0
Expand Down

0 comments on commit 3e580c8

Please sign in to comment.