From 57cb26be89ada5ab1e6f27e88e5ec752864c4ba5 Mon Sep 17 00:00:00 2001 From: Matt Shin Date: Tue, 5 Feb 2019 13:02:53 +0000 Subject: [PATCH] contact file creation: fsync parent directory On file system such as an NFS mounted file system, we may have a delay before the file becomes visible from other process running on other host. We already have a 1st fsync to ensure that the data of the contact file is written to disk. This change adds a 2nd fsync to ensure that the file metadata of the contact file is written as well - by doing an fsync on its directory. --- lib/cylc/suite_srv_files_mgr.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/lib/cylc/suite_srv_files_mgr.py b/lib/cylc/suite_srv_files_mgr.py index 3daba5f5c7a..d08e4941d31 100644 --- a/lib/cylc/suite_srv_files_mgr.py +++ b/lib/cylc/suite_srv_files_mgr.py @@ -198,10 +198,19 @@ def detect_old_contact_file(self, reg, check_host_port=None): def dump_contact_file(self, reg, data): """Create contact file. Data should be a key=value dict.""" + # Note: + # 1st fsync for writing the content of the contact file to disk. + # 2nd fsync for writing the file metadata of the contact file to disk. + # The double fsync logic ensures that if the contact file is written to + # a shared file system e.g. via NFS, it will be immediately visible + # from by a process on other hosts after the current process returns. with open(self.get_contact_file(reg), "wb") as handle: for key, value in sorted(data.items()): handle.write("%s=%s\n" % (key, value)) os.fsync(handle.fileno()) + dir_fileno = os.open(self.get_suite_srv_dir(reg), os.O_DIRECTORY) + os.fsync(dir_fileno) + os.close(dir_fileno) def get_contact_file(self, reg): """Return name of contact file."""