From a19904682f6984de21fbf77240ff5fb6eed1b744 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 8 Aug 2018 14:02:51 +0200 Subject: [PATCH 1/4] Introduce a function to lock/unlock file descriptors when appending This function will be used to make write accesses in trace_write() a bit safer. Note: this patch takes a very different approach for cross-platform support than Git is historically taking: the original approach is to first implement everything on Linux, using the functions available on Linux, and then trying to emulate these functions on platforms that do not have those functions such as Windows. This leads to all kinds of undesirable quirks in Git's source code (and performance characteristics) because of the lack of a proper abstraction layer: instead of declaring functions for the functionality we *actually* need, we abuse POSIX functions to say what we need, even if those functions serve much broader purposes (and do not make at all clear what the caller wants of them). For example, when Git wants to determine whether a path refers to a symbolic link, it calls lstat(). That POSIX function has to be emulated on Windows, painstakingly filling all the information lstat() would, only for the caller to throw away everything except that one bit of information, and all of the time figuring out the mtime/atime/ctime and file size and whatnot was spent in vain. If we were to follow that approach, we would extend the fcntl() emulation in compat/mingw.c after this commit, to support the function added in this commit. But fcntl() allows a lot more versatile file region locking that we actually need, so by necessity the fcntl() emulation would be quite complex: To support the `l_whence = SEEK_CUR` (which we would use, if it did not require so much book-keeping due to our writing between locking and unlocking the file), we would have to call `SetFilePointerEx()` (after obtaining the `HANDLE` on which all Win32 API functions work instead of the integer file descriptors used by all POSIX functions). Multiplying the number of Win32 API round-trips. Of course, we could implement an incomplete emulation of fcntl()'s F_WRLCK, but that would be unsatisfying. An alternative approach would be to use the `flock()` function, whose semantics are much closer to existing Win32 API. But `flock()` is not even a POSIX standard, so we would have to implement emulation of `flock()` for *other* platforms. And it would again be the wrong thing to do, as we would once again fail to have an abstraction that clearly says what *exact*functionality we want to use. This commit expressly tries to set a precedent for a better approach: Let's introduce a proper abstraction: a function that says in its name precisely what Git wants it to do (as opposed to *how* it does it on Linux): lock_or_unlock_fd_for_appending(). The next commit will provide a Windows-specific implementation of this function/functionality. Signed-off-by: Johannes Schindelin --- git-compat-util.h | 2 ++ wrapper.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index 9a64998b24b1de..13b83badea6a51 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -1202,6 +1202,8 @@ struct tm *git_gmtime_r(const time_t *, struct tm *); #define getc_unlocked(fh) getc(fh) #endif +extern int lock_or_unlock_fd_for_appending(int fd, int lock_it); + /* * Our code often opens a path to an optional file, to work on its * contents when we can successfully open it. We can ignore a failure diff --git a/wrapper.c b/wrapper.c index e4fa9d84cd0767..5aecbda3402f3c 100644 --- a/wrapper.c +++ b/wrapper.c @@ -690,3 +690,19 @@ int xgethostname(char *buf, size_t len) buf[len - 1] = 0; return ret; } + +#ifndef GIT_WINDOWS_NATIVE +int lock_or_unlock_fd_for_appending(int fd, int lock_it) +{ + struct flock flock; + + flock.l_type = lock_it ? F_WRLCK : F_UNLCK; + + /* (un-)lock the whole file */ + flock.l_whence = SEEK_SET; + flock.l_start = 0; + flock.l_len = 0; + + return fcntl(fd, F_SETLKW, &flock); +} +#endif From fa0c282aab98cd857ea37afd5a00ce8600b77bb1 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 8 Aug 2018 14:09:27 +0200 Subject: [PATCH 2/4] mingw: implement lock_or_unlock_fd_for_appending() For some reason, t/t5552-skipping-fetch-negotiator.sh fails particularly often on Windows due to racy tracing where the `git upload-pack` and the `git fetch` processes compete for the same file. We just introduced a remedy that uses fcntl(), but Windows does not have fcntl(). So let's implement an alternative. Signed-off-by: Johannes Schindelin --- compat/mingw.c | 19 +++++++++++++++++++ compat/mingw.h | 3 +++ 2 files changed, 22 insertions(+) diff --git a/compat/mingw.c b/compat/mingw.c index 6ded1c859f1b5a..6da9ce86174c94 100644 --- a/compat/mingw.c +++ b/compat/mingw.c @@ -514,6 +514,25 @@ int mingw_chmod(const char *filename, int mode) return _wchmod(wfilename, mode); } +int mingw_lock_or_unlock_fd_for_appending(int fd, int lock_it) +{ + HANDLE handle = (HANDLE)_get_osfhandle(fd); + OVERLAPPED overlapped = { 0 }; + DWORD err; + + if (!lock_it && UnlockFileEx(handle, 0, -1, 0, &overlapped)) + return 0; + if (lock_it && + LockFileEx(handle, LOCKFILE_EXCLUSIVE_LOCK, 0, -1, 0, &overlapped)) + return 0; + + err = GetLastError(); + /* LockFileEx() cannot lock pipes */ + errno = err == ERROR_INVALID_FUNCTION ? + EBADF : err_win_to_posix(GetLastError()); + return -1; +} + /* * The unit of FILETIME is 100-nanoseconds since January 1, 1601, UTC. * Returns the 100-nanoseconds ("hekto nanoseconds") since the epoch. diff --git a/compat/mingw.h b/compat/mingw.h index 571019d0bddcea..0f76d89a9df3f6 100644 --- a/compat/mingw.h +++ b/compat/mingw.h @@ -397,6 +397,9 @@ HANDLE winansi_get_osfhandle(int fd); * git specific compatibility */ +int mingw_lock_or_unlock_fd_for_appending(int fd, int lock_it); +#define lock_or_unlock_fd_for_appending mingw_lock_or_unlock_fd_for_appending + #define has_dos_drive_prefix(path) \ (isalpha(*(path)) && (path)[1] == ':' ? 2 : 0) int mingw_skip_dos_drive_prefix(char **path); From 1f5c15c7ed3914b733ccf8a4457fef50d2eff1c5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 8 Aug 2018 14:04:03 +0200 Subject: [PATCH 3/4] trace: lock the trace file to avoid racy trace_write() calls When multiple processes try to write to the same file, it is not guaranteed that everything works as expected: those writes can overlap, and in the worst case even lose messages. This happens in t/t5552-skipping-fetch-negotiator.sh, where we abuse the `GIT_TRACE` facility to write traces of two concurrent processes (`git fetch` and `git upload-pack`) to the same file, and then verify that the trace contains certain expected breadcrumbs. To remedy this, let's lock the file descriptors for exclusive writing, using the function we just introduced in the previous commit. Note: while the POSIX documentation of fcntl() at http://pubs.opengroup.org/onlinepubs/9699919799/functions/fcntl.html suggests that the `errno` is set to `EINVAL` when being asked to lock a file descriptor that cannot be locked, on macOS it results in `EBADF` when trying to lock a redirected `stdout` (which the documentation claims should indicate that the file descriptor is not valid for writing). To cover all our bases, we simply treat both `EINVAL` and `EBADF` as indicators that we cannot lock/unlock this file descriptor. Signed-off-by: Johannes Schindelin --- trace.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/trace.c b/trace.c index fc623e91fdd7ed..16abdb816a7d53 100644 --- a/trace.c +++ b/trace.c @@ -114,11 +114,20 @@ static int prepare_trace_line(const char *file, int line, static void trace_write(struct trace_key *key, const void *buf, unsigned len) { - if (write_in_full(get_trace_fd(key), buf, len) < 0) { + int fd = get_trace_fd(key), locked; + + locked = !lock_or_unlock_fd_for_appending(fd, 1); + if (!locked && errno != EBADF && errno != EINVAL) + warning("unable to lock file descriptor for %s: %s", + key->key, strerror(errno)); + if (write_in_full(fd, buf, len) < 0) { warning("unable to write trace for %s: %s", key->key, strerror(errno)); trace_disable(key); } + if (locked && lock_or_unlock_fd_for_appending(fd, 0) < 0) + warning("failed to remove lock on fd for %s: %s", + key->key, strerror(errno)); } void trace_verbatim(struct trace_key *key, const void *buf, unsigned len) From 38358ac74dcb10a7908f2ee236d544b1fcb6626c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Thu, 9 Aug 2018 17:43:06 +0200 Subject: [PATCH 4/4] trace: verify that locking works Recently, t5552 introduced a pattern where two processes try to write to the same GIT_TRACE file in parallel. This is not safe, as the two processes fighting over who gets to append to the file can cause garbled lines and may even result in data loss on Windows (where buffers are written to before they are flushed). To remedy this, we introduced the lock_or_unlock_fd_for_appending() function. And to make sure that this works, this commit introduces a regression test. Signed-off-by: Johannes Schindelin --- Makefile | 1 + t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/helper/test-trace.c | 130 +++++++++++++++++++++++++++++++++++++++++ t/t0070-fundamental.sh | 6 ++ 5 files changed, 139 insertions(+) create mode 100644 t/helper/test-trace.c diff --git a/Makefile b/Makefile index 617475622b98db..2e3fb5b8d18e30 100644 --- a/Makefile +++ b/Makefile @@ -729,6 +729,7 @@ TEST_BUILTINS_OBJS += test-strcmp-offset.o TEST_BUILTINS_OBJS += test-string-list.o TEST_BUILTINS_OBJS += test-submodule-config.o TEST_BUILTINS_OBJS += test-subprocess.o +TEST_BUILTINS_OBJS += test-trace.o TEST_BUILTINS_OBJS += test-urlmatch-normalization.o TEST_BUILTINS_OBJS += test-wildmatch.o TEST_BUILTINS_OBJS += test-write-cache.o diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 805a45de9c877d..7adce872bcdf21 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -39,6 +39,7 @@ static struct test_cmd cmds[] = { { "string-list", cmd__string_list }, { "submodule-config", cmd__submodule_config }, { "subprocess", cmd__subprocess }, + { "trace", cmd__trace }, { "urlmatch-normalization", cmd__urlmatch_normalization }, { "wildmatch", cmd__wildmatch }, { "write-cache", cmd__write_cache }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 7116ddfb94398d..c462ac924e723d 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -33,6 +33,7 @@ int cmd__strcmp_offset(int argc, const char **argv); int cmd__string_list(int argc, const char **argv); int cmd__submodule_config(int argc, const char **argv); int cmd__subprocess(int argc, const char **argv); +int cmd__trace(int argc, const char **argv); int cmd__urlmatch_normalization(int argc, const char **argv); int cmd__wildmatch(int argc, const char **argv); int cmd__write_cache(int argc, const char **argv); diff --git a/t/helper/test-trace.c b/t/helper/test-trace.c new file mode 100644 index 00000000000000..04159c77a7feb7 --- /dev/null +++ b/t/helper/test-trace.c @@ -0,0 +1,130 @@ +#include "test-tool.h" +#include "cache.h" +#include "run-command.h" + +static struct child_process children[2] = { + CHILD_PROCESS_INIT, + CHILD_PROCESS_INIT, +}; + +#define SAY(child, what) \ + if (write_in_full(children[child].in, \ + what "\n", strlen(what) + 1) < 0) \ + die("Failed to tell child process #%d to %s", child, what) + +#define LISTEN(child, what) \ + if (strbuf_getwholeline_fd(&buf, children[child].out, '\n') < 0) \ + die("Child process #%d failed to acknowledge %s", child, what) + +#define ACK(what) \ + if (write_in_full(1, what ": ACK\n", strlen(what) + 6) < 0) \ + die_errno("'%s': %s ACK", child_name, what) + +static void contention_orchestrator(const char *argv0) +{ + struct strbuf buf = STRBUF_INIT; + int i; + + /* Spawn two children and simulate write contention */ + trace_printf("start"); + + for (i = 0; i < 2; i++) { + strbuf_reset(&buf); + strbuf_addf(&buf, "child #%d", i); + argv_array_pushl(&children[i].args, + argv0, "trace", "lock", buf.buf, NULL); + children[i].in = children[i].out = -1; + if (start_command(&children[i]) < 0) + die("Could not spawn child process #%d", i); + } + + SAY(1, "lock"); + LISTEN(1, "lock"); + + SAY(0, "trace delayed"); + SAY(1, "trace while-locked"); + LISTEN(1, "trace"); + + SAY(1, "unlock"); + LISTEN(1, "unlock"); + LISTEN(0, "trace"); + + SAY(0, "quit"); + SAY(1, "quit"); + + if (finish_command(&children[0]) < 0 || + finish_command(&children[1]) < 0) + die("Child process failed to finish"); + + strbuf_release(&buf); +} + +static void child(const char *child_name) +{ + struct strbuf buf = STRBUF_INIT; + int fd, locked = 0; + const char *p; + + /* This is the child process */ + trace_printf("child start: '%s'", child_name); + fd = trace_default_key.fd; + if (fd <= 0) + die("child process: not tracing..."); + while (!strbuf_getwholeline_fd(&buf, 0, '\n')) { + strbuf_rtrim(&buf); + if (!strcmp("lock", buf.buf)) { + if (lock_or_unlock_fd_for_appending(fd, 1) < 0 && + errno != EBADF && errno != EINVAL) + die_errno("'%s': lock", child_name); + ACK("lock"); + locked = 1; + } else if (!strcmp("unlock", buf.buf)) { + if (lock_or_unlock_fd_for_appending(fd, 0) < 0 && + errno != EBADF && errno != EINVAL) + die_errno("'%s': unlock", child_name); + ACK("unlock"); + locked = 0; + } else if (skip_prefix(buf.buf, "trace ", &p)) { + if (!locked) + trace_printf("%s: %s", child_name, p); + else { + char *p2 = xstrdup(p); + + /* Give the racy process a run for its money. */ + sleep_millisec(50); + + strbuf_reset(&buf); + strbuf_addf(&buf, "%s: %s\n", + child_name, p2); + free(p2); + + if (write_in_full(fd, buf.buf, buf.len) < 0) + die_errno("'%s': trace", child_name); + } + ACK("trace"); + } else if (!strcmp("quit", buf.buf)) { + close(0); + close(1); + break; + } else + die("Unhandled command: '%s'", buf.buf); + + } + + strbuf_release(&buf); +} + +int cmd__trace(int argc, const char **argv) +{ + const char *argv0 = argv[-1]; + + if (argc > 1 && !strcmp("lock", argv[1])) { + if (argc > 2) + child(argv[2]); + else + contention_orchestrator(argv0); + } else + die("Usage: %s %s lock []", argv0, argv[0]); + + return 0; +} diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh index 23fbe6434abd3f..57f7a1246046a4 100755 --- a/t/t0070-fundamental.sh +++ b/t/t0070-fundamental.sh @@ -34,4 +34,10 @@ test_expect_success 'check for a bug in the regex routines' ' test-tool regex --bug ' +test_expect_success 'multiple processes can GIT_TRACE to the same file' ' + GIT_TRACE="$(pwd)/trace" test-tool trace lock && + sed -n "/while-locked/,\$s/.*delayed$/YES/p" actual && + test YES = "$(cat actual)" +' + test_done