Skip to content

Commit

Permalink
std.Build.Cache.hit: work around macOS kernel bug
Browse files Browse the repository at this point in the history
The previous commit cast doubt upon the initial report about macOS
kernel behavior, identifying another reason that ENOENT could be
returned from file creation.

However, it is demonstrable that ENOENT can be returned for both cases:
1. create file race
2. handle refers to deleted directory

This commit re-introduces the workaround for the file creation race on
macOS however it does not unconditionally retry - it first tries again
with O_EXCL to disambiguate the error condition that has occurred.
  • Loading branch information
andrewrk committed Dec 11, 2024
1 parent d37ee79 commit 7ff42ef
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 8 deletions.
37 changes: 37 additions & 0 deletions lib/std/Build/Cache.zig
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,43 @@ pub const Manifest = struct {
};
break;
},
error.FileNotFound => {
// There are no dir components, so the only possibility
// should be that the directory behind the handle has been
// deleted, however we have observed on macOS two processes
// racing to do openat() with O_CREAT manifest in ENOENT.
//
// As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of
// the directory of our open handle.
if (builtin.os.tag != .macos) {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
}

if (self.cache.manifest_dir.createFile(&manifest_file_path, .{
.read = true,
.truncate = false,
.lock = .exclusive,
.lock_nonblocking = self.want_shared_lock,
.exclusive = true,
})) |manifest_file| {
self.manifest_file = manifest_file;
self.have_exclusive_lock = true;
break;
} else |excl_err| switch (excl_err) {
error.WouldBlock, error.PathAlreadyExists => continue,
error.FileNotFound => {
self.diagnostic = .{ .manifest_create = error.FileNotFound };
return error.CacheCheckFailed;
},
else => |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
},
}
},
else => |e| {
self.diagnostic = .{ .manifest_create = e };
return error.CacheCheckFailed;
Expand Down
2 changes: 1 addition & 1 deletion lib/std/Build/Step.zig
Original file line number Diff line number Diff line change
Expand Up @@ -770,7 +770,7 @@ fn failWithCacheError(s: *Step, man: *const Build.Cache.Manifest, err: Build.Cac
},
},
error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return s.fail("failed check cache: invalid manifest file format", .{}),
error.InvalidFormat => return s.fail("failed to check cache: invalid manifest file format", .{}),
}
}

Expand Down
2 changes: 2 additions & 0 deletions lib/std/posix.zig
Original file line number Diff line number Diff line change
Expand Up @@ -1541,6 +1541,8 @@ pub const OpenError = error{
/// * One of the path components does not exist.
/// * Cwd was used, but cwd has been deleted.
/// * The path associated with the open directory handle has been deleted.
/// * On macOS, multiple processes or threads raced to create the same file
/// with `O.EXCL` set to `false`.
FileNotFound,

/// The path exceeded `max_path_bytes` bytes.
Expand Down
2 changes: 1 addition & 1 deletion src/Compilation.zig
Original file line number Diff line number Diff line change
Expand Up @@ -2069,7 +2069,7 @@ pub fn update(comp: *Compilation, main_progress_node: std.Progress.Node) !void {
error.OutOfMemory => return error.OutOfMemory,
error.InvalidFormat => return comp.setMiscFailure(
.check_whole_cache,
"failed check cache: invalid manifest file format",
"failed to check cache: invalid manifest file format",
.{},
),
};
Expand Down
34 changes: 28 additions & 6 deletions src/Zcu/PerThread.zig
Original file line number Diff line number Diff line change
Expand Up @@ -136,12 +136,34 @@ pub fn astGenFile(
error.NoDevice => unreachable, // it's not a pipe
error.WouldBlock => unreachable, // not asking for non-blocking I/O
error.FileNotFound => {
// Since there are no dir components this could only occur if
// `zir_dir` is deleted after the compiler process obtains an
// open directory handle.
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
// There are no dir components, so the only possibility should
// be that the directory behind the handle has been deleted,
// however we have observed on macOS two processes racing to do
// openat() with O_CREAT manifest in ENOENT.
//
// As a workaround, we retry with exclusive=true which
// disambiguates by returning EEXIST, indicating original
// failure was a race, or ENOENT, indicating deletion of the
// directory of our open handle.
if (builtin.os.tag != .macos) {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
}
break zir_dir.createFile(&hex_digest, .{
.read = true,
.truncate = false,
.lock = lock,
.exclusive = true,
}) catch |excl_err| switch (excl_err) {
error.PathAlreadyExists => continue,
error.FileNotFound => {
std.process.fatal("cache directory '{}' unexpectedly removed during compiler execution", .{
cache_directory,
});
},
else => |e| return e,
};
},

else => |e| return e, // Retryable errors are handled at callsite.
Expand Down

0 comments on commit 7ff42ef

Please sign in to comment.