Skip to content

Commit

Permalink
core.fsync: introduce granular fsync control
Browse files Browse the repository at this point in the history
This commit introduces the `core.fsync` configuration
knob which can be used to control how components of the
repository are made durable on disk.

This setting allows future extensibility of components
that could be synced in two ways:
* We issue a warning rather than an error for unrecognized
  components, so new configs can be used with old Git versions.
* We support negation, so users can choose one of the default
  aggregate options and then remove components that they don't
  want.

This also support the common request of doing absolutely no
fysncing with the `core.fsync=none` value, which is expected
to make the test suite faster.

This commit introduces the new ability for the user to harden
the index, which is a requirement for being able to actually
find a file that has been added to the repo and then deleted
from the working tree.

Signed-off-by: Neeraj Singh <neerajsi@microsoft.com>
  • Loading branch information
neerajsi-msft committed Dec 4, 2021
1 parent 527380d commit 23311a1
Show file tree
Hide file tree
Showing 16 changed files with 179 additions and 37 deletions.
28 changes: 20 additions & 8 deletions Documentation/config/core.txt
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,26 @@ core.whitespace::
is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
errors. The default tab width is 8. Allowed values are 1 to 63.

core.fsync::
A comma-separated list of parts of the repository which should be
hardened via the core.fsyncMethod when created or modified. You can
disable hardening of any component by prefixing it with a '-'. Later
items take precedence over earlier ones in the list. For example,
`core.fsync=all,-index` means "harden everything except the index".
Items that are not hardened may be lost in the event of an unclean
system shutdown.
+
* `none` disables fsync completely. This must be specified alone.
* `loose-object` hardens objects added to the repo in loose-object form.
* `pack` hardens objects added to the repo in packfile form.
* `pack-metadata` hardens packfile bitmaps and indexes.
* `commit-graph` hardens the commit graph file.
* `index` hardens the index when it is modified.
* `objects` is an aggregate option that includes `loose-objects`, `pack`,
`pack-metadata`, and `commit-graph`.
* `default` is an aggregate option that is equivalent to `objects,-loose-object`
* `all` is an aggregate option that syncs all individual components above.

core.fsyncMethod::
A value indicating the strategy Git will use to harden repository data
using fsync and related primitives.
Expand All @@ -556,14 +576,6 @@ core.fsyncMethod::
hardware, but does not send any FLUSH CACHE request. If the operating system
does not support the required interfaces, this falls back to fsync().

core.fsyncObjectFiles::
This boolean will enable 'fsync()' when writing object files.
+
This is a total waste of time and effort on a filesystem that orders
data writes properly, but can be useful for filesystems that do not use
journalling (traditional UNIX filesystems) or that only journal metadata
and not file contents (OS X's HFS+, or Linux ext3 with "data=writeback").

core.preloadIndex::
Enable parallel index preload for operations like 'git diff'
+
Expand Down
2 changes: 1 addition & 1 deletion builtin/fast-import.c
Original file line number Diff line number Diff line change
Expand Up @@ -856,7 +856,7 @@ static void end_packfile(void)
struct tag *t;

close_pack_windows(pack_data);
finalize_hashfile(pack_file, cur_pack_oid.hash, 0);
finalize_hashfile(pack_file, cur_pack_oid.hash, REPO_COMPONENT_PACK, 0);
fixup_pack_header_footer(pack_data->pack_fd, pack_data->hash,
pack_data->pack_name, object_count,
cur_pack_oid.hash, pack_size);
Expand Down
4 changes: 2 additions & 2 deletions builtin/index-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -1286,7 +1286,7 @@ static void conclude_pack(int fix_thin_pack, const char *curr_pack, unsigned cha
nr_objects - nr_objects_initial);
stop_progress_msg(&progress, msg.buf);
strbuf_release(&msg);
finalize_hashfile(f, tail_hash, 0);
finalize_hashfile(f, tail_hash, REPO_COMPONENT_PACK, 0);
hashcpy(read_hash, pack_hash);
fixup_pack_header_footer(output_fd, pack_hash,
curr_pack, nr_objects,
Expand Down Expand Up @@ -1508,7 +1508,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
if (!from_stdin) {
close(input_fd);
} else {
fsync_or_die(output_fd, curr_pack_name);
fsync_component_or_die(REPO_COMPONENT_PACK, output_fd, curr_pack_name);
err = close(output_fd);
if (err)
die_errno(_("error while closing pack file"));
Expand Down
8 changes: 5 additions & 3 deletions builtin/pack-objects.c
Original file line number Diff line number Diff line change
Expand Up @@ -1204,11 +1204,13 @@ static void write_pack_file(void)
* If so, rewrite it like in fast-import
*/
if (pack_to_stdout) {
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_CLOSE);
finalize_hashfile(f, hash, REPO_COMPONENT_NONE,
CSUM_HASH_IN_STREAM | CSUM_CLOSE);
} else if (nr_written == nr_remaining) {
finalize_hashfile(f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(f, hash, REPO_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
int fd = finalize_hashfile(f, hash, 0);
int fd = finalize_hashfile(f, hash, REPO_COMPONENT_PACK, 0);
fixup_pack_header_footer(fd, hash, pack_tmp_name,
nr_written, hash, offset);
close(fd);
Expand Down
5 changes: 3 additions & 2 deletions bulk-checkin.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ static void finish_bulk_checkin(struct bulk_checkin_state *state)
unlink(state->pack_tmp_name);
goto clear_exit;
} else if (state->nr_written == 1) {
finalize_hashfile(state->f, hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(state->f, hash, REPO_COMPONENT_PACK,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
} else {
int fd = finalize_hashfile(state->f, hash, 0);
int fd = finalize_hashfile(state->f, hash, REPO_COMPONENT_PACK, 0);
fixup_pack_header_footer(fd, hash, state->pack_tmp_name,
state->nr_written, hash,
state->offset);
Expand Down
41 changes: 40 additions & 1 deletion cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -985,7 +985,40 @@ void reset_shared_repository(void);
extern int read_replace_refs;
extern char *git_replace_ref_base;

extern int fsync_object_files;
/*
* These values are used to help identify parts of a repository to fsync.
* REPO_COMPONENT_NONE identifies data that will not be a persistent part of the
* repository and so shouldn't be fsynced.
*/
enum repo_component {
REPO_COMPONENT_NONE = 0,
REPO_COMPONENT_LOOSE_OBJECT = 1 << 0,
REPO_COMPONENT_PACK = 1 << 1,
REPO_COMPONENT_PACK_METADATA = 1 << 2,
REPO_COMPONENT_COMMIT_GRAPH = 1 << 3,
REPO_COMPONENT_INDEX = 1 << 4,
};

#define FSYNC_COMPONENTS_DEFAULT (REPO_COMPONENT_PACK | \
REPO_COMPONENT_PACK_METADATA | \
REPO_COMPONENT_COMMIT_GRAPH)

#define FSYNC_COMPONENTS_OBJECTS (REPO_COMPONENT_LOOSE_OBJECT | \
REPO_COMPONENT_PACK | \
REPO_COMPONENT_PACK_METADATA | \
REPO_COMPONENT_COMMIT_GRAPH)

#define FSYNC_COMPONENTS_ALL (REPO_COMPONENT_LOOSE_OBJECT | \
REPO_COMPONENT_PACK | \
REPO_COMPONENT_PACK_METADATA | \
REPO_COMPONENT_COMMIT_GRAPH | \
REPO_COMPONENT_INDEX)


/*
* A bitmask indicating which components of the repo should be fsynced.
*/
extern enum repo_component fsync_components;

enum fsync_method {
FSYNC_METHOD_FSYNC,
Expand Down Expand Up @@ -1747,6 +1780,12 @@ int copy_file_with_time(const char *dst, const char *src, int mode);
void write_or_die(int fd, const void *buf, size_t count);
void fsync_or_die(int fd, const char *);

inline void fsync_component_or_die(enum repo_component component, int fd, const char *msg)
{
if (fsync_components & component)
fsync_or_die(fd, msg);
}

ssize_t read_in_full(int fd, void *buf, size_t count);
ssize_t write_in_full(int fd, const void *buf, size_t count);
ssize_t pread_in_full(int fd, void *buf, size_t count, off_t offset);
Expand Down
3 changes: 2 additions & 1 deletion commit-graph.c
Original file line number Diff line number Diff line change
Expand Up @@ -1939,7 +1939,8 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
}

close_commit_graph(ctx->r->objects);
finalize_hashfile(f, file_hash, CSUM_HASH_IN_STREAM | CSUM_FSYNC);
finalize_hashfile(f, file_hash, REPO_COMPONENT_COMMIT_GRAPH,
CSUM_HASH_IN_STREAM | CSUM_FSYNC);
free_chunkfile(cf);

if (ctx->split) {
Expand Down
77 changes: 76 additions & 1 deletion config.c
Original file line number Diff line number Diff line change
Expand Up @@ -1213,6 +1213,74 @@ static int git_parse_maybe_bool_text(const char *value)
return -1;
}

static const struct fsync_component_entry {
const char *name;
enum repo_component component_bits;
} fsync_component_table[] = {
{ "loose-object", REPO_COMPONENT_LOOSE_OBJECT },
{ "pack", REPO_COMPONENT_PACK },
{ "pack-metadata", REPO_COMPONENT_PACK_METADATA },
{ "commit-graph", REPO_COMPONENT_COMMIT_GRAPH },
{ "index", REPO_COMPONENT_INDEX },
{ "objects", FSYNC_COMPONENTS_OBJECTS },
{ "default", FSYNC_COMPONENTS_DEFAULT },
{ "all", FSYNC_COMPONENTS_ALL },
};

static enum repo_component parse_fsync_components(const char *var, const char *string)
{
enum repo_component output = 0;

if (!strcmp(string, "none"))
return output;

while (string) {
int i;
size_t len;
const char *ep;
int negated = 0;
int found = 0;

string = string + strspn(string, ", \t\n\r");
ep = strchrnul(string, ',');
len = ep - string;

if (*string == '-') {
negated = 1;
string++;
len--;
if (!len)
warning(_("invalid value for variable %s"), var);
}

if (!len)
break;

for (i = 0; i < ARRAY_SIZE(fsync_component_table); ++i) {
const struct fsync_component_entry *entry = &fsync_component_table[i];

if (strncmp(entry->name, string, len))
continue;

found = 1;
if (negated)
output &= ~entry->component_bits;
else
output |= entry->component_bits;
}

if (!found) {
char *component = xstrndup(string, len);
warning(_("unknown %s value '%s'"), var, component);
free(component);
}

string = ep;
}

return output;
}

int git_parse_maybe_bool(const char *value)
{
int v = git_parse_maybe_bool_text(value);
Expand Down Expand Up @@ -1490,6 +1558,13 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
return 0;
}

if (!strcmp(var, "core.fsync")) {
if (!value)
return config_error_nonbool(var);
fsync_components = parse_fsync_components(var, value);
return 0;
}

if (!strcmp(var, "core.fsyncmethod")) {
if (!value)
return config_error_nonbool(var);
Expand All @@ -1503,7 +1578,7 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
}

if (!strcmp(var, "core.fsyncobjectfiles")) {
fsync_object_files = git_config_bool(var, value);
warning(_("core.fsyncobjectfiles is deprecated; use core.fsync instead"));
return 0;
}

Expand Down
5 changes: 3 additions & 2 deletions csum-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,8 @@ static void free_hashfile(struct hashfile *f)
free(f);
}

int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int flags)
int finalize_hashfile(struct hashfile *f, unsigned char *result,
enum repo_component component, unsigned int flags)
{
int fd;

Expand All @@ -69,7 +70,7 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result, unsigned int fl
if (flags & CSUM_HASH_IN_STREAM)
flush(f, f->buffer, the_hash_algo->rawsz);
if (flags & CSUM_FSYNC)
fsync_or_die(f->fd, f->name);
fsync_component_or_die(component, f->fd, f->name);
if (flags & CSUM_CLOSE) {
if (close(f->fd))
die_errno("%s: sha1 file error on close", f->name);
Expand Down
2 changes: 1 addition & 1 deletion csum-file.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ int hashfile_truncate(struct hashfile *, struct hashfile_checkpoint *);
struct hashfile *hashfd(int fd, const char *name);
struct hashfile *hashfd_check(const char *name);
struct hashfile *hashfd_throughput(int fd, const char *name, struct progress *tp);
int finalize_hashfile(struct hashfile *, unsigned char *, unsigned int);
int finalize_hashfile(struct hashfile *, unsigned char *, enum repo_component, unsigned int);
void hashwrite(struct hashfile *, const void *, unsigned int);
void hashflush(struct hashfile *f);
void crc32_begin(struct hashfile *);
Expand Down
1 change: 1 addition & 0 deletions environment.c
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const char *git_hooks_path;
int zlib_compression_level = Z_BEST_SPEED;
int pack_compression_level = Z_DEFAULT_COMPRESSION;
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
enum repo_component fsync_components = FSYNC_COMPONENTS_DEFAULT;
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
size_t delta_base_cache_limit = 96 * 1024 * 1024;
Expand Down
3 changes: 2 additions & 1 deletion midx.c
Original file line number Diff line number Diff line change
Expand Up @@ -1406,7 +1406,8 @@ static int write_midx_internal(const char *object_dir,
write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
write_chunkfile(cf, &ctx);

finalize_hashfile(f, midx_hash, CSUM_FSYNC | CSUM_HASH_IN_STREAM);
finalize_hashfile(f, midx_hash, REPO_COMPONENT_PACK_METADATA,
CSUM_FSYNC | CSUM_HASH_IN_STREAM);
free_chunkfile(cf);

if (flags & (MIDX_WRITE_REV_INDEX | MIDX_WRITE_BITMAP))
Expand Down
3 changes: 1 addition & 2 deletions object-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -1809,8 +1809,7 @@ int hash_object_file(const struct git_hash_algo *algo, const void *buf,
/* Finalize a file on disk, and close it. */
static void close_loose_object(int fd)
{
if (fsync_object_files)
fsync_or_die(fd, "loose object file");
fsync_component_or_die(REPO_COMPONENT_LOOSE_OBJECT, fd, "loose object file");
if (close(fd) != 0)
die_errno(_("error when closing loose object file"));
}
Expand Down
3 changes: 2 additions & 1 deletion pack-bitmap-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -719,7 +719,8 @@ void bitmap_writer_finish(struct pack_idx_entry **index,
if (options & BITMAP_OPT_HASH_CACHE)
write_hash_cache(f, index, index_nr);

finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);
finalize_hashfile(f, NULL, REPO_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_FSYNC | CSUM_CLOSE);

if (adjust_shared_perm(tmp_file.buf))
die_errno("unable to make temporary bitmap file readable");
Expand Down
12 changes: 7 additions & 5 deletions pack-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -159,8 +159,9 @@ const char *write_idx_file(const char *index_name, struct pack_idx_entry **objec
}

hashwrite(f, sha1, the_hash_algo->rawsz);
finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((opts->flags & WRITE_IDX_VERIFY)
finalize_hashfile(f, NULL, REPO_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((opts->flags & WRITE_IDX_VERIFY)
? 0 : CSUM_FSYNC));
return index_name;
}
Expand Down Expand Up @@ -281,8 +282,9 @@ const char *write_rev_file_order(const char *rev_name,
if (rev_name && adjust_shared_perm(rev_name) < 0)
die(_("failed to make %s readable"), rev_name);

finalize_hashfile(f, NULL, CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));
finalize_hashfile(f, NULL, REPO_COMPONENT_PACK_METADATA,
CSUM_HASH_IN_STREAM | CSUM_CLOSE |
((flags & WRITE_IDX_VERIFY) ? 0 : CSUM_FSYNC));

return rev_name;
}
Expand Down Expand Up @@ -390,7 +392,7 @@ void fixup_pack_header_footer(int pack_fd,
the_hash_algo->final_fn(partial_pack_hash, &old_hash_ctx);
the_hash_algo->final_fn(new_pack_hash, &new_hash_ctx);
write_or_die(pack_fd, new_pack_hash, the_hash_algo->rawsz);
fsync_or_die(pack_fd, pack_name);
fsync_component_or_die(REPO_COMPONENT_PACK, pack_fd, pack_name);
}

char *index_pack_lockfile(int ip_out, int *is_well_formed)
Expand Down
Loading

0 comments on commit 23311a1

Please sign in to comment.