derrickstolee · derrickstolee · Oct 21, 2022 · Oct 24, 2022 · Oct 10, 2022 · Oct 24, 2022
diff --git a/Documentation/config/extensions.txt b/Documentation/config/extensions.txt
@@ -7,6 +7,31 @@ Note that this setting should only be set by linkgit:git-init[1] or
 linkgit:git-clone[1].  Trying to change it after initialization will not
 work and will produce hard-to-diagnose issues.
 
+extensions.refFormat::
+	Specify the reference storage mechanisms used by the repoitory as a
+	multi-valued list. The acceptable values are `files` and `packed`.
+	If not specified, the list of `files` and `packed` is assumed. It
+	is an error to specify this key unless `core.repositoryFormatVersion`
+	is 1.
++
+As new ref formats are added, Git commands may modify this list before and
+after upgrading the on-disk reference storage files. The specific values
+indicate the existence of different layers:
++
+* `files`: When present, references may be stored as "loose" reference files
+  in the `$GIT_DIR/refs/` directory. The name of the reference corresponds
+  to the filename after `$GIT_DIR` and the file contains an object ID as
+  a hexadecimal string. If a loose reference file exists, then its value
+  takes precedence over all other formats.
++
+* `packed`: When present, references may be stored as a group in a
+  `packed-refs` file in its version 1 format. When grouped with `"files"`
+  or provided on its own, this file is located at `$GIT_DIR/packed-refs`.
+  This file contains a list of distinct reference names, paired with their
+  object IDs. When combined with `files`, the `packed` format will only be
+  used to group multiple loose object files upon request via the
+  `git pack-refs` command or via the `pack-refs` maintenance task.
+
 extensions.worktreeConfig::
 	If enabled, then worktrees will load config settings from the
 	`$GIT_DIR/config.worktree` file in addition to the

diff --git a/Documentation/config/index.txt b/Documentation/config/index.txt
@@ -30,3 +30,11 @@ index.version::
 	Specify the version with which new index files should be
 	initialized.  This does not affect existing repositories.
 	If `feature.manyFiles` is enabled, then the default is 4.
+
+index.computeHash::
+	When enabled, compute the hash of the index file as it is written
+	and store the hash at the end of the content. This is enabled by
+	default.
++
+If you disable `index.computHash`, then older Git clients may report that
+your index is corrupt during `git fsck`.
diff --git a/Documentation/gitformat-chunk.txt b/Documentation/gitformat-chunk.txt
@@ -24,8 +24,9 @@ how they use the chunks to describe structured data.
 
 A chunk-based file format begins with some header information custom to
 that format. That header should include enough information to identify
-the file type, format version, and number of chunks in the file. From this
-information, that file can determine the start of the chunk-based region.
+the file type, format version, and (optionally) the number of chunks in
+the file. From this information, that file can determine the start of the
+chunk-based region.
 
 The chunk-based region starts with a table of contents describing where
 each chunk starts and ends. This consists of (C+1) rows of 12 bytes each,
@@ -51,8 +52,27 @@ The final entry in the table of contents must be four zero bytes. This
 confirms that the table of contents is ending and provides the offset for
 the end of the chunk-based data.
 
+The default chunk format assumes the table of contents appears at the
+beginning of the file (after the header information) and the chunks are
+ordered by increasing offset. Alternatively, the chunk format allows a
+table of contents that is placed at the end of the file (before the
+trailing hash) and the offsets are in descending order. In this trailing
+table of contents case, the data in order looks instead like the following
+table:
+
+  | Chunk ID (4 bytes) | Chunk Offset (8 bytes) |
+  |--------------------|------------------------|
+  | 0x0000             | OFFSET[C+1]            |
+  | ID[C]              | OFFSET[C]              |
+  | ...                | ...                    |
+  | ID[0]              | OFFSET[0]              |
+
+The concrete file format that uses the chunk format will mention that it
+uses a trailing table of contents if it uses it. By default, the table of
+contents is in ascending order before all chunk data.
+
 Note: The chunk-based format expects that the file contains _at least_ a
-trailing hash after `OFFSET[C+1]`.
+trailing hash after either `OFFSET[C+1]` or the trailing table of contents.
 
 Functions for working with chunk-based file formats are declared in
 `chunk-format.h`. Using these methods provide extra checks that assist

diff --git a/cache.h b/cache.h
@@ -1155,6 +1155,8 @@ struct repository_format {
 	int hash_algo;
 	int sparse_index;
 	char *work_tree;
+	int ref_format_count;
+	enum ref_format_flags ref_format;
 	struct string_list unknown_extensions;
 	struct string_list v1_only_extensions;
 };

diff --git a/chunk-format.c b/chunk-format.c
@@ -13,6 +13,7 @@ struct chunk_info {
 	chunk_write_fn write_fn;
 
 	const void *start;
+	off_t offset;
 };
 
 struct chunkfile {
@@ -56,38 +57,59 @@ void add_chunk(struct chunkfile *cf,
 	cf->chunks_nr++;
 }
 
-int write_chunkfile(struct chunkfile *cf, void *data)
+int write_chunkfile(struct chunkfile *cf,
+		    enum chunkfile_flags flags,
+		    void *data)
 {
 	int i, result = 0;
-	uint64_t cur_offset = hashfile_total(cf->f);
 
 	trace2_region_enter("chunkfile", "write", the_repository);
 
-	/* Add the table of contents to the current offset */
-	cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
+	if (!(flags & CHUNKFILE_TRAILING_TOC)) {
+		uint64_t cur_offset = hashfile_total(cf->f);
 
-	for (i = 0; i < cf->chunks_nr; i++) {
-		hashwrite_be32(cf->f, cf->chunks[i].id);
-		hashwrite_be64(cf->f, cur_offset);
+		/* Add the table of contents to the current offset */
+		cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
 
-		cur_offset += cf->chunks[i].size;
-	}
+		for (i = 0; i < cf->chunks_nr; i++) {
+			hashwrite_be32(cf->f, cf->chunks[i].id);
+			hashwrite_be64(cf->f, cur_offset);
+
+			cur_offset += cf->chunks[i].size;
+		}
 
-	/* Trailing entry marks the end of the chunks */
-	hashwrite_be32(cf->f, 0);
-	hashwrite_be64(cf->f, cur_offset);
+		/* Trailing entry marks the end of the chunks */
+		hashwrite_be32(cf->f, 0);
+		hashwrite_be64(cf->f, cur_offset);
+	}
 
 	for (i = 0; i < cf->chunks_nr; i++) {
-		off_t start_offset = hashfile_total(cf->f);
+		cf->chunks[i].offset = hashfile_total(cf->f);
 		result = cf->chunks[i].write_fn(cf->f, data);
 
 		if (result)
 			goto cleanup;
 
-		if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
-			BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
-			    cf->chunks[i].size, cf->chunks[i].id,
-			    hashfile_total(cf->f) - start_offset);
+		if (!(flags & CHUNKFILE_TRAILING_TOC)) {
+			if (hashfile_total(cf->f) - cf->chunks[i].offset != cf->chunks[i].size)
+				BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
+				    cf->chunks[i].size, cf->chunks[i].id,
+				    hashfile_total(cf->f) - cf->chunks[i].offset);
+		}
+
+		cf->chunks[i].size = hashfile_total(cf->f) - cf->chunks[i].offset;
+	}
+
+	if (flags & CHUNKFILE_TRAILING_TOC) {
+		size_t last_chunk_tail = hashfile_total(cf->f);
+		/* First entry marks the end of the chunks */
+		hashwrite_be32(cf->f, 0);
+		hashwrite_be64(cf->f, last_chunk_tail);
+
+		for (i = cf->chunks_nr - 1; i >= 0; i--) {
+			hashwrite_be32(cf->f, cf->chunks[i].id);
+			hashwrite_be64(cf->f, cf->chunks[i].offset);
+		}
 	}
 
 cleanup:
@@ -151,6 +173,59 @@ int read_table_of_contents(struct chunkfile *cf,
 	return 0;
 }
 
+int read_trailing_table_of_contents(struct chunkfile *cf,
+				    const unsigned char *mfile,
+				    size_t mfile_size)
+{
+	int i;
+	uint32_t chunk_id;
+	const unsigned char *table_of_contents = mfile + mfile_size - the_hash_algo->rawsz;
+
+	while (1) {
+		uint64_t chunk_offset;
+
+		table_of_contents -= CHUNK_TOC_ENTRY_SIZE;
+
+		chunk_id = get_be32(table_of_contents);
+		chunk_offset = get_be64(table_of_contents + 4);
+
+		/* Calculate the previous chunk size, if it exists. */
+		if (cf->chunks_nr) {
+			off_t previous_offset = cf->chunks[cf->chunks_nr - 1].offset;
+
+			if (chunk_offset < previous_offset ||
+			    chunk_offset > table_of_contents - mfile) {
+				error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
+				previous_offset, chunk_offset);
+				return -1;
+			}
+
+			cf->chunks[cf->chunks_nr - 1].size = chunk_offset - previous_offset;
+		}
+
+		/* Stop at the null chunk. We only need it for the last size. */
+		if (!chunk_id)
+			break;
+
+		for (i = 0; i < cf->chunks_nr; i++) {
+			if (cf->chunks[i].id == chunk_id) {
+				error(_("duplicate chunk ID %"PRIx32" found"),
+					chunk_id);
+				return -1;
+			}
+		}
+
+		ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
+
+		cf->chunks[cf->chunks_nr].id = chunk_id;
+		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
+		cf->chunks[cf->chunks_nr].offset = chunk_offset;
+		cf->chunks_nr++;
+	}
+
+	return 0;
+}
+
 static int pair_chunk_fn(const unsigned char *chunk_start,
 			 size_t chunk_size,
 			 void *data)

diff --git a/chunk-format.h b/chunk-format.h
@@ -31,14 +31,30 @@ void add_chunk(struct chunkfile *cf,
 	       uint32_t id,
 	       size_t size,
 	       chunk_write_fn fn);
-int write_chunkfile(struct chunkfile *cf, void *data);
+
+enum chunkfile_flags {
+	CHUNKFILE_TRAILING_TOC = (1 << 0),
+};
+
+int write_chunkfile(struct chunkfile *cf,
+		    enum chunkfile_flags flags,
+		    void *data);
 
 int read_table_of_contents(struct chunkfile *cf,
 			   const unsigned char *mfile,
 			   size_t mfile_size,
 			   uint64_t toc_offset,
 			   int toc_length);
 
+/**
+ * Read the given chunkfile, but read the table of contents from the
+ * end of the given mfile. The file is expected to be a hashfile with
+ * the_hash_file->rawsz bytes at the end storing the hash.
+ */
+int read_trailing_table_of_contents(struct chunkfile *cf,
+				    const unsigned char *mfile,
+				    size_t mfile_size);
+
 #define CHUNK_NOT_FOUND (-2)
 
 /*

diff --git a/commit-graph.c b/commit-graph.c
@@ -1932,7 +1932,7 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
 			get_num_chunks(cf) * ctx->commits.nr);
 	}
 
-	write_chunkfile(cf, ctx);
+	write_chunkfile(cf, 0, ctx);
 
 	stop_progress(&ctx->progress);
 	strbuf_release(&progress_title);

diff --git a/csum-file.c b/csum-file.c
@@ -45,7 +45,8 @@ void hashflush(struct hashfile *f)
 	unsigned offset = f->offset;
 
 	if (offset) {
-		the_hash_algo->update_fn(&f->ctx, f->buffer, offset);
+		if (!f->skip_hash)
+			the_hash_algo->update_fn(&f->ctx, f->buffer, offset);
 		flush(f, f->buffer, offset);
 		f->offset = 0;
 	}
@@ -64,7 +65,12 @@ int finalize_hashfile(struct hashfile *f, unsigned char *result,
 	int fd;
 
 	hashflush(f);
-	the_hash_algo->final_fn(f->buffer, &f->ctx);
+
+	if (f->skip_hash)
+		memset(f->buffer, 0, the_hash_algo->rawsz);
+	else
+		the_hash_algo->final_fn(f->buffer, &f->ctx);
+
 	if (result)
 		hashcpy(result, f->buffer);
 	if (flags & CSUM_HASH_IN_STREAM)
@@ -108,7 +114,8 @@ void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
 			 * the hashfile's buffer. In this block,
 			 * f->offset is necessarily zero.
 			 */
-			the_hash_algo->update_fn(&f->ctx, buf, nr);
+			if (!f->skip_hash)
+				the_hash_algo->update_fn(&f->ctx, buf, nr);
 			flush(f, buf, nr);
 		} else {
 			/*
@@ -153,6 +160,7 @@ static struct hashfile *hashfd_internal(int fd, const char *name,
 	f->tp = tp;
 	f->name = name;
 	f->do_crc = 0;
+	f->skip_hash = 0;
 	the_hash_algo->init_fn(&f->ctx);
 
 	f->buffer_len = buffer_len;

diff --git a/csum-file.h b/csum-file.h
@@ -20,6 +20,13 @@ struct hashfile {
 	size_t buffer_len;
 	unsigned char *buffer;
 	unsigned char *check_buffer;
+
+	/**
+	 * If set to 1, skip_hash indicates that we should
+	 * not actually compute the hash for this hashfile and
+	 * instead only use it as a buffered write.
+	 */
+	unsigned int skip_hash;
 };
 
 /* Checkpoint */

diff --git a/midx.c b/midx.c
@@ -1480,7 +1480,7 @@ static int write_midx_internal(const char *object_dir,
 	}
 
 	write_midx_header(f, get_num_chunks(cf), ctx.nr - dropped_packs);
-	write_chunkfile(cf, &ctx);
+	write_chunkfile(cf, 0, &ctx);
 
 	finalize_hashfile(f, midx_hash, FSYNC_COMPONENT_PACK_METADATA,
 			  CSUM_FSYNC | CSUM_HASH_IN_STREAM);

diff --git a/read-cache.c b/read-cache.c
@@ -1817,6 +1817,8 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 	git_hash_ctx c;
 	unsigned char hash[GIT_MAX_RAWSZ];
 	int hdr_version;
+	int all_zeroes = 1;
+	unsigned char *start, *end;
 
 	if (hdr->hdr_signature != htonl(CACHE_SIGNATURE))
 		return error(_("bad signature 0x%08x"), hdr->hdr_signature);
@@ -1827,10 +1829,23 @@ static int verify_hdr(const struct cache_header *hdr, unsigned long size)
 	if (!verify_index_checksum)
 		return 0;
 
+	end = (unsigned char *)hdr + size;
+	start = end - the_hash_algo->rawsz;
+	while (start < end) {
+		if (*start != 0) {
+			all_zeroes = 0;
+			break;
+		}
+		start++;
+	}
+
+	if (all_zeroes)
+		return 0;
+
 	the_hash_algo->init_fn(&c);
 	the_hash_algo->update_fn(&c, hdr, size - the_hash_algo->rawsz);
 	the_hash_algo->final_fn(hash, &c);
-	if (!hasheq(hash, (unsigned char *)hdr + size - the_hash_algo->rawsz))
+	if (!hasheq(hash, end - the_hash_algo->rawsz))
 		return error(_("bad index file sha1 signature"));
 	return 0;
 }
@@ -2917,9 +2932,14 @@ static int do_write_index(struct index_state *istate, struct tempfile *tempfile,
 	int ieot_entries = 1;
 	struct index_entry_offset_table *ieot = NULL;
 	int nr, nr_threads;
+	int compute_hash;
 
 	f = hashfd(tempfile->fd, tempfile->filename.buf);
 
+	if (!git_config_get_maybe_bool("index.computehash", &compute_hash) &&
+	    !compute_hash)
+		f->skip_hash = 1;
+
 	for (i = removed = extended = 0; i < entries; i++) {
 		if (cache[i]->ce_flags & CE_REMOVE)
 			removed++;