Skip to content

Commit

Permalink
Merge branch 'tb/midx-write-cleanup'
Browse files Browse the repository at this point in the history
Code clean-up around writing the .midx files.

* tb/midx-write-cleanup:
  pack-bitmap.c: reimplement `midx_bitmap_filename()` with helper
  midx: replace `get_midx_rev_filename()` with a generic helper
  midx-write.c: support reading an existing MIDX with `packs_to_include`
  midx-write.c: extract `fill_packs_from_midx()`
  midx-write.c: extract `should_include_pack()`
  midx-write.c: pass `start_pack` to `compute_sorted_entries()`
  midx-write.c: reduce argument count for `get_sorted_entries()`
  midx-write.c: tolerate `--preferred-pack` without bitmaps
  • Loading branch information
gitster committed Jun 7, 2024
2 parents cd77e87 + 4cac79a commit 1b76f06
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 91 deletions.
161 changes: 79 additions & 82 deletions midx-write.c
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,18 @@ struct write_midx_context {
struct string_list *to_include;
};

static int should_include_pack(const struct write_midx_context *ctx,
const char *file_name,
int exclude_from_midx)
{
if (exclude_from_midx && ctx->m && midx_contains_pack(ctx->m, file_name))
return 0;
if (ctx->to_include && !string_list_has_string(ctx->to_include,
file_name))
return 0;
return 1;
}

static void add_pack_to_midx(const char *full_path, size_t full_path_len,
const char *file_name, void *data)
{
Expand All @@ -108,29 +120,11 @@ static void add_pack_to_midx(const char *full_path, size_t full_path_len,

if (ends_with(file_name, ".idx")) {
display_progress(ctx->progress, ++ctx->pack_paths_checked);
/*
* Note that at most one of ctx->m and ctx->to_include are set,
* so we are testing midx_contains_pack() and
* string_list_has_string() independently (guarded by the
* appropriate NULL checks).
*
* We could support passing to_include while reusing an existing
* MIDX, but don't currently since the reuse process drags
* forward all packs from an existing MIDX (without checking
* whether or not they appear in the to_include list).
*
* If we added support for that, these next two conditional
* should be performed independently (likely checking
* to_include before the existing MIDX).
*/
if (ctx->m && midx_contains_pack(ctx->m, file_name))
return;
else if (ctx->to_include &&
!string_list_has_string(ctx->to_include, file_name))

if (!should_include_pack(ctx, file_name, 1))
return;

ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);

p = add_packed_git(full_path, full_path_len, 0);
if (!p) {
warning(_("failed to add packfile '%s'"),
Expand Down Expand Up @@ -299,21 +293,16 @@ static void midx_fanout_add_pack_fanout(struct midx_fanout *fanout,
* Copy only the de-duplicated entries (selected by most-recent modified time
* of a packfile containing the object).
*/
static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
struct pack_info *info,
uint32_t nr_packs,
size_t *nr_objects,
int preferred_pack)
static void compute_sorted_entries(struct write_midx_context *ctx,
uint32_t start_pack)
{
uint32_t cur_fanout, cur_pack, cur_object;
size_t alloc_objects, total_objects = 0;
struct midx_fanout fanout = { 0 };
struct pack_midx_entry *deduplicated_entries = NULL;
uint32_t start_pack = m ? m->num_packs : 0;

for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++)
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++)
total_objects = st_add(total_objects,
info[cur_pack].p->num_objects);
ctx->info[cur_pack].p->num_objects);

/*
* As we de-duplicate by fanout value, we expect the fanout
Expand All @@ -323,26 +312,26 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
alloc_objects = fanout.alloc = total_objects > 3200 ? total_objects / 200 : 16;

ALLOC_ARRAY(fanout.entries, fanout.alloc);
ALLOC_ARRAY(deduplicated_entries, alloc_objects);
*nr_objects = 0;
ALLOC_ARRAY(ctx->entries, alloc_objects);
ctx->entries_nr = 0;

for (cur_fanout = 0; cur_fanout < 256; cur_fanout++) {
fanout.nr = 0;

if (m)
midx_fanout_add_midx_fanout(&fanout, m, cur_fanout,
preferred_pack);
if (ctx->m)
midx_fanout_add_midx_fanout(&fanout, ctx->m, cur_fanout,
ctx->preferred_pack_idx);

for (cur_pack = start_pack; cur_pack < nr_packs; cur_pack++) {
int preferred = cur_pack == preferred_pack;
for (cur_pack = start_pack; cur_pack < ctx->nr; cur_pack++) {
int preferred = cur_pack == ctx->preferred_pack_idx;
midx_fanout_add_pack_fanout(&fanout,
info, cur_pack,
ctx->info, cur_pack,
preferred, cur_fanout);
}

if (-1 < preferred_pack && preferred_pack < start_pack)
midx_fanout_add_pack_fanout(&fanout, info,
preferred_pack, 1,
if (-1 < ctx->preferred_pack_idx && ctx->preferred_pack_idx < start_pack)
midx_fanout_add_pack_fanout(&fanout, ctx->info,
ctx->preferred_pack_idx, 1,
cur_fanout);

midx_fanout_sort(&fanout);
Expand All @@ -356,17 +345,16 @@ static struct pack_midx_entry *get_sorted_entries(struct multi_pack_index *m,
&fanout.entries[cur_object].oid))
continue;

ALLOC_GROW(deduplicated_entries, st_add(*nr_objects, 1),
ALLOC_GROW(ctx->entries, st_add(ctx->entries_nr, 1),
alloc_objects);
memcpy(&deduplicated_entries[*nr_objects],
memcpy(&ctx->entries[ctx->entries_nr],
&fanout.entries[cur_object],
sizeof(struct pack_midx_entry));
(*nr_objects)++;
ctx->entries_nr++;
}
}

free(fanout.entries);
return deduplicated_entries;
}

static int write_midx_pack_names(struct hashfile *f, void *data)
Expand Down Expand Up @@ -886,6 +874,43 @@ static struct multi_pack_index *lookup_multi_pack_index(struct repository *r,
return result;
}

static int fill_packs_from_midx(struct write_midx_context *ctx,
const char *preferred_pack_name, uint32_t flags)
{
uint32_t i;

for (i = 0; i < ctx->m->num_packs; i++) {
if (!should_include_pack(ctx, ctx->m->pack_names[i], 0))
continue;

ALLOC_GROW(ctx->info, ctx->nr + 1, ctx->alloc);

if (flags & MIDX_WRITE_REV_INDEX || preferred_pack_name) {
/*
* If generating a reverse index, need to have
* packed_git's loaded to compare their
* mtimes and object count.
*
*
* If a preferred pack is specified, need to
* have packed_git's loaded to ensure the chosen
* preferred pack has a non-zero object count.
*/
if (prepare_midx_pack(the_repository, ctx->m, i))
return error(_("could not load pack"));

if (open_pack_index(ctx->m->packs[i]))
die(_("could not open index for %s"),
ctx->m->packs[i]->pack_name);
}

fill_pack_info(&ctx->info[ctx->nr++], ctx->m->packs[i],
ctx->m->pack_names[i], i);
}

return 0;
}

static int write_midx_internal(const char *object_dir,
struct string_list *packs_to_include,
struct string_list *packs_to_drop,
Expand All @@ -895,7 +920,7 @@ static int write_midx_internal(const char *object_dir,
{
struct strbuf midx_name = STRBUF_INIT;
unsigned char midx_hash[GIT_MAX_RAWSZ];
uint32_t i;
uint32_t i, start_pack;
struct hashfile *f = NULL;
struct lock_file lk;
struct write_midx_context ctx = { 0 };
Expand All @@ -912,15 +937,7 @@ static int write_midx_internal(const char *object_dir,
die_errno(_("unable to create leading directories of %s"),
midx_name.buf);

if (!packs_to_include) {
/*
* Only reference an existing MIDX when not filtering which
* packs to include, since all packs and objects are copied
* blindly from an existing MIDX if one is present.
*/
ctx.m = lookup_multi_pack_index(the_repository, object_dir);
}

ctx.m = lookup_multi_pack_index(the_repository, object_dir);
if (ctx.m && !midx_checksum_valid(ctx.m)) {
warning(_("ignoring existing multi-pack-index; checksum mismatch"));
ctx.m = NULL;
Expand All @@ -929,42 +946,23 @@ static int write_midx_internal(const char *object_dir,
ctx.nr = 0;
ctx.alloc = ctx.m ? ctx.m->num_packs : 16;
ctx.info = NULL;
ctx.to_include = packs_to_include;
ALLOC_ARRAY(ctx.info, ctx.alloc);

if (ctx.m) {
for (i = 0; i < ctx.m->num_packs; i++) {
ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc);

if (flags & MIDX_WRITE_REV_INDEX) {
/*
* If generating a reverse index, need to have
* packed_git's loaded to compare their
* mtimes and object count.
*/
if (prepare_midx_pack(the_repository, ctx.m, i)) {
error(_("could not load pack"));
result = 1;
goto cleanup;
}

if (open_pack_index(ctx.m->packs[i]))
die(_("could not open index for %s"),
ctx.m->packs[i]->pack_name);
}

fill_pack_info(&ctx.info[ctx.nr++], ctx.m->packs[i],
ctx.m->pack_names[i], i);
}
if (ctx.m && fill_packs_from_midx(&ctx, preferred_pack_name,
flags) < 0) {
result = 1;
goto cleanup;
}

start_pack = ctx.nr;

ctx.pack_paths_checked = 0;
if (flags & MIDX_PROGRESS)
ctx.progress = start_delayed_progress(_("Adding packfiles to multi-pack-index"), 0);
else
ctx.progress = NULL;

ctx.to_include = packs_to_include;

for_each_file_in_pack_dir(object_dir, add_pack_to_midx, &ctx);
stop_progress(&ctx.progress);

Expand Down Expand Up @@ -1054,8 +1052,7 @@ static int write_midx_internal(const char *object_dir,
}
}

ctx.entries = get_sorted_entries(ctx.m, ctx.info, ctx.nr, &ctx.entries_nr,
ctx.preferred_pack_idx);
compute_sorted_entries(&ctx, start_pack);

ctx.large_offsets_needed = 0;
for (i = 0; i < ctx.entries_nr; i++) {
Expand Down
10 changes: 6 additions & 4 deletions midx.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,15 @@ const unsigned char *get_midx_checksum(struct multi_pack_index *m)

void get_midx_filename(struct strbuf *out, const char *object_dir)
{
strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
get_midx_filename_ext(out, object_dir, NULL, NULL);
}

void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m)
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
const unsigned char *hash, const char *ext)
{
get_midx_filename(out, m->object_dir);
strbuf_addf(out, "-%s.rev", hash_to_hex(get_midx_checksum(m)));
strbuf_addf(out, "%s/pack/multi-pack-index", object_dir);
if (ext)
strbuf_addf(out, "-%s.%s", hash_to_hex(hash), ext);
}

static int midx_read_oid_fanout(const unsigned char *chunk_start,
Expand Down
6 changes: 5 additions & 1 deletion midx.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@ struct multi_pack_index {
#define MIDX_WRITE_BITMAP_HASH_CACHE (1 << 3)
#define MIDX_WRITE_BITMAP_LOOKUP_TABLE (1 << 4)

#define MIDX_EXT_REV "rev"
#define MIDX_EXT_BITMAP "bitmap"

const unsigned char *get_midx_checksum(struct multi_pack_index *m);
void get_midx_filename(struct strbuf *out, const char *object_dir);
void get_midx_rev_filename(struct strbuf *out, struct multi_pack_index *m);
void get_midx_filename_ext(struct strbuf *out, const char *object_dir,
const unsigned char *hash, const char *ext);

struct multi_pack_index *load_multi_pack_index(const char *object_dir, int local);
int prepare_midx_pack(struct repository *r, struct multi_pack_index *m, uint32_t pack_int_id);
Expand Down
5 changes: 2 additions & 3 deletions pack-bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -309,9 +309,8 @@ static int load_bitmap_entries_v1(struct bitmap_index *index)
char *midx_bitmap_filename(struct multi_pack_index *midx)
{
struct strbuf buf = STRBUF_INIT;

get_midx_filename(&buf, midx->object_dir);
strbuf_addf(&buf, "-%s.bitmap", hash_to_hex(get_midx_checksum(midx)));
get_midx_filename_ext(&buf, midx->object_dir, get_midx_checksum(midx),
MIDX_EXT_BITMAP);

return strbuf_detach(&buf, NULL);
}
Expand Down
3 changes: 2 additions & 1 deletion pack-revindex.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,8 @@ int load_midx_revindex(struct multi_pack_index *m)
trace2_data_string("load_midx_revindex", the_repository,
"source", "rev");

get_midx_rev_filename(&revindex_name, m);
get_midx_filename_ext(&revindex_name, m->object_dir,
get_midx_checksum(m), MIDX_EXT_REV);

ret = load_revindex_from_disk(revindex_name.buf,
m->num_objects,
Expand Down
23 changes: 23 additions & 0 deletions t/t5319-multi-pack-index.sh
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,29 @@ test_expect_success 'preferred packs must be non-empty' '
)
'

test_expect_success 'preferred pack from existing MIDX without bitmaps' '
git init preferred-without-bitmaps &&
(
cd preferred-without-bitmaps &&
test_commit one &&
pack="$(git pack-objects --all $objdir/pack/pack </dev/null)" &&
git multi-pack-index write &&
# make another pack so that the subsequent MIDX write
# has something to do
test_commit two &&
git repack -d &&
# write a new MIDX without bitmaps reusing the singular
# pack from the existing MIDX as the preferred pack in
# the new MIDX
git multi-pack-index write --preferred-pack=pack-$pack.pack
)
'

test_expect_success 'verify multi-pack-index success' '
git multi-pack-index verify --object-dir=$objdir
'
Expand Down

0 comments on commit 1b76f06

Please sign in to comment.