Skip to content

Commit f6f1dad

Browse files
mjcheethamdscho
authored andcommitted
maintenance: add new cache-local-objects maintenance task (#720)
Introduce a new maintenance task, `cache-local-objects`, that operates on Scalar or VFS for Git repositories with a per-volume, shared object cache (specified by `gvfs.sharedCache`) to migrate packfiles and loose objects from the repository object directory to the shared cache. Older versions of `microsoft/git` incorrectly placed packfiles in the repository object directory instead of the shared cache; this task will help clean up existing clones impacted by that issue. Fixes #716
2 parents ba48074 + 510b7d4 commit f6f1dad

File tree

4 files changed

+336
-5
lines changed

4 files changed

+336
-5
lines changed

Diff for: Documentation/git-maintenance.txt

+8
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ task:
6969
* `prefetch`: hourly.
7070
* `loose-objects`: daily.
7171
* `incremental-repack`: daily.
72+
* `cache-local-objects`: weekly.
7273
--
7374
+
7475
`git maintenance register` will also disable foreground maintenance by
@@ -158,6 +159,13 @@ pack-refs::
158159
need to iterate across many references. See linkgit:git-pack-refs[1]
159160
for more information.
160161

162+
cache-local-objects::
163+
The `cache-local-objects` task only operates on Scalar or VFS for Git
164+
repositories (cloned with either `scalar clone` or `gvfs clone`) that
165+
have the `gvfs.sharedCache` configuration setting present. This task
166+
migrates pack files and loose objects from the repository's object
167+
directory in to the shared volume cache.
168+
161169
OPTIONS
162170
-------
163171
--auto::

Diff for: builtin/gc.c

+197-5
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#define USE_THE_REPOSITORY_VARIABLE
1414
#define DISABLE_SIGN_COMPARE_WARNINGS
1515

16+
#include "git-compat-util.h"
1617
#include "builtin.h"
1718
#include "abspath.h"
1819
#include "date.h"
@@ -44,6 +45,8 @@
4445
#include "hook.h"
4546
#include "setup.h"
4647
#include "trace2.h"
48+
#include "copy.h"
49+
#include "dir.h"
4750

4851
#define FAILED_RUN "failed to run %s"
4952

@@ -1154,17 +1157,19 @@ static int write_loose_object_to_stdin(const struct object_id *oid,
11541157
return ++(d->count) > d->batch_size;
11551158
}
11561159

1157-
static const char *object_dir = NULL;
1160+
static const char *shared_object_dir = NULL;
11581161

11591162
static int pack_loose(struct maintenance_run_opts *opts)
11601163
{
11611164
struct repository *r = the_repository;
11621165
int result = 0;
11631166
struct write_loose_object_data data;
11641167
struct child_process pack_proc = CHILD_PROCESS_INIT;
1168+
const char *object_dir = r->objects->odb->path;
11651169

1166-
if (!object_dir)
1167-
object_dir = r->objects->odb->path;
1170+
/* If set, use the shared object directory. */
1171+
if (shared_object_dir)
1172+
object_dir = shared_object_dir;
11681173

11691174
/*
11701175
* Do not start pack-objects process
@@ -1358,6 +1363,186 @@ static int maintenance_task_incremental_repack(struct maintenance_run_opts *opts
13581363
return 0;
13591364
}
13601365

1366+
static void link_or_copy_or_die(const char *src, const char *dst)
1367+
{
1368+
if (!link(src, dst))
1369+
return;
1370+
1371+
/* Use copy operation if src and dst are on different file systems. */
1372+
if (errno != EXDEV)
1373+
warning_errno(_("failed to link '%s' to '%s'"), src, dst);
1374+
1375+
if (copy_file(dst, src, 0444))
1376+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1377+
}
1378+
1379+
static void rename_or_copy_or_die(const char *src, const char *dst)
1380+
{
1381+
if (!rename(src, dst))
1382+
return;
1383+
1384+
/* Use copy and delete if src and dst are on different file systems. */
1385+
if (errno != EXDEV)
1386+
warning_errno(_("failed to move '%s' to '%s'"), src, dst);
1387+
1388+
if (copy_file(dst, src, 0444))
1389+
die_errno(_("failed to copy '%s' to '%s'"), src, dst);
1390+
1391+
if (unlink(src))
1392+
die_errno(_("failed to delete '%s'"), src);
1393+
}
1394+
1395+
static void migrate_pack(const char *srcdir, const char *dstdir,
1396+
const char *pack_filename)
1397+
{
1398+
size_t basenamelen, srclen, dstlen;
1399+
struct strbuf src = STRBUF_INIT, dst = STRBUF_INIT;
1400+
struct {
1401+
const char *ext;
1402+
unsigned move:1;
1403+
} files[] = {
1404+
{".pack", 0},
1405+
{".keep", 0},
1406+
{".rev", 0},
1407+
{".idx", 1}, /* The index file must be atomically moved last. */
1408+
};
1409+
1410+
trace2_region_enter("maintenance", "migrate_pack", the_repository);
1411+
1412+
basenamelen = strlen(pack_filename) - 5; /* .pack */
1413+
strbuf_addstr(&src, srcdir);
1414+
strbuf_addch(&src, '/');
1415+
strbuf_add(&src, pack_filename, basenamelen);
1416+
strbuf_addstr(&src, ".idx");
1417+
1418+
/* A pack without an index file is not yet ready to be migrated. */
1419+
if (!file_exists(src.buf))
1420+
goto cleanup;
1421+
1422+
strbuf_setlen(&src, src.len - 4 /* .idx */);
1423+
strbuf_addstr(&dst, dstdir);
1424+
strbuf_addch(&dst, '/');
1425+
strbuf_add(&dst, pack_filename, basenamelen);
1426+
1427+
srclen = src.len;
1428+
dstlen = dst.len;
1429+
1430+
/* Move or copy files from the source directory to the destination. */
1431+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1432+
strbuf_setlen(&src, srclen);
1433+
strbuf_addstr(&src, files[i].ext);
1434+
1435+
if (!file_exists(src.buf))
1436+
continue;
1437+
1438+
strbuf_setlen(&dst, dstlen);
1439+
strbuf_addstr(&dst, files[i].ext);
1440+
1441+
if (files[i].move)
1442+
rename_or_copy_or_die(src.buf, dst.buf);
1443+
else
1444+
link_or_copy_or_die(src.buf, dst.buf);
1445+
}
1446+
1447+
/*
1448+
* Now the pack and all associated files exist at the destination we can
1449+
* now clean up the files in the source directory.
1450+
*/
1451+
for (size_t i = 0; i < ARRAY_SIZE(files); i++) {
1452+
/* Files that were moved rather than copied have no clean up. */
1453+
if (files[i].move)
1454+
continue;
1455+
1456+
strbuf_setlen(&src, srclen);
1457+
strbuf_addstr(&src, files[i].ext);
1458+
1459+
/* Files that never existed in originally have no clean up.*/
1460+
if (!file_exists(src.buf))
1461+
continue;
1462+
1463+
if (unlink(src.buf))
1464+
warning_errno(_("failed to delete '%s'"), src.buf);
1465+
}
1466+
1467+
cleanup:
1468+
strbuf_release(&src);
1469+
strbuf_release(&dst);
1470+
1471+
trace2_region_leave("maintenance", "migrate_pack", the_repository);
1472+
}
1473+
1474+
static void move_pack_to_shared_cache(const char *full_path, size_t full_path_len,
1475+
const char *file_name, void *data)
1476+
{
1477+
char *srcdir;
1478+
const char *dstdir = (const char *)data;
1479+
1480+
/* We only care about the actual pack files here.
1481+
* The associated .idx, .keep, .rev files will be copied in tandem
1482+
* with the pack file, with the index file being moved last.
1483+
* The original locations of the non-index files will only deleted
1484+
* once all other files have been copied/moved.
1485+
*/
1486+
if (!ends_with(file_name, ".pack"))
1487+
return;
1488+
1489+
srcdir = xstrndup(full_path, full_path_len - strlen(file_name) - 1);
1490+
1491+
migrate_pack(srcdir, dstdir, file_name);
1492+
1493+
free(srcdir);
1494+
}
1495+
1496+
static int move_loose_object_to_shared_cache(const struct object_id *oid,
1497+
const char *path,
1498+
UNUSED void *data)
1499+
{
1500+
struct stat st;
1501+
struct strbuf dst = STRBUF_INIT;
1502+
char *hex = oid_to_hex(oid);
1503+
1504+
strbuf_addf(&dst, "%s/%.2s/", shared_object_dir, hex);
1505+
1506+
if (stat(dst.buf, &st)) {
1507+
if (mkdir(dst.buf, 0777))
1508+
die_errno(_("failed to create directory '%s'"), dst.buf);
1509+
} else if (!S_ISDIR(st.st_mode))
1510+
die(_("expected '%s' to be a directory"), dst.buf);
1511+
1512+
strbuf_addstr(&dst, hex+2);
1513+
rename_or_copy_or_die(path, dst.buf);
1514+
1515+
strbuf_release(&dst);
1516+
return 0;
1517+
}
1518+
1519+
static int maintenance_task_cache_local_objs(UNUSED struct maintenance_run_opts *opts,
1520+
UNUSED struct gc_config *cfg)
1521+
{
1522+
struct strbuf dstdir = STRBUF_INIT;
1523+
struct repository *r = the_repository;
1524+
1525+
/* This task is only applicable with a VFS/Scalar shared cache. */
1526+
if (!shared_object_dir)
1527+
return 0;
1528+
1529+
/* If the dest is the same as the local odb path then we do nothing. */
1530+
if (!fspathcmp(r->objects->odb->path, shared_object_dir))
1531+
goto cleanup;
1532+
1533+
strbuf_addf(&dstdir, "%s/pack", shared_object_dir);
1534+
1535+
for_each_file_in_pack_dir(r->objects->odb->path, move_pack_to_shared_cache,
1536+
dstdir.buf);
1537+
1538+
for_each_loose_object(move_loose_object_to_shared_cache, NULL,
1539+
FOR_EACH_OBJECT_LOCAL_ONLY);
1540+
1541+
cleanup:
1542+
strbuf_release(&dstdir);
1543+
return 0;
1544+
}
1545+
13611546
typedef int maintenance_task_fn(struct maintenance_run_opts *opts,
13621547
struct gc_config *cfg);
13631548

@@ -1387,6 +1572,7 @@ enum maintenance_task_label {
13871572
TASK_GC,
13881573
TASK_COMMIT_GRAPH,
13891574
TASK_PACK_REFS,
1575+
TASK_CACHE_LOCAL_OBJS,
13901576

13911577
/* Leave as final value */
13921578
TASK__COUNT
@@ -1423,6 +1609,10 @@ static struct maintenance_task tasks[] = {
14231609
maintenance_task_pack_refs,
14241610
pack_refs_condition,
14251611
},
1612+
[TASK_CACHE_LOCAL_OBJS] = {
1613+
"cache-local-objects",
1614+
maintenance_task_cache_local_objs,
1615+
},
14261616
};
14271617

14281618
static int compare_tasks_by_selection(const void *a_, const void *b_)
@@ -1517,6 +1707,8 @@ static void initialize_maintenance_strategy(void)
15171707
tasks[TASK_LOOSE_OBJECTS].schedule = SCHEDULE_DAILY;
15181708
tasks[TASK_PACK_REFS].enabled = 1;
15191709
tasks[TASK_PACK_REFS].schedule = SCHEDULE_WEEKLY;
1710+
tasks[TASK_CACHE_LOCAL_OBJS].enabled = 1;
1711+
tasks[TASK_CACHE_LOCAL_OBJS].schedule = SCHEDULE_WEEKLY;
15201712
}
15211713
}
15221714

@@ -1634,8 +1826,8 @@ static int maintenance_run(int argc, const char **argv, const char *prefix,
16341826
*/
16351827
if (!git_config_get_value("gvfs.sharedcache", &tmp_obj_dir) &&
16361828
tmp_obj_dir) {
1637-
object_dir = xstrdup(tmp_obj_dir);
1638-
setenv(DB_ENVIRONMENT, object_dir, 1);
1829+
shared_object_dir = xstrdup(tmp_obj_dir);
1830+
setenv(DB_ENVIRONMENT, shared_object_dir, 1);
16391831
}
16401832

16411833
ret = maintenance_run_tasks(&opts, &cfg);

Diff for: scalar.c

+1
Original file line numberDiff line numberDiff line change
@@ -1170,6 +1170,7 @@ static int cmd_run(int argc, const char **argv)
11701170
{ "fetch", "prefetch" },
11711171
{ "loose-objects", "loose-objects" },
11721172
{ "pack-files", "incremental-repack" },
1173+
{ "cache-local-objects", "cache-local-objects" },
11731174
{ NULL, NULL }
11741175
};
11751176
struct strbuf buf = STRBUF_INIT;

0 commit comments

Comments
 (0)