Skip to content

Commit

Permalink
Merge pull request #227: gvfs-helper: add prefetch action
Browse files Browse the repository at this point in the history
This replaces #223. There was a strangely-subtle issue about reading
the trailing hash from the downloaded packs that caused issues when
reading from the origin remote.

Add `gvfs-helper prefetch` command line option
and `objects.prefetch` mode in `gvfs-helper server`.

Sorry, but this contains a major refactor of the packfile and loose file handling
to let me share it with the prefetch code.  As a side benefit, I collapsed the
tempfile creation before the request goes out and merged the install_ code
after the result is returned.

I also changed packfile code to use the packfile-checksum rather than a
timestamp so that we look more like normal Git.

More details are in the commit message.
  • Loading branch information
derrickstolee authored and dscho committed May 20, 2020
2 parents cefeb98 + fea5e16 commit f7a9dce
Show file tree
Hide file tree
Showing 5 changed files with 1,600 additions and 340 deletions.
129 changes: 116 additions & 13 deletions gvfs-helper-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@ static struct hashmap gh_server__subprocess_map;
static struct object_directory *gh_client__chosen_odb;

/*
* The "objects" capability has 2 verbs: "get" and "post".
* The "objects" capability has verbs: "get" and "post" and "prefetch".
*/
#define CAP_OBJECTS (1u<<1)
#define CAP_OBJECTS_NAME "objects"

#define CAP_OBJECTS__VERB_GET1_NAME "get"
#define CAP_OBJECTS__VERB_POST_NAME "post"
#define CAP_OBJECTS__VERB_PREFETCH_NAME "prefetch"

static int gh_client__start_fn(struct subprocess_entry *subprocess)
{
Expand Down Expand Up @@ -129,6 +130,44 @@ static int gh_client__send__objects_get(struct child_process *process,
return 0;
}

/*
* Send a request to gvfs-helper to prefetch packfiles from either the
* cache-server or the main Git server using "/gvfs/prefetch".
*
* objects.prefetch LF
* [<seconds-since_epoch> LF]
* <flush>
*/
static int gh_client__send__objects_prefetch(struct child_process *process,
timestamp_t seconds_since_epoch)
{
int err;

/*
* We assume that all of the packet_ routines call error()
* so that we don't have to.
*/

err = packet_write_fmt_gently(
process->in,
(CAP_OBJECTS_NAME "." CAP_OBJECTS__VERB_PREFETCH_NAME "\n"));
if (err)
return err;

if (seconds_since_epoch) {
err = packet_write_fmt_gently(process->in, "%" PRItime "\n",
seconds_since_epoch);
if (err)
return err;
}

err = packet_flush_gently(process->in);
if (err)
return err;

return 0;
}

/*
* Verify that the pathname found in the "odb" response line matches
* what we requested.
Expand Down Expand Up @@ -198,7 +237,7 @@ static void gh_client__update_packed_git(const char *line)
}

/*
* Both CAP_OBJECTS verbs return the same format response:
* CAP_OBJECTS verbs return the same format response:
*
* <odb>
* <data>*
Expand Down Expand Up @@ -238,6 +277,8 @@ static int gh_client__objects__receive_response(
const char *v1;
char *line;
int len;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

while (1) {
Expand All @@ -256,13 +297,13 @@ static int gh_client__objects__receive_response(
else if (starts_with(line, "packfile")) {
gh_client__update_packed_git(line);
ghc |= GHC__CREATED__PACKFILE;
*p_nr_packfile += 1;
nr_packfile++;
}

else if (starts_with(line, "loose")) {
gh_client__update_loose_cache(line);
ghc |= GHC__CREATED__LOOSE;
*p_nr_loose += 1;
nr_loose++;
}

else if (starts_with(line, "ok"))
Expand All @@ -276,6 +317,8 @@ static int gh_client__objects__receive_response(
}

*p_ghc = ghc;
*p_nr_loose = nr_loose;
*p_nr_packfile = nr_packfile;

return err;
}
Expand Down Expand Up @@ -332,7 +375,7 @@ static struct gh_server__process *gh_client__find_long_running_process(
/*
* Find an existing long-running process with the above command
* line -or- create a new long-running process for this and
* subsequent 'get' requests.
* subsequent requests.
*/
if (!gh_server__subprocess_map_initialized) {
gh_server__subprocess_map_initialized = 1;
Expand Down Expand Up @@ -369,10 +412,14 @@ static struct gh_server__process *gh_client__find_long_running_process(

void gh_client__queue_oid(const struct object_id *oid)
{
// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));
/*
* Keep this trace as a printf only, so that it goes to the
* perf log, but not the event log. It is useful for interactive
* debugging, but generates way too much (unuseful) noise for the
* database.
*/
if (trace2_is_enabled())
trace2_printf("gh_client__queue_oid: %s", oid_to_hex(oid));

if (!oidset_insert(&gh_client__oidset_queued, oid))
gh_client__oidset_count++;
Expand Down Expand Up @@ -453,10 +500,14 @@ int gh_client__get_immediate(const struct object_id *oid,
int nr_packfile = 0;
int err = 0;

// TODO consider removing this trace2. it is useful for interactive
// TODO debugging, but may generate way too much noise for a data
// TODO event.
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));
/*
* Keep this trace as a printf only, so that it goes to the
* perf log, but not the event log. It is useful for interactive
* debugging, but generates way too much (unuseful) noise for the
* database.
*/
if (trace2_is_enabled())
trace2_printf("gh_client__get_immediate: %s", oid_to_hex(oid));

entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
Expand Down Expand Up @@ -485,3 +536,55 @@ int gh_client__get_immediate(const struct object_id *oid,

return err;
}

/*
* Ask gvfs-helper to prefetch commits-and-trees packfiles since a
* given timestamp.
*
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
* the last received prefetch and ask for ones newer than that.
*/
int gh_client__prefetch(timestamp_t seconds_since_epoch,
int *nr_packfiles_received)
{
struct gh_server__process *entry;
struct child_process *process;
enum gh_client__created ghc;
int nr_loose = 0;
int nr_packfile = 0;
int err = 0;

entry = gh_client__find_long_running_process(CAP_OBJECTS);
if (!entry)
return -1;

trace2_region_enter("gh-client", "objects/prefetch", the_repository);
trace2_data_intmax("gh-client", the_repository, "prefetch/since",
seconds_since_epoch);

process = &entry->subprocess.process;

sigchain_push(SIGPIPE, SIG_IGN);

err = gh_client__send__objects_prefetch(process, seconds_since_epoch);
if (!err)
err = gh_client__objects__receive_response(
process, &ghc, &nr_loose, &nr_packfile);

sigchain_pop(SIGPIPE);

if (err) {
subprocess_stop(&gh_server__subprocess_map,
(struct subprocess_entry *)entry);
FREE_AND_NULL(entry);
}

trace2_data_intmax("gh-client", the_repository,
"prefetch/packfile_count", nr_packfile);
trace2_region_leave("gh-client", "objects/prefetch", the_repository);

if (nr_packfiles_received)
*nr_packfiles_received = nr_packfile;

return err;
}
18 changes: 18 additions & 0 deletions gvfs-helper-client.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,4 +66,22 @@ void gh_client__queue_oid_array(const struct object_id *oids, int oid_nr);
*/
int gh_client__drain_queue(enum gh_client__created *p_ghc);

/*
* Ask `gvfs-helper server` to fetch any "prefetch packs"
* available on the server more recent than the requested time.
*
* If seconds_since_epoch is zero, gvfs-helper will scan the ODB for
* the last received prefetch and ask for ones newer than that.
*
* A long-running background process is used to subsequent requests
* (either prefetch or regular immediate/queued requests) more efficient.
*
* One or more packfiles will be created in the shared-cache ODB.
*
* Returns 0 on success, -1 on error. Optionally also returns the
* number of prefetch packs received.
*/
int gh_client__prefetch(timestamp_t seconds_since_epoch,
int *nr_packfiles_received);

#endif /* GVFS_HELPER_CLIENT_H */
Loading

0 comments on commit f7a9dce

Please sign in to comment.