Skip to content

Commit

Permalink
gvfs-helper: add gvfs.fallback and unit tests (#665)
Browse files Browse the repository at this point in the history
By default, GVFS Protocol-enabled Scalar clones will fall back to the
origin server if there is a network issue with the cache servers.
However (and especially for the prefetch endpoint) this may be a very
expensive operation for the origin server, leading to the user being
throttled. This shows up later in cases such as 'git push' or other web
operations.

To avoid this, create a new config option, 'gvfs.fallback', which
defaults to true. When set to 'false', pass '--no-fallback' from the
gvfs-helper client to the child gvfs-helper server process.

This will allow users who have hit this problem to avoid it in the
future. In case this becomes a more widespread problem, engineering
systems can enable the config option more broadly.

Enabling the config will of course lead to immediate failures for users,
but at least that will help diagnose the problem when it occurs instead
of later when the throttling shows up and the server load has already
passed, damage done.

 This change only applies to interactions with Azure DevOps and the
GVFS Protocol.

---

* [x] This change only applies to interactions with Azure DevOps and the
      GVFS Protocol.
  • Loading branch information
dscho committed Sep 18, 2024
2 parents 5a9c3a6 + e771d82 commit 89f57df
Show file tree
Hide file tree
Showing 4 changed files with 185 additions and 1 deletion.
5 changes: 5 additions & 0 deletions Documentation/config/gvfs.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,8 @@ gvfs.cache-server::

gvfs.sharedcache::
TODO

gvfs.fallback::
If set to `false`, then never fallback to the origin server when the cache
server fails to connect. This will alert users to failures with the cache
server, but avoid causing throttling on the origin server.
11 changes: 10 additions & 1 deletion gvfs-helper-client.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "quote.h"
#include "packfile.h"
#include "hex.h"
#include "config.h"

static struct oidset gh_client__oidset_queued = OIDSET_INIT;
static unsigned long gh_client__oidset_count;
Expand Down Expand Up @@ -339,17 +340,25 @@ static struct gh_server__process *gh_client__find_long_running_process(
struct gh_server__process *entry;
struct strvec argv = STRVEC_INIT;
struct strbuf quoted = STRBUF_INIT;
int fallback;

gh_client__choose_odb();

/*
* TODO decide what defaults we want.
*/
strvec_push(&argv, "gvfs-helper");
strvec_push(&argv, "--fallback");
strvec_push(&argv, "--cache-server=trust");
strvec_pushf(&argv, "--shared-cache=%s",
gh_client__chosen_odb->path);

/* If gvfs.fallback=false, then don't add --fallback. */
if (!git_config_get_bool("gvfs.fallback", &fallback) &&
!fallback)
strvec_push(&argv, "--no-fallback");
else
strvec_push(&argv, "--fallback");

strvec_push(&argv, "server");

sq_quote_argv_pretty(&quoted, argv.v);
Expand Down
8 changes: 8 additions & 0 deletions t/helper/test-gvfs-protocol.c
Original file line number Diff line number Diff line change
Expand Up @@ -1581,6 +1581,14 @@ static enum worker_result dispatch(struct req *req)
const char *method;
enum worker_result wr;

if (strstr(req->uri_base.buf, MY_SERVER_TYPE__CACHE)) {
if (string_list_has_string(&mayhem_list, "cache_http_503")) {
logmayhem("cache_http_503");
return send_http_error(1, 503, "Service Unavailable", 2,
WR_MAYHEM | WR_HANGUP);
}
}

if (string_list_has_string(&mayhem_list, "close_no_write")) {
logmayhem("close_no_write");
return WR_MAYHEM | WR_HANGUP;
Expand Down
162 changes: 162 additions & 0 deletions t/t5799-gvfs-helper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,12 @@ test_set_port GIT_TEST_GVFS_PROTOCOL_PORT
# actually use it). We are only testing explicit object
# fetching using gvfs-helper.exe in isolation.
#
# repo_t2:
# Another empty repo to use after we contaminate t1.
#
REPO_SRC="$(pwd)"/repo_src
REPO_T1="$(pwd)"/repo_t1
REPO_T2="$(pwd)"/repo_t2

# Setup some loopback URLs where test-gvfs-protocol.exe will be
# listening. We will spawn it directly inside the repo_src directory,
Expand All @@ -45,6 +49,7 @@ ORIGIN_URL=http://$HOST_PORT/servertype/origin
CACHE_URL=http://$HOST_PORT/servertype/cache

SHARED_CACHE_T1="$(pwd)"/shared_cache_t1
SHARED_CACHE_T2="$(pwd)"/shared_cache_t2

# The pid-file is created by test-gvfs-protocol.exe when it starts.
# The server will shut down if/when we delete it. (This is a little
Expand Down Expand Up @@ -182,6 +187,10 @@ test_expect_success 'setup repos' '
mkdir "$SHARED_CACHE_T1/pack" &&
mkdir "$SHARED_CACHE_T1/info" &&
#
mkdir "$SHARED_CACHE_T2" &&
mkdir "$SHARED_CACHE_T2/pack" &&
mkdir "$SHARED_CACHE_T2/info" &&
#
# setup repo_t1 and point all of the gvfs.* values to repo_src.
#
test_create_repo "$REPO_T1" &&
Expand All @@ -191,6 +200,13 @@ test_expect_success 'setup repos' '
git -C "$REPO_T1" config --local gvfs.sharedCache "$SHARED_CACHE_T1" &&
echo "$SHARED_CACHE_T1" >> "$REPO_T1"/.git/objects/info/alternates &&
#
test_create_repo "$REPO_T2" &&
git -C "$REPO_T2" branch -M main &&
git -C "$REPO_T2" remote add origin $ORIGIN_URL &&
git -C "$REPO_T2" config --local gvfs.cache-server $CACHE_URL &&
git -C "$REPO_T2" config --local gvfs.sharedCache "$SHARED_CACHE_T2" &&
echo "$SHARED_CACHE_T2" >> "$REPO_T2"/.git/objects/info/alternates &&
#
#
#
cat <<-EOF >creds.txt &&
Expand All @@ -203,6 +219,7 @@ test_expect_success 'setup repos' '
EOF
chmod 755 creds.sh &&
git -C "$REPO_T1" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" &&
git -C "$REPO_T2" config --local credential.helper "!f() { cat \"$(pwd)\"/creds.txt; }; f" &&
#
# Create some test data sets.
#
Expand Down Expand Up @@ -1036,6 +1053,70 @@ test_expect_success 'successful retry after http-error: origin get' '
verify_connection_count 2
'

#################################################################
# So far we have confirmed that gvfs-helper can recover from a network
# error (with retries, since the cache-server was disabled in all of
# the above tests). Try again with fallback turned on.
#
# With mayhem "http_503" turned on both the cache and origin server
# will always throw a 503 error.
#
# Confirm that we tried to make six connections: we should hit the
# cache-server 3 times (one initial attempt and two retries) and then
# try the origin server 3 times.
#
#################################################################

test_expect_success 'http-error: 503 Service Unavailable (with retry and fallback)' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem http_503 &&
test_expect_code $GH__ERROR_CODE__HTTP_503 \
git -C "$REPO_T1" gvfs-helper \
--cache-server=trust \
--remote=origin \
--fallback \
get \
--max-retries=2 \
<"$OIDS_FILE" >OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server &&
grep -q "error: get: (http:503)" OUT.stderr &&
verify_connection_count 6
'

#################################################################
# Now repeat the above, but explicitly turn off fallback.
#
# Again, we use mayhem "http_503". However, with fallback turned
# off, we will only attempt the 3 connections to the cache server.
# We will not try to hit the origin server.
#
# So we should only see a total of 3 connections rather than the
# six in the previous test.
#
#################################################################

test_expect_success 'http-error: 503 Service Unavailable (with retry and no-fallback)' '
test_when_finished "per_test_cleanup" &&
start_gvfs_protocol_server_with_mayhem http_503 &&
test_expect_code $GH__ERROR_CODE__HTTP_503 \
git -C "$REPO_T1" gvfs-helper \
--cache-server=trust \
--remote=origin \
--no-fallback \
get \
--max-retries=2 \
<"$OIDS_FILE" >OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server &&
grep -q "error: get: (http:503)" OUT.stderr &&
verify_connection_count 3
'

#################################################################
# Test HTTP Auth
#
Expand Down Expand Up @@ -1193,6 +1274,87 @@ test_expect_success 'integration: fully implicit: diff 2 commits' '
>OUT.output 2>OUT.stderr
'

# T1 should be considered contaminated at this point.

#################################################################
# gvfs-helper.exe defaults to no fallback.
# gvfs-helper-client.c defaults to adding `--fallback` to child process.
#
# `gvfs.fallback` was added to change the default behavior in the
# gvfs-helper-client.c code to add either `--fallback` or `--no-fallback`
# (for origin server load reasons).
#
# When `gvfs.fallback` is unset, we default to TRUE and pass `--fallback`.
# Otherwise, we use the boolean value to decide.
#
# NOTE: We DO NOT attempt to count connection requests in the
# following tests. Since we are using a normal `git` command to drive
# the `gvfs-helper-client.c` code (and spawn `git-gvfs-helper.exe`) we
# cannot make assumptions on the number of child processes or
# reqeusts. The "promisor" logic may drive one or more single-item
# GETs or a series of bulk POST attempts. Therefore, we must rely
# only on the result of the command and (implicitly) whether all
# missing objects were resolved. We use mayhem features to selectively
# break the cache and origin servers.
#################################################################

test_expect_success 'integration: implicit-get: http_503: diff 2 commits' '
test_when_finished "per_test_cleanup" &&
# Tell both servers to always send 503.
start_gvfs_protocol_server_with_mayhem http_503 &&
# Implicitly demand-load everything without any pre-seeding.
# (We cannot tell from whether fallback was used or not in this
# limited test.)
#
test_must_fail \
git -C "$REPO_T2" -c core.useGVFSHelper=true \
diff $(cat m1.branch)..$(cat m3.branch) \
>OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server
'

test_expect_success 'integration: implicit-get: cache_http_503,no-fallback: diff 2 commits' '
test_when_finished "per_test_cleanup" &&
# Tell cache server to send 503 and origin server to send 200.
start_gvfs_protocol_server_with_mayhem cache_http_503 &&
# Implicitly demand-load everything without any pre-seeding.
# This should fail because we do not allow fallback.
#
test_must_fail \
git -C "$REPO_T2" \
-c core.useGVFSHelper=true \
-c gvfs.fallback=false \
diff $(cat m1.branch)..$(cat m3.branch) \
>OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server
'

test_expect_success 'integration: implicit-get: cache_http_503,with-fallback: diff 2 commits' '
test_when_finished "per_test_cleanup" &&
# Tell cache server to send 503 and origin server to send 200.
start_gvfs_protocol_server_with_mayhem cache_http_503 &&
# Implicitly demand-load everything without any pre-seeding.
#
git -C "$REPO_T2" \
-c core.useGVFSHelper=true \
-c gvfs.fallback=true \
diff $(cat m1.branch)..$(cat m3.branch) \
>OUT.output 2>OUT.stderr &&
stop_gvfs_protocol_server
'

# T2 should be considered contaminated at this point.


#################################################################
# Duplicate packfile tests.
#
Expand Down

0 comments on commit 89f57df

Please sign in to comment.