Skip to content

Commit 93b7999

Browse files
committed
Merge branch 'scalar-with-gvfs'
Prepare `scalar` to use the GVFS protocol instead of partial clone (required to support Azure Repos). Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
2 parents 63dfa6e + 21fa0fd commit 93b7999

16 files changed

+1200
-54
lines changed

Diff for: Documentation/scalar.txt

+55-1
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,16 @@ SYNOPSIS
99
--------
1010
[verse]
1111
scalar clone [--single-branch] [--branch <main-branch>] [--full-clone]
12-
[--[no-]src] <url> [<enlistment>]
12+
[--[no-]src] [--local-cache-path <path>] [--cache-server-url <url>]
13+
<url> [<enlistment>]
1314
scalar list
1415
scalar register [<enlistment>]
1516
scalar unregister [<enlistment>]
1617
scalar run ( all | config | commit-graph | fetch | loose-objects | pack-files ) [<enlistment>]
1718
scalar reconfigure [ --all | <enlistment> ]
1819
scalar diagnose [<enlistment>]
1920
scalar delete <enlistment>
21+
scalar cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]
2022

2123
DESCRIPTION
2224
-----------
@@ -97,6 +99,37 @@ cloning. If the HEAD at the remote did not point at any branch when
9799
A sparse-checkout is initialized by default. This behavior can be
98100
turned off via `--full-clone`.
99101

102+
--local-cache-path <path>::
103+
Override the path to the local cache root directory; Pre-fetched objects
104+
are stored into a repository-dependent subdirectory of that path.
105+
+
106+
The default is `<drive>:\.scalarCache` on Windows (on the same drive as the
107+
clone), and `~/.scalarCache` on macOS.
108+
109+
--cache-server-url <url>::
110+
Retrieve missing objects from the specified remote, which is expected to
111+
understand the GVFS protocol.
112+
113+
--[no-]gvfs-protocol::
114+
When cloning from a `<url>` with either `dev.azure.com` or
115+
`visualstudio.com` in the name, `scalar clone` will attempt to use the GVFS
116+
Protocol to access Git objects, specifically from a cache server when
117+
available, and will fail to clone if there is an error over that protocol.
118+
119+
To enable the GVFS Protocol regardless of the origin `<url>`, use
120+
`--gvfs-protocol`. This will cause `scalar clone` to fail when the origin
121+
server fails to provide a valid response to the `gvfs/config` endpoint.
122+
123+
To disable the GVFS Protocol, use `--no-gvfs-protocol` and `scalar clone`
124+
will only use the Git protocol, starting with a partial clone. This can be
125+
helpful if your `<url>` points to Azure Repos but the repository does not
126+
have GVFS cache servers enabled. It is likely more efficient to use its
127+
partial clone functionality through the Git protocol.
128+
129+
Previous versions of `scalar clone` could fall back to a partial clone over
130+
the Git protocol if there is any issue gathering GVFS configuration
131+
information from the origin server.
132+
100133
List
101134
~~~~
102135

@@ -170,6 +203,27 @@ delete <enlistment>::
170203
This subcommand lets you delete an existing Scalar enlistment from your
171204
local file system, unregistering the repository.
172205

206+
Cache-server
207+
~~~~~~~~~~~~
208+
209+
cache-server ( --get | --set <url> | --list [<remote>] ) [<enlistment>]::
210+
This command lets you query or set the GVFS-enabled cache server used
211+
to fetch missing objects.
212+
213+
--get::
214+
This is the default command mode: query the currently-configured cache
215+
server URL, if any.
216+
217+
--list::
218+
Access the `gvfs/info` endpoint of the specified remote (default:
219+
`origin`) to figure out which cache servers are available, if any.
220+
+
221+
In contrast to the `--get` command mode (which only accesses the local
222+
repository), this command mode triggers a request via the network that
223+
potentially requires authentication. If authentication is required, the
224+
configured credential helper is employed (see linkgit:git-credential[1]
225+
for details).
226+
173227
SEE ALSO
174228
--------
175229
linkgit:git-clone[1], linkgit:git-maintenance[1].

Diff for: Makefile

+2-1
Original file line numberDiff line numberDiff line change
@@ -2787,6 +2787,7 @@ GIT_OBJS += git.o
27872787
.PHONY: git-objs
27882788
git-objs: $(GIT_OBJS)
27892789

2790+
SCALAR_OBJS := json-parser.o
27902791
SCALAR_OBJS += scalar.o
27912792
.PHONY: scalar-objs
27922793
scalar-objs: $(SCALAR_OBJS)
@@ -2938,7 +2939,7 @@ $(REMOTE_CURL_PRIMARY): remote-curl.o http.o http-walker.o $(LAZYLOAD_LIBCURL_OB
29382939
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) \
29392940
$(CURL_LIBCURL) $(EXPAT_LIBEXPAT) $(LIBS)
29402941

2941-
scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS)
2942+
scalar$X: $(SCALAR_OBJS) GIT-LDFLAGS $(GITLIBS)
29422943
$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) \
29432944
$(filter %.o,$^) $(LIBS)
29442945

Diff for: contrib/buildsystems/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -804,7 +804,7 @@ target_link_libraries(git-sh-i18n--envsubst common-main)
804804
add_executable(git-shell ${CMAKE_SOURCE_DIR}/shell.c)
805805
target_link_libraries(git-shell common-main)
806806

807-
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c)
807+
add_executable(scalar ${CMAKE_SOURCE_DIR}/scalar.c ${CMAKE_SOURCE_DIR}/json-parser.c)
808808
target_link_libraries(scalar common-main)
809809

810810
if(CURL_FOUND)

Diff for: contrib/scalar/docs/getting-started.md

+20-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ Creating a new Scalar clone
1818
---------------------------------------------------
1919

2020
The `clone` verb creates a local enlistment of a remote repository using the
21-
partial clone feature available e.g. on GitHub.
22-
21+
partial clone feature available e.g. on GitHub, or using the
22+
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md),
23+
such as Azure Repos.
2324

2425
```
2526
scalar clone [options] <url> [<dir>]
@@ -68,11 +69,26 @@ in `<path>`.
6869
These options allow a user to customize their initial enlistment.
6970

7071
* `--full-clone`: If specified, do not initialize the sparse-checkout feature.
71-
All files will be present in your `src` directory. This uses a Git partial
72-
clone: blobs are downloaded on demand.
72+
All files will be present in your `src` directory. This behaves very similar
73+
to a Git partial clone in that blobs are downloaded on demand. However, it
74+
will use the GVFS protocol to download all Git objects.
75+
76+
* `--cache-server-url=<url>`: If specified, set the intended cache server to
77+
the specified `<url>`. All object queries will use the GVFS protocol to this
78+
`<url>` instead of the origin remote. If the remote supplies a list of
79+
cache servers via the `<url>/gvfs/config` endpoint, then the `clone` command
80+
will select a nearby cache server from that list.
7381

7482
* `--branch=<ref>`: Specify the branch to checkout after clone.
7583

84+
* `--local-cache-path=<path>`: Use this option to override the path for the
85+
local Scalar cache. If not specified, then Scalar will select a default
86+
path to share objects with your other enlistments. On Windows, this path
87+
is a subdirectory of `<Volume>:\.scalarCache\`. On Mac, this path is a
88+
subdirectory of `~/.scalarCache/`. The default cache path is recommended so
89+
multiple enlistments of the same remote repository share objects on the
90+
same device.
91+
7692
### Advanced Options
7793

7894
The options below are not intended for use by a typical user. These are

Diff for: contrib/scalar/docs/index.md

+9-5
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,14 @@ these features for that repo (except partial clone) and start running suggested
2828
maintenance in the background using
2929
[the `git maintenance` feature](https://git-scm.com/docs/git-maintenance).
3030

31-
Repos cloned with the `scalar clone` command use partial clone to significantly
32-
reduce the amount of data required to get started using a repository. By
33-
delaying all blob downloads until they are required, Scalar allows you to work
34-
with very large repositories quickly.
31+
Repos cloned with the `scalar clone` command use partial clone or the
32+
[GVFS protocol](https://github.com/microsoft/VFSForGit/blob/HEAD/Protocol.md)
33+
to significantly reduce the amount of data required to get started
34+
using a repository. By delaying all blob downloads until they are required,
35+
Scalar allows you to work with very large repositories quickly. The GVFS
36+
protocol allows a network of _cache servers_ to serve objects with lower
37+
latency and higher throughput. The cache servers also reduce load on the
38+
central server.
3539

3640
Documentation
3741
-------------
@@ -42,7 +46,7 @@ Documentation
4246

4347
* [Troubleshooting](troubleshooting.md):
4448
Collect diagnostic information or update custom settings. Includes
45-
`scalar diagnose`.
49+
`scalar diagnose` and `scalar cache-server`.
4650

4751
* [The Philosophy of Scalar](philosophy.md): Why does Scalar work the way
4852
it does, and how do we make decisions about its future?

Diff for: contrib/scalar/docs/philosophy.md

+19-14
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,27 @@ Scalar only to configure those new settings. In particular, we ported
1313
features like background maintenance to Git to make Scalar simpler and
1414
make Git more powerful.
1515

16-
Services such as GitHub support partial clone , a standard adopted by the Git
17-
project to download only part of the Git objects when cloning, and fetching
18-
further objects on demand. If your hosting service supports partial clone, then
19-
we absolutely recommend it as a way to greatly speed up your clone and fetch
20-
times and to reduce how much disk space your Git repository requires. Scalar
21-
will help with this!
16+
Scalar ships inside [a custom version of Git][microsoft-git], but we are
17+
working to make it available in other forks of Git. The only feature
18+
that is not intended to ever reach the standard Git client is Scalar's use
19+
of [the GVFS Protocol][gvfs-protocol], which is essentially an older
20+
version of [Git's partial clone feature](https://github.blog/2020-12-21-get-up-to-speed-with-partial-clone-and-shallow-clone/)
21+
that was available first in Azure Repos. Services such as GitHub support
22+
only partial clone instead of the GVFS protocol because that is the
23+
standard adopted by the Git project. If your hosting service supports
24+
partial clone, then we absolutely recommend it as a way to greatly speed
25+
up your clone and fetch times and to reduce how much disk space your Git
26+
repository requires. Scalar will help with this!
2227

23-
Most of the value of Scalar can be found in the core Git client. However, most
24-
of the advanced features that really optimize Git's performance are off by
25-
default for compatibility reasons. To really take advantage of Git's latest and
26-
greatest features, you either need to study the [`git config`
27-
documentation](https://git-scm.com/docs/git-config) and regularly read [the Git
28-
release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
28+
If you don't use the GVFS Protocol, then most of the value of Scalar can
29+
be found in the core Git client. However, most of the advanced features
30+
that really optimize Git's performance are off by default for compatibility
31+
reasons. To really take advantage of Git's latest and greatest features,
32+
you either need to study the [`git config` documentation](https://git-scm.com/docs/git-config)
33+
and regularly read [the Git release notes](https://github.com/git/git/tree/master/Documentation/RelNotes).
2934
Even if you do all that work and customize your Git settings on your machines,
30-
you likely will want to share those settings with other team members. Or, you
31-
can just use Scalar!
35+
you likely will want to share those settings with other team members.
36+
Or, you can just use Scalar!
3237

3338
Using `scalar register` on an existing Git repository will give you these
3439
benefits:

Diff for: contrib/scalar/docs/troubleshooting.md

+20
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,23 @@ files for that repository. This includes:
1818

1919
As the `diagnose` command completes, it provides the path of the resulting
2020
zip file. This zip can be attached to bug reports to make the analysis easier.
21+
22+
Modifying Configuration Values
23+
------------------------------
24+
25+
The Scalar-specific configuration is only available for repos using the
26+
GVFS protocol.
27+
28+
### Cache Server URL
29+
30+
When using an enlistment cloned with `scalar clone` and the GVFS protocol,
31+
you will have a value called the cache server URL. Cache servers are a feature
32+
of the GVFS protocol to provide low-latency access to the on-demand object
33+
requests. This modifies the `gvfs.cache-server` setting in your local Git config
34+
file.
35+
36+
Run `scalar cache-server --get` to see the current cache server.
37+
38+
Run `scalar cache-server --list` to see the available cache server URLs.
39+
40+
Run `scalar cache-server --set=<url>` to set your cache server to `<url>`.

Diff for: diagnose.c

+91-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include "packfile.h"
1414
#include "parse-options.h"
1515
#include "write-or-die.h"
16+
#include "config.h"
1617

1718
struct archive_dir {
1819
const char *path;
@@ -72,6 +73,39 @@ static int dir_file_stats(struct object_directory *object_dir, void *data)
7273
return 0;
7374
}
7475

76+
static void dir_stats(struct strbuf *buf, const char *path)
77+
{
78+
DIR *dir = opendir(path);
79+
struct dirent *e;
80+
struct stat e_stat;
81+
struct strbuf file_path = STRBUF_INIT;
82+
size_t base_path_len;
83+
84+
if (!dir)
85+
return;
86+
87+
strbuf_addstr(buf, "Contents of ");
88+
strbuf_add_absolute_path(buf, path);
89+
strbuf_addstr(buf, ":\n");
90+
91+
strbuf_add_absolute_path(&file_path, path);
92+
strbuf_addch(&file_path, '/');
93+
base_path_len = file_path.len;
94+
95+
while ((e = readdir(dir)) != NULL)
96+
if (!is_dot_or_dotdot(e->d_name) && e->d_type == DT_REG) {
97+
strbuf_setlen(&file_path, base_path_len);
98+
strbuf_addstr(&file_path, e->d_name);
99+
if (!stat(file_path.buf, &e_stat))
100+
strbuf_addf(buf, "%-70s %16"PRIuMAX"\n",
101+
e->d_name,
102+
(uintmax_t)e_stat.st_size);
103+
}
104+
105+
strbuf_release(&file_path);
106+
closedir(dir);
107+
}
108+
75109
static int count_files(struct strbuf *path)
76110
{
77111
DIR *dir = opendir(path->buf);
@@ -184,7 +218,8 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
184218
struct strvec archiver_args = STRVEC_INIT;
185219
char **argv_copy = NULL;
186220
int stdout_fd = -1, archiver_fd = -1;
187-
struct strbuf buf = STRBUF_INIT;
221+
char *cache_server_url = NULL, *shared_cache = NULL;
222+
struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT;
188223
int res;
189224
struct archive_dir archive_dirs[] = {
190225
{ ".git", 0 },
@@ -219,6 +254,13 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
219254
get_version_info(&buf, 1);
220255

221256
strbuf_addf(&buf, "Repository root: %s\n", the_repository->worktree);
257+
258+
git_config_get_string("gvfs.cache-server", &cache_server_url);
259+
git_config_get_string("gvfs.sharedCache", &shared_cache);
260+
strbuf_addf(&buf, "Cache Server: %s\nLocal Cache: %s\n\n",
261+
cache_server_url ? cache_server_url : "None",
262+
shared_cache ? shared_cache : "None");
263+
222264
get_disk_info(&buf);
223265
write_or_die(stdout_fd, buf.buf, buf.len);
224266
strvec_pushf(&archiver_args,
@@ -249,6 +291,52 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
249291
}
250292
}
251293

294+
if (shared_cache) {
295+
size_t path_len;
296+
297+
strbuf_reset(&buf);
298+
strbuf_addf(&path, "%s/pack", shared_cache);
299+
strbuf_reset(&buf);
300+
strbuf_addstr(&buf, "--add-virtual-file=packs-cached.txt:");
301+
dir_stats(&buf, path.buf);
302+
strvec_push(&archiver_args, buf.buf);
303+
304+
strbuf_reset(&buf);
305+
strbuf_addstr(&buf, "--add-virtual-file=objects-cached.txt:");
306+
loose_objs_stats(&buf, shared_cache);
307+
strvec_push(&archiver_args, buf.buf);
308+
309+
strbuf_reset(&path);
310+
strbuf_addf(&path, "%s/info", shared_cache);
311+
path_len = path.len;
312+
313+
if (is_directory(path.buf)) {
314+
DIR *dir = opendir(path.buf);
315+
struct dirent *e;
316+
317+
while ((e = readdir(dir))) {
318+
if (!strcmp(".", e->d_name) || !strcmp("..", e->d_name))
319+
continue;
320+
if (e->d_type == DT_DIR)
321+
continue;
322+
323+
strbuf_reset(&buf);
324+
strbuf_addf(&buf, "--add-virtual-file=info/%s:", e->d_name);
325+
326+
strbuf_setlen(&path, path_len);
327+
strbuf_addch(&path, '/');
328+
strbuf_addstr(&path, e->d_name);
329+
330+
if (strbuf_read_file(&buf, path.buf, 0) < 0) {
331+
res = error_errno(_("could not read '%s'"), path.buf);
332+
goto diagnose_cleanup;
333+
}
334+
strvec_push(&archiver_args, buf.buf);
335+
}
336+
closedir(dir);
337+
}
338+
}
339+
252340
strvec_pushl(&archiver_args, "--prefix=",
253341
oid_to_hex(the_hash_algo->empty_tree), "--", NULL);
254342

@@ -276,6 +364,8 @@ int create_diagnostics_archive(struct strbuf *zip_path, enum diagnose_mode mode)
276364
free(argv_copy);
277365
strvec_clear(&archiver_args);
278366
strbuf_release(&buf);
367+
free(cache_server_url);
368+
free(shared_cache);
279369

280370
return res;
281371
}

Diff for: dir.c

+2
Original file line numberDiff line numberDiff line change
@@ -3226,6 +3226,8 @@ static int cmp_icase(char a, char b)
32263226
{
32273227
if (a == b)
32283228
return 0;
3229+
if (is_dir_sep(a))
3230+
return is_dir_sep(b) ? 0 : -1;
32293231
if (ignore_case)
32303232
return toupper(a) - toupper(b);
32313233
return a - b;

0 commit comments

Comments
 (0)