From cde69e6ac4d4f7e0542485152267c7c38673ba3d Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 5 Apr 2017 10:58:09 -0600 Subject: [PATCH 01/15] reset --stdin: trim carriage return from the paths While using the reset --stdin feature on windows path added may have a \r at the end of the path that wasn't getting removed so didn't match the path in the index and wasn't reset. Signed-off-by: Kevin Willford --- t/t7108-reset-stdin.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/t/t7108-reset-stdin.sh b/t/t7108-reset-stdin.sh index b7cbcbf869296c..db5483b8f10052 100755 --- a/t/t7108-reset-stdin.sh +++ b/t/t7108-reset-stdin.sh @@ -29,4 +29,13 @@ test_expect_success '--stdin requires --mixed' ' git reset --mixed --stdin list && + git reset --stdin Date: Tue, 24 Jan 2017 17:44:31 +0100 Subject: [PATCH 02/15] gvfs: start by adding the -gvfs suffix to the version Signed-off-by: Saeed Noursalehi --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index df00ddfd8433d3..4be27bfa50e307 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,7 +1,7 @@ #!/bin/sh GVF=GIT-VERSION-FILE -DEF_VER=v2.29.0-rc2 +DEF_VER=v2.29.0.vfs.0.0 LF=' ' From 3bc2ce6ca7771a05d6401d18d8814450f389abf5 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Tue, 4 Apr 2017 12:04:11 +0200 Subject: [PATCH 03/15] gvfs: ensure that the version is based on a GVFS tag Signed-off-by: Johannes Schindelin --- GIT-VERSION-GEN | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index 4be27bfa50e307..5e9882b36fc195 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -12,7 +12,7 @@ if test -f version then VN=$(cat version) || VN="$DEF_VER" elif test -d ${GIT_DIR:-.git} -o -f .git && - VN=$(git describe --match "v[0-9]*" HEAD 2>/dev/null) && + VN=$(git describe --match "v[0-9]*vfs*" HEAD 2>/dev/null) && case "$VN" in *$LF*) (exit 1) ;; v[0-9]*) From dd37042125dade511657b2b37b2817f1c9f3ed7e Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:30:59 +0100 Subject: [PATCH 04/15] gvfs: add a GVFS-specific header file This header file will accumulate GVFS-specific definitions. Signed-off-by: Kevin Willford --- gvfs.h | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 gvfs.h diff --git a/gvfs.h b/gvfs.h new file mode 100644 index 00000000000000..b6dbe85eae4071 --- /dev/null +++ b/gvfs.h @@ -0,0 +1,9 @@ +#ifndef GVFS_H +#define GVFS_H + +/* + * This file is for the specific settings and methods + * used for GVFS functionality + */ + +#endif /* GVFS_H */ From f8993187bde939cd061730127574765f337b194d Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:34:12 +0100 Subject: [PATCH 05/15] gvfs: add the core.gvfs config setting This does not do anything yet. The next patches will add various values for that config setting that correspond to the various features offered/required by GVFS. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 3 +++ cache.h | 1 + config.c | 6 ++++++ environment.c | 1 + gvfs.h | 31 +++++++++++++++++++++++++++++++ 5 files changed, 42 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index ef3987dcae4d54..16dd853137b468 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -622,6 +622,9 @@ core.multiPackIndex:: single index. See link:technical/multi-pack-index.html[the multi-pack-index design document]. +core.gvfs:: + Enable the features needed for GVFS. + core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] for more information. diff --git a/cache.h b/cache.h index 3e979666c86524..1309a0351b7ead 100644 --- a/cache.h +++ b/cache.h @@ -955,6 +955,7 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; +extern int core_gvfs; extern int precomposed_unicode; extern int protect_hfs; extern int protect_ntfs; diff --git a/config.c b/config.c index bf87d02603d25a..11c2db289a40ef 100644 --- a/config.c +++ b/config.c @@ -20,6 +20,7 @@ #include "dir.h" #include "color.h" #include "refs.h" +#include "gvfs.h" struct config_source { struct config_source *prev; @@ -1374,6 +1375,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.gvfs")) { + gvfs_load_config_value(value); + return 0; + } + if (!strcmp(var, "core.sparsecheckout")) { core_apply_sparse_checkout = git_config_bool(var, value); return 0; diff --git a/environment.c b/environment.c index bb518c61cd259c..4c349365bd0f93 100644 --- a/environment.c +++ b/environment.c @@ -69,6 +69,7 @@ char *notes_ref_name; int grafts_replace_parents = 1; int core_apply_sparse_checkout; int core_sparse_checkout_cone; +int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; diff --git a/gvfs.h b/gvfs.h index b6dbe85eae4071..2e9e10f4e49ef6 100644 --- a/gvfs.h +++ b/gvfs.h @@ -1,9 +1,40 @@ #ifndef GVFS_H #define GVFS_H +#include "cache.h" +#include "config.h" + /* * This file is for the specific settings and methods * used for GVFS functionality */ +static inline int gvfs_config_is_set(int mask) { + return (core_gvfs & mask) == mask; +} + +static inline int gvfs_config_is_set_any(void) { + return core_gvfs > 0; +} + +static inline void gvfs_load_config_value(const char *value) { + int is_bool = 0; + + if (value) + core_gvfs = git_config_bool_or_int("core.gvfs", value, &is_bool); + else + git_config_get_bool_or_int("core.gvfs", &is_bool, &core_gvfs); + + /* Turn on all bits if a bool was set in the settings */ + if (is_bool && core_gvfs) + core_gvfs = -1; +} + + +static inline int gvfs_config_load_and_is_set(int mask) { + gvfs_load_config_value(0); + return gvfs_config_is_set(mask); +} + + #endif /* GVFS_H */ From 13e59955a238b984c8eac874825ba1591f5adea2 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:38:59 +0100 Subject: [PATCH 06/15] gvfs: add the feature to skip writing the index' SHA-1 This takes a substantial amount of time, and if the user is reasonably sure that the files' integrity is not compromised, that time can be saved. Git no longer verifies the SHA-1 by default, anyway. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 10 +++++++++- gvfs.h | 6 ++++++ read-cache.c | 11 ++++++++--- t/t1016-read-tree-skip-sha-on-read.sh | 22 ++++++++++++++++++++++ 4 files changed, 45 insertions(+), 4 deletions(-) create mode 100755 t/t1016-read-tree-skip-sha-on-read.sh diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 16dd853137b468..e8a397776548fa 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -623,7 +623,15 @@ core.multiPackIndex:: multi-pack-index design document]. core.gvfs:: - Enable the features needed for GVFS. + Enable the features needed for GVFS. This value can be set to true + to indicate all features should be turned on or the bit values listed + below can be used to turn on specific features. ++ +-- + GVFS_SKIP_SHA_ON_INDEX:: + Bit value 1 + Disables the calculation of the sha when writing the index +-- core.sparseCheckout:: Enable "sparse checkout" feature. See linkgit:git-sparse-checkout[1] diff --git a/gvfs.h b/gvfs.h index 2e9e10f4e49ef6..690419127a72dd 100644 --- a/gvfs.h +++ b/gvfs.h @@ -9,6 +9,12 @@ * used for GVFS functionality */ + +/* + * The list of bits in the core_gvfs setting + */ +#define GVFS_SKIP_SHA_ON_INDEX (1 << 0) + static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; } diff --git a/read-cache.c b/read-cache.c index 99d1c5b84b3e39..6d8b6e5aaa9d0a 100644 --- a/read-cache.c +++ b/read-cache.c @@ -25,6 +25,7 @@ #include "fsmonitor.h" #include "thread-utils.h" #include "progress.h" +#include "gvfs.h" /* Mask for the name length in ce_flags in the on-disk index */ @@ -2468,7 +2469,9 @@ static int ce_write_flush(git_hash_ctx *context, int fd) { unsigned int buffered = write_buffer_len; if (buffered) { - the_hash_algo->update_fn(context, write_buffer, buffered); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, + buffered); if (write_in_full(fd, write_buffer, buffered) < 0) return -1; write_buffer_len = 0; @@ -2517,7 +2520,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) if (left) { write_buffer_len = 0; - the_hash_algo->update_fn(context, write_buffer, left); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->update_fn(context, write_buffer, left); } /* Flush first if not enough space for hash signature */ @@ -2528,7 +2532,8 @@ static int ce_flush(git_hash_ctx *context, int fd, unsigned char *hash) } /* Append the hash signature at the end */ - the_hash_algo->final_fn(write_buffer + left, context); + if (!gvfs_config_is_set(GVFS_SKIP_SHA_ON_INDEX)) + the_hash_algo->final_fn(write_buffer + left, context); hashcpy(hash, write_buffer + left); left += the_hash_algo->rawsz; return (write_in_full(fd, write_buffer, left) < 0) ? -1 : 0; diff --git a/t/t1016-read-tree-skip-sha-on-read.sh b/t/t1016-read-tree-skip-sha-on-read.sh new file mode 100755 index 00000000000000..5b76a80a0020dc --- /dev/null +++ b/t/t1016-read-tree-skip-sha-on-read.sh @@ -0,0 +1,22 @@ +#!/bin/sh + +test_description='check that read-tree works with core.gvfs config value' + +. ./test-lib.sh +. "$TEST_DIRECTORY"/lib-read-tree.sh + +test_expect_success setup ' + echo one >a && + git add a && + git commit -m initial +' +test_expect_success 'read-tree without core.gvsf' ' + read_tree_u_must_succeed -m -u HEAD +' + +test_expect_success 'read-tree with core.gvfs set to 1' ' + git config core.gvfs 1 && + read_tree_u_must_succeed -m -u HEAD +' + +test_done From d0a1de0949daab600aa7f520d8c0b158359e8080 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Tue, 24 Jan 2017 17:54:55 +0100 Subject: [PATCH 07/15] gvfs: add the feature that blobs may be missing Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 4 ++++ cache-tree.c | 4 +++- gvfs.h | 1 + t/t0000-basic.sh | 5 +++++ 4 files changed, 13 insertions(+), 1 deletion(-) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index e8a397776548fa..25c125cd74b83e 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -631,6 +631,10 @@ core.gvfs:: GVFS_SKIP_SHA_ON_INDEX:: Bit value 1 Disables the calculation of the sha when writing the index + GVFS_MISSING_OK:: + Bit value 4 + Normally git write-tree ensures that the objects referenced by the + directory exist in the object database. This option disables this check. -- core.sparseCheckout:: diff --git a/cache-tree.c b/cache-tree.c index a537a806c16e03..9a59dafa26a07d 100644 --- a/cache-tree.c +++ b/cache-tree.c @@ -6,6 +6,7 @@ #include "object-store.h" #include "replace-object.h" #include "promisor-remote.h" +#include "gvfs.h" #ifndef DEBUG_CACHE_TREE #define DEBUG_CACHE_TREE 0 @@ -244,7 +245,8 @@ static int update_one(struct cache_tree *it, int flags) { struct strbuf buffer; - int missing_ok = flags & WRITE_TREE_MISSING_OK; + int missing_ok = gvfs_config_is_set(GVFS_MISSING_OK) ? + WRITE_TREE_MISSING_OK : (flags & WRITE_TREE_MISSING_OK); int dryrun = flags & WRITE_TREE_DRY_RUN; int repair = flags & WRITE_TREE_REPAIR; int to_invalidate = 0; diff --git a/gvfs.h b/gvfs.h index 690419127a72dd..dabbf67f94c852 100644 --- a/gvfs.h +++ b/gvfs.h @@ -14,6 +14,7 @@ * The list of bits in the core_gvfs setting */ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) +#define GVFS_MISSING_OK (1 << 2) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t0000-basic.sh b/t/t0000-basic.sh index 923281af93981d..92873f02b89c14 100755 --- a/t/t0000-basic.sh +++ b/t/t0000-basic.sh @@ -1186,6 +1186,11 @@ test_expect_success 'writing this tree with --missing-ok' ' git write-tree --missing-ok ' +test_expect_success 'writing this tree with missing ok config value' ' + git config core.gvfs 4 && + git write-tree +' + ################################################################ test_expect_success 'git read-tree followed by write-tree should be idempotent' ' From d28d0b67211155daa8d9752cb6743daae9bfd3ba Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Wed, 18 May 2016 13:40:39 +0000 Subject: [PATCH 08/15] gvfs: prevent files to be deleted outside the sparse checkout Prevent the sparse checkout to delete files that were marked with skip-worktree bit and are not in the sparse-checkout file. This is because everything with the skip-worktree bit turned on is being virtualized and will be removed with the change of HEAD. There was only one failing test when running with these changes that was checking to make sure the worktree narrows on checkout which was expected since we would no longer be narrowing the worktree. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 9 +++++++++ gvfs.h | 1 + t/t1090-sparse-checkout-scope.sh | 17 +++++++++++++++++ unpack-trees.c | 22 ++++++++++++++++++++++ 4 files changed, 49 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 25c125cd74b83e..5135ac15f25e04 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -635,6 +635,15 @@ core.gvfs:: Bit value 4 Normally git write-tree ensures that the objects referenced by the directory exist in the object database. This option disables this check. + GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT:: + Bit value 8 + When marking entries to remove from the index and the working + directory this option will take into account what the + skip-worktree bit was set to so that if the entry has the + skip-worktree bit set it will not be removed from the working + directory. This will allow virtualized working directories to + detect the change to HEAD and use the new commit tree to show + the files that are in the working directory. -- core.sparseCheckout:: diff --git a/gvfs.h b/gvfs.h index dabbf67f94c852..f9144d2ad9ab92 100644 --- a/gvfs.h +++ b/gvfs.h @@ -15,6 +15,7 @@ */ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) #define GVFS_MISSING_OK (1 << 2) +#define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t1090-sparse-checkout-scope.sh b/t/t1090-sparse-checkout-scope.sh index e7dd921be5f956..d267e21db83720 100755 --- a/t/t1090-sparse-checkout-scope.sh +++ b/t/t1090-sparse-checkout-scope.sh @@ -81,6 +81,23 @@ test_expect_success 'in partial clone, sparse checkout only fetches needed blobs test_cmp expect actual ' +test_expect_success 'checkout does not delete items outside the sparse checkout file' ' + git checkout master && + git config core.gvfs 8 && + git checkout -b outside && + echo "new file1" >d && + git add d && + git commit -m "branch initial" && + echo "new file1" >e && + git add e && + git commit -m "skipped worktree" && + git update-index --skip-worktree e && + echo "/d" >.git/info/sparse-checkout && + git checkout HEAD^ && + test_path_is_file d && + test_path_is_file e +' + test_expect_success MINGW 'no unnecessary opendir() with fscache' ' git clone . fscache-test && ( diff --git a/unpack-trees.c b/unpack-trees.c index de1d0dcbe421ee..cf542868c109f2 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -16,6 +16,7 @@ #include "fsmonitor.h" #include "object-store.h" #include "promisor-remote.h" +#include "gvfs.h" /* * Error messages expected by scripts out of plumbing commands such as @@ -2240,6 +2241,27 @@ static int deleted_entry(const struct cache_entry *ce, } if (!(old->ce_flags & CE_CONFLICTED) && verify_uptodate(old, o)) return -1; + + /* + * When marking entries to remove from the index and the working + * directory this option will take into account what the + * skip-worktree bit was set to so that if the entry has the + * skip-worktree bit set it will not be removed from the working + * directory. This will allow virtualized working directories to + * detect the change to HEAD and use the new commit tree to show + * the files that are in the working directory. + * + * old is the cache_entry that will have the skip-worktree bit set + * which will need to be preserved when the CE_REMOVE entry is added + */ + if (gvfs_config_is_set(GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT) && + old && + old->ce_flags & CE_SKIP_WORKTREE) { + add_entry(o, old, CE_REMOVE, 0); + invalidate_ce_path(old, o); + return 1; + } + add_entry(o, ce, CE_REMOVE, 0); invalidate_ce_path(ce, o); return 1; From 3d74f55ce60baddec2c4e2ff74b1edc6146c57f9 Mon Sep 17 00:00:00 2001 From: Kevin Willford Date: Mon, 30 May 2016 10:55:53 -0400 Subject: [PATCH 09/15] gvfs: optionally skip reachability checks/upload pack during fetch While performing a fetch with a virtual file system we know that there will be missing objects and we don't want to download them just because of the reachability of the commits. We also don't want to download a pack file with commits, trees, and blobs since these will be downloaded on demand. This flag will skip the first connectivity check and by returning zero will skip the upload pack. It will also skip the second connectivity check but continue to update the branches to the latest commit ids. Signed-off-by: Kevin Willford --- Documentation/config/core.txt | 9 +++++++++ connected.c | 19 +++++++++++++++++++ gvfs.h | 1 + t/t5583-vfs.sh | 24 ++++++++++++++++++++++++ 4 files changed, 53 insertions(+) create mode 100755 t/t5583-vfs.sh diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index 5135ac15f25e04..ae055e8c959046 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -644,6 +644,15 @@ core.gvfs:: directory. This will allow virtualized working directories to detect the change to HEAD and use the new commit tree to show the files that are in the working directory. + GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK:: + Bit value 16 + While performing a fetch with a virtual file system we know + that there will be missing objects and we don't want to download + them just because of the reachability of the commits. We also + don't want to download a pack file with commits, trees, and blobs + since these will be downloaded on demand. This flag will skip the + checks on the reachability of objects during a fetch as well as + the upload pack so that extraneous objects don't get downloaded. -- core.sparseCheckout:: diff --git a/connected.c b/connected.c index b18299fdf0e522..ea7166bde682d6 100644 --- a/connected.c +++ b/connected.c @@ -6,6 +6,7 @@ #include "transport.h" #include "packfile.h" #include "promisor-remote.h" +#include "gvfs.h" /* * If we feed all the commits we want to verify to this command @@ -30,6 +31,24 @@ int check_connected(oid_iterate_fn fn, void *cb_data, struct transport *transport; size_t base_len; + /* + * Running a virtual file system there will be objects that are + * missing locally and we don't want to download a bunch of + * commits, trees, and blobs just to make sure everything is + * reachable locally so this option will skip reachablility + * checks below that use rev-list. This will stop the check + * before uploadpack runs to determine if there is anything to + * fetch. Returning zero for the first check will also prevent the + * uploadpack from happening. It will also skip the check after + * the fetch is finished to make sure all the objects where + * downloaded in the pack file. This will allow the fetch to + * run and get all the latest tip commit ids for all the branches + * in the fetch but not pull down commits, trees, or blobs via + * upload pack. + */ + if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) + return 0; + if (!opt) opt = &defaults; transport = opt->transport; diff --git a/gvfs.h b/gvfs.h index f9144d2ad9ab92..8ee12df31c8a1d 100644 --- a/gvfs.h +++ b/gvfs.h @@ -16,6 +16,7 @@ #define GVFS_SKIP_SHA_ON_INDEX (1 << 0) #define GVFS_MISSING_OK (1 << 2) #define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) +#define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t5583-vfs.sh b/t/t5583-vfs.sh new file mode 100755 index 00000000000000..8a703cbb640387 --- /dev/null +++ b/t/t5583-vfs.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +test_description='fetch using the flag to skip reachability and upload pack' + +. ./test-lib.sh + + +test_expect_success setup ' + echo inital >a && + git add a && + git commit -m initial && + git clone . one +' + +test_expect_success "fetch test" ' + cd one && + git config core.gvfs 16 && + rm -rf .git/objects/* && + git -C .. cat-file commit HEAD | git hash-object -w --stdin -t commit && + git fetch && + test_must_fail git rev-parse --verify HEAD^{tree} +' + +test_done \ No newline at end of file From 14427db28a7c63a7135ca5c74bc2d2b1fe6dd7e4 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Wed, 15 Jun 2016 14:59:16 +0000 Subject: [PATCH 10/15] gvfs: ensure all filters and EOL conversions are blocked Ensure all filters and EOL conversions are blocked when running under GVFS so that our projected file sizes will match the actual file size when it is hydrated on the local machine. Signed-off-by: Ben Peart --- Documentation/config/core.txt | 9 +++++++++ convert.c | 22 +++++++++++++++++++++ gvfs.h | 1 + t/t0021-conversion.sh | 37 +++++++++++++++++++++++++++++++++++ t/t0027-auto-crlf.sh | 12 ++++++++++++ 5 files changed, 81 insertions(+) diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index ae055e8c959046..380ffb4d9a4b81 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -653,6 +653,15 @@ core.gvfs:: since these will be downloaded on demand. This flag will skip the checks on the reachability of objects during a fetch as well as the upload pack so that extraneous objects don't get downloaded. + GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS:: + Bit value 64 + With a virtual file system we only know the file size before any + CRLF or smudge/clean filters processing is done on the client. + To prevent file corruption due to truncation or expansion with + garbage at the end, these filters must not run when the file + is first accessed and brought down to the client. Git.exe can't + currently tell the first access vs subsequent accesses so this + flag just blocks them from occurring at all. -- core.sparseCheckout:: diff --git a/convert.c b/convert.c index ee360c2f07ced0..f23cd679dc85d4 100644 --- a/convert.c +++ b/convert.c @@ -9,6 +9,7 @@ #include "sub-process.h" #include "utf8.h" #include "ll-merge.h" +#include "gvfs.h" /* * convert.c - convert a file when checking it out and checking it in. @@ -559,6 +560,9 @@ static int crlf_to_git(const struct index_state *istate, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -598,6 +602,9 @@ static int crlf_to_worktree(const char *src, size_t len, if (!will_convert_lf_to_crlf(&stats, crlf_action)) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -709,6 +716,9 @@ static int apply_single_file_filter(const char *path, const char *src, size_t le struct async async; struct filter_params params; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("Filter \"%s\" not supported when running under GVFS", cmd); + memset(&async, 0, sizeof(async)); async.proc = filter_buffer_or_fd; async.data = ¶ms; @@ -1113,6 +1123,9 @@ static int ident_to_git(const char *src, size_t len, if (!buf) return 1; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* only grow if not in place */ if (strbuf_avail(buf) + buf->len < len) strbuf_grow(buf, len - buf->len); @@ -1160,6 +1173,9 @@ static int ident_to_worktree(const char *src, size_t len, if (!cnt) return 0; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + /* are we "faking" in place editing ? */ if (src == buf->buf) to_free = strbuf_detach(buf, NULL); @@ -1614,6 +1630,9 @@ static int lf_to_crlf_filter_fn(struct stream_filter *filter, size_t count, o = 0; struct lf_to_crlf_filter *lf_to_crlf = (struct lf_to_crlf_filter *)filter; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("CRLF conversions not supported when running under GVFS"); + /* * We may be holding onto the CR to see if it is followed by a * LF, in which case we would need to go to the main loop. @@ -1858,6 +1877,9 @@ static int ident_filter_fn(struct stream_filter *filter, struct ident_filter *ident = (struct ident_filter *)filter; static const char head[] = "$Id"; + if (gvfs_config_is_set(GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS)) + die("ident conversions not supported when running under GVFS"); + if (!input) { /* drain upon eof */ switch (ident->state) { diff --git a/gvfs.h b/gvfs.h index 8ee12df31c8a1d..2d6de575bf4a65 100644 --- a/gvfs.h +++ b/gvfs.h @@ -17,6 +17,7 @@ #define GVFS_MISSING_OK (1 << 2) #define GVFS_NO_DELETE_OUTSIDE_SPARSECHECKOUT (1 << 3) #define GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK (1 << 4) +#define GVFS_BLOCK_FILTERS_AND_EOL_CONVERSIONS (1 << 6) static inline int gvfs_config_is_set(int mask) { return (core_gvfs & mask) == mask; diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh index 5508e0bf6fbbb3..75e5d77435b4f8 100755 --- a/t/t0021-conversion.sh +++ b/t/t0021-conversion.sh @@ -314,6 +314,43 @@ test_expect_success "filter: smudge empty file" ' test_cmp expected filtered-empty-in-repo ' +test_expect_success "filter: clean filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + echo dead data walking >empty-in-repo && + test_must_fail git add empty-in-repo +' + +test_expect_success "filter: smudge filters blocked when under GVFS" ' + test_config filter.empty-in-repo.clean "cat >/dev/null" && + test_config filter.empty-in-repo.smudge "echo smudged && cat" && + test_config core.gvfs 64 && + + test_must_fail git checkout +' + +test_expect_success "ident blocked on add when under GVFS" ' + test_config core.gvfs 64 && + test_config core.autocrlf false && + + echo "*.i ident" >.gitattributes && + echo "\$Id\$" > ident.i && + + test_must_fail git add ident.i +' + +test_expect_success "ident blocked when under GVFS" ' + git add ident.i && + + git commit -m "added ident.i" && + test_config core.gvfs 64 && + rm ident.i && + + test_must_fail git checkout -- ident.i +' + test_expect_success 'disable filter with empty override' ' test_config_global filter.disable.smudge false && test_config_global filter.disable.clean false && diff --git a/t/t0027-auto-crlf.sh b/t/t0027-auto-crlf.sh index 9fcd56fab37314..4d43e15bb96727 100755 --- a/t/t0027-auto-crlf.sh +++ b/t/t0027-auto-crlf.sh @@ -333,6 +333,18 @@ checkout_files () { " } +test_expect_success 'crlf conversions blocked when under GVFS' ' + git checkout -b gvfs && + test_commit initial && + rm initial.t && + test_config core.gvfs 64 && + test_config core.autocrlf true && + test_must_fail git read-tree --reset -u HEAD && + + git config core.autocrlf false && + git read-tree --reset -u HEAD +' + # Test control characters # NUL SOH CR EOF==^Z test_expect_success 'ls-files --eol -o Text/Binary' ' From 0b3a52c58949984167e46ad001944bc0aeeb79bf Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 30 Nov 2016 23:11:36 +0100 Subject: [PATCH 11/15] Add a new run_hook_strvec() function The two existing members of the run_hook*() family, run_hook_ve() and run_hook_le(), are good for callers that know the precise number of parameters already. Let's introduce a new sibling that takes a strvec for callers that want to pass a variable number of parameters. Signed-off-by: Johannes Schindelin --- run-command.c | 20 +++++++++++++++++--- run-command.h | 2 ++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/run-command.c b/run-command.c index 2ee59acdc8c828..afcda96f1d0ffa 100644 --- a/run-command.c +++ b/run-command.c @@ -1343,7 +1343,8 @@ const char *find_hook(const char *name) return path.buf; } -int run_hook_ve(const char *const *env, const char *name, va_list args) +int run_hook_strvec(const char *const *env, const char *name, + struct strvec *argv) { struct child_process hook = CHILD_PROCESS_INIT; const char *p; @@ -1353,8 +1354,7 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return 0; strvec_push(&hook.args, p); - while ((p = va_arg(args, const char *))) - strvec_push(&hook.args, p); + strvec_pushv(&hook.args, argv->v); hook.env = env; hook.no_stdin = 1; hook.stdout_to_stderr = 1; @@ -1363,6 +1363,20 @@ int run_hook_ve(const char *const *env, const char *name, va_list args) return run_command(&hook); } +int run_hook_ve(const char *const *env, const char *name, va_list args) +{ + struct strvec argv = STRVEC_INIT; + const char *p; + int ret; + + while ((p = va_arg(args, const char *))) + strvec_push(&argv, p); + + ret = run_hook_strvec(env, name, &argv); + strvec_clear(&argv); + return ret; +} + int run_hook_le(const char *const *env, const char *name, ...) { va_list args; diff --git a/run-command.h b/run-command.h index 6472b38bde448c..d6d6b850fdd339 100644 --- a/run-command.h +++ b/run-command.h @@ -217,6 +217,8 @@ const char *find_hook(const char *name); LAST_ARG_MUST_BE_NULL int run_hook_le(const char *const *env, const char *name, ...); int run_hook_ve(const char *const *env, const char *name, va_list args); +int run_hook_strvec(const char *const *env, const char *name, + struct strvec *argv); /* * Trigger an auto-gc From a1d0128e90e0862611ac1d1ff95bf353a33e708b Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 10 Jan 2017 18:47:14 +0000 Subject: [PATCH 12/15] gvfs: allow "virtualizing" objects The idea is to allow blob objects to be missing from the local repository, and to load them lazily on demand. After discussing this idea on the mailing list, we will rename the feature to "lazy clone" and work more on this. Signed-off-by: Ben Peart --- cache.h | 2 ++ config.c | 5 +++++ connected.c | 2 ++ environment.c | 1 + sha1-file.c | 22 ++++++++++++++++++++++ 5 files changed, 32 insertions(+) diff --git a/cache.h b/cache.h index 1309a0351b7ead..5010acf0dfee9d 100644 --- a/cache.h +++ b/cache.h @@ -984,6 +984,8 @@ int use_optional_locks(void); extern char comment_line_char; extern int auto_comment_line_char; +extern int core_virtualize_objects; + enum log_refs_config { LOG_REFS_UNSET = -1, LOG_REFS_NONE = 0, diff --git a/config.c b/config.c index 11c2db289a40ef..bd7fcc512e4faa 100644 --- a/config.c +++ b/config.c @@ -1410,6 +1410,11 @@ static int git_default_core_config(const char *var, const char *value, void *cb) return 0; } + if (!strcmp(var, "core.virtualizeobjects")) { + core_virtualize_objects = git_config_bool(var, value); + return 0; + } + /* Add other config variables here and to Documentation/config.txt. */ return platform_core_config(var, value, cb); } diff --git a/connected.c b/connected.c index ea7166bde682d6..a9168bcca65bea 100644 --- a/connected.c +++ b/connected.c @@ -48,6 +48,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data, */ if (gvfs_config_is_set(GVFS_FETCH_SKIP_REACHABILITY_AND_UPLOADPACK)) return 0; + if (core_virtualize_objects) + return 0; if (!opt) opt = &defaults; diff --git a/environment.c b/environment.c index 4c349365bd0f93..80fb6df4971d44 100644 --- a/environment.c +++ b/environment.c @@ -73,6 +73,7 @@ int core_gvfs; int merge_log_config = -1; int precomposed_unicode = -1; /* see probe_utf8_pathname_composition() */ unsigned long pack_size_limit_cfg; +int core_virtualize_objects; enum log_refs_config log_all_ref_updates = LOG_REFS_UNSET; #ifndef PROTECT_HFS_DEFAULT diff --git a/sha1-file.c b/sha1-file.c index dd65bd5c681513..2319c1060bb905 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1454,6 +1454,21 @@ void disable_obj_read_lock(void) pthread_mutex_destroy(&obj_read_mutex); } +static int run_read_object_hook(const struct object_id *oid) +{ + struct strvec args = STRVEC_INIT; + int ret; + uint64_t start; + + start = getnanotime(); + strvec_push(&args, oid_to_hex(oid)); + ret = run_hook_strvec(NULL, "read-object", &args); + strvec_clear(&args); + trace_performance_since(start, "run_read_object_hook"); + + return ret; +} + int fetch_if_missing = 1; static int do_oid_object_info_extended(struct repository *r, @@ -1466,6 +1481,7 @@ static int do_oid_object_info_extended(struct repository *r, int rtype; const struct object_id *real = oid; int already_retried = 0; + int tried_hook = 0; if (flags & OBJECT_INFO_LOOKUP_REPLACE) @@ -1477,6 +1493,7 @@ static int do_oid_object_info_extended(struct repository *r, if (!oi) oi = &blank_oi; +retry: co = find_cached_object(real); if (co) { if (oi->typep) @@ -1511,6 +1528,11 @@ static int do_oid_object_info_extended(struct repository *r, reprepare_packed_git(r); if (find_pack_entry(r, real, &e)) break; + if (core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!run_read_object_hook(oid)) + goto retry; + } } /* Check if it is a missing object */ From b07fa566fa85d2ba304a4dd1210ed56495ab3d83 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Wed, 15 Mar 2017 18:43:05 +0000 Subject: [PATCH 13/15] Hydrate missing loose objects in check_and_freshen() Hydrate missing loose objects in check_and_freshen() when running virtualized. Add test cases to verify read-object hook works when running virtualized. This hook is called in check_and_freshen() rather than check_and_freshen_local() to make the hook work also with alternates. Helped-by: Kevin Willford Signed-off-by: Ben Peart --- sha1-file.c | 46 +++++++++++++++++++++++++++--------------- t/t0411-read-object.sh | 27 +++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 16 deletions(-) create mode 100755 t/t0411-read-object.sh diff --git a/sha1-file.c b/sha1-file.c index 2319c1060bb905..45f5d285b03fc2 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -878,6 +878,24 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } +static int run_read_object_hook(const struct object_id *oid) +{ + struct child_process hook = CHILD_PROCESS_INIT; + const char *p; + + p = find_hook("read-object"); + if (!p) + return 1; + + strvec_push(&hook.args, p); + strvec_push(&hook.args, oid_to_hex(oid)); + hook.env = NULL; + hook.no_stdin = 1; + hook.stdout_to_stderr = 1; + + return run_command(&hook); +} + /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ static int freshen_file(const char *fn) { @@ -928,8 +946,19 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) static int check_and_freshen(const struct object_id *oid, int freshen) { - return check_and_freshen_local(oid, freshen) || + int ret; + int tried_hook = 0; + +retry: + ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); + if (!ret && core_virtualize_objects && !tried_hook) { + tried_hook = 1; + if (!run_read_object_hook(oid)) + goto retry; + } + + return ret; } int has_loose_object_nonlocal(const struct object_id *oid) @@ -1454,21 +1483,6 @@ void disable_obj_read_lock(void) pthread_mutex_destroy(&obj_read_mutex); } -static int run_read_object_hook(const struct object_id *oid) -{ - struct strvec args = STRVEC_INIT; - int ret; - uint64_t start; - - start = getnanotime(); - strvec_push(&args, oid_to_hex(oid)); - ret = run_hook_strvec(NULL, "read-object", &args); - strvec_clear(&args); - trace_performance_since(start, "run_read_object_hook"); - - return ret; -} - int fetch_if_missing = 1; static int do_oid_object_info_extended(struct repository *r, diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh new file mode 100755 index 00000000000000..0c3bfea1886cf9 --- /dev/null +++ b/t/t0411-read-object.sh @@ -0,0 +1,27 @@ +#!/bin/sh + +test_description='tests for read-object hook' + +. ./test-lib.sh + +test_expect_success 'setup host and guest repos' ' + test_commit zero && + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) && + git init guest-repo && + cd guest-repo && + git config core.virtualizeobjects true && + write_script .git/hooks/read-object <<-\EOF + # pass core.virtualizeobjects=false so we dont end up calling the hook proc recursively + git --git-dir=../.git/ cat-file blob "$1" | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1 + EOF +' + +test_expect_success 'blobs can be retrieved from the host repo' ' + git cat-file blob "$hash1" +' + +test_expect_success 'invalid blobs generate errors' ' + test_must_fail git cat-file blob "invalid" +' + +test_done From 9af634a5958403cdc50279cd7c1128fdf4442046 Mon Sep 17 00:00:00 2001 From: Ben Peart Date: Tue, 18 Jul 2017 12:04:44 +0200 Subject: [PATCH 14/15] Add support for read-object as a background process to retrieve missing objects This commit converts the existing read_object hook proc model for downloading missing blobs to use a background process that is started the first time git encounters a missing blob and stays running until git exits. Git and the read-object process communicate via stdin/stdout and a versioned, capability negotiated interface as documented in Documentation/technical/read-object-protocol.txt. The advantage of this over the previous hook proc is that it saves the overhead of spawning a new hook process for every missing blob. The model for the background process was refactored from the recent git LFS work. I refactored that code into a shared module (sub-process.c/h) and then updated convert.c to consume the new library. I then used the same sub-process module when implementing the read-object background process. The read-object hook feature was designed before the SHA-256 support was even close to be started. As a consequence, its protocol hard-codes the key `sha1`, even if we now also support SHA-256 object IDs. Technically, this is wrong, and probably the best way forward would be to rename the key to `oid` (or `sha256`, but that is less future-proof). However, there are existing setups out there, with existing read-object hooks that most likely have no idea what to do with `oid` requests. So let's leave the key as `sha1` for the time being, even if it will be technically incorrect in SHA-256 repositories. Signed-off-by: Ben Peart --- .../technical/read-object-protocol.txt | 102 +++++++++++++++ contrib/long-running-read-object/example.pl | 114 ++++++++++++++++ sha1-file.c | 122 ++++++++++++++++-- t/t0410/read-object | 114 ++++++++++++++++ t/t0411-read-object.sh | 24 ++-- 5 files changed, 450 insertions(+), 26 deletions(-) create mode 100644 Documentation/technical/read-object-protocol.txt create mode 100644 contrib/long-running-read-object/example.pl create mode 100755 t/t0410/read-object diff --git a/Documentation/technical/read-object-protocol.txt b/Documentation/technical/read-object-protocol.txt new file mode 100644 index 00000000000000..a893b46e7c28a9 --- /dev/null +++ b/Documentation/technical/read-object-protocol.txt @@ -0,0 +1,102 @@ +Read Object Process +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The read-object process enables Git to read all missing blobs with a +single process invocation for the entire life of a single Git command. +This is achieved by using a packet format (pkt-line, see technical/ +protocol-common.txt) based protocol over standard input and standard +output as follows. All packets, except for the "*CONTENT" packets and +the "0000" flush packet, are considered text and therefore are +terminated by a LF. + +Git starts the process when it encounters the first missing object that +needs to be retrieved. After the process is started, Git sends a welcome +message ("git-read-object-client"), a list of supported protocol version +numbers, and a flush packet. Git expects to read a welcome response +message ("git-read-object-server"), exactly one protocol version number +from the previously sent list, and a flush packet. All further +communication will be based on the selected version. + +The remaining protocol description below documents "version=1". Please +note that "version=42" in the example below does not exist and is only +there to illustrate how the protocol would look with more than one +version. + +After the version negotiation Git sends a list of all capabilities that +it supports and a flush packet. Git expects to read a list of desired +capabilities, which must be a subset of the supported capabilities list, +and a flush packet as response: +------------------------ +packet: git> git-read-object-client +packet: git> version=1 +packet: git> version=42 +packet: git> 0000 +packet: git< git-read-object-server +packet: git< version=1 +packet: git< 0000 +packet: git> capability=get +packet: git> capability=have +packet: git> capability=put +packet: git> capability=not-yet-invented +packet: git> 0000 +packet: git< capability=get +packet: git< 0000 +------------------------ +The only supported capability in version 1 is "get". + +Afterwards Git sends a list of "key=value" pairs terminated with a flush +packet. The list will contain at least the command (based on the +supported capabilities) and the sha1 of the object to retrieve. Please +note, that the process must not send any response before it received the +final flush packet. + +When the process receives the "get" command, it should make the requested +object available in the git object store and then return success. Git will +then check the object store again and this time find it and proceed. +------------------------ +packet: git> command=get +packet: git> sha1=0a214a649e1b3d5011e14a3dc227753f2bd2be05 +packet: git> 0000 +------------------------ + +The process is expected to respond with a list of "key=value" pairs +terminated with a flush packet. If the process does not experience +problems then the list must contain a "success" status. +------------------------ +packet: git< status=success +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content, it +is expected to respond with an "error" status. +------------------------ +packet: git< status=error +packet: git< 0000 +------------------------ + +In case the process cannot or does not want to process the content as +well as any future content for the lifetime of the Git process, then it +is expected to respond with an "abort" status at any point in the +protocol. +------------------------ +packet: git< status=abort +packet: git< 0000 +------------------------ + +Git neither stops nor restarts the process in case the "error"/"abort" +status is set. + +If the process dies during the communication or does not adhere to the +protocol then Git will stop the process and restart it with the next +object that needs to be processed. + +After the read-object process has processed an object it is expected to +wait for the next "key=value" list containing a command. Git will close +the command pipe on exit. The process is expected to detect EOF and exit +gracefully on its own. Git will wait until the process has stopped. + +A long running read-object process demo implementation can be found in +`contrib/long-running-read-object/example.pl` located in the Git core +repository. If you develop your own long running process then the +`GIT_TRACE_PACKET` environment variables can be very helpful for +debugging (see linkgit:git[1]). diff --git a/contrib/long-running-read-object/example.pl b/contrib/long-running-read-object/example.pl new file mode 100644 index 00000000000000..b8f37f836a813c --- /dev/null +++ b/contrib/long-running-read-object/example.pl @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "/host_repo/.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/sha1-file.c b/sha1-file.c index 45f5d285b03fc2..551068a93284ba 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -32,6 +32,9 @@ #include "packfile.h" #include "object-store.h" #include "promisor-remote.h" +#include "sigchain.h" +#include "sub-process.h" +#include "pkt-line.h" /* The maximum size for an object header. */ #define MAX_HEADER_LEN 32 @@ -878,22 +881,113 @@ void prepare_alt_odb(struct repository *r) r->objects->loaded_alternates = 1; } -static int run_read_object_hook(const struct object_id *oid) +#define CAP_GET (1u<<0) + +static int subprocess_map_initialized; +static struct hashmap subprocess_map; + +struct read_object_process { + struct subprocess_entry subprocess; + unsigned int supported_capabilities; +}; + +static int start_read_object_fn(struct subprocess_entry *subprocess) { - struct child_process hook = CHILD_PROCESS_INIT; - const char *p; + struct read_object_process *entry = (struct read_object_process *)subprocess; + static int versions[] = {1, 0}; + static struct subprocess_capability capabilities[] = { + { "get", CAP_GET }, + { NULL, 0 } + }; - p = find_hook("read-object"); - if (!p) - return 1; + return subprocess_handshake(subprocess, "git-read-object", versions, + NULL, capabilities, + &entry->supported_capabilities); +} + +static int read_object_process(const struct object_id *oid) +{ + int err; + struct read_object_process *entry; + struct child_process *process; + struct strbuf status = STRBUF_INIT; + const char *cmd = find_hook("read-object"); + uint64_t start; + + start = getnanotime(); + + if (!subprocess_map_initialized) { + subprocess_map_initialized = 1; + hashmap_init(&subprocess_map, (hashmap_cmp_fn)cmd2process_cmp, + NULL, 0); + entry = NULL; + } else { + entry = (struct read_object_process *) subprocess_find_entry(&subprocess_map, cmd); + } + + if (!entry) { + entry = xmalloc(sizeof(*entry)); + entry->supported_capabilities = 0; + + if (subprocess_start(&subprocess_map, &entry->subprocess, cmd, + start_read_object_fn)) { + free(entry); + return -1; + } + } + process = &entry->subprocess.process; + + if (!(CAP_GET & entry->supported_capabilities)) + return -1; + + sigchain_push(SIGPIPE, SIG_IGN); + + err = packet_write_fmt_gently(process->in, "command=get\n"); + if (err) + goto done; + + err = packet_write_fmt_gently(process->in, "sha1=%s\n", oid_to_hex(oid)); + if (err) + goto done; + + err = packet_flush_gently(process->in); + if (err) + goto done; + + err = subprocess_read_status(process->out, &status); + err = err ? err : strcmp(status.buf, "success"); + +done: + sigchain_pop(SIGPIPE); + + if (err || errno == EPIPE) { + err = err ? err : errno; + if (!strcmp(status.buf, "error")) { + /* The process signaled a problem with the file. */ + } + else if (!strcmp(status.buf, "abort")) { + /* + * The process signaled a permanent problem. Don't try to read + * objects with the same command for the lifetime of the current + * Git process. + */ + entry->supported_capabilities &= ~CAP_GET; + } + else { + /* + * Something went wrong with the read-object process. + * Force shutdown and restart if needed. + */ + error("external process '%s' failed", cmd); + subprocess_stop(&subprocess_map, + (struct subprocess_entry *)entry); + free(entry); + } + } - strvec_push(&hook.args, p); - strvec_push(&hook.args, oid_to_hex(oid)); - hook.env = NULL; - hook.no_stdin = 1; - hook.stdout_to_stderr = 1; + trace_performance_since(start, "read_object_process"); - return run_command(&hook); + return err; } /* Returns 1 if we have successfully freshened the file, 0 otherwise. */ @@ -954,7 +1048,7 @@ static int check_and_freshen(const struct object_id *oid, int freshen) check_and_freshen_nonlocal(oid, freshen); if (!ret && core_virtualize_objects && !tried_hook) { tried_hook = 1; - if (!run_read_object_hook(oid)) + if (!read_object_process(oid)) goto retry; } @@ -1544,7 +1638,7 @@ static int do_oid_object_info_extended(struct repository *r, break; if (core_virtualize_objects && !tried_hook) { tried_hook = 1; - if (!run_read_object_hook(oid)) + if (!read_object_process(oid)) goto retry; } } diff --git a/t/t0410/read-object b/t/t0410/read-object new file mode 100755 index 00000000000000..2b8feacc78577f --- /dev/null +++ b/t/t0410/read-object @@ -0,0 +1,114 @@ +#!/usr/bin/perl +# +# Example implementation for the Git read-object protocol version 1 +# See Documentation/technical/read-object-protocol.txt +# +# Allows you to test the ability for blobs to be pulled from a host git repo +# "on demand." Called when git needs a blob it couldn't find locally due to +# a lazy clone that only cloned the commits and trees. +# +# A lazy clone can be simulated via the following commands from the host repo +# you wish to create a lazy clone of: +# +# cd /host_repo +# git rev-parse HEAD +# git init /guest_repo +# git cat-file --batch-check --batch-all-objects | grep -v 'blob' | +# cut -d' ' -f1 | git pack-objects /guest_repo/.git/objects/pack/noblobs +# cd /guest_repo +# git config core.virtualizeobjects true +# git reset --hard +# +# Please note, this sample is a minimal skeleton. No proper error handling +# was implemented. +# + +use strict; +use warnings; + +# +# Point $DIR to the folder where your host git repo is located so we can pull +# missing objects from it +# +my $DIR = "../.git/"; + +sub packet_bin_read { + my $buffer; + my $bytes_read = read STDIN, $buffer, 4; + if ( $bytes_read == 0 ) { + + # EOF - Git stopped talking to us! + exit(); + } + elsif ( $bytes_read != 4 ) { + die "invalid packet: '$buffer'"; + } + my $pkt_size = hex($buffer); + if ( $pkt_size == 0 ) { + return ( 1, "" ); + } + elsif ( $pkt_size > 4 ) { + my $content_size = $pkt_size - 4; + $bytes_read = read STDIN, $buffer, $content_size; + if ( $bytes_read != $content_size ) { + die "invalid packet ($content_size bytes expected; $bytes_read bytes read)"; + } + return ( 0, $buffer ); + } + else { + die "invalid packet size: $pkt_size"; + } +} + +sub packet_txt_read { + my ( $res, $buf ) = packet_bin_read(); + unless ( $buf =~ s/\n$// ) { + die "A non-binary line MUST be terminated by an LF."; + } + return ( $res, $buf ); +} + +sub packet_bin_write { + my $buf = shift; + print STDOUT sprintf( "%04x", length($buf) + 4 ); + print STDOUT $buf; + STDOUT->flush(); +} + +sub packet_txt_write { + packet_bin_write( $_[0] . "\n" ); +} + +sub packet_flush { + print STDOUT sprintf( "%04x", 0 ); + STDOUT->flush(); +} + +( packet_txt_read() eq ( 0, "git-read-object-client" ) ) || die "bad initialize"; +( packet_txt_read() eq ( 0, "version=1" ) ) || die "bad version"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad version end"; + +packet_txt_write("git-read-object-server"); +packet_txt_write("version=1"); +packet_flush(); + +( packet_txt_read() eq ( 0, "capability=get" ) ) || die "bad capability"; +( packet_bin_read() eq ( 1, "" ) ) || die "bad capability end"; + +packet_txt_write("capability=get"); +packet_flush(); + +while (1) { + my ($command) = packet_txt_read() =~ /^command=([^=]+)$/; + + if ( $command eq "get" ) { + my ($sha1) = packet_txt_read() =~ /^sha1=([0-9a-f]{40,64})$/; + packet_bin_read(); + + system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); + packet_txt_write(($?) ? "status=error" : "status=success"); + packet_flush(); + } else { + die "bad command '$command'"; + } +} diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh index 0c3bfea1886cf9..b8d7521c2c9106 100755 --- a/t/t0411-read-object.sh +++ b/t/t0411-read-object.sh @@ -1,27 +1,27 @@ #!/bin/sh -test_description='tests for read-object hook' +test_description='tests for long running read-object process' . ./test-lib.sh -test_expect_success 'setup host and guest repos' ' +test_expect_success 'setup host repo with a root commit' ' test_commit zero && - hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) && - git init guest-repo && - cd guest-repo && - git config core.virtualizeobjects true && - write_script .git/hooks/read-object <<-\EOF - # pass core.virtualizeobjects=false so we dont end up calling the hook proc recursively - git --git-dir=../.git/ cat-file blob "$1" | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1 - EOF + hash1=$(git ls-tree HEAD | grep zero.t | cut -f1 | cut -d\ -f3) ' test_expect_success 'blobs can be retrieved from the host repo' ' - git cat-file blob "$hash1" + git init guest-repo && + (cd guest-repo && + mkdir -p .git/hooks && + cp $TEST_DIRECTORY/t0410/read-object .git/hooks/ && + git config core.virtualizeobjects true && + git cat-file blob "$hash1") ' test_expect_success 'invalid blobs generate errors' ' - test_must_fail git cat-file blob "invalid" + (cd guest-repo && + test_must_fail git cat-file blob "invalid") ' + test_done From 87e39d2dcc7d7b2d60879acaa445fe4fca201d62 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 8 Sep 2017 11:32:43 +0200 Subject: [PATCH 15/15] sha1_file: when writing objects, skip the read_object_hook If we are going to write an object there is no use in calling the read object hook to get an object from a potentially remote source. We would rather just write out the object and avoid the potential round trip for an object that doesn't exist. This change adds a flag to the check_and_freshen() and freshen_loose_object() functions' signatures so that the hook is bypassed when the functions are called before writing loose objects. The check for a local object is still performed so we don't overwrite something that has already been written to one of the objects directories. Based on a patch by Kevin Willford. Signed-off-by: Johannes Schindelin --- sha1-file.c | 17 ++++++++++------- t/t0410/read-object | 4 ++++ t/t0411-read-object.sh | 7 +++++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/sha1-file.c b/sha1-file.c index 551068a93284ba..eb749414a0ea2d 100644 --- a/sha1-file.c +++ b/sha1-file.c @@ -1038,7 +1038,8 @@ static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen) return 0; } -static int check_and_freshen(const struct object_id *oid, int freshen) +static int check_and_freshen(const struct object_id *oid, int freshen, + int skip_virtualized_objects) { int ret; int tried_hook = 0; @@ -1046,7 +1047,8 @@ static int check_and_freshen(const struct object_id *oid, int freshen) retry: ret = check_and_freshen_local(oid, freshen) || check_and_freshen_nonlocal(oid, freshen); - if (!ret && core_virtualize_objects && !tried_hook) { + if (!ret && core_virtualize_objects && !skip_virtualized_objects && + !tried_hook) { tried_hook = 1; if (!read_object_process(oid)) goto retry; @@ -1062,7 +1064,7 @@ int has_loose_object_nonlocal(const struct object_id *oid) static int has_loose_object(const struct object_id *oid) { - return check_and_freshen(oid, 0); + return check_and_freshen(oid, 0, 0); } static void mmap_limit_check(size_t length) @@ -2040,9 +2042,10 @@ static int write_loose_object(const struct object_id *oid, char *hdr, return finalize_object_file(tmp_file.buf, filename.buf); } -static int freshen_loose_object(const struct object_id *oid) +static int freshen_loose_object(const struct object_id *oid, + int skip_virtualized_objects) { - return check_and_freshen(oid, 1); + return check_and_freshen(oid, 1, skip_virtualized_objects); } static int freshen_packed_object(const struct object_id *oid) @@ -2069,7 +2072,7 @@ int write_object_file(const void *buf, unsigned long len, const char *type, */ write_object_file_prepare(the_hash_algo, buf, len, type, oid, hdr, &hdrlen); - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) return 0; return write_loose_object(oid, hdr, hdrlen, buf, len, 0); } @@ -2089,7 +2092,7 @@ int hash_object_file_literally(const void *buf, unsigned long len, if (!(flags & HASH_WRITE_OBJECT)) goto cleanup; - if (freshen_packed_object(oid) || freshen_loose_object(oid)) + if (freshen_packed_object(oid) || freshen_loose_object(oid, 1)) goto cleanup; status = write_loose_object(oid, header, hdrlen, buf, len, 0); diff --git a/t/t0410/read-object b/t/t0410/read-object index 2b8feacc78577f..02c799837f4057 100755 --- a/t/t0410/read-object +++ b/t/t0410/read-object @@ -108,6 +108,10 @@ while (1) { system ('git --git-dir="' . $DIR . '" cat-file blob ' . $sha1 . ' | git -c core.virtualizeobjects=false hash-object -w --stdin >/dev/null 2>&1'); packet_txt_write(($?) ? "status=error" : "status=success"); packet_flush(); + + open my $log, '>>.git/read-object-hook.log'; + print $log "Read object $sha1, exit code $?\n"; + close $log; } else { die "bad command '$command'"; } diff --git a/t/t0411-read-object.sh b/t/t0411-read-object.sh index b8d7521c2c9106..af97ec5b50f356 100755 --- a/t/t0411-read-object.sh +++ b/t/t0411-read-object.sh @@ -23,5 +23,12 @@ test_expect_success 'invalid blobs generate errors' ' test_must_fail git cat-file blob "invalid") ' +test_expect_success 'read-object-hook is bypassed when writing objects' ' + (cd guest-repo && + echo hello >hello.txt && + git add hello.txt && + hash="$(git rev-parse --verify :hello.txt)" && + ! grep "$hash" .git/read-object-hook.log) +' test_done