From c91c92d20bdc2cfd253c0d8475c4c45254883be3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 29 Jun 2017 14:33:38 -0400 Subject: [PATCH 01/21] dir: refactor add_excludes() Refactor add_excludes() to separate the reading of the exclude file into a buffer and the parsing of the buffer into exclude_list items. Add add_excludes_from_blob_to_list() to allow an exclude file be specified with an OID. Signed-off-by: Jeff Hostetler --- dir.c | 51 +++++++++++++++++++++++++++++++++++++++++++++++++-- dir.h | 3 +++ 2 files changed, 52 insertions(+), 2 deletions(-) diff --git a/dir.c b/dir.c index 1c55dc3e366f8c..3132cb01607e65 100644 --- a/dir.c +++ b/dir.c @@ -739,6 +739,10 @@ static void invalidate_directory(struct untracked_cache *uc, dir->dirs[i]->recurse = 0; } +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el); + /* * Given a file with name "fname", read it (either from disk, or from * an index if 'istate' is non-null), parse it and store the @@ -754,9 +758,9 @@ static int add_excludes(const char *fname, const char *base, int baselen, struct sha1_stat *sha1_stat) { struct stat st; - int fd, i, lineno = 1; + int fd; size_t size = 0; - char *buf, *entry; + char *buf; fd = open(fname, O_RDONLY); if (fd < 0 || fstat(fd, &st) < 0) { @@ -813,6 +817,17 @@ static int add_excludes(const char *fname, const char *base, int baselen, } } + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + +static int add_excludes_from_buffer(char *buf, size_t size, + const char *base, int baselen, + struct exclude_list *el) +{ + int i, lineno = 1; + char *entry; + el->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -841,6 +856,38 @@ int add_excludes_from_file_to_list(const char *fname, const char *base, return add_excludes(fname, base, baselen, el, istate, NULL); } +int add_excludes_from_blob_to_list( + struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el) +{ + char *buf; + unsigned long size; + enum object_type type; + + buf = read_sha1_file(oid->hash, &type, &size); + if (!buf) + return -1; + + if (type != OBJ_BLOB) { + free(buf); + return -1; + } + + if (size == 0) { + free(buf); + return 0; + } + + if (buf[size - 1] != '\n') { + buf = xrealloc(buf, st_add(size, 1)); + buf[size++] = '\n'; + } + + add_excludes_from_buffer(buf, size, base, baselen, el); + return 0; +} + struct exclude_list *add_exclude_list(struct dir_struct *dir, int group_type, const char *src) { diff --git a/dir.h b/dir.h index e3717055d19336..1bcf39123ad7fd 100644 --- a/dir.h +++ b/dir.h @@ -256,6 +256,9 @@ extern struct exclude_list *add_exclude_list(struct dir_struct *dir, extern int add_excludes_from_file_to_list(const char *fname, const char *base, int baselen, struct exclude_list *el, struct index_state *istate); extern void add_excludes_from_file(struct dir_struct *, const char *fname); +extern int add_excludes_from_blob_to_list(struct object_id *oid, + const char *base, int baselen, + struct exclude_list *el); extern void parse_exclude_pattern(const char **string, int *patternlen, unsigned *flags, int *nowildcardlen); extern void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos); From 2b621d6289d117a0ca431afe6183a4847dad162a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Mon, 26 Jun 2017 15:35:36 -0400 Subject: [PATCH 02/21] oidset2: create oidset subclass with object length and pathname Create subclass of oidset where each entry has a field to store the length of the object's content and an optional pathname. This will be used in a future commit to build a manifest of omitted objects in a partial/narrow clone/fetch. TODO Evaluate if the new oidmap routines can replace this. Signed-off-by: Jeff Hostetler --- Makefile | 1 + oidset2.c | 138 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ oidset2.h | 64 +++++++++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 oidset2.c create mode 100644 oidset2.h diff --git a/Makefile b/Makefile index ed4ca438bd9c6d..57135ef103f41b 100644 --- a/Makefile +++ b/Makefile @@ -822,6 +822,7 @@ LIB_OBJS += notes-merge.o LIB_OBJS += notes-utils.o LIB_OBJS += object.o LIB_OBJS += oidset.o +LIB_OBJS += oidset2.o LIB_OBJS += packfile.o LIB_OBJS += pack-bitmap.o LIB_OBJS += pack-bitmap-write.o diff --git a/oidset2.c b/oidset2.c new file mode 100644 index 00000000000000..161aa28adbb701 --- /dev/null +++ b/oidset2.c @@ -0,0 +1,138 @@ +#include "cache.h" +#include "oidset2.h" + +static int oidset2_hashcmp(const void *unused_cmp_data, + const void *va, const void *vb, + const void *vkey) +{ + const struct oidset2_entry *a = va, *b = vb; + const struct object_id *key = vkey; + return oidcmp(&a->oid, key ? key : &b->oid); +} + +struct oidset2_entry *oidset2_get(const struct oidset2 *set, + const struct object_id *oid) +{ + struct hashmap_entry key; + struct oidset2_entry *value; + + if (!set->map.cmpfn) + return NULL; + + hashmap_entry_init(&key, sha1hash(oid->hash)); + value = hashmap_get(&set->map, &key, oid); + + return value; +} + +int oidset2_contains(const struct oidset2 *set, const struct object_id *oid) +{ + return !!oidset2_get(set, oid); +} + +int oidset2_insert(struct oidset2 *set, const struct object_id *oid, + enum object_type type, unsigned long object_length, + const char *pathname) +{ + struct oidset2_entry *entry; + + if (!set->map.cmpfn) + hashmap_init(&set->map, oidset2_hashcmp, NULL, 0); + + if (oidset2_contains(set, oid)) + return 1; + + entry = xcalloc(1, sizeof(*entry)); + hashmap_entry_init(&entry->hash, sha1hash(oid->hash)); + oidcpy(&entry->oid, oid); + + entry->type = type; + entry->object_length = object_length; + entry->object_length_valid = 1; + + if (pathname) + entry->pathname = strdup(pathname); + + hashmap_add(&set->map, entry); + return 0; +} + +int oidset2_insert_without_length(struct oidset2 *set, + const struct object_id *oid, + enum object_type type, + const char *pathname) +{ + struct oidset2_entry *entry; + + if (!set->map.cmpfn) + hashmap_init(&set->map, oidset2_hashcmp, NULL, 0); + + if (oidset2_contains(set, oid)) + return 1; + + entry = xcalloc(1, sizeof(*entry)); + hashmap_entry_init(&entry->hash, sha1hash(oid->hash)); + oidcpy(&entry->oid, oid); + + entry->type = type; + entry->object_length = 0; + entry->object_length_valid = 0; + + if (pathname) + entry->pathname = strdup(pathname); + + hashmap_add(&set->map, entry); + return 0; +} + +void oidset2_remove(struct oidset2 *set, const struct object_id *oid) +{ + struct hashmap_entry key; + struct oidset2_entry *e; + + hashmap_entry_init(&key, sha1hash(oid->hash)); + e = hashmap_remove(&set->map, &key, oid); + + free(e->pathname); + free(e); +} + +void oidset2_clear(struct oidset2 *set) +{ + hashmap_free(&set->map, 1); +} + +static int oidset2_cmp(const void *a, const void *b) +{ + const struct oidset2_entry *ae = *((const struct oidset2_entry **)a); + const struct oidset2_entry *be = *((const struct oidset2_entry **)b); + + return oidcmp(&ae->oid, &be->oid); +} + +void oidset2_foreach(struct oidset2 *set, oidset2_foreach_cb cb, void *cb_data) +{ + struct hashmap_iter iter; + struct oidset2_entry **array; + struct oidset2_entry *e; + int j, k; + + if (!set || !set->map.cmpfn) + return; + + array = xcalloc(hashmap_get_size(&set->map), sizeof(*e)); + + hashmap_iter_init(&set->map, &iter); + k = 0; + while ((e = hashmap_iter_next(&iter))) + array[k++] = e; + + QSORT(array, k, oidset2_cmp); + + for (j = 0; j < k; j++) { + e = array[j]; + cb(j, k, e, cb_data); + } + + free(array); +} diff --git a/oidset2.h b/oidset2.h new file mode 100644 index 00000000000000..5d3d6289ed3f4b --- /dev/null +++ b/oidset2.h @@ -0,0 +1,64 @@ +#ifndef OIDSET2_H +#define OIDSET2_H + +/** + * oidset2 is a variant of oidset, but allows additional fields for each object. + */ + +/** + * A single oidset2; should be zero-initialized (or use OIDSET2_INIT). + */ +struct oidset2 { + struct hashmap map; +}; + +#define OIDSET2_INIT { { NULL } } + +struct oidset2_entry { + struct hashmap_entry hash; + struct object_id oid; + + char *pathname; + unsigned long object_length; + + enum object_type type; + unsigned int object_length_valid : 1; +}; + +struct oidset2_entry *oidset2_get(const struct oidset2 *set, + const struct object_id *oid); + +/** + * Returns true iff `set` contains `oid`. + */ +int oidset2_contains(const struct oidset2 *set, const struct object_id *oid); + +/** + * Insert the oid into the set; a copy is made, so "oid" does not need + * to persist after this function is called. + * + * Returns 1 if the oid was already in the set, 0 otherwise. This can be used + * to perform an efficient check-and-add. + */ +int oidset2_insert(struct oidset2 *set, const struct object_id *oid, + enum object_type type, unsigned long object_length, + const char *pathname); +int oidset2_insert_without_length(struct oidset2 *set, + const struct object_id *oid, + enum object_type type, + const char *pathname); + +void oidset2_remove(struct oidset2 *set, const struct object_id *oid); + +typedef void (*oidset2_foreach_cb)(int i, int i_limit, struct oidset2_entry *e, + void *cb_data); + +void oidset2_foreach(struct oidset2 *set, oidset2_foreach_cb cb, void *cb_data); + +/** + * Remove all entries from the oidset2, freeing any resources associated with + * it. + */ +void oidset2_clear(struct oidset2 *set); + +#endif /* OIDSET2_H */ From 82f739025163a3572532e29af1dd4abcbcc5808e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 13:20:48 -0400 Subject: [PATCH 03/21] list-objects: filter objects in traverse_commit_list Create traverse_commit_list_filtered() and add filtering interface to allow certain objects to be omitted (not shown) during a traversal. Update traverse_commit_list() to be a wrapper for the above. Filtering will be used in a future commit by rev-list and pack-objects for narrow/partial clone/fetch to omit certain blobs from the output. traverse_bitmap_commit_list() does not work with filtering. If a packfile bitmap is present, it will not be used. Signed-off-by: Jeff Hostetler --- list-objects.c | 66 ++++++++++++++++++++++++++++++++++++++------------ list-objects.h | 30 +++++++++++++++++++++++ 2 files changed, 80 insertions(+), 16 deletions(-) diff --git a/list-objects.c b/list-objects.c index b3931fa434dc99..3e86008b1fb55c 100644 --- a/list-objects.c +++ b/list-objects.c @@ -13,10 +13,13 @@ static void process_blob(struct rev_info *revs, show_object_fn show, struct strbuf *path, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter, + void *filter_data) { struct object *obj = &blob->object; size_t pathlen; + list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_SHOW; if (!revs->blob_objects) return; @@ -24,11 +27,15 @@ static void process_blob(struct rev_info *revs, die("bad blob object"); if (obj->flags & (UNINTERESTING | SEEN)) return; - obj->flags |= SEEN; pathlen = path->len; strbuf_addstr(path, name); - show(obj, path->buf, cb_data); + if (filter) + r = filter(LOFT_BLOB, obj, path->buf, &path->buf[pathlen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, path->buf, cb_data); strbuf_setlen(path, pathlen); } @@ -69,7 +76,9 @@ static void process_tree(struct rev_info *revs, show_object_fn show, struct strbuf *base, const char *name, - void *cb_data) + void *cb_data, + filter_object_fn filter, + void *filter_data) { struct object *obj = &tree->object; struct tree_desc desc; @@ -77,6 +86,7 @@ static void process_tree(struct rev_info *revs, enum interesting match = revs->diffopt.pathspec.nr == 0 ? all_entries_interesting: entry_not_interesting; int baselen = base->len; + list_objects_filter_result r = LOFR_MARK_SEEN | LOFR_SHOW; if (!revs->tree_objects) return; @@ -90,9 +100,13 @@ static void process_tree(struct rev_info *revs, die("bad tree object %s", oid_to_hex(&obj->oid)); } - obj->flags |= SEEN; strbuf_addstr(base, name); - show(obj, base->buf, cb_data); + if (filter) + r = filter(LOFT_BEGIN_TREE, obj, base->buf, &base->buf[baselen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, base->buf, cb_data); if (base->len) strbuf_addch(base, '/'); @@ -112,7 +126,7 @@ static void process_tree(struct rev_info *revs, process_tree(revs, lookup_tree(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter, filter_data); else if (S_ISGITLINK(entry.mode)) process_gitlink(revs, entry.oid->hash, show, base, entry.path, @@ -121,8 +135,17 @@ static void process_tree(struct rev_info *revs, process_blob(revs, lookup_blob(entry.oid), show, base, entry.path, - cb_data); + cb_data, filter, filter_data); } + + if (filter) { + r = filter(LOFT_END_TREE, obj, base->buf, &base->buf[baselen], filter_data); + if (r & LOFR_MARK_SEEN) + obj->flags |= SEEN; + if (r & LOFR_SHOW) + show(obj, base->buf, cb_data); + } + strbuf_setlen(base, baselen); free_tree_buffer(tree); } @@ -183,10 +206,10 @@ static void add_pending_tree(struct rev_info *revs, struct tree *tree) add_pending_object(revs, &tree->object, ""); } -void traverse_commit_list(struct rev_info *revs, - show_commit_fn show_commit, - show_object_fn show_object, - void *data) +void traverse_commit_list_worker( + struct rev_info *revs, + show_commit_fn show_commit, show_object_fn show_object, void *show_data, + filter_object_fn filter, void *filter_data) { int i; struct commit *commit; @@ -200,7 +223,7 @@ void traverse_commit_list(struct rev_info *revs, */ if (commit->tree) add_pending_tree(revs, commit->tree); - show_commit(commit, data); + show_commit(commit, show_data); } for (i = 0; i < revs->pending.nr; i++) { struct object_array_entry *pending = revs->pending.objects + i; @@ -211,19 +234,19 @@ void traverse_commit_list(struct rev_info *revs, continue; if (obj->type == OBJ_TAG) { obj->flags |= SEEN; - show_object(obj, name, data); + show_object(obj, name, show_data); continue; } if (!path) path = ""; if (obj->type == OBJ_TREE) { process_tree(revs, (struct tree *)obj, show_object, - &base, path, data); + &base, path, show_data, filter, filter_data); continue; } if (obj->type == OBJ_BLOB) { process_blob(revs, (struct blob *)obj, show_object, - &base, path, data); + &base, path, show_data, filter, filter_data); continue; } die("unknown pending object %s (%s)", @@ -232,3 +255,14 @@ void traverse_commit_list(struct rev_info *revs, object_array_clear(&revs->pending); strbuf_release(&base); } + +void traverse_commit_list(struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + void *show_data) +{ + traverse_commit_list_worker( + revs, + show_commit, show_object, show_data, + NULL, NULL); +} diff --git a/list-objects.h b/list-objects.h index 0cebf8585cb179..39fcbb5058d775 100644 --- a/list-objects.h +++ b/list-objects.h @@ -8,4 +8,34 @@ void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, voi typedef void (*show_edge_fn)(struct commit *); void mark_edges_uninteresting(struct rev_info *, show_edge_fn); +enum list_objects_filter_result { + LOFR_ZERO = 0, + LOFR_MARK_SEEN = 1<<0, + LOFR_SHOW = 1<<1, +}; + +/* See object.h and revision.h */ +#define FILTER_REVISIT (1<<25) + +enum list_objects_filter_type { + LOFT_BEGIN_TREE, + LOFT_END_TREE, + LOFT_BLOB +}; + +typedef enum list_objects_filter_result list_objects_filter_result; +typedef enum list_objects_filter_type list_objects_filter_type; + +typedef list_objects_filter_result (*filter_object_fn)( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data); + +void traverse_commit_list_worker( + struct rev_info *, + show_commit_fn, show_object_fn, void *show_data, + filter_object_fn filter, void *filter_data); + #endif From 80481e6a3b4b598809d8dd3e0be4470d7b504d70 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:19:51 +0000 Subject: [PATCH 04/21] list-objects-filter-blobs-none: add filter to omit all blobs Create a simple filter for traverse_commit_list_worker() to omit all blobs from the result. This filter will be used in a future commit by rev-list and pack-objects to create a "commits and trees" result. This is intended for partial clone and fetch support. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-blobs-none.c | 89 ++++++++++++++++++++++++++++++++ list-objects-filter-blobs-none.h | 18 +++++++ 3 files changed, 108 insertions(+) create mode 100644 list-objects-filter-blobs-none.c create mode 100644 list-objects-filter-blobs-none.h diff --git a/Makefile b/Makefile index 57135ef103f41b..484dab750b8bff 100644 --- a/Makefile +++ b/Makefile @@ -804,6 +804,7 @@ LIB_OBJS += levenshtein.o LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o +LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-blobs-none.c b/list-objects-filter-blobs-none.c new file mode 100644 index 00000000000000..773e21db1a1947 --- /dev/null +++ b/list-objects-filter-blobs-none.c @@ -0,0 +1,89 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-blobs-none.h" + +/* + * A filter for list-objects to omit ALL blobs from the traversal. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_none_data { + struct oidset2 omits; +}; + +static list_objects_filter_result filter_blobs_none( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_none_data *filter_data = filter_data_; + unsigned long object_length; + enum object_type t; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + /* + * Since we always omit all blobs (and never + * provisionally omit), we should never see + * a blob twice. + */ + assert(!oidset2_contains(&filter_data->omits, &obj->oid)); + + t = sha1_object_info(obj->oid.hash, &object_length); + if (t == OBJ_NONE) /* we may not have it locally */ + oidset2_insert_without_length( + &filter_data->omits, &obj->oid, + obj->type, pathname); + else + oidset2_insert(&filter_data->omits, &obj->oid, + obj->type, object_length, + pathname); + + return LOFR_MARK_SEEN; /* but not LOFR_SHOW (hard omit) */ + } +} + +void traverse_commit_list__blobs_none( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data) +{ + struct filter_blobs_none_data d; + + memset(&d, 0, sizeof(d)); + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_blobs_none, &d); + + if (print_omitted_object) + oidset2_foreach(&d.omits, print_omitted_object, ctx_data); + + oidset2_clear(&d.omits); +} diff --git a/list-objects-filter-blobs-none.h b/list-objects-filter-blobs-none.h new file mode 100644 index 00000000000000..009be2ff703438 --- /dev/null +++ b/list-objects-filter-blobs-none.h @@ -0,0 +1,18 @@ +#ifndef LIST_OBJECTS_FILTER_BLOBS_NONE_H +#define LIST_OBJECTS_FILTER_BLOBS_NONE_H + +#include "oidset2.h" + +/* + * A filter for list-objects to omit ALL blobs + * from the traversal. + */ +void traverse_commit_list__blobs_none( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data); + +#endif /* LIST_OBJECTS_FILTER_BLOBS_NONE_H */ + From 48a9413ec488843d490a21f135be10d6749bb451 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:24:19 +0000 Subject: [PATCH 05/21] list-objects-filter-blobs-limit: add large blob filtering Create a filter for traverse_commit_list_worker() to omit blobs larger than a requested size from the result, but always include ".git*" special files. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-blobs-limit.c | 127 ++++++++++++++++++++++++++++++ list-objects-filter-blobs-limit.h | 18 +++++ 3 files changed, 146 insertions(+) create mode 100644 list-objects-filter-blobs-limit.c create mode 100644 list-objects-filter-blobs-limit.h diff --git a/Makefile b/Makefile index 484dab750b8bff..b3dc72aeeefb8f 100644 --- a/Makefile +++ b/Makefile @@ -805,6 +805,7 @@ LIB_OBJS += line-log.o LIB_OBJS += line-range.o LIB_OBJS += list-objects.o LIB_OBJS += list-objects-filter-blobs-none.o +LIB_OBJS += list-objects-filter-blobs-limit.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-blobs-limit.c b/list-objects-filter-blobs-limit.c new file mode 100644 index 00000000000000..0e2ead4a12e843 --- /dev/null +++ b/list-objects-filter-blobs-limit.c @@ -0,0 +1,127 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-blobs-limit.h" + +/* + * A filter for list-objects to omit large blobs, + * but always include ".git*" special files. + * And to OPTIONALLY collect a list of the omitted OIDs. + */ +struct filter_blobs_limit_data { + struct oidset2 omits; + unsigned long max_bytes; +}; + +static list_objects_filter_result filter_blobs_limit( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_blobs_limit_data *filter_data = filter_data_; + unsigned long object_length; + enum object_type t; + int is_special_filename; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + /* always include all tree objects */ + return LOFR_MARK_SEEN | LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + is_special_filename = ((strncmp(filename, ".git", 4) == 0) && + filename[4]); + + /* + * If previously provisionally omitted this blob (because + * of size), but it now has a special filename, force + * include it. + */ + if (oidset2_contains(&filter_data->omits, &obj->oid)) { + if (is_special_filename) { + oidset2_remove(&filter_data->omits, + &obj->oid); + return LOFR_MARK_SEEN | LOFR_SHOW; + } + return LOFR_ZERO; + } + + /* + * If filename matches ".git*", always include it (regardless + * of size). + */ + if (is_special_filename) + return LOFR_MARK_SEEN | LOFR_SHOW; + + t = sha1_object_info(obj->oid.hash, &object_length); + if (t == OBJ_BLOB && object_length < filter_data->max_bytes) + return LOFR_MARK_SEEN | LOFR_SHOW; + + if (t == OBJ_BLOB) { + /* + * Provisionally omit it. We've already established + * that this blob is too big and doesn't have a special + * filename, so we *WANT* to omit it. However, there + * may be a special file elsewhere in the tree that + * references this same blob, so we cannot reject it + * just yet. Leave the LOFR_ bits unset so that *IF* + * the blob appears again in the traversal, we will + * be asked again. + */ + oidset2_insert(&filter_data->omits, &obj->oid, + obj->type, object_length, pathname); + return LOFR_ZERO; + } + + /* + * We DO NOT have the blob locally, so we cannot apply the + * filter criteria. Be conservative and force show it (and + * let the caller deal with the ambiguity). (This matches + * the behavior when the special filename matches.) + */ + return LOFR_MARK_SEEN | LOFR_SHOW; + } +} + +void traverse_commit_list__blobs_limit( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + unsigned long large_byte_limit) +{ + struct filter_blobs_limit_data d; + + memset(&d, 0, sizeof(d)); + d.max_bytes = large_byte_limit; + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_blobs_limit, &d); + + if (print_omitted_object) + oidset2_foreach(&d.omits, print_omitted_object, ctx_data); + + oidset2_clear(&d.omits); +} diff --git a/list-objects-filter-blobs-limit.h b/list-objects-filter-blobs-limit.h new file mode 100644 index 00000000000000..2f9d08cb8474a4 --- /dev/null +++ b/list-objects-filter-blobs-limit.h @@ -0,0 +1,18 @@ +#ifndef LIST_OBJECTS_FILTER_BLOBS_LIMIT_H +#define LIST_OBJECTS_FILTER_BLOBS_LIMIT_H + +#include "oidset2.h" + +/* + * A filter for list-objects to omit large blobs, + * but always include ".git*" special files. + */ +void traverse_commit_list__blobs_limit( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + unsigned long large_byte_limit); + +#endif /* LIST_OBJECTS_FILTER_BLOBS_LIMIT_H */ From ac37825abb77a1f449db349eccd8e61f35ea80a9 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 17:27:44 +0000 Subject: [PATCH 06/21] list-objects-filter-sparse: add sparse filter Create a filter for traverse_commit_list_worker() to only include the blobs the would be referenced by a sparse-checkout using the given specification. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-sparse.c | 227 +++++++++++++++++++++++++++++++++++ list-objects-filter-sparse.h | 30 +++++ 3 files changed, 258 insertions(+) create mode 100644 list-objects-filter-sparse.c create mode 100644 list-objects-filter-sparse.h diff --git a/Makefile b/Makefile index b3dc72aeeefb8f..3f994ef0cf3a44 100644 --- a/Makefile +++ b/Makefile @@ -806,6 +806,7 @@ LIB_OBJS += line-range.o LIB_OBJS += list-objects.o LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-blobs-limit.o +LIB_OBJS += list-objects-filter-sparse.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o LIB_OBJS += log-tree.o diff --git a/list-objects-filter-sparse.c b/list-objects-filter-sparse.c new file mode 100644 index 00000000000000..cea29a7757d070 --- /dev/null +++ b/list-objects-filter-sparse.c @@ -0,0 +1,227 @@ +#include "cache.h" +#include "dir.h" +#include "tag.h" +#include "commit.h" +#include "tree.h" +#include "blob.h" +#include "diff.h" +#include "tree-walk.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-sparse.h" + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID or from a local pathname. We allow an OID because + * the repo may be bare or we may be doing the filtering on the + * server. + */ +struct frame { + int defval; + int child_prov_omit : 1; +}; + +struct filter_use_sparse_data { + struct oidset2 omits; + struct exclude_list el; + + size_t nr, alloc; + struct frame *array_frame; +}; + +static list_objects_filter_result filter_use_sparse( + list_objects_filter_type filter_type, + struct object *obj, + const char *pathname, + const char *filename, + void *filter_data_) +{ + struct filter_use_sparse_data *filter_data = filter_data_; + struct oidset2_entry *entry_prev; + unsigned long object_length; + enum object_type t; + int val, dtype; + struct frame *frame; + + switch (filter_type) { + default: + die("unkown filter_type"); + return LOFR_ZERO; + + case LOFT_BEGIN_TREE: + assert(obj->type == OBJ_TREE); + dtype = DT_DIR; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = filter_data->array_frame[filter_data->nr].defval; + + ALLOC_GROW(filter_data->array_frame, filter_data->nr + 1, + filter_data->alloc); + filter_data->nr++; + filter_data->array_frame[filter_data->nr].defval = val; + filter_data->array_frame[filter_data->nr].child_prov_omit = 0; + + /* + * A directory with this tree OID may appear in multiple + * places in the tree. (Think of a directory move, with + * no other changes.) And with a different pathname, the + * is_excluded...() results for this directory and items + * contained within it may be different. So we cannot + * mark it SEEN (yet), since that will prevent process_tree() + * from revisiting this tree object with other pathnames. + * + * Only SHOW the tree object the first time we visit this + * tree object. + * + * We always show all tree objects. A future optimization + * may want to attempt to narrow this. + */ + if (obj->flags & FILTER_REVISIT) + return LOFR_ZERO; + obj->flags |= FILTER_REVISIT; + return LOFR_SHOW; + + case LOFT_END_TREE: + assert(obj->type == OBJ_TREE); + assert(filter_data->nr > 0); + + frame = &filter_data->array_frame[filter_data->nr]; + filter_data->nr--; + + /* + * Tell our parent directory if any of our children were + * provisionally omitted. + */ + filter_data->array_frame[filter_data->nr].child_prov_omit |= + frame->child_prov_omit; + + /* + * If there are NO provisionally omitted child objects (ALL child + * objects in this folder were INCLUDED), then we can mark the + * folder as SEEN (so we will not have to revisit it again). + */ + if (!frame->child_prov_omit) + return LOFR_MARK_SEEN; + return LOFR_ZERO; + + case LOFT_BLOB: + assert(obj->type == OBJ_BLOB); + assert((obj->flags & SEEN) == 0); + + frame = &filter_data->array_frame[filter_data->nr]; + + /* + * If we previously provisionally omitted this blob (because + * its pathname was not in the sparse-checkout) *AND* this + * reference to the blob has the same pathname, we can avoid + * repeating the exclusion logic on this pathname and just + * continue to provisionally omit it. + */ + entry_prev = oidset2_get(&filter_data->omits, &obj->oid); + if (entry_prev && !strcmp(pathname, entry_prev->pathname)) { + frame->child_prov_omit = 1; + return LOFR_ZERO; + } + + dtype = DT_REG; + val = is_excluded_from_list(pathname, strlen(pathname), + filename, &dtype, &filter_data->el, + &the_index); + if (val < 0) + val = frame->defval; + if (val > 0) { + if (entry_prev) + oidset2_remove(&filter_data->omits, + &obj->oid); + return LOFR_MARK_SEEN | LOFR_SHOW; + } + + /* + * Provisionally omit it. We've already established + * that this pathname is not in the sparse-checkout + * specification, so we *WANT* to omit this blob. + * + * However, a pathname elsewhere in the tree may also + * reference this same blob, so we cannot reject it + * yet. Leave the LOFR_ bits unset so that if the + * blob appears again in the traversal, we will be + * asked again. + * + * The pathname we associate with this omit is just + * the first one we saw for this blob. Other + * instances of this blob may have other pathnames + * and that is fine. We just use it for perf + * because most of the time, the blob will be in + * the same place as we walk the commits. + */ + t = sha1_object_info(obj->oid.hash, &object_length); + if (t == OBJ_NONE) /* we may not have it locally */ + oidset2_insert_without_length(&filter_data->omits, + &obj->oid, obj->type, + pathname); + else + oidset2_insert(&filter_data->omits, &obj->oid, + obj->type, object_length, pathname); + frame->child_prov_omit = 1; + return LOFR_ZERO; + } +} + +void traverse_commit_list__sparse_oid( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + struct object_id *oid) +{ + struct filter_use_sparse_data d; + + memset(&d, 0, sizeof(d)); + if (add_excludes_from_blob_to_list(oid, NULL, 0, &d.el) < 0) + die("could not load filter specification"); + + ALLOC_GROW(d.array_frame, d.nr + 1, d.alloc); + d.array_frame[d.nr].defval = 0; /* default to include */ + d.array_frame[d.nr].child_prov_omit = 0; + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_use_sparse, &d); + + if (print_omitted_object) + oidset2_foreach(&d.omits, print_omitted_object, ctx_data); + + oidset2_clear(&d.omits); +} + +void traverse_commit_list__sparse_path( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + const char *path) +{ + struct filter_use_sparse_data d; + + memset(&d, 0, sizeof(d)); + if (add_excludes_from_file_to_list(path, NULL, 0, &d.el, NULL) < 0) + die("could not load filter specification"); + + ALLOC_GROW(d.array_frame, d.nr + 1, d.alloc); + d.array_frame[d.nr].defval = 0; /* default to include */ + d.array_frame[d.nr].child_prov_omit = 0; + + traverse_commit_list_worker(revs, show_commit, show_object, ctx_data, + filter_use_sparse, &d); + + if (print_omitted_object) + oidset2_foreach(&d.omits, print_omitted_object, ctx_data); + + oidset2_clear(&d.omits); +} diff --git a/list-objects-filter-sparse.h b/list-objects-filter-sparse.h new file mode 100644 index 00000000000000..9aabbf8bc77b5b --- /dev/null +++ b/list-objects-filter-sparse.h @@ -0,0 +1,30 @@ +#ifndef LIST_OBJECTS_FILTERS_SPARSE_H +#define LIST_OBJECTS_FILTERS_SPARSE_H + +#include "oidset2.h" + +/* + * A filter driven by a sparse-checkout specification to only + * include blobs that a sparse checkout would populate. + * + * The sparse-checkout spec can be loaded from a blob with the + * given OID, a blob with a blob-ish path, or from a local pathname. + * We allow an OID because the repo may be bare or we may be doing + * the filtering on the server. + */ +void traverse_commit_list__sparse_oid( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + struct object_id *oid); +void traverse_commit_list__sparse_path( + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *ctx_data, + const char *path); + +#endif /* LIST_OBJECTS_FILTERS_SPARSE_H */ From 5b0696a793c06ae0450b694cd7c64ce5773194df Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 6 Jul 2017 16:52:36 -0400 Subject: [PATCH 07/21] list-objects-filter-options: common argument parsing Create common routines and defines for parsing list-objects-filter-related command line arguments and pack-protocol fields. Signed-off-by: Jeff Hostetler --- Makefile | 1 + list-objects-filter-options.c | 101 ++++++++++++++++++++++++++++++++++ list-objects-filter-options.h | 50 +++++++++++++++++ 3 files changed, 152 insertions(+) create mode 100644 list-objects-filter-options.c create mode 100644 list-objects-filter-options.h diff --git a/Makefile b/Makefile index 3f994ef0cf3a44..3a4ba7a41ebe57 100644 --- a/Makefile +++ b/Makefile @@ -806,6 +806,7 @@ LIB_OBJS += line-range.o LIB_OBJS += list-objects.o LIB_OBJS += list-objects-filter-blobs-none.o LIB_OBJS += list-objects-filter-blobs-limit.o +LIB_OBJS += list-objects-filter-options.o LIB_OBJS += list-objects-filter-sparse.o LIB_OBJS += ll-merge.o LIB_OBJS += lockfile.o diff --git a/list-objects-filter-options.c b/list-objects-filter-options.c new file mode 100644 index 00000000000000..40f48ac275cabf --- /dev/null +++ b/list-objects-filter-options.c @@ -0,0 +1,101 @@ +#include "cache.h" +#include "commit.h" +#include "config.h" +#include "revision.h" +#include "list-objects.h" +#include "list-objects-filter-options.h" + +/* + * Parse value of the argument to the "filter" keword. + * On the command line this looks like: --filter= + * and in the pack protocol as: filter + * + * ::= blob:none + * blob:limit:[kmg] + * sparse:oid: + * sparse:path: + */ +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg) +{ + struct object_context oc; + struct object_id sparse_oid; + const char *v0; + const char *v1; + + if (filter_options->choice) + die(_("multiple object filter types cannot be combined")); + + /* + * TODO consider rejecting 'arg' if it contains any + * TODO injection characters (since we might send this + * TODO to a sub-command or to the server and we don't + * TODO want to deal with legacy quoting/escaping for + * TODO a new feature). + */ + + filter_options->raw_value = strdup(arg); + + if (skip_prefix(arg, "blob:", &v0) || skip_prefix(arg, "blobs:", &v0)) { + if (!strcmp(v0, "none")) { + filter_options->choice = LOFC_BLOB_NONE; + return 0; + } + + if (skip_prefix(v0, "limit=", &v1) && + git_parse_ulong(v1, &filter_options->blob_limit_value)) { + filter_options->choice = LOFC_BLOB_LIMIT; + return 0; + } + } + else if (skip_prefix(arg, "sparse:", &v0)) { + if (skip_prefix(v0, "oid=", &v1)) { + filter_options->choice = LOFC_SPARSE_OID; + if (!get_oid_with_context(v1, GET_OID_BLOB, + &sparse_oid, &oc)) { + /* + * We successfully converted the + * into an actual OID. Rewrite the raw_value + * in canonoical form with just the OID. + * (If we send this request to the server, we + * want an absolute expression rather than a + * local-ref-relative expression.) + */ + free((char *)filter_options->raw_value); + filter_options->raw_value = + xstrfmt("sparse:oid=%s", + oid_to_hex(&sparse_oid)); + filter_options->sparse_oid_value = + oiddup(&sparse_oid); + } else { + /* + * We could not turn the into an + * OID. Leave the raw_value as is in case + * the server can parse it. (It may refer to + * a branch, commit, or blob we don't have.) + */ + } + return 0; + } + + if (skip_prefix(v0, "path=", &v1)) { + filter_options->choice = LOFC_SPARSE_PATH; + filter_options->sparse_path_value = strdup(v1); + return 0; + } + } + + die(_("invalid filter expression '%s'"), arg); + return 0; +} + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset) +{ + struct list_objects_filter_options *filter_options = opt->value; + + assert(arg); + assert(!unset); + + return parse_list_objects_filter(filter_options, arg); +} diff --git a/list-objects-filter-options.h b/list-objects-filter-options.h new file mode 100644 index 00000000000000..48c0354cc143d9 --- /dev/null +++ b/list-objects-filter-options.h @@ -0,0 +1,50 @@ +#ifndef LIST_OBJECTS_FILTER_OPTIONS_H +#define LIST_OBJECTS_FILTER_OPTIONS_H + +#include "parse-options.h" + +/* + * Common declarations and utilities for filtering objects (such as omitting + * large blobs) in list_objects:traverse_commit_list() and git-rev-list. + */ + +enum list_objects_filter_choice { + LOFC_DISABLED = 0, + LOFC_BLOB_NONE, + LOFC_BLOB_LIMIT, + LOFC_SPARSE_OID, + LOFC_SPARSE_PATH, +}; + +struct list_objects_filter_options { + /* + * The raw argument value given on the command line or + * protocol request. (The part after the "--keyword=".) + */ + const char *raw_value; + + /* + * Parsed values. Only 1 will be set depending on the flags below. + */ + struct object_id *sparse_oid_value; + const char *sparse_path_value; + unsigned long blob_limit_value; + + enum list_objects_filter_choice choice; +}; + +/* Normalized command line arguments */ +#define CL_ARG__FILTER "filter" + +int parse_list_objects_filter(struct list_objects_filter_options *filter_options, + const char *arg); + +int opt_parse_list_objects_filter(const struct option *opt, + const char *arg, int unset); + +#define OPT_PARSE_LIST_OBJECTS_FILTER(fo) \ + { OPTION_CALLBACK, 0, CL_ARG__FILTER, fo, N_("args"), \ + N_("object filtering"), PARSE_OPT_NONEG, \ + opt_parse_list_objects_filter } + +#endif /* LIST_OBJECTS_FILTER_OPTIONS_H */ From d8dfa4479d1009dd4d5f92bd672d7239622ef292 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 1 Sep 2017 18:20:36 +0000 Subject: [PATCH 08/21] list-objects: add traverse_commit_list_filtered method Add traverse_commit_list_filtered() wrapper around the various filter methods using common data in object_filter_options. Signed-off-by: Jeff Hostetler --- list-objects.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ list-objects.h | 13 ++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/list-objects.c b/list-objects.c index 3e86008b1fb55c..f68530813a0173 100644 --- a/list-objects.c +++ b/list-objects.c @@ -7,6 +7,9 @@ #include "tree-walk.h" #include "revision.h" #include "list-objects.h" +#include "list-objects-filter-blobs-none.h" +#include "list-objects-filter-blobs-limit.h" +#include "list-objects-filter-sparse.h" static void process_blob(struct rev_info *revs, struct blob *blob, @@ -266,3 +269,45 @@ void traverse_commit_list(struct rev_info *revs, show_commit, show_object, show_data, NULL, NULL); } + +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *show_data) +{ + switch (filter_options->choice) { + case LOFC_DISABLED: + traverse_commit_list(revs, show_commit, show_object, show_data); + return; + + case LOFC_BLOB_NONE: + traverse_commit_list__blobs_none( + revs, show_commit, show_object, print_omitted_object, + show_data); + return; + + case LOFC_BLOB_LIMIT: + traverse_commit_list__blobs_limit( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->blob_limit_value); + return; + + case LOFC_SPARSE_OID: + traverse_commit_list__sparse_oid( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->sparse_oid_value); + return; + + case LOFC_SPARSE_PATH: + traverse_commit_list__sparse_path( + revs, show_commit, show_object, print_omitted_object, + show_data, filter_options->sparse_path_value); + return; + + default: + die("unspecified list-objects filter"); + } +} diff --git a/list-objects.h b/list-objects.h index 39fcbb5058d775..77dfeb324f6dff 100644 --- a/list-objects.h +++ b/list-objects.h @@ -1,6 +1,9 @@ #ifndef LIST_OBJECTS_H #define LIST_OBJECTS_H +#include "oidset2.h" +#include "list-objects-filter-options.h" + typedef void (*show_commit_fn)(struct commit *, void *); typedef void (*show_object_fn)(struct object *, const char *, void *); void traverse_commit_list(struct rev_info *, show_commit_fn, show_object_fn, void *); @@ -38,4 +41,12 @@ void traverse_commit_list_worker( show_commit_fn, show_object_fn, void *show_data, filter_object_fn filter, void *filter_data); -#endif +void traverse_commit_list_filtered( + struct list_objects_filter_options *filter_options, + struct rev_info *revs, + show_commit_fn show_commit, + show_object_fn show_object, + oidset2_foreach_cb print_omitted_object, + void *show_data); + +#endif /* LIST_OBJECTS_H */ From 58f747e1fe1bf9cce9fe64376a08ad0a15888898 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 13:39:05 -0400 Subject: [PATCH 09/21] rev-list: add list-objects filtering support Teach rev-list to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the result. This feature is only enabled when one of the "--objects*" options are used. When the "--filter-print-omitted" option is used, the omitted objects and their sizes are printed at the end. These are marked with a "~". This can be combined with "--quiet" to get a list of just the omitted objects. Signed-off-by: Jeff Hostetler --- builtin/rev-list.c | 87 ++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/builtin/rev-list.c b/builtin/rev-list.c index c1c74d4a795643..73e8736442d084 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -54,6 +54,11 @@ static const char rev_list_usage[] = static struct progress *progress; static unsigned progress_counter; +static struct list_objects_filter_options filter_options; +static struct oidset2 missing_objects; +static int arg_print_missing; +static int arg_print_omitted; +static int arg_relax; static void finish_commit(struct commit *commit, void *data); static void show_commit(struct commit *commit, void *data) @@ -181,8 +186,26 @@ static void finish_commit(struct commit *commit, void *data) static void finish_object(struct object *obj, const char *name, void *cb_data) { struct rev_list_info *info = cb_data; - if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) + if (obj->type == OBJ_BLOB && !has_object_file(&obj->oid)) { + if (arg_print_missing) { + oidset2_insert_without_length(&missing_objects, + &obj->oid, obj->type, + name); + return; + } + if (arg_relax) { + /* + * Relax consistency checks to not complain about + * omitted objects (presumably caused by use of + * the previous use of the 'filter-objects' feature). + * + * Note that this is independent of any filtering that + * we are doing in this run. + */ + return; + } die("missing blob object '%s'", oid_to_hex(&obj->oid)); + } if (info->revs->verify_objects && !obj->parsed && obj->type != OBJ_COMMIT) parse_object(&obj->oid); } @@ -202,6 +225,25 @@ static void show_edge(struct commit *commit) printf("-%s\n", oid_to_hex(&commit->object.oid)); } +static void print_omitted_object(int i, int i_limit, struct oidset2_entry *e, void *cb_data) +{ + /* struct rev_list_info *info = cb_data; */ + const char *tn = typename(e->type); + + if (e->object_length_valid) + printf("~%s %s %lu\n", oid_to_hex(&e->oid), tn, e->object_length); + else + printf("~%s %s\n", oid_to_hex(&e->oid), tn); +} + +static void print_missing_object(int i, int i_limit, struct oidset2_entry *e, void *cb_data) +{ + /* struct rev_list_info *info = cb_data; */ + const char *tn = typename(e->type); + + printf("?%s %s\n", oid_to_hex(&e->oid), tn); +} + static void print_var_str(const char *var, const char *val) { printf("%s='%s'\n", var, val); @@ -335,6 +377,30 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) show_progress = arg; continue; } + + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&filter_options, arg); + if (filter_options.choice && !revs.blob_objects) + die(_("object filtering requires --objects")); + if (filter_options.choice == LOFC_SPARSE_OID && + !filter_options.sparse_oid_value) + die(_("invalid sparse value '%s'"), + filter_options.raw_value); + continue; + } + if (!strcmp(arg, "--filter-print-missing")) { + arg_print_missing = 1; + continue; + } + if (!strcmp(arg, "--filter-print-omitted")) { + arg_print_omitted = 1; + continue; + } + if (!strcmp(arg, "--filter-relax")) { + arg_relax = 1; + continue; + } + usage(rev_list_usage); } @@ -360,6 +426,9 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) if (revs.show_notes) die(_("rev-list does not support display of notes")); + if (filter_options.choice && use_bitmap_index) + die(_("cannot combine --use-bitmap-index with object filtering")); + save_commit_buffer = (revs.verbose_header || revs.grep_filter.pattern_list || revs.grep_filter.header_list); @@ -404,7 +473,21 @@ int cmd_rev_list(int argc, const char **argv, const char *prefix) return show_bisect_vars(&info, reaches, all); } - traverse_commit_list(&revs, show_commit, show_object, &info); + if (arg_print_missing) + memset(&missing_objects, 0, sizeof(missing_objects)); + + if (filter_options.choice) + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, show_object, + (arg_print_omitted ? print_omitted_object : NULL), + &info); + else + traverse_commit_list(&revs, show_commit, show_object, &info); + + if (arg_print_missing) { + oidset2_foreach(&missing_objects, print_missing_object, &info); + oidset2_clear(&missing_objects); + } stop_progress(&progress); From a3b97298304516da4d72a1df78c04937480d40fa Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 13:47:07 -0400 Subject: [PATCH 10/21] rev-list: document list-objects filtering Signed-off-by: Jeff Hostetler --- Documentation/git-rev-list.txt | 9 ++++++++- Documentation/rev-list-options.txt | 32 ++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/Documentation/git-rev-list.txt b/Documentation/git-rev-list.txt index ef22f1775b6348..b2e825521316d4 100644 --- a/Documentation/git-rev-list.txt +++ b/Documentation/git-rev-list.txt @@ -47,7 +47,14 @@ SYNOPSIS [ --fixed-strings | -F ] [ --date=] [ [ --objects | --objects-edge | --objects-edge-aggressive ] - [ --unpacked ] ] + [ --unpacked ] + [ [ --filter-omit-all-blobs | + --filter-omit-large-blobs=[kmg] | + --filter-use-blob= | + --filter-use-path= ] + [ --filter-print-missing ] + [ --filter-print-omitted ] ] ] + [ --filter-relax ] [ --pretty | --header ] [ --bisect ] [ --bisect-vars ] diff --git a/Documentation/rev-list-options.txt b/Documentation/rev-list-options.txt index 7d860bfca1442e..7b79a999ef10e7 100644 --- a/Documentation/rev-list-options.txt +++ b/Documentation/rev-list-options.txt @@ -706,6 +706,38 @@ ifdef::git-rev-list[] --unpacked:: Only useful with `--objects`; print the object IDs that are not in packs. + +--filter-omit-all-blobs:: + Only useful with one of the `--objects*`; omits all blobs from + the printed list of objects. + +--filter-omit-large-blobs=[kmg]:: + Only useful with one of the `--objects*`; omits blobs larger than + n bytes from the printed list of objects. May optionally be + followed by 'k', 'm', or 'g' units. Value may be zero. Special + files (matching ".git*") are always included, regardless of size. + +--filter-use-blob=:: +--filter-use-path=:: + Only useful with one of the `--objects*`; uses a sparse-checkout + specification contained in the given object or file to filter the + result to only contain blobs referenced by such a sparse-checkout. + +--filter-print-missing:: + Prints a list of the missing objects for the requested traversal. + Object IDs are prefixed with a ``?'' character. The object type + is printed after the ID. This may be used with or without any of + the above filtering options. + +--filter-print-omitted:: + Only useful with one of the above `--filter*`; prints a list + of the omitted objects. Object IDs are prefixed with a ``~'' + character. The object size is printed after the ID. + +--filter-relax:: + Relax consistency checking for missing blobs. Do not warn of + missing blobs during normal (non-filtering) object traversal + following an earlier partial/narrow clone or fetch. endif::git-rev-list[] --no-walk[=(sorted|unsorted)]:: From 03fb7bf271dc188b5e0253a3b03259bf43f740b3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Fri, 30 Jun 2017 15:34:06 -0400 Subject: [PATCH 11/21] t6112: rev-list object filtering test Signed-off-by: Jeff Hostetler --- t/t6112-rev-list-filters-objects.sh | 237 ++++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100755 t/t6112-rev-list-filters-objects.sh diff --git a/t/t6112-rev-list-filters-objects.sh b/t/t6112-rev-list-filters-objects.sh new file mode 100755 index 00000000000000..69e9fb81846d35 --- /dev/null +++ b/t/t6112-rev-list-filters-objects.sh @@ -0,0 +1,237 @@ +#!/bin/sh + +test_description='git rev-list with object filtering for partial clone' + +. ./test-lib.sh + +# test the omit-all filter + +test_expect_success 'setup' ' + echo "{print \$1}" >print_1.awk && + echo "{print \$2}" >print_2.awk && + + for n in 1 2 3 4 5 + do + echo "This is file: $n" > file.$n + git add file.$n + git commit -m "$n" + done +' + +# Verify the omitted ("~OID") lines match the predicted list of OIDs. +test_expect_success 'omit-all-blobs omitted 5 blobs' ' + git ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +# Verify the complete OID list matches the unfiltered OIDs plus the omitted OIDs. +test_expect_success 'omit-all-blobs nothing else changed' ' + git rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git rev-list HEAD --objects --filter-print-omitted --filter=blob:none \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +# test the size-based filtering. + +test_expect_success 'setup_large' ' + for n in 1000 10000 + do + printf "%"$n"s" X > large.$n + git add large.$n + git commit -m "$n" + done +' + +test_expect_success 'omit-large-blobs omit 2 blobs' ' + git ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'omit-large-blobs nothing else changed' ' + git rev-list HEAD --objects \ + | awk -f print_1.awk \ + | sort >expected && + git rev-list HEAD --objects --filter-print-omitted --filter=blob:limit=500 \ + | awk -f print_1.awk \ + | sed "s/~//" \ + | sort >observed && + test_cmp observed expected +' + +# boundary test around the size parameter. +# filter is strictly less than the value, so size 500 and 1000 should have the +# same results, but 1001 should filter more. + +test_expect_success 'omit-large-blobs omit 2 blobs' ' + git ls-files -s large.1000 large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1000 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'omit-large-blobs omit 1 blob' ' + git ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1001 \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'omit-large-blobs omit 1 blob (1k)' ' + git ls-files -s large.10000 \ + | awk -f print_2.awk \ + | sort >expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1k \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +test_expect_success 'omit-large-blobs omit no blob (1m)' ' + cat expected && + git rev-list HEAD --quiet --objects --filter-print-omitted --filter=blob:limit=1m \ + | awk -f print_1.awk \ + | sed "s/~//" >observed && + test_cmp observed expected +' + +# Test sparse-pattern filtering (using explicit local patterns). +# We use the same disk format as sparse-checkout to specify the +# filtering, but do not require sparse-checkout to be enabled. + +test_expect_success 'setup using sparse file' ' + mkdir dir1 && + for n in sparse1 sparse2 + do + echo "This is file: $n" > $n + git add $n + echo dir1/$n > dir1/$n + git add dir1/$n + done && + git commit -m "sparse" && + echo dir1/ >pattern1 && + echo sparse1 >pattern2 +' + +# pattern1 should only include the 2 dir1/* files. +# and omit the 5 file.*, 2 large.*, and 2 top-level sparse* files. +test_expect_success 'sparse using path pattern1' ' + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:path=pattern1 >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 9 && + + grep "dir1/sparse" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# pattern2 should include the sparse1 and dir1/sparse1. +# and omit the 5 file.*, 2 large.*, and the 2 sparse2 files. +test_expect_success 'sparse using path pattern2' ' + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:path=pattern2 >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 9 && + + grep "sparse1" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# Test sparse-pattern filtering (using a blob in the repo). +# This could be used to later let pack-objects do filtering. + +# pattern1 should only include the 2 dir1/* files. +# and omit the 5 file.*, 2 large.*, 2 top-level sparse*, and 1 pattern file. +test_expect_success 'sparse using OID for pattern1' ' + git add pattern1 && + git commit -m "pattern1" && + + git rev-list HEAD --objects >normal.output && + grep "pattern1" pattern1.oid && + + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:oid=`cat pattern1.oid` >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 10 && + + grep "dir1/sparse" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# repeat previous test but use blob-ish expression rather than OID. +test_expect_success 'sparse using blob-ish to get OID for pattern spec' ' + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:oid=HEAD:pattern1 >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 10 && + + grep "dir1/sparse" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# pattern2 should include the sparse1 and dir1/sparse1. +# and omit the 5 file.*, 2 large.*, 2 top-level sparse*, and 2 pattern files. +test_expect_success 'sparse using OID for pattern2' ' + git add pattern2 && + git commit -m "pattern2" && + + git rev-list HEAD --objects >normal.output && + grep "pattern2" pattern2.oid && + + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:oid=`cat pattern2.oid` >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 11 && + + grep "sparse1" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# repeat previous test but use blob-ish expression rather than OID. +test_expect_success 'sparse using blob-ish rather than OID for pattern2' ' + git rev-list HEAD --objects --filter-print-omitted --filter=sparse:oid=HEAD:pattern2 >out && + + grep "^~" out >blobs.omitted && + test $(cat blobs.omitted | wc -l) = 11 && + + grep "sparse1" out >blobs.included && + test $(cat blobs.included | wc -l) = 2 +' + +# delete some loose objects and test rev-list printing them as missing. +test_expect_success 'print missing objects' ' + git ls-files -s file.1 file.2 file.3 file.4 file.5 \ + | awk -f print_2.awk \ + | sort >expected && + for id in `cat expected | sed "s|..|&/|"` + do + rm .git/objects/$id + done && + git rev-list --quiet HEAD --filter-print-missing --objects \ + | awk -f print_1.awk \ + | sed "s/?//" \ + | sort >observed && + test_cmp observed expected +' + +test_done From 624e5a213f19393b8e3c70b4dbe3cb2280b9ec27 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 16:06:54 +0000 Subject: [PATCH 12/21] pack-objects: add list-objects filtering Teach pack-objects to use the filtering provided by the traverse_commit_list_filtered() interface to omit unwanted objects from the resulting packfile. This feature is intended for partial clone/fetch. Filtering requires the use of the "--stdout" option. Signed-off-by: Jeff Hostetler --- builtin/pack-objects.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index f721137eaf8814..5d6352b72b9588 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -79,6 +79,8 @@ static unsigned long cache_max_small_delta_size = 1000; static unsigned long window_memory_limit = 0; +static struct list_objects_filter_options filter_options; + /* * stats */ @@ -2816,7 +2818,12 @@ static void get_object_list(int ac, const char **av) if (prepare_revision_walk(&revs)) die("revision walk setup failed"); mark_edges_uninteresting(&revs, show_edge); - traverse_commit_list(&revs, show_commit, show_object, NULL); + if (filter_options.choice) + traverse_commit_list_filtered(&filter_options, &revs, + show_commit, show_object, + NULL, NULL); + else + traverse_commit_list(&revs, show_commit, show_object, NULL); if (unpack_unreachable_expiration) { revs.ignore_missing_links = 1; @@ -2952,6 +2959,9 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) N_("use a bitmap index if available to speed up counting objects")), OPT_BOOL(0, "write-bitmap-index", &write_bitmap_index, N_("write a bitmap index together with the pack index")), + + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), + OPT_END(), }; @@ -3028,6 +3038,12 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix) if (!rev_list_all || !rev_list_reflog || !rev_list_index) unpack_unreachable_expiration = 0; + if (filter_options.choice) { + if (!pack_to_stdout) + die("cannot use filtering with an indexable pack."); + use_bitmap_index = 0; + } + /* * "soft" reasons not to use bitmaps - for on-disk repack by default we want * From a899dc3f8a4f504054083d942f6e2e22ab035945 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 16:30:35 +0000 Subject: [PATCH 13/21] pack-objects: document list-objects filtering Add help text for list-objects filtering options. Signed-off-by: Jeff Hostetler --- Documentation/git-pack-objects.txt | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/git-pack-objects.txt b/Documentation/git-pack-objects.txt index 473a16135abf86..dffd463eb39c11 100644 --- a/Documentation/git-pack-objects.txt +++ b/Documentation/git-pack-objects.txt @@ -236,6 +236,23 @@ So does `git bundle` (see linkgit:git-bundle[1]) when it creates a bundle. With this option, parents that are hidden by grafts are packed nevertheless. +--filter-omit-all-blobs:: + Requires `--stdout`. Omits all blobs from the packfile. + +--filter-omit-large-blobs=[kmg]:: + Requires `--stdout`. Omits large blobs larger than n bytes from + the packfile. May optionally be followed by 'k', 'm', or 'g' units. + Value may be zero. Special files (matching ".git*") are always + included, regardless of size. + +--filter-use-blob=:: +--filter-use-path=:: + Requires `--stdout`. Use a sparse-checkout specification to + filter the resulting packfile to only contain the blobs that + would be referenced by such a sparse-checkout. `` specifies + a local pathname. `` specifies an expression that can + be evaluated to a blob. + SEE ALSO -------- linkgit:git-rev-list[1] From 8d05df88dcbc97b08d7a8c1c29af52503fe3a947 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 18:10:19 +0000 Subject: [PATCH 14/21] pack-protocol: document list-objects filtering Signed-off-by: Jeff Hostetler --- Documentation/technical/pack-protocol.txt | 19 +++++++++++++++++++ .../technical/protocol-capabilities.txt | 7 +++++++ 2 files changed, 26 insertions(+) diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt index ed1eae8b83a651..d1c73136387b9c 100644 --- a/Documentation/technical/pack-protocol.txt +++ b/Documentation/technical/pack-protocol.txt @@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line. upload-request = want-list *shallow-line *1depth-request + [filter-request] flush-pkt want-list = first-want @@ -227,6 +228,13 @@ out of what the server said it could do with the first 'want' line. additional-want = PKT-LINE("want" SP obj-id) depth = 1*DIGIT + + filter-request = PKT-LINE("filter-omit-all-blobs") / + PKT-LINE("filter-omit-large-blobs" SP magnitude) / + PKT-LINE("filter-use-blob" SP obj-id) + PKT-LINE("filter-use-path" SP path) + + magnitude = 1*DIGIT [ "k" | "m" | "g" ] ---- Clients MUST send all the obj-ids it wants from the reference @@ -249,6 +257,17 @@ complete those commits. Commits whose parents are not received as a result are defined as shallow and marked as such in the server. This information is sent back to the client in the next step. +The client can optionally request that pack-objects omit various +objects from the packfile using one of several filtering techniques. +These are intended for use with partial clone/fetch operations. +"filter-omit-all-blobs" requests that all blobs be omitted from the +packfile. "filter-omit-large-blobs" requests that blobs larger than +the requested size be omitted, unless they have a ".git*" special +filename. "filter-use-blob" requests blob filtering based upon a +sparse-checkout specification in the named blob. "filter-use-path" +request blob filtering based upon a sparse-checkout specification +file. + Once all the 'want's and 'shallow's (and optional 'deepen') are transferred, clients MUST send a flush-pkt, to tell the server side that it is done sending the list. diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt index 26dcc6f502020d..2ae6d636e68824 100644 --- a/Documentation/technical/protocol-capabilities.txt +++ b/Documentation/technical/protocol-capabilities.txt @@ -309,3 +309,10 @@ to accept a signed push certificate, and asks the to be included in the push certificate. A send-pack client MUST NOT send a push-cert packet unless the receive-pack server advertises this capability. + +filter-objects +-------------- + +If the upload-pack server advertises the 'filter-objects' capability, +fetch-pack may send "filter-*" commands to request a partial clone +or fetch where the server omits various objects from the packfile. From 64e1eb4a96a943765006619397f1623e258f213b Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:22:12 +0000 Subject: [PATCH 15/21] upload-pack: add list-objects filtering Teach upload-pack to accept list-objects filtering parameters over the git protocol and pass them to pack-objects. Signed-off-by: Jeff Hostetler --- upload-pack.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/upload-pack.c b/upload-pack.c index 7efff2fbfd7638..0c351b70518919 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -18,6 +18,7 @@ #include "parse-options.h" #include "argv-array.h" #include "prio-queue.h" +#include "list-objects-filter-options.h" static const char * const upload_pack_usage[] = { N_("git upload-pack [] "), @@ -64,6 +65,9 @@ static int advertise_refs; static int stateless_rpc; static const char *pack_objects_hook; +static int capability_filter_objects_requested; +static struct list_objects_filter_options filter_options; + static void reset_timeout(void) { alarm(timeout); @@ -132,6 +136,14 @@ static void create_pack_file(void) if (use_include_tag) argv_array_push(&pack_objects.args, "--include-tag"); + /* + * TODO Do we need to quote raw_value? + */ + if (filter_options.choice) + argv_array_pushf(&pack_objects.args, "--%s=%s", + CL_ARG__FILTER, + filter_options.raw_value); + pack_objects.in = -1; pack_objects.out = -1; pack_objects.err = -1; @@ -794,6 +806,12 @@ static void receive_needs(void) deepen_rev_list = 1; continue; } + if (skip_prefix(line, (CL_ARG__FILTER " "), &arg)) { + parse_list_objects_filter(&filter_options, arg); + if (filter_options.choice && !capability_filter_objects_requested) + die("git upload-pack: filtering capability not negotiated"); + continue; + } if (!skip_prefix(line, "want ", &arg) || get_oid_hex(arg, &oid_buf)) die("git upload-pack: protocol error, " @@ -821,6 +839,8 @@ static void receive_needs(void) no_progress = 1; if (parse_feature_request(features, "include-tag")) use_include_tag = 1; + if (parse_feature_request(features, CL_ARG__FILTER)) + capability_filter_objects_requested = 1; o = parse_object(&oid_buf); if (!o) { @@ -929,7 +949,8 @@ static int send_ref(const char *refname, const struct object_id *oid, { static const char *capabilities = "multi_ack thin-pack side-band" " side-band-64k ofs-delta shallow deepen-since deepen-not" - " deepen-relative no-progress include-tag multi_ack_detailed"; + " deepen-relative no-progress include-tag multi_ack_detailed" + " " CL_ARG__FILTER; const char *refname_nons = strip_namespace(refname); struct object_id peeled; From 0cd2a312148a56b7b6c01dfa2da7ae2386c2953c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:33:45 +0000 Subject: [PATCH 16/21] transport: add list-objects filtering Signed-off-by: Jeff Hostetler --- transport.c | 5 +++++ transport.h | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/transport.c b/transport.c index d75ff0514d8d4d..772c5fe046b556 100644 --- a/transport.c +++ b/transport.c @@ -162,6 +162,10 @@ static int set_git_option(struct git_transport_options *opts, opts->deepen_relative = !!value; return 0; } + else if (!strcmp(name, TRANS_OPT_LIST_OBJECTS_FILTER)) { + parse_list_objects_filter(&opts->filter_options, value); + return 0; + } return 1; } @@ -229,6 +233,7 @@ static int fetch_refs_via_pack(struct transport *transport, data->options.check_self_contained_and_connected; args.cloning = transport->cloning; args.update_shallow = data->options.update_shallow; + args.filter_options = data->options.filter_options; if (!data->got_remote_heads) { connect_setup(transport, 0); diff --git a/transport.h b/transport.h index bc5571574b6780..23e622b318dbfd 100644 --- a/transport.h +++ b/transport.h @@ -4,6 +4,7 @@ #include "cache.h" #include "run-command.h" #include "remote.h" +#include "list-objects-filter-options.h" struct string_list; @@ -21,6 +22,7 @@ struct git_transport_options { const char *uploadpack; const char *receivepack; struct push_cas_option *cas; + struct list_objects_filter_options filter_options; }; enum transport_family { @@ -210,6 +212,9 @@ void transport_check_allowed(const char *type); /* Send push certificates */ #define TRANS_OPT_PUSH_CERT "pushcert" +/* See Documentation/technical/pack-protocol.txt */ +#define TRANS_OPT_LIST_OBJECTS_FILTER CL_ARG__FILTER + /** * Returns 0 if the option was used, non-zero otherwise. Prints a * message to stderr if the option is not used. From f55d785f7bb1a88b722990617bd1bf9169447b2e Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:50:26 +0000 Subject: [PATCH 17/21] fetch-pack: add list-objects filtering Teach fetch-pack to accept and pass list-objects filtering parameters to upload-pack. Signed-off-by: Jeff Hostetler --- builtin/fetch-pack.c | 5 +++++ fetch-pack.c | 26 ++++++++++++++++++++++++++ fetch-pack.h | 2 ++ 3 files changed, 33 insertions(+) diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c index 366b9d13f929b7..0d0d9611b125cb 100644 --- a/builtin/fetch-pack.c +++ b/builtin/fetch-pack.c @@ -143,6 +143,11 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix) args.update_shallow = 1; continue; } + if (skip_prefix(arg, ("--" CL_ARG__FILTER "="), &arg)) { + parse_list_objects_filter(&args.filter_options, arg); + continue; + } + usage(fetch_pack_usage); } if (deepen_not.nr) diff --git a/fetch-pack.c b/fetch-pack.c index 105506e9aa5e5a..f12d39d5d18601 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -377,6 +377,8 @@ static int find_common(struct fetch_pack_args *args, if (prefer_ofs_delta) strbuf_addstr(&c, " ofs-delta"); if (deepen_since_ok) strbuf_addstr(&c, " deepen-since"); if (deepen_not_ok) strbuf_addstr(&c, " deepen-not"); + if (args->filter_options.choice) + strbuf_addstr(&c, (" " CL_ARG__FILTER)); if (agent_supported) strbuf_addf(&c, " agent=%s", git_user_agent_sanitized()); packet_buf_write(&req_buf, "want %s%s\n", remote_hex, c.buf); @@ -407,6 +409,14 @@ static int find_common(struct fetch_pack_args *args, packet_buf_write(&req_buf, "deepen-not %s", s->string); } } + + /* + * TODO Do we need to quote raw_value? + */ + if (args->filter_options.choice) + packet_buf_write(&req_buf, (CL_ARG__FILTER " %s"), + args->filter_options.raw_value); + packet_buf_flush(&req_buf); state_len = req_buf.len; @@ -850,6 +860,17 @@ static int get_pack(struct fetch_pack_args *args, "--keep=fetch-pack %"PRIuMAX " on %s", (uintmax_t)getpid(), hostname); } + + /* + * Relax consistency checks to allow missing blobs (presumably + * because thay are exactly the set that we requested to be + * omitted. + * + * TODO remove this. + */ + if (args->filter_options.choice) + argv_array_push(&cmd.args, "--filter-relax"); + if (args->check_self_contained_and_connected) argv_array_push(&cmd.args, "--check-self-contained-and-connected"); } @@ -963,6 +984,11 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, else prefer_ofs_delta = 0; + if (server_supports(CL_ARG__FILTER)) + print_verbose(args, _("Server supports " CL_ARG__FILTER)); + else if (args->filter_options.choice) + die("Server does not support %s", CL_ARG__FILTER); + if ((agent_feature = server_feature_value("agent", &agent_len))) { agent_supported = 1; if (agent_len) diff --git a/fetch-pack.h b/fetch-pack.h index b6aeb43a8e2143..72690653489eac 100644 --- a/fetch-pack.h +++ b/fetch-pack.h @@ -3,6 +3,7 @@ #include "string-list.h" #include "run-command.h" +#include "list-objects-filter-options.h" struct oid_array; @@ -12,6 +13,7 @@ struct fetch_pack_args { int depth; const char *deepen_since; const struct string_list *deepen_not; + struct list_objects_filter_options filter_options; unsigned deepen_relative:1; unsigned quiet:1; unsigned keep_pack:1; From 9c2a2ac05724e250ab6824a18baf6039b599b29c Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 19:59:21 +0000 Subject: [PATCH 18/21] REMOVE: connected: add filter-relax Signed-off-by: Jeff Hostetler --- connected.c | 3 +++ connected.h | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/connected.c b/connected.c index f416b05051f3b7..1bab3d2e8e4ca2 100644 --- a/connected.c +++ b/connected.c @@ -63,6 +63,9 @@ int check_connected(sha1_iterate_fn fn, void *cb_data, argv_array_pushf(&rev_list.args, "--progress=%s", _("Checking connectivity")); + if (opt->filter_relax) + argv_array_push(&rev_list.args, "--filter-relax"); + rev_list.git_cmd = 1; rev_list.env = opt->env; rev_list.in = -1; diff --git a/connected.h b/connected.h index 4ca325f79dc5ee..8fd21effa30392 100644 --- a/connected.h +++ b/connected.h @@ -34,6 +34,14 @@ struct check_connected_options { /* If non-zero, show progress as we traverse the objects. */ int progress; + /* + * Relax consistency checks for missing blobs (presumably due to + * earlier use of object filtering). + * + * TODO remove this. + */ + int filter_relax; + /* * Insert these variables into the environment of the child process. */ From 74555d30476af26a778f51afc4521d6b9ffaa681 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:03:50 +0000 Subject: [PATCH 19/21] REMOVE: index-pack: add filter-relax Signed-off-by: Jeff Hostetler --- builtin/index-pack.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index f2be145e128d81..0055b5ad87953d 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -13,6 +13,7 @@ #include "streaming.h" #include "thread-utils.h" #include "packfile.h" +#include "list-objects-filter-options.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--verify] [--strict] ( | --stdin [--fix-thin] [])"; @@ -82,6 +83,7 @@ static int verbose; static int show_resolving_progress; static int show_stat; static int check_self_contained_and_connected; +static int filter_relax; static struct progress *progress; @@ -222,6 +224,17 @@ static unsigned check_object(struct object *obj) if (!(obj->flags & FLAG_CHECKED)) { unsigned long size; int type = sha1_object_info(obj->oid.hash, &size); + + if (type <= 0 && filter_relax) { + /* + * Relax consistency checks to not complain about + * missing objects (presumably because of earlier + * object filtering). + */ + obj->flags |= FLAG_CHECKED; + return 0; + } + if (type <= 0) die(_("did not receive expected object %s"), oid_to_hex(&obj->oid)); @@ -1728,6 +1741,8 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix) die(_("bad %s"), arg); } else if (skip_prefix(arg, "--max-input-size=", &arg)) { max_input_size = strtoumax(arg, NULL, 10); + } else if (!strcmp(arg, "--filter-relax")) { + filter_relax = 1; } else usage(index_pack_usage); continue; From 4446072d9e1e0872ceecb9d2f62d3ddd503201e7 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:38:50 +0000 Subject: [PATCH 20/21] clone: add list-objects filtering Signed-off-by: Jeff Hostetler --- builtin/clone.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/builtin/clone.c b/builtin/clone.c index dbddd98f80d666..8c9684c1e21ba7 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -26,6 +26,7 @@ #include "run-command.h" #include "connected.h" #include "packfile.h" +#include "list-objects-filter-options.h" /* * Overall FIXMEs: @@ -60,6 +61,7 @@ static struct string_list option_optional_reference = STRING_LIST_INIT_NODUP; static int option_dissociate; static int max_jobs = -1; static struct string_list option_recurse_submodules = STRING_LIST_INIT_NODUP; +static struct list_objects_filter_options filter_options; static int recurse_submodules_cb(const struct option *opt, const char *arg, int unset) @@ -135,6 +137,7 @@ static struct option builtin_clone_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -648,6 +651,8 @@ static void update_remote_refs(const struct ref *refs, if (check_connectivity) { struct check_connected_options opt = CHECK_CONNECTED_INIT; + opt.filter_relax = filter_options.choice != LOFC_DISABLED; + opt.transport = transport; opt.progress = transport->progress; @@ -1073,6 +1078,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix) warning(_("--shallow-since is ignored in local clones; use file:// instead.")); if (option_not.nr) warning(_("--shallow-exclude is ignored in local clones; use file:// instead.")); + if (filter_options.choice) + warning(_("Object filtering is ignored in local clones; use file:// instead.")); if (!access(mkpath("%s/shallow", path), F_OK)) { if (option_local > 0) warning(_("source repository is shallow, ignoring --local")); @@ -1104,6 +1111,10 @@ int cmd_clone(int argc, const char **argv, const char *prefix) transport_set_option(transport, TRANS_OPT_UPLOADPACK, option_upload_pack); + if (filter_options.choice) + transport_set_option(transport, TRANS_OPT_LIST_OBJECTS_FILTER, + filter_options.raw_value); + if (transport->smart_options && !deepen) transport->smart_options->check_self_contained_and_connected = 1; From 4ab5a0b44a2a55923509a87127d1ead9de8a56c3 Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 7 Sep 2017 20:57:24 +0000 Subject: [PATCH 21/21] fetch: add list-objects filtering parameters Signed-off-by: Jeff Hostetler --- builtin/fetch.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/builtin/fetch.c b/builtin/fetch.c index 225c734924f148..329bd9d2497c33 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -18,6 +18,7 @@ #include "argv-array.h" #include "utf8.h" #include "packfile.h" +#include "list-objects-filter-options.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), @@ -55,6 +56,7 @@ static int recurse_submodules_default = RECURSE_SUBMODULES_ON_DEMAND; static int shown_url = 0; static int refmap_alloc, refmap_nr; static const char **refmap_array; +static struct list_objects_filter_options filter_options; static int git_fetch_config(const char *k, const char *v, void *cb) { @@ -160,6 +162,7 @@ static struct option builtin_fetch_options[] = { TRANSPORT_FAMILY_IPV4), OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"), TRANSPORT_FAMILY_IPV6), + OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_END() }; @@ -754,6 +757,9 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, const char *filename = dry_run ? "/dev/null" : git_path_fetch_head(); int want_status; int summary_width = transport_summary_width(ref_map); + struct check_connected_options opt = CHECK_CONNECTED_INIT; + + opt.filter_relax = filter_options.choice != LOFC_DISABLED; fp = fopen(filename, "a"); if (!fp) @@ -765,7 +771,7 @@ static int store_updated_refs(const char *raw_url, const char *remote_name, url = xstrdup("foreign"); rm = ref_map; - if (check_connected(iterate_ref_map, &rm, NULL)) { + if (check_connected(iterate_ref_map, &rm, &opt)) { rc = error(_("%s did not send all necessary objects\n"), url); goto abort; } @@ -906,6 +912,8 @@ static int quickfetch(struct ref *ref_map) struct ref *rm = ref_map; struct check_connected_options opt = CHECK_CONNECTED_INIT; + opt.filter_relax = filter_options.choice != LOFC_DISABLED; + /* * If we are deepening a shallow clone we already have these * objects reachable. Running rev-list here will return with