From 56b7ca5840784dadb918104264dbb64312a752f6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 25 Jun 2018 13:16:47 -0400 Subject: [PATCH 01/18] commit-reach: move walk methods from commit.c There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The method declarations in commit.h are not touched by this commit and will be moved in a following commit. Many consumers need to point to commit-reach.h and that would bloat this commit. Signed-off-by: Derrick Stolee --- Makefile | 1 + commit-reach.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 42 ++++++ commit.c | 358 ------------------------------------------------ object.h | 2 +- 5 files changed, 404 insertions(+), 359 deletions(-) create mode 100644 commit-reach.c create mode 100644 commit-reach.h diff --git a/Makefile b/Makefile index bb8bd672016ddb..59781f4bc35945 100644 --- a/Makefile +++ b/Makefile @@ -829,6 +829,7 @@ LIB_OBJS += column.o LIB_OBJS += combine-diff.o LIB_OBJS += commit.o LIB_OBJS += commit-graph.o +LIB_OBJS += commit-reach.o LIB_OBJS += compat/obstack.o LIB_OBJS += compat/terminal.o LIB_OBJS += config.o diff --git a/commit-reach.c b/commit-reach.c new file mode 100644 index 00000000000000..8ab6044414a127 --- /dev/null +++ b/commit-reach.c @@ -0,0 +1,360 @@ +#include "cache.h" +#include "prio-queue.h" +#include "commit.h" +#include "commit-reach.h" + +/* Remember to update object flag allocation in object.h */ +#define PARENT1 (1u<<16) +#define PARENT2 (1u<<17) +#define STALE (1u<<18) +#define RESULT (1u<<19) + +static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); + +static int queue_has_nonstale(struct prio_queue *queue) +{ + int i; + for (i = 0; i < queue->nr; i++) { + struct commit *commit = queue->array[i].data; + if (!(commit->object.flags & STALE)) + return 1; + } + return 0; +} + +/* all input commits in one and twos[] must have been parsed! */ +static struct commit_list *paint_down_to_common(struct commit *one, int n, + struct commit **twos, + int min_generation) +{ + struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; + struct commit_list *result = NULL; + int i; + uint32_t last_gen = GENERATION_NUMBER_INFINITY; + + one->object.flags |= PARENT1; + if (!n) { + commit_list_append(one, &result); + return result; + } + prio_queue_put(&queue, one); + + for (i = 0; i < n; i++) { + twos[i]->object.flags |= PARENT2; + prio_queue_put(&queue, twos[i]); + } + + while (queue_has_nonstale(&queue)) { + struct commit *commit = prio_queue_get(&queue); + struct commit_list *parents; + int flags; + + if (commit->generation > last_gen) + BUG("bad generation skip %8x > %8x at %s", + commit->generation, last_gen, + oid_to_hex(&commit->object.oid)); + last_gen = commit->generation; + + if (commit->generation < min_generation) + break; + + flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); + if (flags == (PARENT1 | PARENT2)) { + if (!(commit->object.flags & RESULT)) { + commit->object.flags |= RESULT; + commit_list_insert_by_date(commit, &result); + } + /* Mark parents of a found merge stale */ + flags |= STALE; + } + parents = commit->parents; + while (parents) { + struct commit *p = parents->item; + parents = parents->next; + if ((p->object.flags & flags) == flags) + continue; + if (parse_commit(p)) + return NULL; + p->object.flags |= flags; + prio_queue_put(&queue, p); + } + } + + clear_prio_queue(&queue); + return result; +} + +static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos) +{ + struct commit_list *list = NULL; + struct commit_list *result = NULL; + int i; + + for (i = 0; i < n; i++) { + if (one == twos[i]) + /* + * We do not mark this even with RESULT so we do not + * have to clean it up. + */ + return commit_list_insert(one, &result); + } + + if (parse_commit(one)) + return NULL; + for (i = 0; i < n; i++) { + if (parse_commit(twos[i])) + return NULL; + } + + list = paint_down_to_common(one, n, twos, 0); + + while (list) { + struct commit *commit = pop_commit(&list); + if (!(commit->object.flags & STALE)) + commit_list_insert_by_date(commit, &result); + } + return result; +} + +struct commit_list *get_octopus_merge_bases(struct commit_list *in) +{ + struct commit_list *i, *j, *k, *ret = NULL; + + if (!in) + return ret; + + commit_list_insert(in->item, &ret); + + for (i = in->next; i; i = i->next) { + struct commit_list *new_commits = NULL, *end = NULL; + + for (j = ret; j; j = j->next) { + struct commit_list *bases; + bases = get_merge_bases(i->item, j->item); + if (!new_commits) + new_commits = bases; + else + end->next = bases; + for (k = bases; k; k = k->next) + end = k; + } + ret = new_commits; + } + return ret; +} + +static int remove_redundant(struct commit **array, int cnt) +{ + /* + * Some commit in the array may be an ancestor of + * another commit. Move such commit to the end of + * the array, and return the number of commits that + * are independent from each other. + */ + struct commit **work; + unsigned char *redundant; + int *filled_index; + int i, j, filled; + + work = xcalloc(cnt, sizeof(*work)); + redundant = xcalloc(cnt, 1); + ALLOC_ARRAY(filled_index, cnt - 1); + + for (i = 0; i < cnt; i++) + parse_commit(array[i]); + for (i = 0; i < cnt; i++) { + struct commit_list *common; + uint32_t min_generation = array[i]->generation; + + if (redundant[i]) + continue; + for (j = filled = 0; j < cnt; j++) { + if (i == j || redundant[j]) + continue; + filled_index[filled] = j; + work[filled++] = array[j]; + + if (array[j]->generation < min_generation) + min_generation = array[j]->generation; + } + common = paint_down_to_common(array[i], filled, work, + min_generation); + if (array[i]->object.flags & PARENT2) + redundant[i] = 1; + for (j = 0; j < filled; j++) + if (work[j]->object.flags & PARENT1) + redundant[filled_index[j]] = 1; + clear_commit_marks(array[i], all_flags); + clear_commit_marks_many(filled, work, all_flags); + free_commit_list(common); + } + + /* Now collect the result */ + COPY_ARRAY(work, array, cnt); + for (i = filled = 0; i < cnt; i++) + if (!redundant[i]) + array[filled++] = work[i]; + for (j = filled, i = 0; i < cnt; i++) + if (redundant[i]) + array[j++] = work[i]; + free(work); + free(redundant); + free(filled_index); + return filled; +} + +static struct commit_list *get_merge_bases_many_0(struct commit *one, + int n, + struct commit **twos, + int cleanup) +{ + struct commit_list *list; + struct commit **rslt; + struct commit_list *result; + int cnt, i; + + result = merge_bases_many(one, n, twos); + for (i = 0; i < n; i++) { + if (one == twos[i]) + return result; + } + if (!result || !result->next) { + if (cleanup) { + clear_commit_marks(one, all_flags); + clear_commit_marks_many(n, twos, all_flags); + } + return result; + } + + /* There are more than one */ + cnt = commit_list_count(result); + rslt = xcalloc(cnt, sizeof(*rslt)); + for (list = result, i = 0; list; list = list->next) + rslt[i++] = list->item; + free_commit_list(result); + + clear_commit_marks(one, all_flags); + clear_commit_marks_many(n, twos, all_flags); + + cnt = remove_redundant(rslt, cnt); + result = NULL; + for (i = 0; i < cnt; i++) + commit_list_insert_by_date(rslt[i], &result); + free(rslt); + return result; +} + +struct commit_list *get_merge_bases_many(struct commit *one, + int n, + struct commit **twos) +{ + return get_merge_bases_many_0(one, n, twos, 1); +} + +struct commit_list *get_merge_bases_many_dirty(struct commit *one, + int n, + struct commit **twos) +{ + return get_merge_bases_many_0(one, n, twos, 0); +} + +struct commit_list *get_merge_bases(struct commit *one, struct commit *two) +{ + return get_merge_bases_many_0(one, 1, &two, 1); +} + +/* + * Is "commit" a descendant of one of the elements on the "with_commit" list? + */ +int is_descendant_of(struct commit *commit, struct commit_list *with_commit) +{ + if (!with_commit) + return 1; + while (with_commit) { + struct commit *other; + + other = with_commit->item; + with_commit = with_commit->next; + if (in_merge_bases(other, commit)) + return 1; + } + return 0; +} + +/* + * Is "commit" an ancestor of one of the "references"? + */ +int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference) +{ + struct commit_list *bases; + int ret = 0, i; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; + + if (parse_commit(commit)) + return ret; + for (i = 0; i < nr_reference; i++) { + if (parse_commit(reference[i])) + return ret; + if (reference[i]->generation < min_generation) + min_generation = reference[i]->generation; + } + + if (commit->generation > min_generation) + return ret; + + bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); + if (commit->object.flags & PARENT2) + ret = 1; + clear_commit_marks(commit, all_flags); + clear_commit_marks_many(nr_reference, reference, all_flags); + free_commit_list(bases); + return ret; +} + +/* + * Is "commit" an ancestor of (i.e. reachable from) the "reference"? + */ +int in_merge_bases(struct commit *commit, struct commit *reference) +{ + return in_merge_bases_many(commit, 1, &reference); +} + +struct commit_list *reduce_heads(struct commit_list *heads) +{ + struct commit_list *p; + struct commit_list *result = NULL, **tail = &result; + struct commit **array; + int num_head, i; + + if (!heads) + return NULL; + + /* Uniquify */ + for (p = heads; p; p = p->next) + p->item->object.flags &= ~STALE; + for (p = heads, num_head = 0; p; p = p->next) { + if (p->item->object.flags & STALE) + continue; + p->item->object.flags |= STALE; + num_head++; + } + array = xcalloc(num_head, sizeof(*array)); + for (p = heads, i = 0; p; p = p->next) { + if (p->item->object.flags & STALE) { + array[i++] = p->item; + p->item->object.flags &= ~STALE; + } + } + num_head = remove_redundant(array, num_head); + for (i = 0; i < num_head; i++) + tail = &commit_list_insert(array[i], tail)->next; + free(array); + return result; +} + +void reduce_heads_replace(struct commit_list **heads) +{ + struct commit_list *result = reduce_heads(*heads); + free_commit_list(*heads); + *heads = result; +} diff --git a/commit-reach.h b/commit-reach.h new file mode 100644 index 00000000000000..1ea2696e40ba3f --- /dev/null +++ b/commit-reach.h @@ -0,0 +1,42 @@ +#ifndef __COMMIT_REACH_H__ +#define __COMMIT_REACH_H__ + +struct commit; +struct commit_list; + +struct commit_list *get_merge_bases_many(struct commit *one, + int n, + struct commit **twos); +struct commit_list *get_merge_bases_many_dirty(struct commit *one, + int n, + struct commit **twos); +struct commit_list *get_merge_bases(struct commit *one, struct commit *two); +struct commit_list *get_octopus_merge_bases(struct commit_list *in); + +/* To be used only when object flags after this call no longer matter */ +struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos); + +int is_descendant_of(struct commit *commit, struct commit_list *with_commit); +int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference); +int in_merge_bases(struct commit *commit, struct commit *reference); + + +/* + * Takes a list of commits and returns a new list where those + * have been removed that can be reached from other commits in + * the list. It is useful for, e.g., reducing the commits + * randomly thrown at the git-merge command and removing + * redundant commits that the user shouldn't have given to it. + * + * This function destroys the STALE bit of the commit objects' + * flags. + */ +struct commit_list *reduce_heads(struct commit_list *heads); + +/* + * Like `reduce_heads()`, except it replaces the list. Use this + * instead of `foo = reduce_heads(foo);` to avoid memory leaks. + */ +void reduce_heads_replace(struct commit_list **heads); + +#endif diff --git a/commit.c b/commit.c index 39b80bd21d59ca..32d1234bd745fa 100644 --- a/commit.c +++ b/commit.c @@ -843,364 +843,6 @@ void sort_in_topological_order(struct commit_list **list, enum rev_sort_order so clear_author_date_slab(&author_date); } -/* merge-base stuff */ - -/* Remember to update object flag allocation in object.h */ -#define PARENT1 (1u<<16) -#define PARENT2 (1u<<17) -#define STALE (1u<<18) -#define RESULT (1u<<19) - -static const unsigned all_flags = (PARENT1 | PARENT2 | STALE | RESULT); - -static int queue_has_nonstale(struct prio_queue *queue) -{ - int i; - for (i = 0; i < queue->nr; i++) { - struct commit *commit = queue->array[i].data; - if (!(commit->object.flags & STALE)) - return 1; - } - return 0; -} - -/* all input commits in one and twos[] must have been parsed! */ -static struct commit_list *paint_down_to_common(struct commit *one, int n, - struct commit **twos, - int min_generation) -{ - struct prio_queue queue = { compare_commits_by_gen_then_commit_date }; - struct commit_list *result = NULL; - int i; - uint32_t last_gen = GENERATION_NUMBER_INFINITY; - - one->object.flags |= PARENT1; - if (!n) { - commit_list_append(one, &result); - return result; - } - prio_queue_put(&queue, one); - - for (i = 0; i < n; i++) { - twos[i]->object.flags |= PARENT2; - prio_queue_put(&queue, twos[i]); - } - - while (queue_has_nonstale(&queue)) { - struct commit *commit = prio_queue_get(&queue); - struct commit_list *parents; - int flags; - - if (commit->generation > last_gen) - BUG("bad generation skip %8x > %8x at %s", - commit->generation, last_gen, - oid_to_hex(&commit->object.oid)); - last_gen = commit->generation; - - if (commit->generation < min_generation) - break; - - flags = commit->object.flags & (PARENT1 | PARENT2 | STALE); - if (flags == (PARENT1 | PARENT2)) { - if (!(commit->object.flags & RESULT)) { - commit->object.flags |= RESULT; - commit_list_insert_by_date(commit, &result); - } - /* Mark parents of a found merge stale */ - flags |= STALE; - } - parents = commit->parents; - while (parents) { - struct commit *p = parents->item; - parents = parents->next; - if ((p->object.flags & flags) == flags) - continue; - if (parse_commit(p)) - return NULL; - p->object.flags |= flags; - prio_queue_put(&queue, p); - } - } - - clear_prio_queue(&queue); - return result; -} - -static struct commit_list *merge_bases_many(struct commit *one, int n, struct commit **twos) -{ - struct commit_list *list = NULL; - struct commit_list *result = NULL; - int i; - - for (i = 0; i < n; i++) { - if (one == twos[i]) - /* - * We do not mark this even with RESULT so we do not - * have to clean it up. - */ - return commit_list_insert(one, &result); - } - - if (parse_commit(one)) - return NULL; - for (i = 0; i < n; i++) { - if (parse_commit(twos[i])) - return NULL; - } - - list = paint_down_to_common(one, n, twos, 0); - - while (list) { - struct commit *commit = pop_commit(&list); - if (!(commit->object.flags & STALE)) - commit_list_insert_by_date(commit, &result); - } - return result; -} - -struct commit_list *get_octopus_merge_bases(struct commit_list *in) -{ - struct commit_list *i, *j, *k, *ret = NULL; - - if (!in) - return ret; - - commit_list_insert(in->item, &ret); - - for (i = in->next; i; i = i->next) { - struct commit_list *new_commits = NULL, *end = NULL; - - for (j = ret; j; j = j->next) { - struct commit_list *bases; - bases = get_merge_bases(i->item, j->item); - if (!new_commits) - new_commits = bases; - else - end->next = bases; - for (k = bases; k; k = k->next) - end = k; - } - ret = new_commits; - } - return ret; -} - -static int remove_redundant(struct commit **array, int cnt) -{ - /* - * Some commit in the array may be an ancestor of - * another commit. Move such commit to the end of - * the array, and return the number of commits that - * are independent from each other. - */ - struct commit **work; - unsigned char *redundant; - int *filled_index; - int i, j, filled; - - work = xcalloc(cnt, sizeof(*work)); - redundant = xcalloc(cnt, 1); - ALLOC_ARRAY(filled_index, cnt - 1); - - for (i = 0; i < cnt; i++) - parse_commit(array[i]); - for (i = 0; i < cnt; i++) { - struct commit_list *common; - uint32_t min_generation = array[i]->generation; - - if (redundant[i]) - continue; - for (j = filled = 0; j < cnt; j++) { - if (i == j || redundant[j]) - continue; - filled_index[filled] = j; - work[filled++] = array[j]; - - if (array[j]->generation < min_generation) - min_generation = array[j]->generation; - } - common = paint_down_to_common(array[i], filled, work, - min_generation); - if (array[i]->object.flags & PARENT2) - redundant[i] = 1; - for (j = 0; j < filled; j++) - if (work[j]->object.flags & PARENT1) - redundant[filled_index[j]] = 1; - clear_commit_marks(array[i], all_flags); - clear_commit_marks_many(filled, work, all_flags); - free_commit_list(common); - } - - /* Now collect the result */ - COPY_ARRAY(work, array, cnt); - for (i = filled = 0; i < cnt; i++) - if (!redundant[i]) - array[filled++] = work[i]; - for (j = filled, i = 0; i < cnt; i++) - if (redundant[i]) - array[j++] = work[i]; - free(work); - free(redundant); - free(filled_index); - return filled; -} - -static struct commit_list *get_merge_bases_many_0(struct commit *one, - int n, - struct commit **twos, - int cleanup) -{ - struct commit_list *list; - struct commit **rslt; - struct commit_list *result; - int cnt, i; - - result = merge_bases_many(one, n, twos); - for (i = 0; i < n; i++) { - if (one == twos[i]) - return result; - } - if (!result || !result->next) { - if (cleanup) { - clear_commit_marks(one, all_flags); - clear_commit_marks_many(n, twos, all_flags); - } - return result; - } - - /* There are more than one */ - cnt = commit_list_count(result); - rslt = xcalloc(cnt, sizeof(*rslt)); - for (list = result, i = 0; list; list = list->next) - rslt[i++] = list->item; - free_commit_list(result); - - clear_commit_marks(one, all_flags); - clear_commit_marks_many(n, twos, all_flags); - - cnt = remove_redundant(rslt, cnt); - result = NULL; - for (i = 0; i < cnt; i++) - commit_list_insert_by_date(rslt[i], &result); - free(rslt); - return result; -} - -struct commit_list *get_merge_bases_many(struct commit *one, - int n, - struct commit **twos) -{ - return get_merge_bases_many_0(one, n, twos, 1); -} - -struct commit_list *get_merge_bases_many_dirty(struct commit *one, - int n, - struct commit **twos) -{ - return get_merge_bases_many_0(one, n, twos, 0); -} - -struct commit_list *get_merge_bases(struct commit *one, struct commit *two) -{ - return get_merge_bases_many_0(one, 1, &two, 1); -} - -/* - * Is "commit" a descendant of one of the elements on the "with_commit" list? - */ -int is_descendant_of(struct commit *commit, struct commit_list *with_commit) -{ - if (!with_commit) - return 1; - while (with_commit) { - struct commit *other; - - other = with_commit->item; - with_commit = with_commit->next; - if (in_merge_bases(other, commit)) - return 1; - } - return 0; -} - -/* - * Is "commit" an ancestor of one of the "references"? - */ -int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference) -{ - struct commit_list *bases; - int ret = 0, i; - uint32_t min_generation = GENERATION_NUMBER_INFINITY; - - if (parse_commit(commit)) - return ret; - for (i = 0; i < nr_reference; i++) { - if (parse_commit(reference[i])) - return ret; - if (reference[i]->generation < min_generation) - min_generation = reference[i]->generation; - } - - if (commit->generation > min_generation) - return ret; - - bases = paint_down_to_common(commit, nr_reference, reference, commit->generation); - if (commit->object.flags & PARENT2) - ret = 1; - clear_commit_marks(commit, all_flags); - clear_commit_marks_many(nr_reference, reference, all_flags); - free_commit_list(bases); - return ret; -} - -/* - * Is "commit" an ancestor of (i.e. reachable from) the "reference"? - */ -int in_merge_bases(struct commit *commit, struct commit *reference) -{ - return in_merge_bases_many(commit, 1, &reference); -} - -struct commit_list *reduce_heads(struct commit_list *heads) -{ - struct commit_list *p; - struct commit_list *result = NULL, **tail = &result; - struct commit **array; - int num_head, i; - - if (!heads) - return NULL; - - /* Uniquify */ - for (p = heads; p; p = p->next) - p->item->object.flags &= ~STALE; - for (p = heads, num_head = 0; p; p = p->next) { - if (p->item->object.flags & STALE) - continue; - p->item->object.flags |= STALE; - num_head++; - } - array = xcalloc(num_head, sizeof(*array)); - for (p = heads, i = 0; p; p = p->next) { - if (p->item->object.flags & STALE) { - array[i++] = p->item; - p->item->object.flags &= ~STALE; - } - } - num_head = remove_redundant(array, num_head); - for (i = 0; i < num_head; i++) - tail = &commit_list_insert(array[i], tail)->next; - free(array); - return result; -} - -void reduce_heads_replace(struct commit_list **heads) -{ - struct commit_list *result = reduce_heads(*heads); - free_commit_list(*heads); - *heads = result; -} - static const char gpg_sig_header[] = "gpgsig"; static const int gpg_sig_header_len = sizeof(gpg_sig_header) - 1; diff --git a/object.h b/object.h index fa5ca975678630..18c2b073e34fa8 100644 --- a/object.h +++ b/object.h @@ -65,7 +65,7 @@ struct object_array { * bisect.c: 16 * bundle.c: 16 * http-push.c: 16-----19 - * commit.c: 16-----19 + * commit-reach.c: 16-----19 * sha1-name.c: 20 * list-objects-filter.c: 21 * builtin/fsck.c: 0--3 From ca88ec3ca5413236801f213cbfe8295878bcc5c6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 18 Jul 2018 11:52:37 -0400 Subject: [PATCH 02/18] commit.h: remove method declarations These methods are now declared in commit-reach.h. Remove them from commit.h and add new include statements in all files that require these declarations. Signed-off-by: Derrick Stolee --- bisect.c | 1 + builtin/branch.c | 1 + builtin/commit.c | 1 + builtin/fetch.c | 1 + builtin/fmt-merge-msg.c | 1 + builtin/log.c | 1 + builtin/merge-base.c | 1 + builtin/merge.c | 1 + builtin/pull.c | 1 + builtin/receive-pack.c | 1 + builtin/rev-parse.c | 1 + commit.h | 29 ----------------------------- fast-import.c | 1 + http-push.c | 2 +- merge-recursive.c | 1 + notes-merge.c | 1 + pack-bitmap-write.c | 1 + ref-filter.c | 1 + remote.c | 1 + revision.c | 1 + sequencer.c | 1 + sha1-name.c | 1 + shallow.c | 1 + submodule.c | 1 + 24 files changed, 23 insertions(+), 30 deletions(-) diff --git a/bisect.c b/bisect.c index e1275ba79e8e73..d023543c911d04 100644 --- a/bisect.c +++ b/bisect.c @@ -13,6 +13,7 @@ #include "sha1-array.h" #include "argv-array.h" #include "commit-slab.h" +#include "commit-reach.h" static struct oid_array good_revs; static struct oid_array skipped_revs; diff --git a/builtin/branch.c b/builtin/branch.c index a50632fb23f52e..9a787447f47bac 100644 --- a/builtin/branch.c +++ b/builtin/branch.c @@ -23,6 +23,7 @@ #include "ref-filter.h" #include "worktree.h" #include "help.h" +#include "commit-reach.h" static const char * const builtin_branch_usage[] = { N_("git branch [] [-r | -a] [--merged | --no-merged]"), diff --git a/builtin/commit.c b/builtin/commit.c index 158e3f843afbf8..b5c608458e8a20 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -33,6 +33,7 @@ #include "sequencer.h" #include "mailmap.h" #include "help.h" +#include "commit-reach.h" static const char * const builtin_commit_usage[] = { N_("git commit [] [--] ..."), diff --git a/builtin/fetch.c b/builtin/fetch.c index f5d960baecf6c9..7de234774be038 100644 --- a/builtin/fetch.c +++ b/builtin/fetch.c @@ -22,6 +22,7 @@ #include "utf8.h" #include "packfile.h" #include "list-objects-filter-options.h" +#include "commit-reach.h" static const char * const builtin_fetch_usage[] = { N_("git fetch [] [ [...]]"), diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index ff165c0fcd209f..7277d557b2d197 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -12,6 +12,7 @@ #include "fmt-merge-msg.h" #include "gpg-interface.h" #include "repository.h" +#include "commit-reach.h" static const char * const fmt_merge_msg_usage[] = { N_("git fmt-merge-msg [-m ] [--log[=] | --no-log] [--file ]"), diff --git a/builtin/log.c b/builtin/log.c index 55a6286d7f4a95..333d97c69288ac 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -31,6 +31,7 @@ #include "progress.h" #include "commit-slab.h" #include "repository.h" +#include "commit-reach.h" #define MAIL_DEFAULT_WRAP 72 diff --git a/builtin/merge-base.c b/builtin/merge-base.c index 08d91b1f0c0172..1c920990701381 100644 --- a/builtin/merge-base.c +++ b/builtin/merge-base.c @@ -7,6 +7,7 @@ #include "revision.h" #include "parse-options.h" #include "repository.h" +#include "commit-reach.h" static int show_merge_base(struct commit **rev, int rev_nr, int show_all) { diff --git a/builtin/merge.c b/builtin/merge.c index d1b547d97376d7..4c601c40a2efbb 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -36,6 +36,7 @@ #include "packfile.h" #include "tag.h" #include "alias.h" +#include "commit-reach.h" #define DEFAULT_TWOHEAD (1<<0) #define DEFAULT_OCTOPUS (1<<1) diff --git a/builtin/pull.c b/builtin/pull.c index 4e789353922340..15ad01096817c9 100644 --- a/builtin/pull.c +++ b/builtin/pull.c @@ -22,6 +22,7 @@ #include "tempfile.h" #include "lockfile.h" #include "wt-status.h" +#include "commit-reach.h" enum rebase_type { REBASE_INVALID = -1, diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 400d31c18ccd97..d8467f97343438 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -27,6 +27,7 @@ #include "packfile.h" #include "object-store.h" #include "protocol.h" +#include "commit-reach.h" static const char * const receive_pack_usage[] = { N_("git receive-pack "), diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index 0f09bbbf65a4bc..455f62246d69ad 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -14,6 +14,7 @@ #include "revision.h" #include "split-index.h" #include "submodule.h" +#include "commit-reach.h" #define DO_REVS 1 #define DO_NOREV 2 diff --git a/commit.h b/commit.h index da0db36eba2bf1..e2c99d9b04f6b2 100644 --- a/commit.h +++ b/commit.h @@ -204,13 +204,6 @@ struct commit_graft *read_graft_line(struct strbuf *line); int register_commit_graft(struct repository *r, struct commit_graft *, int); struct commit_graft *lookup_commit_graft(struct repository *r, const struct object_id *oid); -extern struct commit_list *get_merge_bases(struct commit *rev1, struct commit *rev2); -extern struct commit_list *get_merge_bases_many(struct commit *one, int n, struct commit **twos); -extern struct commit_list *get_octopus_merge_bases(struct commit_list *in); - -/* To be used only when object flags after this call no longer matter */ -extern struct commit_list *get_merge_bases_many_dirty(struct commit *one, int n, struct commit **twos); - /* largest positive number a signed 32-bit integer can contain */ #define INFINITE_DEPTH 0x7fffffff @@ -258,32 +251,10 @@ extern int delayed_reachability_test(struct shallow_info *si, int c); extern void prune_shallow(int show_only); extern struct trace_key trace_shallow; -int is_descendant_of(struct commit *, struct commit_list *); -int in_merge_bases(struct commit *, struct commit *); -int in_merge_bases_many(struct commit *, int, struct commit **); - extern int interactive_add(int argc, const char **argv, const char *prefix, int patch); extern int run_add_interactive(const char *revision, const char *patch_mode, const struct pathspec *pathspec); -/* - * Takes a list of commits and returns a new list where those - * have been removed that can be reached from other commits in - * the list. It is useful for, e.g., reducing the commits - * randomly thrown at the git-merge command and removing - * redundant commits that the user shouldn't have given to it. - * - * This function destroys the STALE bit of the commit objects' - * flags. - */ -extern struct commit_list *reduce_heads(struct commit_list *heads); - -/* - * Like `reduce_heads()`, except it replaces the list. Use this - * instead of `foo = reduce_heads(foo);` to avoid memory leaks. - */ -extern void reduce_heads_replace(struct commit_list **heads); - struct commit_extra_header { struct commit_extra_header *next; char *key; diff --git a/fast-import.c b/fast-import.c index 3ea5781029de66..4a93df3839dcb4 100644 --- a/fast-import.c +++ b/fast-import.c @@ -171,6 +171,7 @@ Format of STDIN stream: #include "packfile.h" #include "object-store.h" #include "mem-pool.h" +#include "commit-reach.h" #define PACK_ID_BITS 16 #define MAX_PACK_ID ((1< diff --git a/merge-recursive.c b/merge-recursive.c index 1dd6ec384de132..8155dee9a98cbe 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -27,6 +27,7 @@ #include "dir.h" #include "submodule.h" #include "revision.h" +#include "commit-reach.h" struct path_hashmap_entry { struct hashmap_entry e; diff --git a/notes-merge.c b/notes-merge.c index 76ab19e702423a..12dfdf6c170154 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -12,6 +12,7 @@ #include "notes-merge.h" #include "strbuf.h" #include "notes-utils.h" +#include "commit-reach.h" struct notes_merge_pair { struct object_id obj, base, local, remote; diff --git a/pack-bitmap-write.c b/pack-bitmap-write.c index 953c5dd84dff2d..55bcab907c0ecf 100644 --- a/pack-bitmap-write.c +++ b/pack-bitmap-write.c @@ -11,6 +11,7 @@ #include "pack-bitmap.h" #include "sha1-lookup.h" #include "pack-objects.h" +#include "commit-reach.h" struct bitmapped_commit { struct commit *commit; diff --git a/ref-filter.c b/ref-filter.c index 9b2da883926dce..fca3ad040af6ab 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -19,6 +19,7 @@ #include "wt-status.h" #include "commit-slab.h" #include "commit-graph.h" +#include "commit-reach.h" static struct ref_msg { const char *gone; diff --git a/remote.c b/remote.c index 26b1fbd9a881d2..8e99b9888ab8d9 100644 --- a/remote.c +++ b/remote.c @@ -12,6 +12,7 @@ #include "string-list.h" #include "mergesort.h" #include "argv-array.h" +#include "commit-reach.h" enum map_direction { FROM_SRC, FROM_DST }; diff --git a/revision.c b/revision.c index 4dbe406bed2aa1..3205a3947ad122 100644 --- a/revision.c +++ b/revision.c @@ -24,6 +24,7 @@ #include "packfile.h" #include "worktree.h" #include "argv-array.h" +#include "commit-reach.h" volatile show_early_output_fn_t show_early_output; diff --git a/sequencer.c b/sequencer.c index d1d07bed5bb051..97bdfd48b42c14 100644 --- a/sequencer.c +++ b/sequencer.c @@ -30,6 +30,7 @@ #include "oidset.h" #include "commit-slab.h" #include "alias.h" +#include "commit-reach.h" #define GIT_REFLOG_ACTION "GIT_REFLOG_ACTION" diff --git a/sha1-name.c b/sha1-name.c index 009faab4aefc34..7215b30b8861aa 100644 --- a/sha1-name.c +++ b/sha1-name.c @@ -12,6 +12,7 @@ #include "packfile.h" #include "object-store.h" #include "repository.h" +#include "commit-reach.h" static int get_oid_oneline(const char *, struct object_id *, struct commit_list *); diff --git a/shallow.c b/shallow.c index dbe8a2a2906abf..99fd2d1ba0f70a 100644 --- a/shallow.c +++ b/shallow.c @@ -16,6 +16,7 @@ #include "list-objects.h" #include "commit-slab.h" #include "repository.h" +#include "commit-reach.h" void set_alternate_shallow_file(struct repository *r, const char *path, int override) { diff --git a/submodule.c b/submodule.c index 6688dd5d459495..6650ed7aa02b2b 100644 --- a/submodule.c +++ b/submodule.c @@ -22,6 +22,7 @@ #include "worktree.h" #include "parse-options.h" #include "object-store.h" +#include "commit-reach.h" static int config_update_recurse_submodules = RECURSE_SUBMODULES_OFF; static struct string_list changed_submodule_names = STRING_LIST_INIT_DUP; From 12349f5d4c941a7529a70cf7e3baed2a6d73dd11 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 25 Jun 2018 13:35:03 -0400 Subject: [PATCH 03/18] commit-reach: move ref_newer from remote.c There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The ref_newer() method is used by 'git push -f' to check if a force-push is necessary. By making the method public, we make it possible to test the method directly without setting up an envieronment where a 'git push' call makes sense. Signed-off-by: Derrick Stolee --- builtin/remote.c | 1 + commit-reach.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++- commit-reach.h | 2 ++ remote.c | 49 ------------------------------------------ remote.h | 1 - 5 files changed, 57 insertions(+), 51 deletions(-) diff --git a/builtin/remote.c b/builtin/remote.c index c74ee886900176..79b03264463795 100644 --- a/builtin/remote.c +++ b/builtin/remote.c @@ -10,6 +10,7 @@ #include "refspec.h" #include "object-store.h" #include "argv-array.h" +#include "commit-reach.h" static const char * const builtin_remote_usage[] = { N_("git remote [-v | --verbose]"), diff --git a/commit-reach.c b/commit-reach.c index 8ab6044414a127..a6bc4781a63d11 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1,6 +1,10 @@ #include "cache.h" -#include "prio-queue.h" #include "commit.h" +#include "decorate.h" +#include "prio-queue.h" +#include "tree.h" +#include "revision.h" +#include "tag.h" #include "commit-reach.h" /* Remember to update object flag allocation in object.h */ @@ -358,3 +362,52 @@ void reduce_heads_replace(struct commit_list **heads) free_commit_list(*heads); *heads = result; } + +static void unmark_and_free(struct commit_list *list, unsigned int mark) +{ + while (list) { + struct commit *commit = pop_commit(&list); + commit->object.flags &= ~mark; + } +} + +int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) +{ + struct object *o; + struct commit *old_commit, *new_commit; + struct commit_list *list, *used; + int found = 0; + + /* + * Both new_commit and old_commit must be commit-ish and new_commit is descendant of + * old_commit. Otherwise we require --force. + */ + o = deref_tag(the_repository, parse_object(the_repository, old_oid), + NULL, 0); + if (!o || o->type != OBJ_COMMIT) + return 0; + old_commit = (struct commit *) o; + + o = deref_tag(the_repository, parse_object(the_repository, new_oid), + NULL, 0); + if (!o || o->type != OBJ_COMMIT) + return 0; + new_commit = (struct commit *) o; + + if (parse_commit(new_commit) < 0) + return 0; + + used = list = NULL; + commit_list_insert(new_commit, &list); + while (list) { + new_commit = pop_most_recent_commit(&list, TMP_MARK); + commit_list_insert(new_commit, &used); + if (new_commit == old_commit) { + found = 1; + break; + } + } + unmark_and_free(list, TMP_MARK); + unmark_and_free(used, TMP_MARK); + return found; +} diff --git a/commit-reach.h b/commit-reach.h index 1ea2696e40ba3f..f1cf9bfcd8e6cb 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -39,4 +39,6 @@ struct commit_list *reduce_heads(struct commit_list *heads); */ void reduce_heads_replace(struct commit_list **heads); +int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); + #endif diff --git a/remote.c b/remote.c index 8e99b9888ab8d9..f0c23bae487954 100644 --- a/remote.c +++ b/remote.c @@ -1784,55 +1784,6 @@ int resolve_remote_symref(struct ref *ref, struct ref *list) return 1; } -static void unmark_and_free(struct commit_list *list, unsigned int mark) -{ - while (list) { - struct commit *commit = pop_commit(&list); - commit->object.flags &= ~mark; - } -} - -int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) -{ - struct object *o; - struct commit *old_commit, *new_commit; - struct commit_list *list, *used; - int found = 0; - - /* - * Both new_commit and old_commit must be commit-ish and new_commit is descendant of - * old_commit. Otherwise we require --force. - */ - o = deref_tag(the_repository, parse_object(the_repository, old_oid), - NULL, 0); - if (!o || o->type != OBJ_COMMIT) - return 0; - old_commit = (struct commit *) o; - - o = deref_tag(the_repository, parse_object(the_repository, new_oid), - NULL, 0); - if (!o || o->type != OBJ_COMMIT) - return 0; - new_commit = (struct commit *) o; - - if (parse_commit(new_commit) < 0) - return 0; - - used = list = NULL; - commit_list_insert(new_commit, &list); - while (list) { - new_commit = pop_most_recent_commit(&list, TMP_MARK); - commit_list_insert(new_commit, &used); - if (new_commit == old_commit) { - found = 1; - break; - } - } - unmark_and_free(list, TMP_MARK); - unmark_and_free(used, TMP_MARK); - return found; -} - /* * Lookup the upstream branch for the given branch and if present, optionally * compute the commit ahead/behind values for the pair. diff --git a/remote.h b/remote.h index 45ecc6cefafdea..56fb9cbb27258b 100644 --- a/remote.h +++ b/remote.h @@ -149,7 +149,6 @@ extern struct ref **get_remote_refs(int fd_out, struct packet_reader *reader, const struct string_list *server_options); int resolve_remote_symref(struct ref *ref, struct ref *list); -int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); /* * Remove and free all but the first of any entries in the input list From f548989484edcff726d3822a1707f4c0979a2e4f Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Mon, 25 Jun 2018 14:01:28 -0400 Subject: [PATCH 04/18] commit-reach: move commit_contains from ref-filter There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. All methods are direct moves, except we also make the commit_contains() method public so its consumers in ref-filter.c can still call it. We can also test this method in a test-tool in a later commit. Signed-off-by: Derrick Stolee --- commit-reach.c | 121 +++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 20 ++++++- ref-filter.c | 145 +++---------------------------------------------- 3 files changed, 147 insertions(+), 139 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index a6bc4781a63d11..01d796f0116163 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -1,8 +1,10 @@ #include "cache.h" #include "commit.h" +#include "commit-graph.h" #include "decorate.h" #include "prio-queue.h" #include "tree.h" +#include "ref-filter.c" #include "revision.h" #include "tag.h" #include "commit-reach.h" @@ -411,3 +413,122 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) unmark_and_free(used, TMP_MARK); return found; } + +/* + * Mimicking the real stack, this stack lives on the heap, avoiding stack + * overflows. + * + * At each recursion step, the stack items points to the commits whose + * ancestors are to be inspected. + */ +struct contains_stack { + int nr, alloc; + struct contains_stack_entry { + struct commit *commit; + struct commit_list *parents; + } *contains_stack; +}; + +static int in_commit_list(const struct commit_list *want, struct commit *c) +{ + for (; want; want = want->next) + if (!oidcmp(&want->item->object.oid, &c->object.oid)) + return 1; + return 0; +} + +/* + * Test whether the candidate is contained in the list. + * Do not recurse to find out, though, but return -1 if inconclusive. + */ +static enum contains_result contains_test(struct commit *candidate, + const struct commit_list *want, + struct contains_cache *cache, + uint32_t cutoff) +{ + enum contains_result *cached = contains_cache_at(cache, candidate); + + /* If we already have the answer cached, return that. */ + if (*cached) + return *cached; + + /* or are we it? */ + if (in_commit_list(want, candidate)) { + *cached = CONTAINS_YES; + return CONTAINS_YES; + } + + /* Otherwise, we don't know; prepare to recurse */ + parse_commit_or_die(candidate); + + if (candidate->generation < cutoff) + return CONTAINS_NO; + + return CONTAINS_UNKNOWN; +} + +static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack) +{ + ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc); + contains_stack->contains_stack[contains_stack->nr].commit = candidate; + contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents; +} + +static enum contains_result contains_tag_algo(struct commit *candidate, + const struct commit_list *want, + struct contains_cache *cache) +{ + struct contains_stack contains_stack = { 0, 0, NULL }; + enum contains_result result; + uint32_t cutoff = GENERATION_NUMBER_INFINITY; + const struct commit_list *p; + + for (p = want; p; p = p->next) { + struct commit *c = p->item; + load_commit_graph_info(the_repository, c); + if (c->generation < cutoff) + cutoff = c->generation; + } + + result = contains_test(candidate, want, cache, cutoff); + if (result != CONTAINS_UNKNOWN) + return result; + + push_to_contains_stack(candidate, &contains_stack); + while (contains_stack.nr) { + struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; + struct commit *commit = entry->commit; + struct commit_list *parents = entry->parents; + + if (!parents) { + *contains_cache_at(cache, commit) = CONTAINS_NO; + contains_stack.nr--; + } + /* + * If we just popped the stack, parents->item has been marked, + * therefore contains_test will return a meaningful yes/no. + */ + else switch (contains_test(parents->item, want, cache, cutoff)) { + case CONTAINS_YES: + *contains_cache_at(cache, commit) = CONTAINS_YES; + contains_stack.nr--; + break; + case CONTAINS_NO: + entry->parents = parents->next; + break; + case CONTAINS_UNKNOWN: + push_to_contains_stack(parents->item, &contains_stack); + break; + } + } + free(contains_stack.contains_stack); + return contains_test(candidate, want, cache, cutoff); +} + +int commit_contains(struct ref_filter *filter, struct commit *commit, + struct commit_list *list, struct contains_cache *cache) +{ + if (filter->with_commit_tag_algo) + return contains_tag_algo(commit, list, cache) == CONTAINS_YES; + return is_descendant_of(commit, list); +} diff --git a/commit-reach.h b/commit-reach.h index f1cf9bfcd8e6cb..13dec25cee40fa 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -1,8 +1,12 @@ #ifndef __COMMIT_REACH_H__ #define __COMMIT_REACH_H__ +#include "commit-slab.h" + struct commit; struct commit_list; +struct contains_cache; +struct ref_filter; struct commit_list *get_merge_bases_many(struct commit *one, int n, @@ -20,7 +24,6 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit); int in_merge_bases_many(struct commit *commit, int nr_reference, struct commit **reference); int in_merge_bases(struct commit *commit, struct commit *reference); - /* * Takes a list of commits and returns a new list where those * have been removed that can be reached from other commits in @@ -41,4 +44,19 @@ void reduce_heads_replace(struct commit_list **heads); int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid); +/* + * Unknown has to be "0" here, because that's the default value for + * contains_cache slab entries that have not yet been assigned. + */ +enum contains_result { + CONTAINS_UNKNOWN = 0, + CONTAINS_NO, + CONTAINS_YES +}; + +define_commit_slab(contains_cache, enum contains_result); + +int commit_contains(struct ref_filter *filter, struct commit *commit, + struct commit_list *list, struct contains_cache *cache); + #endif diff --git a/ref-filter.c b/ref-filter.c index fca3ad040af6ab..495e830fa58d26 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1624,144 +1624,6 @@ static int get_ref_atom_value(struct ref_array_item *ref, int atom, return 0; } -/* - * Unknown has to be "0" here, because that's the default value for - * contains_cache slab entries that have not yet been assigned. - */ -enum contains_result { - CONTAINS_UNKNOWN = 0, - CONTAINS_NO, - CONTAINS_YES -}; - -define_commit_slab(contains_cache, enum contains_result); - -struct ref_filter_cbdata { - struct ref_array *array; - struct ref_filter *filter; - struct contains_cache contains_cache; - struct contains_cache no_contains_cache; -}; - -/* - * Mimicking the real stack, this stack lives on the heap, avoiding stack - * overflows. - * - * At each recursion step, the stack items points to the commits whose - * ancestors are to be inspected. - */ -struct contains_stack { - int nr, alloc; - struct contains_stack_entry { - struct commit *commit; - struct commit_list *parents; - } *contains_stack; -}; - -static int in_commit_list(const struct commit_list *want, struct commit *c) -{ - for (; want; want = want->next) - if (!oidcmp(&want->item->object.oid, &c->object.oid)) - return 1; - return 0; -} - -/* - * Test whether the candidate is contained in the list. - * Do not recurse to find out, though, but return -1 if inconclusive. - */ -static enum contains_result contains_test(struct commit *candidate, - const struct commit_list *want, - struct contains_cache *cache, - uint32_t cutoff) -{ - enum contains_result *cached = contains_cache_at(cache, candidate); - - /* If we already have the answer cached, return that. */ - if (*cached) - return *cached; - - /* or are we it? */ - if (in_commit_list(want, candidate)) { - *cached = CONTAINS_YES; - return CONTAINS_YES; - } - - /* Otherwise, we don't know; prepare to recurse */ - parse_commit_or_die(candidate); - - if (candidate->generation < cutoff) - return CONTAINS_NO; - - return CONTAINS_UNKNOWN; -} - -static void push_to_contains_stack(struct commit *candidate, struct contains_stack *contains_stack) -{ - ALLOC_GROW(contains_stack->contains_stack, contains_stack->nr + 1, contains_stack->alloc); - contains_stack->contains_stack[contains_stack->nr].commit = candidate; - contains_stack->contains_stack[contains_stack->nr++].parents = candidate->parents; -} - -static enum contains_result contains_tag_algo(struct commit *candidate, - const struct commit_list *want, - struct contains_cache *cache) -{ - struct contains_stack contains_stack = { 0, 0, NULL }; - enum contains_result result; - uint32_t cutoff = GENERATION_NUMBER_INFINITY; - const struct commit_list *p; - - for (p = want; p; p = p->next) { - struct commit *c = p->item; - load_commit_graph_info(the_repository, c); - if (c->generation < cutoff) - cutoff = c->generation; - } - - result = contains_test(candidate, want, cache, cutoff); - if (result != CONTAINS_UNKNOWN) - return result; - - push_to_contains_stack(candidate, &contains_stack); - while (contains_stack.nr) { - struct contains_stack_entry *entry = &contains_stack.contains_stack[contains_stack.nr - 1]; - struct commit *commit = entry->commit; - struct commit_list *parents = entry->parents; - - if (!parents) { - *contains_cache_at(cache, commit) = CONTAINS_NO; - contains_stack.nr--; - } - /* - * If we just popped the stack, parents->item has been marked, - * therefore contains_test will return a meaningful yes/no. - */ - else switch (contains_test(parents->item, want, cache, cutoff)) { - case CONTAINS_YES: - *contains_cache_at(cache, commit) = CONTAINS_YES; - contains_stack.nr--; - break; - case CONTAINS_NO: - entry->parents = parents->next; - break; - case CONTAINS_UNKNOWN: - push_to_contains_stack(parents->item, &contains_stack); - break; - } - } - free(contains_stack.contains_stack); - return contains_test(candidate, want, cache, cutoff); -} - -static int commit_contains(struct ref_filter *filter, struct commit *commit, - struct commit_list *list, struct contains_cache *cache) -{ - if (filter->with_commit_tag_algo) - return contains_tag_algo(commit, list, cache) == CONTAINS_YES; - return is_descendant_of(commit, list); -} - /* * Return 1 if the refname matches one of the patterns, otherwise 0. * A pattern can be a literal prefix (e.g. a refname "refs/heads/master" @@ -1988,6 +1850,13 @@ static int filter_ref_kind(struct ref_filter *filter, const char *refname) return ref_kind_from_refname(refname); } +struct ref_filter_cbdata { + struct ref_array *array; + struct ref_filter *filter; + struct contains_cache contains_cache; + struct contains_cache no_contains_cache; +}; + /* * A call-back given to for_each_ref(). Filter refs and keep them for * later object processing. From e822fd8e756ce39464646a6b7d2587db9af23719 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 Jun 2018 16:25:07 -0400 Subject: [PATCH 05/18] upload-pack: make reachable() more generic In anticipation of moving the reachable() method to commit-reach.c, modify the prototype to be more generic to flags known outside of upload-pack.c. Also rename 'want' to 'from' to make the statement more clear outside of the context of haves/wants negotiation. Signed-off-by: Derrick Stolee --- upload-pack.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 4ca052d0b64184..5a639cb47b82bd 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -336,17 +336,18 @@ static int got_oid(const char *hex, struct object_id *oid) return 0; } -static int reachable(struct commit *want) +static int reachable(struct commit *from, unsigned int with_flag, + unsigned int assign_flag) { struct prio_queue work = { compare_commits_by_commit_date }; - prio_queue_put(&work, want); + prio_queue_put(&work, from); while (work.nr) { struct commit_list *list; struct commit *commit = prio_queue_get(&work); - if (commit->object.flags & THEY_HAVE) { - want->object.flags |= COMMON_KNOWN; + if (commit->object.flags & with_flag) { + from->object.flags |= assign_flag; break; } if (!commit->object.parsed) @@ -362,10 +363,10 @@ static int reachable(struct commit *want) prio_queue_put(&work, parent); } } - want->object.flags |= REACHABLE; - clear_commit_marks(want, REACHABLE); + from->object.flags |= REACHABLE; + clear_commit_marks(from, REACHABLE); clear_prio_queue(&work); - return (want->object.flags & COMMON_KNOWN); + return (from->object.flags & assign_flag); } static int ok_to_give_up(void) @@ -390,7 +391,7 @@ static int ok_to_give_up(void) want_obj.objects[i].item->flags |= COMMON_KNOWN; continue; } - if (!reachable((struct commit *)want)) + if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN)) return 0; } return 1; From 08dcd4e58fe63f65b56503d30ae63559a1afb33a Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Tue, 19 Jun 2018 16:35:36 -0400 Subject: [PATCH 06/18] upload-pack: refactor ok_to_give_up() In anticipation of consolidating all commit reachability algorithms, refactor ok_to_give_up() in order to allow splitting its logic into an external method. Signed-off-by: Derrick Stolee --- upload-pack.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 5a639cb47b82bd..9fe19003c6b7f5 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -369,34 +369,46 @@ static int reachable(struct commit *from, unsigned int with_flag, return (from->object.flags & assign_flag); } -static int ok_to_give_up(void) +/* + * Determine if every commit in 'from' can reach at least one commit + * that is marked with 'with_flag'. As we traverse, use 'assign_flag' + * as a marker for commits that are already visited. + */ +static int can_all_from_reach_with_flag(struct object_array *from, + unsigned int with_flag, + unsigned int assign_flag) { int i; - if (!have_obj.nr) - return 0; - - for (i = 0; i < want_obj.nr; i++) { - struct object *want = want_obj.objects[i].item; + for (i = 0; i < from->nr; i++) { + struct object *from_one = from->objects[i].item; - if (want->flags & COMMON_KNOWN) + if (from_one->flags & assign_flag) continue; - want = deref_tag(the_repository, want, "a want line", 0); - if (!want || want->type != OBJ_COMMIT) { + from_one = deref_tag(the_repository, from_one, "a from object", 0); + if (!from_one || from_one->type != OBJ_COMMIT) { /* no way to tell if this is reachable by * looking at the ancestry chain alone, so * leave a note to ourselves not to worry about * this object anymore. */ - want_obj.objects[i].item->flags |= COMMON_KNOWN; + from->objects[i].item->flags |= assign_flag; continue; } - if (!reachable((struct commit *)want, THEY_HAVE, COMMON_KNOWN)) + if (!reachable((struct commit *)from_one, with_flag, assign_flag)) return 0; } return 1; } +static int ok_to_give_up(void) +{ + if (!have_obj.nr) + return 0; + + return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN); +} + static int get_common_commits(void) { struct object_id oid; From a71ec7faae049dfadbe3d98e59d827d17165faa6 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 16:47:52 -0400 Subject: [PATCH 07/18] upload-pack: generalize commit date cutoff The ok_to_give_up() method uses the commit date as a cutoff to avoid walking the entire reachble set of commits. Before moving the reachable() method to commit-reach.c, pull out the dependence on the global constant 'oldest_have' with a 'min_commit_date' parameter. Signed-off-by: Derrick Stolee --- upload-pack.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/upload-pack.c b/upload-pack.c index 9fe19003c6b7f5..427de461d85a31 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -337,7 +337,7 @@ static int got_oid(const char *hex, struct object_id *oid) } static int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag) + unsigned int assign_flag, time_t min_commit_date) { struct prio_queue work = { compare_commits_by_commit_date }; @@ -355,7 +355,7 @@ static int reachable(struct commit *from, unsigned int with_flag, if (commit->object.flags & REACHABLE) continue; commit->object.flags |= REACHABLE; - if (commit->date < oldest_have) + if (commit->date < min_commit_date) continue; for (list = commit->parents; list; list = list->next) { struct commit *parent = list->item; @@ -372,11 +372,13 @@ static int reachable(struct commit *from, unsigned int with_flag, /* * Determine if every commit in 'from' can reach at least one commit * that is marked with 'with_flag'. As we traverse, use 'assign_flag' - * as a marker for commits that are already visited. + * as a marker for commits that are already visited. Do not walk + * commits with date below 'min_commit_date'. */ static int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, - unsigned int assign_flag) + unsigned int assign_flag, + time_t min_commit_date) { int i; @@ -395,7 +397,8 @@ static int can_all_from_reach_with_flag(struct object_array *from, from->objects[i].item->flags |= assign_flag; continue; } - if (!reachable((struct commit *)from_one, with_flag, assign_flag)) + if (!reachable((struct commit *)from_one, with_flag, assign_flag, + min_commit_date)) return 0; } return 1; @@ -406,7 +409,8 @@ static int ok_to_give_up(void) if (!have_obj.nr) return 0; - return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, COMMON_KNOWN); + return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, + COMMON_KNOWN, oldest_have); } static int get_common_commits(void) From aa96c807b569dd68db78d132d3b031d38be347e8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 12 Jul 2018 16:52:23 -0400 Subject: [PATCH 08/18] commit-reach: move can_all_from_reach_with_flags There are several commit walks in the codebase. Group them together into a new commit-reach.c file and corresponding header. After we group these walks into one place, we can reduce duplicate logic by calling equivalent methods. The can_all_from_reach_with_flags method is used in a stateful way by upload-pack.c. The parameters are very flexible, so we will be able to use its commit walking logic for many other callers. Signed-off-by: Derrick Stolee --- commit-reach.c | 63 +++++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 14 ++++++++++ object.h | 4 +-- upload-pack.c | 70 +------------------------------------------------- 4 files changed, 80 insertions(+), 71 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 01d796f0116163..d806291d5d88aa 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -10,6 +10,7 @@ #include "commit-reach.h" /* Remember to update object flag allocation in object.h */ +#define REACHABLE (1u<<15) #define PARENT1 (1u<<16) #define PARENT2 (1u<<17) #define STALE (1u<<18) @@ -532,3 +533,65 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, return contains_tag_algo(commit, list, cache) == CONTAINS_YES; return is_descendant_of(commit, list); } + +int reachable(struct commit *from, unsigned int with_flag, + unsigned int assign_flag, time_t min_commit_date) +{ + struct prio_queue work = { compare_commits_by_commit_date }; + + prio_queue_put(&work, from); + while (work.nr) { + struct commit_list *list; + struct commit *commit = prio_queue_get(&work); + + if (commit->object.flags & with_flag) { + from->object.flags |= assign_flag; + break; + } + if (!commit->object.parsed) + parse_object(the_repository, &commit->object.oid); + if (commit->object.flags & REACHABLE) + continue; + commit->object.flags |= REACHABLE; + if (commit->date < min_commit_date) + continue; + for (list = commit->parents; list; list = list->next) { + struct commit *parent = list->item; + if (!(parent->object.flags & REACHABLE)) + prio_queue_put(&work, parent); + } + } + from->object.flags |= REACHABLE; + clear_commit_marks(from, REACHABLE); + clear_prio_queue(&work); + return (from->object.flags & assign_flag); +} + +int can_all_from_reach_with_flag(struct object_array *from, + unsigned int with_flag, + unsigned int assign_flag, + time_t min_commit_date) +{ + int i; + + for (i = 0; i < from->nr; i++) { + struct object *from_one = from->objects[i].item; + + if (from_one->flags & assign_flag) + continue; + from_one = deref_tag(the_repository, from_one, "a from object", 0); + if (!from_one || from_one->type != OBJ_COMMIT) { + /* no way to tell if this is reachable by + * looking at the ancestry chain alone, so + * leave a note to ourselves not to worry about + * this object anymore. + */ + from->objects[i].item->flags |= assign_flag; + continue; + } + if (!reachable((struct commit *)from_one, with_flag, assign_flag, + min_commit_date)) + return 0; + } + return 1; +} diff --git a/commit-reach.h b/commit-reach.h index 13dec25cee40fa..b28bc22fcd4b42 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -59,4 +59,18 @@ define_commit_slab(contains_cache, enum contains_result); int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache); +int reachable(struct commit *from, unsigned int with_flag, + unsigned int assign_flag, time_t min_commit_date); + +/* + * Determine if every commit in 'from' can reach at least one commit + * that is marked with 'with_flag'. As we traverse, use 'assign_flag' + * as a marker for commits that are already visited. Do not walk + * commits with date below 'min_commit_date'. + */ +int can_all_from_reach_with_flag(struct object_array *from, + unsigned int with_flag, + unsigned int assign_flag, + time_t min_commit_date); + #endif diff --git a/object.h b/object.h index 18c2b073e34fa8..b132944c51d968 100644 --- a/object.h +++ b/object.h @@ -60,12 +60,12 @@ struct object_array { * revision.h: 0---------10 26 * fetch-pack.c: 0----5 * walker.c: 0-2 - * upload-pack.c: 4 11----------------19 + * upload-pack.c: 4 11-----14 16-----19 * builtin/blame.c: 12-13 * bisect.c: 16 * bundle.c: 16 * http-push.c: 16-----19 - * commit-reach.c: 16-----19 + * commit-reach.c: 15-------19 * sha1-name.c: 20 * list-objects-filter.c: 21 * builtin/fsck.c: 0--3 diff --git a/upload-pack.c b/upload-pack.c index 427de461d85a31..11c426685d2f2b 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -24,13 +24,13 @@ #include "quote.h" #include "upload-pack.h" #include "serve.h" +#include "commit-reach.h" /* Remember to update object flag allocation in object.h */ #define THEY_HAVE (1u << 11) #define OUR_REF (1u << 12) #define WANTED (1u << 13) #define COMMON_KNOWN (1u << 14) -#define REACHABLE (1u << 15) #define SHALLOW (1u << 16) #define NOT_SHALLOW (1u << 17) @@ -336,74 +336,6 @@ static int got_oid(const char *hex, struct object_id *oid) return 0; } -static int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date) -{ - struct prio_queue work = { compare_commits_by_commit_date }; - - prio_queue_put(&work, from); - while (work.nr) { - struct commit_list *list; - struct commit *commit = prio_queue_get(&work); - - if (commit->object.flags & with_flag) { - from->object.flags |= assign_flag; - break; - } - if (!commit->object.parsed) - parse_object(the_repository, &commit->object.oid); - if (commit->object.flags & REACHABLE) - continue; - commit->object.flags |= REACHABLE; - if (commit->date < min_commit_date) - continue; - for (list = commit->parents; list; list = list->next) { - struct commit *parent = list->item; - if (!(parent->object.flags & REACHABLE)) - prio_queue_put(&work, parent); - } - } - from->object.flags |= REACHABLE; - clear_commit_marks(from, REACHABLE); - clear_prio_queue(&work); - return (from->object.flags & assign_flag); -} - -/* - * Determine if every commit in 'from' can reach at least one commit - * that is marked with 'with_flag'. As we traverse, use 'assign_flag' - * as a marker for commits that are already visited. Do not walk - * commits with date below 'min_commit_date'. - */ -static int can_all_from_reach_with_flag(struct object_array *from, - unsigned int with_flag, - unsigned int assign_flag, - time_t min_commit_date) -{ - int i; - - for (i = 0; i < from->nr; i++) { - struct object *from_one = from->objects[i].item; - - if (from_one->flags & assign_flag) - continue; - from_one = deref_tag(the_repository, from_one, "a from object", 0); - if (!from_one || from_one->type != OBJ_COMMIT) { - /* no way to tell if this is reachable by - * looking at the ancestry chain alone, so - * leave a note to ourselves not to worry about - * this object anymore. - */ - from->objects[i].item->flags |= assign_flag; - continue; - } - if (!reachable((struct commit *)from_one, with_flag, assign_flag, - min_commit_date)) - return 0; - } - return 1; -} - static int ok_to_give_up(void) { if (!have_obj.nr) From 2da4fe80b2d661341a689a16f24772fa4df0fedf Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 10:06:29 -0400 Subject: [PATCH 09/18] test-reach: create new test tool for ref_newer As we prepare to change the behavior of the algorithms in commit-reach.c, create a new test-tool subcommand 'reach' to test these methods on interesting commit-graph shapes. To use the new test-tool, use 'test-tool reach ' and provide input to stdin that describes the inputs to the method. Currently, we only implement the ref_newer method, which requires two commits. Use lines "A:" and "B:" for the two inputs. We will expand this input later to accommodate methods that take lists of commits. The test t6600-test-reach.sh creates a repo whose commits form a two-dimensional grid. This grid makes it easy for us to determine reachability because commit-A-B can reach commit-X-Y if and only if A is at least X and B is at least Y. This helps create interesting test cases for each result of the methods in commit-reach.c. We test all methods in three different states of the commit-graph file: Non-existent (no generation numbers), fully computed, and mixed (some commits have generation numbers and others do not). Signed-off-by: Derrick Stolee --- Makefile | 1 + t/helper/test-reach.c | 63 +++++++++++++++++++++++++++++++ t/helper/test-tool.c | 1 + t/helper/test-tool.h | 1 + t/t6600-test-reach.sh | 86 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+) create mode 100644 t/helper/test-reach.c create mode 100755 t/t6600-test-reach.sh diff --git a/Makefile b/Makefile index 59781f4bc35945..d69f9d415d294e 100644 --- a/Makefile +++ b/Makefile @@ -716,6 +716,7 @@ TEST_BUILTINS_OBJS += test-mktemp.o TEST_BUILTINS_OBJS += test-online-cpus.o TEST_BUILTINS_OBJS += test-path-utils.o TEST_BUILTINS_OBJS += test-prio-queue.o +TEST_BUILTINS_OBJS += test-reach.o TEST_BUILTINS_OBJS += test-read-cache.o TEST_BUILTINS_OBJS += test-ref-store.o TEST_BUILTINS_OBJS += test-regex.o diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c new file mode 100644 index 00000000000000..620bb460414eb5 --- /dev/null +++ b/t/helper/test-reach.c @@ -0,0 +1,63 @@ +#include "test-tool.h" +#include "cache.h" +#include "commit.h" +#include "commit-reach.h" +#include "config.h" +#include "parse-options.h" +#include "tag.h" + +int cmd__reach(int ac, const char **av) +{ + struct object_id oid_A, oid_B; + struct strbuf buf = STRBUF_INIT; + struct repository *r = the_repository; + + setup_git_directory(); + + if (ac < 2) + exit(1); + + + while (strbuf_getline(&buf, stdin) != EOF) { + struct object_id oid; + struct object *o; + struct commit *c; + if (buf.len < 3) + continue; + + if (get_oid_committish(buf.buf + 2, &oid)) + die("failed to resolve %s", buf.buf + 2); + + o = parse_object(r, &oid); + o = deref_tag_noverify(o); + + if (!o) + die("failed to load commit for input %s resulting in oid %s\n", + buf.buf, oid_to_hex(&oid)); + + c = object_as_type(r, o, OBJ_COMMIT, 0); + + if (!c) + die("failed to load commit for input %s resulting in oid %s\n", + buf.buf, oid_to_hex(&oid)); + + switch (buf.buf[0]) { + case 'A': + oidcpy(&oid_A, &oid); + break; + + case 'B': + oidcpy(&oid_B, &oid); + break; + + default: + die("unexpected start of line: %c", buf.buf[0]); + } + } + strbuf_release(&buf); + + if (!strcmp(av[1], "ref_newer")) + printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B)); + + exit(0); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index dafc91c240a440..582d02adfd93ef 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -26,6 +26,7 @@ static struct test_cmd cmds[] = { { "online-cpus", cmd__online_cpus }, { "path-utils", cmd__path_utils }, { "prio-queue", cmd__prio_queue }, + { "reach", cmd__reach }, { "read-cache", cmd__read_cache }, { "ref-store", cmd__ref_store }, { "regex", cmd__regex }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index 80cbcf08575d5e..a7e53c420e1f44 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -20,6 +20,7 @@ int cmd__mktemp(int argc, const char **argv); int cmd__online_cpus(int argc, const char **argv); int cmd__path_utils(int argc, const char **argv); int cmd__prio_queue(int argc, const char **argv); +int cmd__reach(int argc, const char **argv); int cmd__read_cache(int argc, const char **argv); int cmd__ref_store(int argc, const char **argv); int cmd__regex(int argc, const char **argv); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh new file mode 100755 index 00000000000000..966309c6cfb2ec --- /dev/null +++ b/t/t6600-test-reach.sh @@ -0,0 +1,86 @@ +#!/bin/sh + +test_description='basic commit reachability tests' + +. ./test-lib.sh + +# Construct a grid-like commit graph with points (x,y) +# with 1 <= x <= 10, 1 <= y <= 10, where (x,y) has +# parents (x-1, y) and (x, y-1), keeping in mind that +# we drop a parent if a coordinate is nonpositive. +# +# (10,10) +# / \ +# (10,9) (9,10) +# / \ / \ +# (10,8) (9,9) (8,10) +# / \ / \ / \ +# ( continued...) +# \ / \ / \ / +# (3,1) (2,2) (1,3) +# \ / \ / +# (2,1) (2,1) +# \ / +# (1,1) +# +# We use branch 'commit-x-y' to refer to (x,y). +# This grid allows interesting reachability and +# non-reachability queries: (x,y) can reach (x',y') +# if and only if x' <= x and y' <= y. +test_expect_success 'setup' ' + for i in $(test_seq 1 10) + do + test_commit "1-$i" && + git branch -f commit-1-$i + done && + for j in $(test_seq 1 9) + do + git reset --hard commit-$j-1 && + x=$(($j + 1)) && + test_commit "$x-1" && + git branch -f commit-$x-1 && + + for i in $(test_seq 2 10) + do + git merge commit-$j-$i -m "$x-$i" && + git branch -f commit-$x-$i + done + done && + git commit-graph write --reachable && + mv .git/objects/info/commit-graph commit-graph-full && + git show-ref -s commit-5-5 | git commit-graph write --stdin-commits && + mv .git/objects/info/commit-graph commit-graph-half && + git config core.commitGraph true +' + +test_three_modes () { + test_when_finished rm -rf .git/objects/info/commit-graph && + test-tool reach $1 actual && + test_cmp expect actual && + cp commit-graph-full .git/objects/info/commit-graph && + test-tool reach $1 actual && + test_cmp expect actual && + cp commit-graph-half .git/objects/info/commit-graph && + test-tool reach $1 actual && + test_cmp expect actual +} + +test_expect_success 'ref_newer:miss' ' + cat >input <<-\EOF && + A:commit-5-7 + B:commit-4-9 + EOF + echo "ref_newer(A,B):0" >expect && + test_three_modes ref_newer +' + +test_expect_success 'ref_newer:hit' ' + cat >input <<-\EOF && + A:commit-5-7 + B:commit-2-3 + EOF + echo "ref_newer(A,B):1" >expect && + test_three_modes ref_newer +' + +test_done From 10bda4970fc7c6f012061d8d2466266ab89866fe Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 10:28:05 -0400 Subject: [PATCH 10/18] test-reach: test in_merge_bases Signed-off-by: Derrick Stolee --- t/helper/test-reach.c | 6 ++++++ t/t6600-test-reach.sh | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 620bb460414eb5..f93ad5084d5754 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -9,6 +9,7 @@ int cmd__reach(int ac, const char **av) { struct object_id oid_A, oid_B; + struct commit *A, *B; struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; @@ -17,6 +18,7 @@ int cmd__reach(int ac, const char **av) if (ac < 2) exit(1); + A = B = NULL; while (strbuf_getline(&buf, stdin) != EOF) { struct object_id oid; @@ -44,10 +46,12 @@ int cmd__reach(int ac, const char **av) switch (buf.buf[0]) { case 'A': oidcpy(&oid_A, &oid); + A = c; break; case 'B': oidcpy(&oid_B, &oid); + B = c; break; default: @@ -58,6 +62,8 @@ int cmd__reach(int ac, const char **av) if (!strcmp(av[1], "ref_newer")) printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B)); + else if (!strcmp(av[1], "in_merge_bases")) + printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B)); exit(0); } diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 966309c6cfb2ec..5cd6b14c69854b 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -83,4 +83,22 @@ test_expect_success 'ref_newer:hit' ' test_three_modes ref_newer ' +test_expect_success 'in_merge_bases:hit' ' + cat >input <<-\EOF && + A:commit-5-7 + B:commit-8-8 + EOF + echo "in_merge_bases(A,B):1" >expect && + test_three_modes in_merge_bases +' + +test_expect_success 'in_merge_bases:miss' ' + cat >input <<-\EOF && + A:commit-6-8 + B:commit-5-9 + EOF + echo "in_merge_bases(A,B):0" >expect && + test_three_modes in_merge_bases +' + test_done From 9ab9511bcfafd8679012b3dc7f7ffef743bf51b3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 10:38:44 -0400 Subject: [PATCH 11/18] test-reach: test is_descendant_of The is_descendant_of method takes a single commit as its first parameter and a list of commits as its second parameter. Extend the input of the 'test-tool reach' command to take multiple lines of the form "X:" to construct a list of commits. Pass these to is_descendant_of and create tests that check each result. Signed-off-by: Derrick Stolee --- t/helper/test-reach.c | 8 ++++++++ t/t6600-test-reach.sh | 22 ++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index f93ad5084d5754..dccbd48178416d 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -10,6 +10,7 @@ int cmd__reach(int ac, const char **av) { struct object_id oid_A, oid_B; struct commit *A, *B; + struct commit_list *X; struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; @@ -19,6 +20,7 @@ int cmd__reach(int ac, const char **av) exit(1); A = B = NULL; + X = NULL; while (strbuf_getline(&buf, stdin) != EOF) { struct object_id oid; @@ -54,6 +56,10 @@ int cmd__reach(int ac, const char **av) B = c; break; + case 'X': + commit_list_insert(c, &X); + break; + default: die("unexpected start of line: %c", buf.buf[0]); } @@ -64,6 +70,8 @@ int cmd__reach(int ac, const char **av) printf("%s(A,B):%d\n", av[1], ref_newer(&oid_A, &oid_B)); else if (!strcmp(av[1], "in_merge_bases")) printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B)); + else if (!strcmp(av[1], "is_descendant_of")) + printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X)); exit(0); } diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 5cd6b14c69854b..98bcb17960ee5c 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -101,4 +101,26 @@ test_expect_success 'in_merge_bases:miss' ' test_three_modes in_merge_bases ' +test_expect_success 'is_descendant_of:hit' ' + cat >input <<-\EOF && + A:commit-5-7 + X:commit-4-8 + X:commit-6-6 + X:commit-1-1 + EOF + echo "is_descendant_of(A,X):1" >expect && + test_three_modes is_descendant_of +' + +test_expect_success 'is_descendant_of:miss' ' + cat >input <<-\EOF && + A:commit-6-8 + X:commit-5-9 + X:commit-4-10 + X:commit-7-6 + EOF + echo "is_descendant_of(A,X):0" >expect && + test_three_modes is_descendant_of +' + test_done From d3dfbfcf87d3934076d729d56955d078cb2aac15 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 10:51:05 -0400 Subject: [PATCH 12/18] test-reach: test get_merge_bases_many The get_merge_bases_many method returns a list of merge bases for a single commit (A) against a list of commits (X). Some care is needed in constructing the expected behavior because the result is not the expected merge-base for an octopus merge with those parents but instead the set of maximal commits that are reachable from A and at least one of the commits in X. Add get_merge_bases_many to 'test-tool reach' and create a test that demonstrates that this output returns multiple results. Specifically, we select a list of three commits such that we output two commits that are reachable from one of the first two, respectively, and none are reachable from the third. Signed-off-by: Derrick Stolee --- t/helper/test-reach.c | 31 +++++++++++++++++++++++++++++++ t/t6600-test-reach.sh | 15 +++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index dccbd48178416d..4df01187c91720 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -4,13 +4,34 @@ #include "commit-reach.h" #include "config.h" #include "parse-options.h" +#include "string-list.h" #include "tag.h" +static void print_sorted_commit_ids(struct commit_list *list) +{ + int i; + struct string_list s = STRING_LIST_INIT_DUP; + + while (list) { + string_list_append(&s, oid_to_hex(&list->item->object.oid)); + list = list->next; + } + + string_list_sort(&s); + + for (i = 0; i < s.nr; i++) + printf("%s\n", s.items[i].string); + + string_list_clear(&s, 0); +} + int cmd__reach(int ac, const char **av) { struct object_id oid_A, oid_B; struct commit *A, *B; struct commit_list *X; + struct commit **X_array; + int X_nr, X_alloc; struct strbuf buf = STRBUF_INIT; struct repository *r = the_repository; @@ -21,6 +42,9 @@ int cmd__reach(int ac, const char **av) A = B = NULL; X = NULL; + X_nr = 0; + X_alloc = 16; + ALLOC_ARRAY(X_array, X_alloc); while (strbuf_getline(&buf, stdin) != EOF) { struct object_id oid; @@ -58,6 +82,8 @@ int cmd__reach(int ac, const char **av) case 'X': commit_list_insert(c, &X); + ALLOC_GROW(X_array, X_nr + 1, X_alloc); + X_array[X_nr++] = c; break; default: @@ -72,6 +98,11 @@ int cmd__reach(int ac, const char **av) printf("%s(A,B):%d\n", av[1], in_merge_bases(A, B)); else if (!strcmp(av[1], "is_descendant_of")) printf("%s(A,X):%d\n", av[1], is_descendant_of(A, X)); + else if (!strcmp(av[1], "get_merge_bases_many")) { + struct commit_list *list = get_merge_bases_many(A, X_nr, X_array); + printf("%s(A,X):\n", av[1]); + print_sorted_commit_ids(list); + } exit(0); } diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 98bcb17960ee5c..d43e1a61d55f0e 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -123,4 +123,19 @@ test_expect_success 'is_descendant_of:miss' ' test_three_modes is_descendant_of ' +test_expect_success 'get_merge_bases_many' ' + cat >input <<-\EOF && + A:commit-5-7 + X:commit-4-8 + X:commit-6-6 + X:commit-8-3 + EOF + { + echo "get_merge_bases_many(A,X):" && + git rev-parse commit-5-6 \ + commit-4-7 | sort + } >expect && + test_three_modes get_merge_bases_many +' + test_done From 91a6297aa1992f207ba9dfc2ed5af0e0b216e858 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 12:51:23 -0400 Subject: [PATCH 13/18] test-reach: test reduce_heads Signed-off-by: Derrick Stolee --- t/helper/test-reach.c | 4 ++++ t/t6600-test-reach.sh | 22 ++++++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index 4df01187c91720..e32e193b70e1da 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -102,6 +102,10 @@ int cmd__reach(int ac, const char **av) struct commit_list *list = get_merge_bases_many(A, X_nr, X_array); printf("%s(A,X):\n", av[1]); print_sorted_commit_ids(list); + } else if (!strcmp(av[1], "reduce_heads")) { + struct commit_list *list = reduce_heads(X); + printf("%s(X):\n", av[1]); + print_sorted_commit_ids(list); } exit(0); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index d43e1a61d55f0e..17c64679888913 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -138,4 +138,26 @@ test_expect_success 'get_merge_bases_many' ' test_three_modes get_merge_bases_many ' +test_expect_success 'reduce_heads' ' + cat >input <<-\EOF && + X:commit-1-10 + X:commit-2-8 + X:commit-3-6 + X:commit-4-4 + X:commit-1-7 + X:commit-2-5 + X:commit-3-3 + X:commit-5-1 + EOF + { + echo "reduce_heads(X):" && + git rev-parse commit-5-1 \ + commit-4-4 \ + commit-3-6 \ + commit-2-8 \ + commit-1-10 | sort + } >expect && + test_three_modes reduce_heads +' + test_done From 0557fed19de7d5cad556ca92a92dcb1de85a9911 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 13:22:57 -0400 Subject: [PATCH 14/18] test-reach: test can_all_from_reach_with_flags The can_all_from_reach_with_flags method is used by ok_to_give_up in upload-pack.c to see if we have done enough negotiation during a fetch. This method is intentionally created to preserve state between calls to assist with stateful negotiation, such as over SSH. To make this method testable, add a new can_all_from_reach method that does the initial setup and final tear-down. We will later use this method in production code. Call the method from 'test-tool reach' for now. Since this is a many-to-many reachability query, add a new type of input to the 'test-tool reach' input format. Lines "Y:" create a list of commits to be the reachability targets from the commits in the 'X' list. In the context of fetch negotiation, the 'X' commits are the 'want' commits and the 'Y' commits are the 'have' commits. Signed-off-by: Derrick Stolee --- commit-reach.c | 47 +++++++++++++++++++++++++++++++++++++++++++ commit-reach.h | 2 ++ t/helper/test-reach.c | 10 +++++++-- t/t6600-test-reach.sh | 45 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 102 insertions(+), 2 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index d806291d5d88aa..940fbf2e177c14 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -595,3 +595,50 @@ int can_all_from_reach_with_flag(struct object_array *from, } return 1; } + +int can_all_from_reach(struct commit_list *from, struct commit_list *to, + int cutoff_by_min_date) +{ + struct object_array from_objs = OBJECT_ARRAY_INIT; + time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; + struct commit_list *from_iter = from, *to_iter = to; + int result; + + while (from_iter) { + add_object_array(&from_iter->item->object, NULL, &from_objs); + + if (!parse_commit(from_iter->item)) { + if (from_iter->item->date < min_commit_date) + min_commit_date = from_iter->item->date; + } + + from_iter = from_iter->next; + } + + while (to_iter) { + if (!parse_commit(to_iter->item)) { + if (to_iter->item->date < min_commit_date) + min_commit_date = to_iter->item->date; + } + + to_iter->item->object.flags |= PARENT2; + + to_iter = to_iter->next; + } + + result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1, + min_commit_date); + + while (from) { + clear_commit_marks(from->item, PARENT1); + from = from->next; + } + + while (to) { + clear_commit_marks(to->item, PARENT2); + to = to->next; + } + + object_array_clear(&from_objs); + return result; +} diff --git a/commit-reach.h b/commit-reach.h index b28bc22fcd4b42..aa202c97038e76 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -72,5 +72,7 @@ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, time_t min_commit_date); +int can_all_from_reach(struct commit_list *from, struct commit_list *to, + int commit_date_cutoff); #endif diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index e32e193b70e1da..c79729cac02102 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -29,7 +29,7 @@ int cmd__reach(int ac, const char **av) { struct object_id oid_A, oid_B; struct commit *A, *B; - struct commit_list *X; + struct commit_list *X, *Y; struct commit **X_array; int X_nr, X_alloc; struct strbuf buf = STRBUF_INIT; @@ -41,7 +41,7 @@ int cmd__reach(int ac, const char **av) exit(1); A = B = NULL; - X = NULL; + X = Y = NULL; X_nr = 0; X_alloc = 16; ALLOC_ARRAY(X_array, X_alloc); @@ -86,6 +86,10 @@ int cmd__reach(int ac, const char **av) X_array[X_nr++] = c; break; + case 'Y': + commit_list_insert(c, &Y); + break; + default: die("unexpected start of line: %c", buf.buf[0]); } @@ -106,6 +110,8 @@ int cmd__reach(int ac, const char **av) struct commit_list *list = reduce_heads(X); printf("%s(X):\n", av[1]); print_sorted_commit_ids(list); + } else if (!strcmp(av[1], "can_all_from_reach")) { + printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1)); } exit(0); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index 17c64679888913..e41eb397a77a8f 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -160,4 +160,49 @@ test_expect_success 'reduce_heads' ' test_three_modes reduce_heads ' +test_expect_success 'can_all_from_reach:hit' ' + cat >input <<-\EOF && + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + Y:commit-1-9 + Y:commit-2-8 + Y:commit-3-7 + Y:commit-4-6 + Y:commit-5-5 + Y:commit-6-4 + Y:commit-7-3 + Y:commit-8-1 + EOF + echo "can_all_from_reach(X,Y):1" >expect && + test_three_modes can_all_from_reach +' + +test_expect_success 'can_all_from_reach:miss' ' + cat >input <<-\EOF && + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + Y:commit-1-9 + Y:commit-2-8 + Y:commit-3-7 + Y:commit-4-6 + Y:commit-5-5 + Y:commit-6-4 + Y:commit-8-5 + EOF + echo "can_all_from_reach(X,Y):0" >expect && + test_three_modes can_all_from_reach +' + test_done From 02d03862658a65d8d490ccdb94db4902aab199c7 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Wed, 18 Jul 2018 14:40:37 -0400 Subject: [PATCH 15/18] test-reach: test commit_contains The commit_contains method has two modes which depend on the given ref_filter struct. We have the "normal" algorithm (which is also the typically-slow operation) and the "tag" algorithm. This difference is essentially what changes performance for 'git branch --contains' versus 'git tag --contains'. There are thoughts that the data shapes used by these two applications justify the different implementations. Create tests using 'test-tool reach commit_contains [--tag]' to cover both methods. Signed-off-by: Derrick Stolee --- t/helper/test-reach.c | 12 ++++++++++++ t/t6600-test-reach.sh | 34 ++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/t/helper/test-reach.c b/t/helper/test-reach.c index c79729cac02102..eb2110399829d3 100644 --- a/t/helper/test-reach.c +++ b/t/helper/test-reach.c @@ -4,6 +4,7 @@ #include "commit-reach.h" #include "config.h" #include "parse-options.h" +#include "ref-filter.h" #include "string-list.h" #include "tag.h" @@ -112,6 +113,17 @@ int cmd__reach(int ac, const char **av) print_sorted_commit_ids(list); } else if (!strcmp(av[1], "can_all_from_reach")) { printf("%s(X,Y):%d\n", av[1], can_all_from_reach(X, Y, 1)); + } else if (!strcmp(av[1], "commit_contains")) { + struct ref_filter filter; + struct contains_cache cache; + init_contains_cache(&cache); + + if (ac > 2 && !strcmp(av[2], "--tag")) + filter.with_commit_tag_algo = 1; + else + filter.with_commit_tag_algo = 0; + + printf("%s(_,A,X,_):%d\n", av[1], commit_contains(&filter, A, X, &cache)); } exit(0); diff --git a/t/t6600-test-reach.sh b/t/t6600-test-reach.sh index e41eb397a77a8f..d139a00d1dd2bb 100755 --- a/t/t6600-test-reach.sh +++ b/t/t6600-test-reach.sh @@ -205,4 +205,38 @@ test_expect_success 'can_all_from_reach:miss' ' test_three_modes can_all_from_reach ' +test_expect_success 'commit_contains:hit' ' + cat >input <<-\EOF && + A:commit-7-7 + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + EOF + echo "commit_contains(_,A,X,_):1" >expect && + test_three_modes commit_contains && + test_three_modes commit_contains --tag +' + +test_expect_success 'commit_contains:miss' ' + cat >input <<-\EOF && + A:commit-6-5 + X:commit-2-10 + X:commit-3-9 + X:commit-4-8 + X:commit-5-7 + X:commit-6-6 + X:commit-7-5 + X:commit-8-4 + X:commit-9-3 + EOF + echo "commit_contains(_,A,X,_):0" >expect && + test_three_modes commit_contains && + test_three_modes commit_contains --tag +' + test_done From 3b4404433b0310fe9d869e60348e0c0b030b1e57 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 14:37:32 -0400 Subject: [PATCH 16/18] commit-reach: replace ref_newer logic The ref_newer method is used by 'git push' to check if a force-push is required. This method does not use any kind of cutoff when walking, so in the case of a force-push will walk all reachable commits. The is_descendant_of method already uses paint_down_to_common along with cutoffs. By translating the ref_newer arguments into the commit and commit_list required by is_descendant_of, we can have one fewer commit walk and also improve our performance! For a copy of the Linux repository, 'test-tool reach ref_newer' presents the following improvements with the specified input. In the case that ref_newer returns 1, there is no improvement. The improvement is in the second case where ref_newer returns 0. Input: A:v4.9 B:v3.19 Before: 0.09 s After: 0.09 s To test the negative case, add a new commit with parent v3.19, regenerate the commit-graph, and then run with B pointing at that commit. Before: 0.43 s After: 0.09 s Signed-off-by: Derrick Stolee --- commit-reach.c | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index 940fbf2e177c14..f5858944fda9fa 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -366,20 +366,11 @@ void reduce_heads_replace(struct commit_list **heads) *heads = result; } -static void unmark_and_free(struct commit_list *list, unsigned int mark) -{ - while (list) { - struct commit *commit = pop_commit(&list); - commit->object.flags &= ~mark; - } -} - int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) { struct object *o; struct commit *old_commit, *new_commit; - struct commit_list *list, *used; - int found = 0; + struct commit_list *old_commit_list = NULL; /* * Both new_commit and old_commit must be commit-ish and new_commit is descendant of @@ -400,19 +391,8 @@ int ref_newer(const struct object_id *new_oid, const struct object_id *old_oid) if (parse_commit(new_commit) < 0) return 0; - used = list = NULL; - commit_list_insert(new_commit, &list); - while (list) { - new_commit = pop_most_recent_commit(&list, TMP_MARK); - commit_list_insert(new_commit, &used); - if (new_commit == old_commit) { - found = 1; - break; - } - } - unmark_and_free(list, TMP_MARK); - unmark_and_free(used, TMP_MARK); - return found; + commit_list_insert(old_commit, &old_commit_list); + return is_descendant_of(new_commit, old_commit_list); } /* From ae6fc62e865fb41ab073fd199f4ab702e9433314 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Thu, 28 Jun 2018 08:31:05 -0400 Subject: [PATCH 17/18] commit-reach: make can_all_from_reach... linear The can_all_from_reach_with_flags() algorithm is currently quadratic in the worst case, because it calls the reachable() method for every 'from' without tracking which commits have already been walked or which can already reach a commit in 'to'. Rewrite the algorithm to walk each commit a constant number of times. We also add some optimizations that should work for the main consumer of this method: fetch negotitation (haves/wants). The first step includes using a depth-first-search (DFS) from each 'from' commit, sorted by ascending generation number. We do not walk beyond the minimum generation number or the minimum commit date. This DFS is likely to be faster than the existing reachable() method because we expect previous ref values to be along the first-parent history. If we find a target commit, then we mark everything in the DFS stack as a RESULT. This expands the set of targets for the other 'from' commits. We also mark the visited commits using 'assign_flag' to prevent re- walking the same commits. We still need to clear our flags at the end, which is why we will have a total of three visits to each commit. Performance was measured on the Linux repository using 'test-tool reach can_all_from_reach'. The input included rows seeded by tag values. The "small" case included X-rows as v4.[0-9]* and Y-rows as v3.[0-9]*. This mimics a (very large) fetch that says "I have all major v3 releases and want all major v4 releases." The "large" case included X-rows as "v4.*" and Y-rows as "v3.*". This adds all release-candidate tags to the set, which does not greatly increase the number of objects that are considered, but does increase the number of 'from' commits, demonstrating the quadratic nature of the previous code. Small Case: Before: 1.52 s After: 0.26 s Large Case: Before: 3.50 s After: 0.27 s Note how the time increases between the two cases in the two versions. The new code increases relative to the number of commits that need to be walked, but not directly relative to the number of 'from' commits. Signed-off-by: Derrick Stolee --- commit-reach.c | 122 ++++++++++++++++++++++++++++++------------------- commit-reach.h | 9 ++-- upload-pack.c | 5 +- 3 files changed, 83 insertions(+), 53 deletions(-) diff --git a/commit-reach.c b/commit-reach.c index f5858944fda9fa..bc522d6840f3c9 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -514,66 +514,87 @@ int commit_contains(struct ref_filter *filter, struct commit *commit, return is_descendant_of(commit, list); } -int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date) +static int compare_commits_by_gen(const void *_a, const void *_b) { - struct prio_queue work = { compare_commits_by_commit_date }; + const struct commit *a = (const struct commit *)_a; + const struct commit *b = (const struct commit *)_b; - prio_queue_put(&work, from); - while (work.nr) { - struct commit_list *list; - struct commit *commit = prio_queue_get(&work); - - if (commit->object.flags & with_flag) { - from->object.flags |= assign_flag; - break; - } - if (!commit->object.parsed) - parse_object(the_repository, &commit->object.oid); - if (commit->object.flags & REACHABLE) - continue; - commit->object.flags |= REACHABLE; - if (commit->date < min_commit_date) - continue; - for (list = commit->parents; list; list = list->next) { - struct commit *parent = list->item; - if (!(parent->object.flags & REACHABLE)) - prio_queue_put(&work, parent); - } - } - from->object.flags |= REACHABLE; - clear_commit_marks(from, REACHABLE); - clear_prio_queue(&work); - return (from->object.flags & assign_flag); + if (a->generation < b->generation) + return -1; + if (a->generation > b->generation) + return 1; + return 0; } int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date) + time_t min_commit_date, + uint32_t min_generation) { + struct commit **list = NULL; int i; + int result = 1; + ALLOC_ARRAY(list, from->nr); for (i = 0; i < from->nr; i++) { - struct object *from_one = from->objects[i].item; + list[i] = (struct commit *)from->objects[i].item; - if (from_one->flags & assign_flag) - continue; - from_one = deref_tag(the_repository, from_one, "a from object", 0); - if (!from_one || from_one->type != OBJ_COMMIT) { - /* no way to tell if this is reachable by - * looking at the ancestry chain alone, so - * leave a note to ourselves not to worry about - * this object anymore. - */ - from->objects[i].item->flags |= assign_flag; - continue; - } - if (!reachable((struct commit *)from_one, with_flag, assign_flag, - min_commit_date)) + if (parse_commit(list[i]) || + list[i]->generation < min_generation) return 0; } - return 1; + + QSORT(list, from->nr, compare_commits_by_gen); + + for (i = 0; i < from->nr; i++) { + /* DFS from list[i] */ + struct commit_list *stack = NULL; + + list[i]->object.flags |= assign_flag; + commit_list_insert(list[i], &stack); + + while (stack) { + struct commit_list *parent; + + if (stack->item->object.flags & with_flag) { + pop_commit(&stack); + continue; + } + + for (parent = stack->item->parents; parent; parent = parent->next) { + if (parent->item->object.flags & (with_flag | RESULT)) + stack->item->object.flags |= RESULT; + + if (!(parent->item->object.flags & assign_flag)) { + parent->item->object.flags |= assign_flag; + + if (parse_commit(parent->item) || + parent->item->date < min_commit_date || + parent->item->generation < min_generation) + continue; + + commit_list_insert(parent->item, &stack); + break; + } + } + + if (!parent) + pop_commit(&stack); + } + + if (!(list[i]->object.flags & (with_flag | RESULT))) { + result = 0; + goto cleanup; + } + } + +cleanup: + for (i = 0; i < from->nr; i++) { + clear_commit_marks(list[i], RESULT); + clear_commit_marks(list[i], assign_flag); + } + return result; } int can_all_from_reach(struct commit_list *from, struct commit_list *to, @@ -583,6 +604,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, time_t min_commit_date = cutoff_by_min_date ? from->item->date : 0; struct commit_list *from_iter = from, *to_iter = to; int result; + uint32_t min_generation = GENERATION_NUMBER_INFINITY; while (from_iter) { add_object_array(&from_iter->item->object, NULL, &from_objs); @@ -590,6 +612,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, if (!parse_commit(from_iter->item)) { if (from_iter->item->date < min_commit_date) min_commit_date = from_iter->item->date; + + if (from_iter->item->generation < min_generation) + min_generation = from_iter->item->generation; } from_iter = from_iter->next; @@ -599,6 +624,9 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, if (!parse_commit(to_iter->item)) { if (to_iter->item->date < min_commit_date) min_commit_date = to_iter->item->date; + + if (to_iter->item->generation < min_generation) + min_generation = to_iter->item->generation; } to_iter->item->object.flags |= PARENT2; @@ -607,7 +635,7 @@ int can_all_from_reach(struct commit_list *from, struct commit_list *to, } result = can_all_from_reach_with_flag(&from_objs, PARENT2, PARENT1, - min_commit_date); + min_commit_date, min_generation); while (from) { clear_commit_marks(from->item, PARENT1); diff --git a/commit-reach.h b/commit-reach.h index aa202c97038e76..7d313e2975a28e 100644 --- a/commit-reach.h +++ b/commit-reach.h @@ -59,19 +59,18 @@ define_commit_slab(contains_cache, enum contains_result); int commit_contains(struct ref_filter *filter, struct commit *commit, struct commit_list *list, struct contains_cache *cache); -int reachable(struct commit *from, unsigned int with_flag, - unsigned int assign_flag, time_t min_commit_date); - /* * Determine if every commit in 'from' can reach at least one commit * that is marked with 'with_flag'. As we traverse, use 'assign_flag' * as a marker for commits that are already visited. Do not walk - * commits with date below 'min_commit_date'. + * commits with date below 'min_commit_date' or generation below + * 'min_generation'. */ int can_all_from_reach_with_flag(struct object_array *from, unsigned int with_flag, unsigned int assign_flag, - time_t min_commit_date); + time_t min_commit_date, + uint32_t min_generation); int can_all_from_reach(struct commit_list *from, struct commit_list *to, int commit_date_cutoff); diff --git a/upload-pack.c b/upload-pack.c index 11c426685d2f2b..1e498f1188c90e 100644 --- a/upload-pack.c +++ b/upload-pack.c @@ -338,11 +338,14 @@ static int got_oid(const char *hex, struct object_id *oid) static int ok_to_give_up(void) { + uint32_t min_generation = GENERATION_NUMBER_ZERO; + if (!have_obj.nr) return 0; return can_all_from_reach_with_flag(&want_obj, THEY_HAVE, - COMMON_KNOWN, oldest_have); + COMMON_KNOWN, oldest_have, + min_generation); } static int get_common_commits(void) From f40f6f482075ffbe10881d1d015f95c4bcc10ce3 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 13 Jul 2018 15:25:08 -0400 Subject: [PATCH 18/18] commit-reach: use can_all_from_reach The is_descendant_of method previously used in_merge_bases() to check if the commit can reach any of the commits in the provided list. This had two performance problems: 1. The performance is quadratic in worst-case. 2. A single in_merge_bases() call requires walking beyond the target commit in order to find the full set of boundary commits that may be merge-bases. The can_all_from_reach method avoids this quadratic behavior and can limit the search beyond the target commits using generation numbers. It requires a small prototype adjustment to stop using commit-date as a cutoff, as that optimization is no longer appropriate here. Since in_merge_bases() uses paint_down_to_common(), is_descendant_of() naturally found cutoffs to avoid walking the entire commit graph. Since we want to always return the correct result, we cannot use the min_commit_date cutoff in can_all_from_reach. We then rely on generation numbers to provide the cutoff. Since not all repos will have a commit-graph file, nor will we always have generation numbers computed for a commit-graph file, create a new method, generation_numbers_enabled(), that checks for a commit-graph file and sees if the first commit in the file has a non-zero generation number. In the case that we do not have generation numbers, use the old logic for is_descendant_of(). Performance was meausured on a copy of the Linux repository using the 'test-tool reach is_descendant_of' command using this input: A:v4.9 X:v4.10 X:v4.11 X:v4.12 X:v4.13 X:v4.14 X:v4.15 X:v4.16 X:v4.17 X.v3.0 Note that this input is tailored to demonstrate the quadratic nature of the previous method, as it will compute merge-bases for v4.9 versus all of the later versions before checking against v4.1. Before: 0.26 s After: 0.21 s Since we previously used the is_descendant_of method in the ref_newer method, we also measured performance there using 'test-tool reach ref_newer' with this input: A:v4.9 B:v3.19 Before: 0.10 s After: 0.08 s By adding a new commit with parent v3.19, we test the non-reachable case of ref_newer: Before: 0.09 s After: 0.08 s Signed-off-by: Derrick Stolee --- commit-graph.c | 18 ++++++++++++++++++ commit-graph.h | 6 ++++++ commit-reach.c | 24 +++++++++++++++++------- 3 files changed, 41 insertions(+), 7 deletions(-) diff --git a/commit-graph.c b/commit-graph.c index b0a55ad128fbca..e9786fa8640ec4 100644 --- a/commit-graph.c +++ b/commit-graph.c @@ -233,6 +233,24 @@ static int prepare_commit_graph(struct repository *r) return !!r->objects->commit_graph; } +int generation_numbers_enabled(struct repository *r) +{ + uint32_t first_generation; + struct commit_graph *g; + if (!prepare_commit_graph(r)) + return 0; + + g = r->objects->commit_graph; + + if (!g->num_commits) + return 0; + + first_generation = get_be32(g->chunk_commit_data + + g->hash_len + 8) >> 2; + + return !!first_generation; +} + static void close_commit_graph(void) { free_commit_graph(the_repository->objects->commit_graph); diff --git a/commit-graph.h b/commit-graph.h index 76e098934a7f67..0de8f88316fd46 100644 --- a/commit-graph.h +++ b/commit-graph.h @@ -51,6 +51,12 @@ struct commit_graph { struct commit_graph *load_commit_graph_one(const char *graph_file); +/* + * Return 1 if and only if the repository has a commit-graph + * file and generation numbers are computed in that file. + */ +int generation_numbers_enabled(struct repository *r); + void write_commit_graph_reachable(const char *obj_dir, int append); void write_commit_graph(const char *obj_dir, struct string_list *pack_indexes, diff --git a/commit-reach.c b/commit-reach.c index bc522d6840f3c9..c9965240323633 100644 --- a/commit-reach.c +++ b/commit-reach.c @@ -277,15 +277,25 @@ int is_descendant_of(struct commit *commit, struct commit_list *with_commit) { if (!with_commit) return 1; - while (with_commit) { - struct commit *other; - other = with_commit->item; - with_commit = with_commit->next; - if (in_merge_bases(other, commit)) - return 1; + if (generation_numbers_enabled(the_repository)) { + struct commit_list *from_list = NULL; + int result; + commit_list_insert(commit, &from_list); + result = can_all_from_reach(from_list, with_commit, 0); + free_commit_list(from_list); + return result; + } else { + while (with_commit) { + struct commit *other; + + other = with_commit->item; + with_commit = with_commit->next; + if (in_merge_bases(other, commit)) + return 1; + } + return 0; } - return 0; } /*