From 13b4777c37e0384e3515e97be924a47f5766c11a Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Tue, 6 Dec 2016 16:33:39 -0500 Subject: [PATCH 1/2] read-cache: speed up add_index_entry during checkout Teach add_index_entry_with_check() and has_dir_name() to see if the path of the new item is greater than the last path in the index array before attempting to search for it. This is a performance optimization. During checkout, merge_working_tree() populates the new index in sorted order, so this change saves at least 2 lookups per file. Signed-off-by: Jeff Hostetler --- cache.h | 1 + read-cache.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/cache.h b/cache.h index 75e6d4e7b20029..384585b947b9c4 100644 --- a/cache.h +++ b/cache.h @@ -571,6 +571,7 @@ extern int write_locked_index(struct index_state *, struct lock_file *lock, unsi extern int discard_index(struct index_state *); extern int unmerged_index(const struct index_state *); extern int verify_path(const char *path); +extern int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change); extern int index_dir_exists(struct index_state *istate, const char *name, int namelen); extern void adjust_dirname_case(struct index_state *istate, char *name); extern struct cache_entry *index_file_exists(struct index_state *istate, const char *name, int namelen, int igncase); diff --git a/read-cache.c b/read-cache.c index 9ab4c3cead3a75..3397a03fe30ff9 100644 --- a/read-cache.c +++ b/read-cache.c @@ -888,6 +888,34 @@ static int has_file_name(struct index_state *istate, return retval; } +/* + * Like strcmp(), but also return the offset of the first change. + */ +int strcmp_offset(const char *s1_in, const char *s2_in, int *first_change) +{ + const unsigned char *s1 = (const unsigned char *)s1_in; + const unsigned char *s2 = (const unsigned char *)s2_in; + int diff = 0; + int k; + + *first_change = 0; + for (k=0; s1[k]; k++) + if ((diff = (s1[k] - s2[k]))) + goto found_it; + if (!s2[k]) + return 0; + diff = -1; + +found_it: + *first_change = k; + if (diff > 0) + return 1; + else if (diff < 0) + return -1; + else + return 0; +} + /* * Do we have another file with a pathname that is a proper * subset of the name we're trying to add? @@ -899,6 +927,21 @@ static int has_dir_name(struct index_state *istate, int stage = ce_stage(ce); const char *name = ce->name; const char *slash = name + ce_namelen(ce); + int len_eq_last; + int cmp_last = 0; + + if (istate->cache_nr > 0) { + /* + * Compare the entry's full path with the last path in the index. + * If it sorts AFTER the last entry in the index and they have no + * common prefix, then there cannot be any F/D name conflicts. + */ + cmp_last = strcmp_offset(name, + istate->cache[istate->cache_nr-1]->name, + &len_eq_last); + if (cmp_last > 0 && len_eq_last == 0) + return retval; + } for (;;) { int len; @@ -911,6 +954,24 @@ static int has_dir_name(struct index_state *istate, } len = slash - name; + if (cmp_last > 0) { + /* + * If this part of the directory prefix (including the trailing + * slash) already appears in the path of the last entry in the + * index, then we cannot also have a file with this prefix (or + * any parent directory prefix). + */ + if (len+1 <= len_eq_last) + return retval; + /* + * If this part of the directory prefix (excluding the trailing + * slash) is longer than the known equal portions, then this part + * of the prefix cannot collide with a file. Go on to the parent. + */ + if (len > len_eq_last) + continue; + } + pos = index_name_stage_pos(istate, name, len, stage); if (pos >= 0) { /* @@ -1002,7 +1063,16 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e if (!(option & ADD_CACHE_KEEP_CACHE_TREE)) cache_tree_invalidate_path(istate, ce->name); - pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); + + /* + * If this entry's path sorts after the last entry in the index, + * we can avoid searching for it. + */ + if (istate->cache_nr > 0 && + strcmp(ce->name, istate->cache[istate->cache_nr - 1]->name) > 0) + pos = -istate->cache_nr - 1; + else + pos = index_name_stage_pos(istate, ce->name, ce_namelen(ce), ce_stage(ce)); /* existing match? Just replace it. */ if (pos >= 0) { From 34ed48ecf73290cd74b61b5113595a3ddd2d1bde Mon Sep 17 00:00:00 2001 From: Jeff Hostetler Date: Thu, 8 Dec 2016 10:12:15 -0500 Subject: [PATCH 2/2] test-strcmp-offset: created test for strcmp_offset Signed-off-by: Jeff Hostetler --- Makefile | 1 + t/helper/test-strcmp-offset.c | 59 +++++++++++++++++++++++++++++++++++ t/t0065-strcmp-offset.sh | 11 +++++++ 3 files changed, 71 insertions(+) create mode 100644 t/helper/test-strcmp-offset.c create mode 100755 t/t0065-strcmp-offset.sh diff --git a/Makefile b/Makefile index 0e0a6c5c42728a..2d3282b2100695 100644 --- a/Makefile +++ b/Makefile @@ -623,6 +623,7 @@ TEST_PROGRAMS_NEED_X += test-scrap-cache-tree TEST_PROGRAMS_NEED_X += test-sha1 TEST_PROGRAMS_NEED_X += test-sha1-array TEST_PROGRAMS_NEED_X += test-sigchain +TEST_PROGRAMS_NEED_X += test-strcmp-offset TEST_PROGRAMS_NEED_X += test-string-list TEST_PROGRAMS_NEED_X += test-submodule-config TEST_PROGRAMS_NEED_X += test-subprocess diff --git a/t/helper/test-strcmp-offset.c b/t/helper/test-strcmp-offset.c new file mode 100644 index 00000000000000..56fd8c1e41b5eb --- /dev/null +++ b/t/helper/test-strcmp-offset.c @@ -0,0 +1,59 @@ +#include "cache.h" + +struct test_data { + const char *s1; + const char *s2; + int first_change; +}; + +static struct test_data data[] = { + { "abc", "abc", 0 }, + { "abc", "def", 0 }, + + { "abc", "abz", 2 }, + + { "abc", "abcdef", 3 }, + + { "abc\xF0zzz", "abc\xFFzzz", 3 }, + + { NULL, NULL, 0 } +}; + +int try_pair(const char *sa, const char *sb, int first_change) +{ + int failed = 0; + int offset, r_exp, r_tst; + + r_exp = strcmp(sa, sb); + r_tst = strcmp_offset(sa, sb, &offset); + if (r_tst != r_exp) { + if ((r_tst < 0 && r_exp < 0) || (r_tst > 0 && r_exp > 0)) + warning("'%s' vs '%s', imprecise result: %d != %d", + sa, sb, r_exp, r_tst); + else { + error("'%s' vs '%s', result expect %d, observed %d", + sa, sb, r_exp, r_tst); + failed = 1; + } + } + if (offset != first_change) { + error("'%s' vs '%s', offset expect %d, observed %d", + sa, sb, first_change, offset); + failed = 1; + } + + return failed; +} + +int cmd_main(int argc, const char **argv) +{ + int failed = 0; + int k; + + for (k=0; data[k].s1; k++) { + failed += try_pair(data[k].s1, data[k].s2, data[k].first_change); + failed += try_pair(data[k].s2, data[k].s1, data[k].first_change); + } + + return failed; +} diff --git a/t/t0065-strcmp-offset.sh b/t/t0065-strcmp-offset.sh new file mode 100755 index 00000000000000..0176c8c9243e09 --- /dev/null +++ b/t/t0065-strcmp-offset.sh @@ -0,0 +1,11 @@ +#!/bin/sh + +test_description='Test strcmp_offset functionality' + +. ./test-lib.sh + +test_expect_success run_helper ' + test-strcmp-offset +' + +test_done