From 024268a63a4e3b6f2279d754cdef041364ec6663 Mon Sep 17 00:00:00 2001 From: Patrick McCarty Date: Wed, 7 Jun 2017 15:30:46 -0700 Subject: [PATCH] Improve binary search performance time for diff creation When sections of old_data and new_data are found to be equal in the course of searching for matches, short circuit at that point. Signed-off-by: Patrick McCarty --- src/diff.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/diff.c b/src/diff.c index 2891022..b8230a0 100644 --- a/src/diff.c +++ b/src/diff.c @@ -276,10 +276,20 @@ static int64_t search(int64_t *I, u_char *old, int64_t oldsize, } x = st + (en - st) / 2; - if (memcmp(old + I[x], new, MIN(oldsize - I[x], newsize)) < 0) { + + int64_t length = MIN(oldsize - I[x], newsize); + int result = memcmp(old + I[x], new, length); + + if (result < 0) { return search(I, old, oldsize, new, newsize, x, en, pos); - } else { + } else if (result > 0) { return search(I, old, oldsize, new, newsize, st, x, pos); + } else { + /* As a special case, short circuit for the first exact match + * between old_data and new_data, since future exact matches + * will have shorter length. */ + *pos = I[en]; + return length; } }