From: Junio C Hamano Date: Wed, 1 Mar 2006 04:16:41 +0000 (-0800) Subject: diffcore-delta: stop using deltifier for packing. X-Git-Tag: v1.3.0-rc1~58^2~1 X-Git-Url: https://git.verplant.org/?a=commitdiff_plain;h=e29e1147e485654d90a0ea0fd5fb7151bb194265;p=git.git diffcore-delta: stop using deltifier for packing. This switches the change estimation logic used by break, rename and copy detection from delta packing code to a more line oriented one. This way, thee performance-density tradeoff by delta packing code can be made without worrying about breaking the rename detection. Signed-off-by: Junio C Hamano --- diff --git a/diffcore-delta.c b/diffcore-delta.c index 1e6a6911..d03787be 100644 --- a/diffcore-delta.c +++ b/diffcore-delta.c @@ -1,43 +1,128 @@ #include "cache.h" #include "diff.h" #include "diffcore.h" -#include "delta.h" -#include "count-delta.h" - -static int diffcore_count_changes_1(void *src, unsigned long src_size, - void *dst, unsigned long dst_size, - unsigned long delta_limit, - unsigned long *src_copied, - unsigned long *literal_added) -{ - void *delta; - unsigned long delta_size; - - delta = diff_delta(src, src_size, - dst, dst_size, - &delta_size, delta_limit); - if (!delta) - /* If delta_limit is exceeded, we have too much differences */ - return -1; - /* Estimate the edit size by interpreting delta. */ - if (count_delta(delta, delta_size, src_copied, literal_added)) { - free(delta); - return -1; +struct linehash { + unsigned long bytes; + unsigned long hash; +}; + +static unsigned long hash_extended_line(const unsigned char **buf_p, + unsigned long left) +{ + /* An extended line is zero or more whitespace letters (including LF) + * followed by one non whitespace letter followed by zero or more + * non LF, and terminated with by a LF (or EOF). + */ + const unsigned char *bol = *buf_p; + const unsigned char *buf = bol; + unsigned long hashval = 0; + while (left) { + unsigned c = *buf++; + if (!c) + goto binary; + left--; + if (' ' < c) { + hashval = c; + break; + } + } + while (left) { + unsigned c = *buf++; + if (!c) + goto binary; + left--; + if (c == '\n') + break; + if (' ' < c) + hashval = hashval * 11 + c; } - free(delta); + *buf_p = buf; + return hashval; + + binary: + *buf_p = NULL; + return 0; +} + +static int linehash_compare(const void *a_, const void *b_) +{ + struct linehash *a = (struct linehash *) a_; + struct linehash *b = (struct linehash *) b_; + if (a->hash < b->hash) return -1; + if (a->hash > b->hash) return 1; return 0; } +static struct linehash *hash_lines(const unsigned char *buf, + unsigned long size) +{ + const unsigned char *eobuf = buf + size; + struct linehash *line = NULL; + int alloc = 0, used = 0; + + while (buf < eobuf) { + const unsigned char *ptr = buf; + unsigned long hash = hash_extended_line(&buf, eobuf-ptr); + if (!buf) { + free(line); + return NULL; + } + if (alloc <= used) { + alloc = alloc_nr(alloc); + line = xrealloc(line, sizeof(*line) * alloc); + } + line[used].bytes = buf - ptr; + line[used].hash = hash; + used++; + } + qsort(line, used, sizeof(*line), linehash_compare); + + /* Terminate the list */ + if (alloc <= used) + line = xrealloc(line, sizeof(*line) * (used+1)); + line[used].bytes = line[used].hash = 0; + return line; +} + int diffcore_count_changes(void *src, unsigned long src_size, void *dst, unsigned long dst_size, unsigned long delta_limit, unsigned long *src_copied, unsigned long *literal_added) { - return diffcore_count_changes_1(src, src_size, - dst, dst_size, - delta_limit, - src_copied, - literal_added); + struct linehash *src_lines, *dst_lines; + unsigned long sc, la; + + src_lines = hash_lines(src, src_size); + if (!src_lines) + return -1; + dst_lines = hash_lines(dst, dst_size); + if (!dst_lines) { + free(src_lines); + return -1; + } + sc = la = 0; + while (src_lines->bytes && dst_lines->bytes) { + int cmp = linehash_compare(src_lines, dst_lines); + if (!cmp) { + sc += src_lines->bytes; + src_lines++; + dst_lines++; + continue; + } + if (cmp < 0) { + src_lines++; + continue; + } + la += dst_lines->bytes; + dst_lines++; + } + while (dst_lines->bytes) { + la += dst_lines->bytes; + dst_lines++; + } + *src_copied = sc; + *literal_added = la; + return 0; }