From: Linus Torvalds Date: Sun, 26 Jun 2005 02:30:20 +0000 (-0700) Subject: Add a "max_size" parameter to diff_delta() X-Git-Tag: v0.99~175 X-Git-Url: https://git.verplant.org/?a=commitdiff_plain;h=75c42d8cc3b42e4b82946848b8ba902b4bbcc38d;p=git.git Add a "max_size" parameter to diff_delta() Anything that generates a delta to see if two objects are close usually isn't interested in the delta ends up being bigger than some specified size, and this allows us to stop delta generation early when that happens. --- diff --git a/delta.h b/delta.h index df97ff84..ccc0c9ec 100644 --- a/delta.h +++ b/delta.h @@ -4,7 +4,7 @@ /* handling of delta buffers */ extern void *diff_delta(void *from_buf, unsigned long from_size, void *to_buf, unsigned long to_size, - unsigned long *delta_size); + unsigned long *delta_size, unsigned long max_size); extern void *patch_delta(void *src_buf, unsigned long src_size, void *delta_buf, unsigned long delta_size, unsigned long *dst_size); diff --git a/diff-delta.c b/diff-delta.c index 480f03cd..fd9b37f4 100644 --- a/diff-delta.c +++ b/diff-delta.c @@ -203,7 +203,8 @@ static void delta_cleanup(bdfile_t *bdf) void *diff_delta(void *from_buf, unsigned long from_size, void *to_buf, unsigned long to_size, - unsigned long *delta_size) + unsigned long *delta_size, + unsigned long max_size) { int i, outpos, outsize, inscnt, csize, msize, moff; unsigned int fp; @@ -312,6 +313,11 @@ void *diff_delta(void *from_buf, unsigned long from_size, } /* next time around the largest possible output is 1 + 4 + 3 */ + if (max_size && outpos > max_size) { + free(out); + delta_cleanup(&bdf); + return NULL; + } if (outpos > outsize - 8) { void *tmp = out; outsize = outsize * 3 / 2; diff --git a/diffcore-break.c b/diffcore-break.c index 920062bf..9852f971 100644 --- a/diffcore-break.c +++ b/diffcore-break.c @@ -65,7 +65,7 @@ static int should_break(struct diff_filespec *src, delta = diff_delta(src->data, src->size, dst->data, dst->size, - &delta_size); + &delta_size, ~0UL); /* Estimate the edit size by interpreting delta. */ if (count_delta(delta, delta_size, diff --git a/diffcore-rename.c b/diffcore-rename.c index 8fb45f0b..29609c74 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -165,7 +165,7 @@ static int estimate_similarity(struct diff_filespec *src, delta = diff_delta(src->data, src->size, dst->data, dst->size, - &delta_size); + &delta_size, ~0UL); /* A delta that has a lot of literal additions would have * big delta_size no matter what else it does. diff --git a/mkdelta.c b/mkdelta.c index 6470a94e..d4c5f3b4 100644 --- a/mkdelta.c +++ b/mkdelta.c @@ -278,7 +278,8 @@ int main(int argc, char **argv) continue; } delta_buf = diff_delta(ref[r].buf, ref[r].size, - trg.buf, trg.size, &delta_size); + trg.buf, trg.size, + &delta_size, ~0UL); if (!delta_buf) die("out of memory"); if (trg.depth < max_depth && diff --git a/pack-objects.c b/pack-objects.c index dfa9d44a..d9328a98 100644 --- a/pack-objects.c +++ b/pack-objects.c @@ -83,8 +83,8 @@ static void *delta_against(void *buf, unsigned long size, struct object_entry *e if (!otherbuf) die("unable to read %s", sha1_to_hex(entry->delta->sha1)); - delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size); - if (delta_size != entry->delta_size) + delta_buf = diff_delta(buf, size, otherbuf, othersize, &delta_size, ~0UL); + if (!delta_buf || delta_size != entry->delta_size) die("delta size changed"); free(buf); free(otherbuf); @@ -292,6 +292,7 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) struct object_entry *cur_entry = cur->entry; struct object_entry *old_entry = old->entry; unsigned long size, oldsize, delta_size; + long max_size; void *delta_buf; /* Don't bother doing diffs between different types */ @@ -300,6 +301,8 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) /* Size is guaranteed to be larger than or equal to oldsize */ size = cur_entry->size; + if (size < 50) + return -1; oldsize = old_entry->size; if (size - oldsize > oldsize / 4) return -1; @@ -311,15 +314,14 @@ static int try_delta(struct unpacked *cur, struct unpacked *old) * more space-efficient (deletes don't have to say _what_ they * delete). */ - delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size); + max_size = size / 2 - 20; + if (cur_entry->delta) + max_size = cur_entry->delta_size-1; + delta_buf = diff_delta(cur->data, size, old->data, oldsize, &delta_size, max_size); if (!delta_buf) - die("unable to create delta"); - if (delta_size + 20 < size / 2) { - if (!cur_entry->delta || cur_entry->delta_size > delta_size) { - cur_entry->delta = old_entry; - cur_entry->delta_size = delta_size; - } - } + return 0; + cur_entry->delta = old_entry; + cur_entry->delta_size = delta_size; free(delta_buf); return 0; } diff --git a/test-delta.c b/test-delta.c index 8751e27c..da51efc2 100644 --- a/test-delta.c +++ b/test-delta.c @@ -60,10 +60,12 @@ int main(int argc, char *argv[]) if (argv[1][1] == 'd') out_buf = diff_delta(from_buf, from_size, - data_buf, data_size, &out_size); + data_buf, data_size, + &out_size, ~0UL); else out_buf = patch_delta(from_buf, from_size, - data_buf, data_size, &out_size); + data_buf, data_size, + &out_size); if (!out_buf) { fprintf(stderr, "delta operation failed (returned NULL)\n"); return 1;