+ /* src points at a deleted file and dst points at a created
+ * file. They may be quite similar, in which case we want to
+ * say src is renamed to dst.
+ *
+ * Compare them and return how similar they are, representing
+ * the score as an integer between 0 and 10000, except
+ * where they match exactly it is considered better than anything
+ * else.
+ */
+ void *delta;
+ unsigned long delta_size;
+ int score;
+
+ delta_size = ((src->size < dst->size) ?
+ (dst->size - src->size) : (src->size - dst->size));
+
+ /* We would not consider rename followed by more than
+ * minimum_score/MAX_SCORE edits; that is, delta_size must be smaller
+ * than (src->size + dst->size)/2 * minimum_score/MAX_SCORE,
+ * which means...
+ */
+
+ if ((src->size+dst->size)*minimum_score < delta_size*MAX_SCORE*2)
+ return 0;
+
+ delta = diff_delta(src->data, src->size,
+ dst->data, dst->size,
+ &delta_size);
+ free(delta);
+
+ /* This "delta" is really xdiff with adler32 and all the
+ * overheads but it is a quick and dirty approximation.
+ *
+ * Now we will give some score to it. 100% edit gets
+ * 0 points and 0% edit gets MAX_SCORE points. That is, every
+ * 1/MAX_SCORE edit gets 1 point penalty. The amount of penalty is:
+ *
+ * (delta_size * 2 / (src->size + dst->size)) * MAX_SCORE
+ *
+ */
+ score = MAX_SCORE-(MAX_SCORE*2*delta_size/(src->size+dst->size));
+ if (score < 0) return 0;
+ if (MAX_SCORE < score) return MAX_SCORE;
+ return score;
+}
+
+struct diff_score {
+ struct diff_spec_hold *src;
+ struct diff_spec_hold *dst;
+ int score;
+};
+
+static int score_compare(const void *a_, const void *b_)
+{
+ const struct diff_score *a = a_, *b = b_;
+ return b->score - a->score;
+}
+
+static void flush_rename_pair(struct diff_spec_hold *src,
+ struct diff_spec_hold *dst,
+ int rename_score)
+{
+ src->flags |= MATCHED;
+ dst->flags |= MATCHED;
+ free_data(src);
+ free_data(dst);
+ run_external_diff(src->path, dst->path,
+ &src->it, &dst->it, rename_score);
+}
+
+static void free_held_diff(struct diff_spec_hold *list)
+{
+ struct diff_spec_hold *h;
+ for (h = list; list; list = h) {
+ h = list->next;
+ free_data(list);
+ free(list);
+ }
+}
+
+void diff_flush(void)
+{
+ int num_create, num_delete, c, d;
+ struct diff_spec_hold *elem, *src, *dst;
+ struct diff_score *mx;
+
+ /* We really want to cull the candidates list early
+ * with cheap tests in order to avoid doing deltas.
+ *
+ * With the current callers, we should not have already
+ * matched entries at this point, but it is nonetheless
+ * checked for sanity.
+ */
+ for (dst = createdfile; dst; dst = dst->next) {
+ if (dst->flags & MATCHED)
+ continue;
+ for (src = deletedfile; src; src = src->next) {
+ if (src->flags & MATCHED)
+ continue;
+ if (! is_exact_match(src, dst))
+ continue;
+ flush_rename_pair(src, dst, MAX_SCORE);
+ break;
+ }
+ }
+
+ /* Count surviving candidates */
+ for (num_create = 0, elem = createdfile; elem; elem = elem->next)
+ if (!(elem->flags & MATCHED))
+ num_create++;
+
+ for (num_delete = 0, elem = deletedfile; elem; elem = elem->next)
+ if (!(elem->flags & MATCHED))
+ num_delete++;
+
+ if (num_create == 0 || num_delete == 0)
+ goto exit_path;
+
+ mx = xmalloc(sizeof(*mx) * num_create * num_delete);
+ for (c = 0, dst = createdfile; dst; dst = dst->next) {
+ int base = c * num_delete;
+ if (dst->flags & MATCHED)
+ continue;
+ for (d = 0, src = deletedfile; src; src = src->next) {
+ struct diff_score *m = &mx[base+d];
+ if (src->flags & MATCHED)
+ continue;
+ m->src = src;
+ m->dst = dst;
+ m->score = estimate_similarity(src, dst);
+ d++;
+ }
+ c++;
+ }
+ qsort(mx, num_create*num_delete, sizeof(*mx), score_compare);
+
+#if 0
+ for (c = 0; c < num_create * num_delete; c++) {
+ src = mx[c].src;
+ dst = mx[c].dst;
+ if ((src->flags & MATCHED) || (dst->flags & MATCHED))
+ continue;
+ fprintf(stderr,
+ "**score ** %d %s %s\n",
+ mx[c].score, src->path, dst->path);
+ }
+#endif
+
+ for (c = 0; c < num_create * num_delete; c++) {
+ src = mx[c].src;
+ dst = mx[c].dst;
+ if ((src->flags & MATCHED) || (dst->flags & MATCHED))
+ continue;
+ if (mx[c].score < minimum_score)
+ break;
+ flush_rename_pair(src, dst, mx[c].score);
+ }
+ free(mx);
+
+ exit_path:
+ flush_remaining_diff(createdfile, 1);
+ flush_remaining_diff(deletedfile, 0);
+ free_held_diff(createdfile);
+ free_held_diff(deletedfile);
+ createdfile = deletedfile = NULL;
+}
+
+int diff_scoreopt_parse(const char *opt)
+{
+ int diglen, num, scale, i;
+ if (opt[0] != '-' || opt[1] != 'M')
+ return -1; /* that is not -M option */
+ diglen = strspn(opt+2, "0123456789");
+ if (diglen == 0 || strlen(opt+2) != diglen)
+ return 0; /* use default */
+ sscanf(opt+2, "%d", &num);
+ for (i = 0, scale = 1; i < diglen; i++)
+ scale *= 10;
+
+ /* user says num divided by scale and we say internally that
+ * is MAX_SCORE * num / scale.
+ */
+ return MAX_SCORE * num / scale;
+}
+
+void diff_setup(int detect_rename_, int minimum_score_, int reverse_diff_,
+ int diff_raw_output_,
+ const char **pathspec_, int speccnt_)
+{
+ free_held_diff(createdfile);
+ free_held_diff(deletedfile);
+ createdfile = deletedfile = NULL;
+
+ detect_rename = detect_rename_;
+ reverse_diff = reverse_diff_;
+ pathspec = pathspec_;
+ diff_raw_output = diff_raw_output_;
+ speccnt = speccnt_;
+ minimum_score = minimum_score_ ? : DEFAULT_MINIMUM_SCORE;
+}
+
+static const char *git_object_type(unsigned mode)
+{
+ return S_ISDIR(mode) ? "tree" : "blob";
+}
+
+void diff_addremove(int addremove, unsigned mode,
+ const unsigned char *sha1,
+ const char *base, const char *path)
+{
+ char concatpath[PATH_MAX];