[PATCH] Diff-helper update
authorJunio C Hamano <junkio@cox.net>
Wed, 18 May 2005 06:29:49 +0000 (23:29 -0700)
committerLinus Torvalds <torvalds@ppc970.osdl.org>
Wed, 18 May 2005 18:16:24 +0000 (11:16 -0700)
This patch adds a framework and a stub implementation of rename
detection to diff-helper program.

The current stub code is just enough to detect pure renames in
diff-tree output and not fancier.  The plan is perhaps to use
the same delta code when Nico's delta storage patch is merged
for similarity evaluation purposes.

Signed-off-by: Junio C Hamano <junkio@cox.net>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Documentation/git-diff-helper.txt
diff-helper.c
diff.c
diff.h

index f879ed9..e930f97 100644 (file)
@@ -9,7 +9,7 @@ git-diff-helper - Generates patch format output for git-diff-*
 
 SYNOPSIS
 --------
-'git-diff-helper' [-z] [-R]
+'git-diff-helper' [-z] [-R] [-r]
 
 DESCRIPTION
 -----------
@@ -28,7 +28,12 @@ OPTIONS
 
                git-diff-cache <tree> | git-diff-helper -R file.c
 
-would show a diff to bring the working file back to what is in the <tree>.
+       would show a diff to bring the working file back to what
+       is in the <tree>.
+
+-r::
+       Detect renames.
+
 
 See Also
 --------
@@ -37,7 +42,8 @@ The section on generating patches in link:git-diff-cache.html[git-diff-cache]
 
 Author
 ------
-Written by Linus Torvalds <torvalds@osdl.org>
+Written by Junio C Hamano <junkio@cox.net>
+
 
 Documentation
 --------------
index cd0bad0..3ef5706 100644 (file)
@@ -21,6 +21,129 @@ static int matches_pathspec(const char *name, const char **spec, int cnt)
        return 0;
 }
 
+static int detect_rename = 0;
+
+/*
+ * We do not detect circular renames.  Just hold created and deleted
+ * entries and later attempt to match them up.  If they do not match,
+ * then spit them out as deletes or creates as original.
+ */
+
+static struct diff_spec_hold {
+       struct diff_spec_hold *next;
+       struct diff_spec_hold *matched;
+       struct diff_spec old, new;
+       char path[1];
+} *createdfile, *deletedfile;
+
+static void hold_spec(const char *path,
+                     struct diff_spec *old, struct diff_spec *new)
+{
+       struct diff_spec_hold **list, *elem;
+       list = (! old->file_valid) ? &createdfile : &deletedfile;
+       elem = xmalloc(sizeof(*elem) + strlen(path));
+       strcpy(elem->path, path);
+       elem->next = *list;
+       *list = elem;
+       elem->old = *old;
+       elem->new = *new;
+       elem->matched = 0;
+}
+
+#define MINIMUM_SCORE 7000
+int estimate_similarity(struct diff_spec *one, struct diff_spec *two)
+{
+       /* Return how similar they are, representing the score as an
+        * integer between 0 and 10000.
+        *
+        * This version is very dumb and detects exact matches only.
+        * Wnen Nico's delta stuff gets in, I'll use the delta
+        * algorithm to estimate the similarity score in core.
+        */
+
+       if (one->sha1_valid && two->sha1_valid &&
+           !memcmp(one->blob_sha1, two->blob_sha1, 20))
+               return 10000;
+       return 0;
+}
+
+static void flush_renames(const char **spec, int cnt, int reverse)
+{
+       struct diff_spec_hold *rename_src, *rename_dst, *elem;
+       struct diff_spec_hold *leftover = NULL;
+       int score, best_score;
+
+       while (createdfile) {
+               rename_dst = createdfile;
+               createdfile = rename_dst->next;
+               best_score = MINIMUM_SCORE;
+               rename_src = NULL;
+               for (elem = deletedfile;
+                    elem;
+                    elem = elem->next) {
+                       if (elem->matched)
+                               continue;
+                       score = estimate_similarity(&elem->old,
+                                                   &rename_dst->new);
+                       if (best_score < score) {
+                               rename_src = elem;
+                               best_score = score;
+                       }
+               }
+               if (rename_src) {
+                       rename_src->matched = rename_dst;
+                       rename_dst->matched = rename_src;
+
+                       if (!cnt ||
+                           matches_pathspec(rename_src->path, spec, cnt) ||
+                           matches_pathspec(rename_dst->path, spec, cnt)) {
+                               if (reverse)
+                                       run_external_diff(rename_dst->path,
+                                                         rename_src->path,
+                                                         &rename_dst->new,
+                                                         &rename_src->old);
+                               else
+                                       run_external_diff(rename_src->path,
+                                                         rename_dst->path,
+                                                         &rename_src->old,
+                                                         &rename_dst->new);
+                       }
+               }
+               else {
+                       rename_dst->next = leftover;
+                       leftover = rename_dst;
+               }
+       }
+
+       /* unmatched deletes */
+       for (elem = deletedfile; elem; elem = elem->next) {
+               if (elem->matched)
+                       continue;
+               if (!cnt ||
+                   matches_pathspec(elem->path, spec, cnt)) {
+                       if (reverse)
+                               run_external_diff(elem->path, NULL,
+                                                 &elem->new, &elem->old);
+                       else
+                               run_external_diff(elem->path, NULL,
+                                                 &elem->old, &elem->new);
+               }
+       }
+
+       /* unmatched creates */
+       for (elem = leftover; elem; elem = elem->next) {
+               if (!cnt ||
+                   matches_pathspec(elem->path, spec, cnt)) {
+                       if (reverse)
+                               run_external_diff(elem->path, NULL,
+                                                 &elem->new, &elem->old);
+                       else
+                               run_external_diff(elem->path, NULL,
+                                                 &elem->old, &elem->new);
+               }
+       }
+}
+
 static int parse_oneside_change(const char *cp, struct diff_spec *one,
                                char *path)
 {
@@ -100,17 +223,24 @@ static int parse_diff_raw_output(const char *buf,
        default:
                return -1;
        }
+
+       if (detect_rename && old.file_valid != new.file_valid) {
+               /* hold these */
+               hold_spec(path, &old, &new);
+               return 0;
+       }
+
        if (!cnt || matches_pathspec(path, spec, cnt)) {
                if (reverse)
-                       run_external_diff(path, &new, &old);
+                       run_external_diff(path, NULL, &new, &old);
                else
-                       run_external_diff(path, &old, &new);
+                       run_external_diff(path, NULL, &old, &new);
        }
        return 0;
 }
 
 static const char *diff_helper_usage =
-"git-diff-helper [-R] [-z] paths...";
+       "git-diff-helper [-r] [-R] [-z] paths...";
 
 int main(int ac, const char **av) {
        struct strbuf sb;
@@ -124,6 +254,8 @@ int main(int ac, const char **av) {
                        reverse = 1;
                else if (av[1][1] == 'z')
                        line_termination = 0;
+               else if (av[1][1] == 'r')
+                       detect_rename = 1;
                else
                        usage(diff_helper_usage);
                ac--; av++;
@@ -139,5 +271,8 @@ int main(int ac, const char **av) {
                if (status)
                        fprintf(stderr, "cannot parse %s\n", sb.buf);
        }
+
+       if (detect_rename)
+               flush_renames(av+1, ac-1, reverse);
        return 0;
 }
diff --git a/diff.c b/diff.c
index 36e8bd4..74004e5 100644 (file)
--- a/diff.c
+++ b/diff.c
@@ -79,7 +79,8 @@ static struct diff_tempfile {
        char tmp_path[50];
 } diff_temp[2];
 
-static void builtin_diff(const char *name,
+static void builtin_diff(const char *name_a,
+                        const char *name_b,
                         struct diff_tempfile *temp)
 {
        int i, next_at;
@@ -88,9 +89,12 @@ static void builtin_diff(const char *name,
        const char *input_name_sq[2];
        const char *path0[2];
        const char *path1[2];
-       const char *name_sq = sq_expand(name);
+       const char *name_sq[2];
        char *cmd;
-       
+
+       name_sq[0] = sq_expand(name_a);
+       name_sq[1] = sq_expand(name_b);
+
        /* diff_cmd and diff_arg have 6 %s in total which makes
         * the sum of these strings 12 bytes larger than required.
         * we use 2 spaces around diff-opts, and we need to count
@@ -105,7 +109,7 @@ static void builtin_diff(const char *name,
                        path1[i] = "";
                } else {
                        path0[i] = i ? "b/" : "a/";
-                       path1[i] = name_sq;
+                       path1[i] = name_sq[i];
                }
                cmd_size += (strlen(path0[i]) + strlen(path1[i]) +
                             strlen(input_name_sq[i]));
@@ -122,7 +126,7 @@ static void builtin_diff(const char *name,
        next_at += snprintf(cmd+next_at, cmd_size-next_at,
                            diff_arg, input_name_sq[0], input_name_sq[1]);
 
-       printf("diff --git a/%s b/%s\n", name, name);
+       printf("diff --git a/%s b/%s\n", name_a, name_b);
        if (!path1[0][0])
                printf("new file mode %s\n", temp[1].mode);
        else if (!path1[1][0])
@@ -132,6 +136,10 @@ static void builtin_diff(const char *name,
                        printf("old mode %s\n", temp[0].mode);
                        printf("new mode %s\n", temp[1].mode);
                }
+               if (strcmp(name_a, name_b)) {
+                       printf("rename old %s\n", name_a);
+                       printf("rename new %s\n", name_b);
+               }
                if (strncmp(temp[0].mode, temp[1].mode, 3))
                        /* we do not run diff between different kind
                         * of objects.
@@ -157,7 +165,7 @@ static int work_tree_matches(const char *name, const unsigned char *sha1)
         * benchmark with my previous version that always reads cache
         * shows that it makes things worse for diff-tree comparing
         * two linux-2.6 kernel trees in an already checked out work
-        * tree.  This is because most diff-tree comparison deals with
+        * tree.  This is because most diff-tree comparisons deal with
         * only a small number of files, while reading the cache is
         * expensive for a large project, and its cost outweighs the
         * savings we get by not inflating the object to a temporary
@@ -294,6 +302,7 @@ static void remove_tempfile_on_signal(int signo)
  *
  */
 void run_external_diff(const char *name,
+                      const char *other,
                       struct diff_spec *one,
                       struct diff_spec *two)
 {
@@ -304,7 +313,7 @@ void run_external_diff(const char *name,
 
        if (one && two) {
                prepare_temp_file(name, &temp[0], one);
-               prepare_temp_file(name, &temp[1], two);
+               prepare_temp_file(other ? : name, &temp[1], two);
                if (! atexit_asked &&
                    (temp[0].name == temp[0].tmp_path ||
                     temp[1].name == temp[1].tmp_path)) {
@@ -320,7 +329,8 @@ void run_external_diff(const char *name,
                die("unable to fork");
        if (!pid) {
                const char *pgm = external_diff();
-               if (pgm) {
+               /* not passing rename patch to external ones */
+               if (!other && pgm) {
                        if (one && two)
                                execlp(pgm, pgm,
                                       name,
@@ -334,7 +344,7 @@ void run_external_diff(const char *name,
                 * otherwise we use the built-in one.
                 */
                if (one && two)
-                       builtin_diff(name, temp);
+                       builtin_diff(name, other ? : name, temp);
                else
                        printf("* Unmerged path %s\n", name);
                exit(0);
@@ -379,7 +389,7 @@ void diff_addremove(int addremove, unsigned mode,
                strcpy(concatpath, base);
                strcat(concatpath, path);
        }
-       run_external_diff(path ? concatpath : base, one, two);
+       run_external_diff(path ? concatpath : base, NULL, one, two);
 }
 
 void diff_change(unsigned old_mode, unsigned new_mode,
@@ -400,10 +410,10 @@ void diff_change(unsigned old_mode, unsigned new_mode,
                strcpy(concatpath, base);
                strcat(concatpath, path);
        }
-       run_external_diff(path ? concatpath : base, &spec[0], &spec[1]);
+       run_external_diff(path ? concatpath : base, NULL, &spec[0], &spec[1]);
 }
 
 void diff_unmerge(const char *path)
 {
-       run_external_diff(path, NULL, NULL);
+       run_external_diff(path, NULL, NULL, NULL);
 }
diff --git a/diff.h b/diff.h
index c146d8a..815987d 100644 (file)
--- a/diff.h
+++ b/diff.h
@@ -31,7 +31,7 @@ struct diff_spec {
        unsigned file_valid : 1; /* if false the file does not even exist */
 };
 
-extern void run_external_diff(const char *name,
+extern void run_external_diff(const char *name, const char *other,
                              struct diff_spec *, struct diff_spec *);
 
 #endif /* DIFF_H */