diff-helper: pass unrecognized lines through unmodified
[git.git] / diff-helper.c
1 /*
2  * Copyright (C) 2005 Junio C Hamano
3  */
4 #include <limits.h>
5 #include "cache.h"
6 #include "strbuf.h"
7 #include "diff.h"
8
9 static int matches_pathspec(const char *name, const char **spec, int cnt)
10 {
11         int i;
12         int namelen = strlen(name);
13         for (i = 0; i < cnt; i++) {
14                 int speclen = strlen(spec[i]);
15                 if (! strncmp(spec[i], name, speclen) &&
16                     speclen <= namelen &&
17                     (name[speclen] == 0 ||
18                      name[speclen] == '/'))
19                         return 1;
20         }
21         return 0;
22 }
23
24 static int detect_rename = 0;
25
26 /*
27  * We do not detect circular renames.  Just hold created and deleted
28  * entries and later attempt to match them up.  If they do not match,
29  * then spit them out as deletes or creates as original.
30  */
31
32 static struct diff_spec_hold {
33         struct diff_spec_hold *next;
34         struct diff_spec_hold *matched;
35         struct diff_spec old, new;
36         char path[1];
37 } *createdfile, *deletedfile;
38
39 static void hold_spec(const char *path,
40                       struct diff_spec *old, struct diff_spec *new)
41 {
42         struct diff_spec_hold **list, *elem;
43         list = (! old->file_valid) ? &createdfile : &deletedfile;
44         elem = xmalloc(sizeof(*elem) + strlen(path));
45         strcpy(elem->path, path);
46         elem->next = *list;
47         *list = elem;
48         elem->old = *old;
49         elem->new = *new;
50         elem->matched = 0;
51 }
52
53 #define MINIMUM_SCORE 7000
54 int estimate_similarity(struct diff_spec *one, struct diff_spec *two)
55 {
56         /* Return how similar they are, representing the score as an
57          * integer between 0 and 10000.
58          *
59          * This version is very dumb and detects exact matches only.
60          * Wnen Nico's delta stuff gets in, I'll use the delta
61          * algorithm to estimate the similarity score in core.
62          */
63
64         if (one->sha1_valid && two->sha1_valid &&
65             !memcmp(one->blob_sha1, two->blob_sha1, 20))
66                 return 10000;
67         return 0;
68 }
69
70 static void flush_renames(const char **spec, int cnt, int reverse)
71 {
72         struct diff_spec_hold *rename_src, *rename_dst, *elem;
73         struct diff_spec_hold *leftover = NULL;
74         int score, best_score;
75
76         while (createdfile) {
77                 rename_dst = createdfile;
78                 createdfile = rename_dst->next;
79                 best_score = MINIMUM_SCORE;
80                 rename_src = NULL;
81                 for (elem = deletedfile;
82                      elem;
83                      elem = elem->next) {
84                         if (elem->matched)
85                                 continue;
86                         score = estimate_similarity(&elem->old,
87                                                     &rename_dst->new);
88                         if (best_score < score) {
89                                 rename_src = elem;
90                                 best_score = score;
91                         }
92                 }
93                 if (rename_src) {
94                         rename_src->matched = rename_dst;
95                         rename_dst->matched = rename_src;
96
97                         if (!cnt ||
98                             matches_pathspec(rename_src->path, spec, cnt) ||
99                             matches_pathspec(rename_dst->path, spec, cnt)) {
100                                 if (reverse)
101                                         run_external_diff(rename_dst->path,
102                                                           rename_src->path,
103                                                           &rename_dst->new,
104                                                           &rename_src->old);
105                                 else
106                                         run_external_diff(rename_src->path,
107                                                           rename_dst->path,
108                                                           &rename_src->old,
109                                                           &rename_dst->new);
110                         }
111                 }
112                 else {
113                         rename_dst->next = leftover;
114                         leftover = rename_dst;
115                 }
116         }
117
118         /* unmatched deletes */
119         for (elem = deletedfile; elem; elem = elem->next) {
120                 if (elem->matched)
121                         continue;
122                 if (!cnt ||
123                     matches_pathspec(elem->path, spec, cnt)) {
124                         if (reverse)
125                                 run_external_diff(elem->path, NULL,
126                                                   &elem->new, &elem->old);
127                         else
128                                 run_external_diff(elem->path, NULL,
129                                                   &elem->old, &elem->new);
130                 }
131         }
132
133         /* unmatched creates */
134         for (elem = leftover; elem; elem = elem->next) {
135                 if (!cnt ||
136                     matches_pathspec(elem->path, spec, cnt)) {
137                         if (reverse)
138                                 run_external_diff(elem->path, NULL,
139                                                   &elem->new, &elem->old);
140                         else
141                                 run_external_diff(elem->path, NULL,
142                                                   &elem->old, &elem->new);
143                 }
144         }
145 }
146
147 static int parse_oneside_change(const char *cp, struct diff_spec *one,
148                                 char *path)
149 {
150         int ch;
151
152         one->file_valid = one->sha1_valid = 1;
153         one->mode = 0;
154         while ((ch = *cp) && '0' <= ch && ch <= '7') {
155                 one->mode = (one->mode << 3) | (ch - '0');
156                 cp++;
157         }
158
159         if (strncmp(cp, "\tblob\t", 6))
160                 return -1;
161         cp += 6;
162         if (get_sha1_hex(cp, one->blob_sha1))
163                 return -1;
164         cp += 40;
165         if (*cp++ != '\t')
166                 return -1;
167         strcpy(path, cp);
168         return 0;
169 }
170
171 static int parse_diff_raw_output(const char *buf,
172                                  const char **spec, int cnt, int reverse)
173 {
174         struct diff_spec old, new;
175         char path[PATH_MAX];
176         const char *cp = buf;
177         int ch;
178
179         switch (*cp++) {
180         case 'U':
181                 if (!cnt || matches_pathspec(cp + 1, spec, cnt))
182                         diff_unmerge(cp + 1);
183                 return 0;
184         case '+':
185                 old.file_valid = 0;
186                 parse_oneside_change(cp, &new, path);
187                 break;
188         case '-':
189                 new.file_valid = 0;
190                 parse_oneside_change(cp, &old, path);
191                 break;
192         case '*':
193                 old.file_valid = old.sha1_valid =
194                         new.file_valid = new.sha1_valid = 1;
195                 old.mode = new.mode = 0;
196                 while ((ch = *cp) && ('0' <= ch && ch <= '7')) {
197                         old.mode = (old.mode << 3) | (ch - '0');
198                         cp++;
199                 }
200                 if (strncmp(cp, "->", 2))
201                         return -1;
202                 cp += 2;
203                 while ((ch = *cp) && ('0' <= ch && ch <= '7')) {
204                         new.mode = (new.mode << 3) | (ch - '0');
205                         cp++;
206                 }
207                 if (strncmp(cp, "\tblob\t", 6))
208                         return -1;
209                 cp += 6;
210                 if (get_sha1_hex(cp, old.blob_sha1))
211                         return -1;
212                 cp += 40;
213                 if (strncmp(cp, "->", 2))
214                         return -1;
215                 cp += 2;
216                 if (get_sha1_hex(cp, new.blob_sha1))
217                         return -1;
218                 cp += 40;
219                 if (*cp++ != '\t')
220                         return -1;
221                 strcpy(path, cp);
222                 break;
223         default:
224                 return -1;
225         }
226
227         if (detect_rename && old.file_valid != new.file_valid) {
228                 /* hold these */
229                 hold_spec(path, &old, &new);
230                 return 0;
231         }
232
233         if (!cnt || matches_pathspec(path, spec, cnt)) {
234                 if (reverse)
235                         run_external_diff(path, NULL, &new, &old);
236                 else
237                         run_external_diff(path, NULL, &old, &new);
238         }
239         return 0;
240 }
241
242 static const char *diff_helper_usage =
243         "git-diff-helper [-r] [-R] [-z] paths...";
244
245 int main(int ac, const char **av) {
246         struct strbuf sb;
247         int reverse = 0;
248         int line_termination = '\n';
249
250         strbuf_init(&sb);
251
252         while (1 < ac && av[1][0] == '-') {
253                 if (av[1][1] == 'R')
254                         reverse = 1;
255                 else if (av[1][1] == 'z')
256                         line_termination = 0;
257                 else if (av[1][1] == 'r')
258                         detect_rename = 1;
259                 else
260                         usage(diff_helper_usage);
261                 ac--; av++;
262         }
263         /* the remaining parameters are paths patterns */
264
265         while (1) {
266                 int status;
267                 read_line(&sb, stdin, line_termination);
268                 if (sb.eof)
269                         break;
270                 status = parse_diff_raw_output(sb.buf, av+1, ac-1, reverse);
271                 if (status) {
272                         flush_renames(av+1, ac-1, reverse);
273                         printf("%s%c", sb.buf, line_termination);
274                 }
275         }
276
277         flush_renames(av+1, ac-1, reverse);
278         return 0;
279 }