Handling large files with GIT
[git.git] / merge-tree.c
1 #include "cache.h"
2 #include "diff.h"
3
4 static const char merge_tree_usage[] = "git-merge-tree <base-tree> <branch1> <branch2>";
5 static int resolve_directories = 1;
6
7 static void merge_trees(struct tree_desc t[3], const char *base);
8
9 static void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1)
10 {
11         unsigned long size = 0;
12         void *buf = NULL;
13
14         if (sha1) {
15                 buf = read_object_with_reference(sha1, "tree", &size, NULL);
16                 if (!buf)
17                         die("unable to read tree %s", sha1_to_hex(sha1));
18         }
19         desc->size = size;
20         desc->buf = buf;
21         return buf;
22 }
23
24 struct name_entry {
25         const unsigned char *sha1;
26         const char *path;
27         unsigned int mode;
28         int pathlen;
29 };
30
31 static void entry_clear(struct name_entry *a)
32 {
33         memset(a, 0, sizeof(*a));
34 }
35
36 static int entry_compare(struct name_entry *a, struct name_entry *b)
37 {
38         return base_name_compare(
39                         a->path, a->pathlen, a->mode,
40                         b->path, b->pathlen, b->mode);
41 }
42
43 static void entry_extract(struct tree_desc *t, struct name_entry *a)
44 {
45         a->sha1 = tree_entry_extract(t, &a->path, &a->mode);
46         a->pathlen = strlen(a->path);
47 }
48
49 /* An empty entry never compares same, not even to another empty entry */
50 static int same_entry(struct name_entry *a, struct name_entry *b)
51 {
52         return  a->sha1 &&
53                 b->sha1 &&
54                 !memcmp(a->sha1, b->sha1, 20) &&
55                 a->mode == b->mode;
56 }
57
58 static const char *sha1_to_hex_zero(const unsigned char *sha1)
59 {
60         if (sha1)
61                 return sha1_to_hex(sha1);
62         return "0000000000000000000000000000000000000000";
63 }
64
65 static void resolve(const char *base, struct name_entry *branch1, struct name_entry *result)
66 {
67         char branch1_sha1[50];
68
69         /* If it's already branch1, don't bother showing it */
70         if (!branch1)
71                 return;
72         memcpy(branch1_sha1, sha1_to_hex_zero(branch1->sha1), 41);
73
74         printf("0 %06o->%06o %s->%s %s%s\n",
75                 branch1->mode, result->mode,
76                 branch1_sha1, sha1_to_hex_zero(result->sha1),
77                 base, result->path);
78 }
79
80 static int unresolved_directory(const char *base, struct name_entry n[3])
81 {
82         int baselen;
83         char *newbase;
84         struct name_entry *p;
85         struct tree_desc t[3];
86         void *buf0, *buf1, *buf2;
87
88         if (!resolve_directories)
89                 return 0;
90         p = n;
91         if (!p->mode) {
92                 p++;
93                 if (!p->mode)
94                         p++;
95         }
96         if (!S_ISDIR(p->mode))
97                 return 0;
98         baselen = strlen(base);
99         newbase = xmalloc(baselen + p->pathlen + 2);
100         memcpy(newbase, base, baselen);
101         memcpy(newbase + baselen, p->path, p->pathlen);
102         memcpy(newbase + baselen + p->pathlen, "/", 2);
103
104         buf0 = fill_tree_descriptor(t+0, n[0].sha1);
105         buf1 = fill_tree_descriptor(t+1, n[1].sha1);
106         buf2 = fill_tree_descriptor(t+2, n[2].sha1);
107         merge_trees(t, newbase);
108
109         free(buf0);
110         free(buf1);
111         free(buf2);
112         free(newbase);
113         return 1;
114 }
115
116 static void unresolved(const char *base, struct name_entry n[3])
117 {
118         if (unresolved_directory(base, n))
119                 return;
120         if (n[0].sha1)
121                 printf("1 %06o %s %s%s\n", n[0].mode, sha1_to_hex(n[0].sha1), base, n[0].path);
122         if (n[1].sha1)
123                 printf("2 %06o %s %s%s\n", n[1].mode, sha1_to_hex(n[1].sha1), base, n[1].path);
124         if (n[2].sha1)
125                 printf("3 %06o %s %s%s\n", n[2].mode, sha1_to_hex(n[2].sha1), base, n[2].path);
126 }
127
128 /*
129  * Merge two trees together (t[1] and t[2]), using a common base (t[0])
130  * as the origin.
131  *
132  * This walks the (sorted) trees in lock-step, checking every possible
133  * name. Note that directories automatically sort differently from other
134  * files (see "base_name_compare"), so you'll never see file/directory
135  * conflicts, because they won't ever compare the same.
136  *
137  * IOW, if a directory changes to a filename, it will automatically be
138  * seen as the directory going away, and the filename being created.
139  *
140  * Think of this as a three-way diff.
141  *
142  * The output will be either:
143  *  - successful merge
144  *       "0 mode sha1 filename"
145  *    NOTE NOTE NOTE! FIXME! We really really need to walk the index
146  *    in parallel with this too!
147  * 
148  *  - conflict:
149  *      "1 mode sha1 filename"
150  *      "2 mode sha1 filename"
151  *      "3 mode sha1 filename"
152  *    where not all of the 1/2/3 lines may exist, of course.
153  *
154  * The successful merge rules are the same as for the three-way merge
155  * in git-read-tree.
156  */
157 static void merge_trees(struct tree_desc t[3], const char *base)
158 {
159         for (;;) {
160                 struct name_entry entry[3];
161                 unsigned int mask = 0;
162                 int i, last;
163
164                 last = -1;
165                 for (i = 0; i < 3; i++) {
166                         if (!t[i].size)
167                                 continue;
168                         entry_extract(t+i, entry+i);
169                         if (last >= 0) {
170                                 int cmp = entry_compare(entry+i, entry+last);
171
172                                 /*
173                                  * Is the new name bigger than the old one?
174                                  * Ignore it
175                                  */
176                                 if (cmp > 0)
177                                         continue;
178                                 /*
179                                  * Is the new name smaller than the old one?
180                                  * Ignore all old ones
181                                  */
182                                 if (cmp < 0)
183                                         mask = 0;
184                         }
185                         mask |= 1u << i;
186                         last = i;
187                 }
188                 if (!mask)
189                         break;
190
191                 /*
192                  * Update the tree entries we've walked, and clear
193                  * all the unused name-entries.
194                  */
195                 for (i = 0; i < 3; i++) {
196                         if (mask & (1u << i)) {
197                                 update_tree_entry(t+i);
198                                 continue;
199                         }
200                         entry_clear(entry + i);
201                 }
202
203                 /* Same in both? */
204                 if (same_entry(entry+1, entry+2)) {
205                         if (entry[0].sha1) {
206                                 resolve(base, NULL, entry+1);
207                                 continue;
208                         }
209                 }
210
211                 if (same_entry(entry+0, entry+1)) {
212                         if (entry[2].sha1 && !S_ISDIR(entry[2].mode)) {
213                                 resolve(base, entry+1, entry+2);
214                                 continue;
215                         }
216                 }
217
218                 if (same_entry(entry+0, entry+2)) {
219                         if (entry[1].sha1 && !S_ISDIR(entry[1].mode)) {
220                                 resolve(base, NULL, entry+1);
221                                 continue;
222                         }
223                 }
224
225                 unresolved(base, entry);
226         }
227 }
228
229 static void *get_tree_descriptor(struct tree_desc *desc, const char *rev)
230 {
231         unsigned char sha1[20];
232         void *buf;
233
234         if (get_sha1(rev, sha1) < 0)
235                 die("unknown rev %s", rev);
236         buf = fill_tree_descriptor(desc, sha1);
237         if (!buf)
238                 die("%s is not a tree", rev);
239         return buf;
240 }
241
242 int main(int argc, char **argv)
243 {
244         struct tree_desc t[3];
245         void *buf1, *buf2, *buf3;
246
247         if (argc < 4)
248                 usage(merge_tree_usage);
249
250         buf1 = get_tree_descriptor(t+0, argv[1]);
251         buf2 = get_tree_descriptor(t+1, argv[2]);
252         buf3 = get_tree_descriptor(t+2, argv[3]);
253         merge_trees(t, "");
254         free(buf1);
255         free(buf2);
256         free(buf3);
257         return 0;
258 }