6ac122b71cf9a5eec298a003032c5724d47f306d
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "delta.h"
10
11 #define REACHABLE 0x0001
12
13 static int show_root = 0;
14 static int show_tags = 0;
15 static int show_unreachable = 0;
16 static int show_max_delta_depth = 0;
17 static int keep_cache_objects = 0; 
18 static unsigned char head_sha1[20];
19
20 static void expand_deltas(void)
21 {
22         int i, max_depth = 0;
23
24         /*
25          * To be as efficient as possible we look for delta heads and
26          * recursively process them going backward, and parsing
27          * resulting objects along the way.  This allows for processing
28          * each delta objects only once regardless of the delta depth.
29          */
30         for (i = 0; i < nr_objs; i++) {
31                 struct object *obj = objs[i];
32                 if (obj->parsed && !obj->delta && obj->attached_deltas) {
33                         int depth = 0;
34                         char type[10];
35                         unsigned long size;
36                         void *buf = read_sha1_file(obj->sha1, type, &size);
37                         if (!buf)
38                                 continue;
39                         depth = process_deltas(buf, size, obj->type,
40                                                obj->attached_deltas);
41                         if (max_depth < depth)
42                                 max_depth = depth;
43                 }
44         }
45         if (show_max_delta_depth)
46                 printf("maximum delta depth = %d\n", max_depth);
47 }
48                                                                                                                         
49 static void check_connectivity(void)
50 {
51         int i;
52
53         /* Look up all the requirements, warn about missing objects.. */
54         for (i = 0; i < nr_objs; i++) {
55                 struct object *obj = objs[i];
56                 struct object_list *refs;
57
58                 if (!obj->parsed) {
59                         if (obj->delta)
60                                 printf("unresolved delta %s\n",
61                                        sha1_to_hex(obj->sha1));
62                         else
63                                 printf("missing %s %s\n",
64                                        obj->type, sha1_to_hex(obj->sha1));
65                         continue;
66                 }
67
68                 for (refs = obj->refs; refs; refs = refs->next) {
69                         if (refs->item->parsed)
70                                 continue;
71                         printf("broken link from %7s %s\n",
72                                obj->type, sha1_to_hex(obj->sha1));
73                         printf("              to %7s %s\n",
74                                refs->item->type, sha1_to_hex(refs->item->sha1));
75                 }
76
77                 /* Don't bother with tag reachability. */
78                 if (obj->type == tag_type)
79                         continue;
80
81                 if (show_unreachable && !(obj->flags & REACHABLE)) {
82                         if (obj->attached_deltas)
83                                 printf("foreign delta reference %s\n", 
84                                        sha1_to_hex(obj->sha1));
85                         else
86                                 printf("unreachable %s %s\n",
87                                        obj->type, sha1_to_hex(obj->sha1));
88                         continue;
89                 }
90
91                 if (!obj->used) {
92                         printf("dangling %s %s\n", obj->type, 
93                                sha1_to_hex(obj->sha1));
94                 }
95         }
96 }
97
98 /*
99  * The entries in a tree are ordered in the _path_ order,
100  * which means that a directory entry is ordered by adding
101  * a slash to the end of it.
102  *
103  * So a directory called "a" is ordered _after_ a file
104  * called "a.c", because "a/" sorts after "a.c".
105  */
106 #define TREE_UNORDERED (-1)
107 #define TREE_HAS_DUPS  (-2)
108
109 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
110 {
111         int len1 = strlen(a->name);
112         int len2 = strlen(b->name);
113         int len = len1 < len2 ? len1 : len2;
114         unsigned char c1, c2;
115         int cmp;
116
117         cmp = memcmp(a->name, b->name, len);
118         if (cmp < 0)
119                 return 0;
120         if (cmp > 0)
121                 return TREE_UNORDERED;
122
123         /*
124          * Ok, the first <len> characters are the same.
125          * Now we need to order the next one, but turn
126          * a '\0' into a '/' for a directory entry.
127          */
128         c1 = a->name[len];
129         c2 = b->name[len];
130         if (!c1 && !c2)
131                 /*
132                  * git-write-tree used to write out a nonsense tree that has
133                  * entries with the same name, one blob and one tree.  Make
134                  * sure we do not have duplicate entries.
135                  */
136                 return TREE_HAS_DUPS;
137         if (!c1 && a->directory)
138                 c1 = '/';
139         if (!c2 && b->directory)
140                 c2 = '/';
141         return c1 < c2 ? 0 : TREE_UNORDERED;
142 }
143
144 static int fsck_tree(struct tree *item)
145 {
146         int has_full_path = 0;
147         struct tree_entry_list *entry, *last;
148
149         last = NULL;
150         for (entry = item->entries; entry; entry = entry->next) {
151                 if (strchr(entry->name, '/'))
152                         has_full_path = 1;
153
154                 switch (entry->mode) {
155                 /*
156                  * Standard modes.. 
157                  */
158                 case S_IFREG | 0755:
159                 case S_IFREG | 0644:
160                 case S_IFLNK:
161                 case S_IFDIR:
162                         break;
163                 /*
164                  * This is nonstandard, but we had a few of these
165                  * early on when we honored the full set of mode
166                  * bits..
167                  */
168                 case S_IFREG | 0664:
169                         break;
170                 default:
171                         printf("tree %s has entry %o %s\n",
172                                 sha1_to_hex(item->object.sha1),
173                                 entry->mode, entry->name);
174                 }
175
176                 if (last) {
177                         switch (verify_ordered(last, entry)) {
178                         case TREE_UNORDERED:
179                                 fprintf(stderr, "tree %s not ordered\n",
180                                         sha1_to_hex(item->object.sha1));
181                                 return -1;
182                         case TREE_HAS_DUPS:
183                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
184                                         sha1_to_hex(item->object.sha1),
185                                         entry->name);
186                                 return -1;
187                         default:
188                                 break;
189                         }
190                 }
191
192                 last = entry;
193         }
194
195         if (has_full_path) {
196                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
197                         "has full pathnames in it\n", 
198                         sha1_to_hex(item->object.sha1));
199         }
200
201         return 0;
202 }
203
204 static int fsck_commit(struct commit *commit)
205 {
206         if (!commit->tree)
207                 return -1;
208         if (!commit->parents && show_root)
209                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
210         if (!commit->date)
211                 printf("bad commit date in %s\n", 
212                        sha1_to_hex(commit->object.sha1));
213         return 0;
214 }
215
216 static int fsck_tag(struct tag *tag)
217 {
218         struct object *tagged = tag->tagged;
219
220         if (!tagged) {
221                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
222                 return -1;
223         }
224         if (!show_tags)
225                 return 0;
226
227         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
228         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
229         return 0;
230 }
231
232 static int fsck_sha1(unsigned char *sha1)
233 {
234         struct object *obj = parse_object(sha1);
235         if (!obj)
236                 return -1;
237         if (obj->type == blob_type)
238                 return 0;
239         if (obj->type == tree_type)
240                 return fsck_tree((struct tree *) obj);
241         if (obj->type == commit_type)
242                 return fsck_commit((struct commit *) obj);
243         if (obj->type == tag_type)
244                 return fsck_tag((struct tag *) obj);
245         if (!obj->type && obj->delta)
246                 return 0;
247         return -1;
248 }
249
250 /*
251  * This is the sorting chunk size: make it reasonably
252  * big so that we can sort well..
253  */
254 #define MAX_SHA1_ENTRIES (1024)
255
256 struct sha1_entry {
257         unsigned long ino;
258         unsigned char sha1[20];
259 };
260
261 static struct {
262         unsigned long nr;
263         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
264 } sha1_list;
265
266 static int ino_compare(const void *_a, const void *_b)
267 {
268         const struct sha1_entry *a = _a, *b = _b;
269         unsigned long ino1 = a->ino, ino2 = b->ino;
270         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
271 }
272
273 static void fsck_sha1_list(void)
274 {
275         int i, nr = sha1_list.nr;
276
277         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
278         for (i = 0; i < nr; i++) {
279                 struct sha1_entry *entry = sha1_list.entry[i];
280                 unsigned char *sha1 = entry->sha1;
281
282                 sha1_list.entry[i] = NULL;
283                 if (fsck_sha1(sha1) < 0)
284                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
285                 free(entry);
286         }
287         sha1_list.nr = 0;
288 }
289
290 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
291 {
292         struct sha1_entry *entry = xmalloc(sizeof(*entry));
293         int nr;
294
295         entry->ino = ino;
296         memcpy(entry->sha1, sha1, 20);
297         nr = sha1_list.nr;
298         if (nr == MAX_SHA1_ENTRIES) {
299                 fsck_sha1_list();
300                 nr = 0;
301         }
302         sha1_list.entry[nr] = entry;
303         sha1_list.nr = ++nr;
304 }
305
306 static int fsck_dir(int i, char *path)
307 {
308         DIR *dir = opendir(path);
309         struct dirent *de;
310
311         if (!dir) {
312                 return error("missing sha1 directory '%s'", path);
313         }
314
315         while ((de = readdir(dir)) != NULL) {
316                 char name[100];
317                 unsigned char sha1[20];
318                 int len = strlen(de->d_name);
319
320                 switch (len) {
321                 case 2:
322                         if (de->d_name[1] != '.')
323                                 break;
324                 case 1:
325                         if (de->d_name[0] != '.')
326                                 break;
327                         continue;
328                 case 38:
329                         sprintf(name, "%02x", i);
330                         memcpy(name+2, de->d_name, len+1);
331                         if (get_sha1_hex(name, sha1) < 0)
332                                 break;
333                         add_sha1_list(sha1, de->d_ino);
334                         continue;
335                 }
336                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
337         }
338         closedir(dir);
339         return 0;
340 }
341
342 static int read_sha1_reference(const char *path)
343 {
344         char hexname[60];
345         unsigned char sha1[20];
346         int fd = open(path, O_RDONLY), len;
347         struct object *obj;
348
349         if (fd < 0)
350                 return -1;
351
352         len = read(fd, hexname, sizeof(hexname));
353         close(fd);
354         if (len < 40)
355                 return -1;
356
357         if (get_sha1_hex(hexname, sha1) < 0)
358                 return -1;
359
360         obj = lookup_object(sha1);
361         if (!obj)
362                 return error("%s: invalid sha1 pointer %.40s", path, hexname);
363
364         obj->used = 1;
365         mark_reachable(obj, REACHABLE);
366         return 0;
367 }
368
369 static void find_file_objects(const char *base, const char *name)
370 {
371         int baselen = strlen(base);
372         int namelen = strlen(name);
373         char *path = xmalloc(baselen + namelen + 2);
374         struct stat st;
375
376         memcpy(path, base, baselen);
377         path[baselen] = '/';
378         memcpy(path + baselen + 1, name, namelen+1);
379         if (stat(path, &st) < 0)
380                 return;
381
382         /*
383          * Recurse into directories
384          */
385         if (S_ISDIR(st.st_mode)) {
386                 DIR *dir = opendir(path);
387                 if (dir) {
388                         struct dirent *de;
389                         while ((de = readdir(dir)) != NULL) {
390                                 if (de->d_name[0] == '.')
391                                         continue;
392                                 find_file_objects(path, de->d_name);
393                         }
394                         closedir(dir);
395                 }
396                 return;
397         }
398         if (S_ISREG(st.st_mode)) {
399                 read_sha1_reference(path);
400                 return;
401         }
402 }
403
404 static void get_default_heads(void)
405 {
406         char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
407         find_file_objects(git_dir, "refs");
408 }
409
410 int main(int argc, char **argv)
411 {
412         int i, heads;
413         char *sha1_dir;
414
415         for (i = 1; i < argc; i++) {
416                 const char *arg = argv[i];
417
418                 if (!strcmp(arg, "--unreachable")) {
419                         show_unreachable = 1;
420                         continue;
421                 }
422                 if (!strcmp(arg, "--tags")) {
423                         show_tags = 1;
424                         continue;
425                 }
426                 if (!strcmp(arg, "--root")) {
427                         show_root = 1;
428                         continue;
429                 }
430                 if (!strcmp(arg, "--delta-depth")) {
431                         show_max_delta_depth = 1;
432                         continue;
433                 }
434                 if (!strcmp(arg, "--cache")) {
435                         keep_cache_objects = 1;
436                         continue;
437                 }
438                 if (*arg == '-')
439                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] <head-sha1>*]");
440         }
441
442         sha1_dir = get_object_directory();
443         for (i = 0; i < 256; i++) {
444                 static char dir[4096];
445                 sprintf(dir, "%s/%02x", sha1_dir, i);
446                 fsck_dir(i, dir);
447         }
448         fsck_sha1_list();
449
450         expand_deltas();
451
452         heads = 0;
453         for (i = 1; i < argc; i++) {
454                 const char *arg = argv[i]; 
455
456                 if (*arg == '-')
457                         continue;
458
459                 if (!get_sha1(arg, head_sha1)) {
460                         struct object *obj = lookup_object(head_sha1);
461
462                         /* Error is printed by lookup_object(). */
463                         if (!obj)
464                                 continue;
465
466                         obj->used = 1;
467                         mark_reachable(obj, REACHABLE);
468                         heads++;
469                         continue;
470                 }
471                 error("expected sha1, got %s", arg);
472         }
473
474         /*
475          * If we've not been given any explicit head information, do the
476          * default ones from .git/refs. We also consider the index file
477          * in this case (ie this implies --cache).
478          */
479         if (!heads) {
480                 get_default_heads();
481                 keep_cache_objects = 1;
482         }
483
484         if (keep_cache_objects) {
485                 int i;
486                 read_cache();
487                 for (i = 0; i < active_nr; i++) {
488                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
489                         struct object *obj;
490                         if (!blob)
491                                 continue;
492                         obj = &blob->object;
493                         obj->used = 1;
494                         mark_reachable(obj, REACHABLE);
495                 }
496         }
497
498         check_connectivity();
499         return 0;
500 }