c6603919cb9d7bdd686e10298b9e871a6898930e
[git.git] / fsck-cache.c
1 #include <sys/types.h>
2 #include <dirent.h>
3
4 #include "cache.h"
5 #include "commit.h"
6 #include "tree.h"
7 #include "blob.h"
8 #include "tag.h"
9 #include "refs.h"
10 #include "pack.h"
11
12 #define REACHABLE 0x0001
13
14 static int show_root = 0;
15 static int show_tags = 0;
16 static int show_unreachable = 0;
17 static int standalone = 0;
18 static int check_full = 0;
19 static int check_strict = 0;
20 static int keep_cache_objects = 0; 
21 static unsigned char head_sha1[20];
22
23 static void check_connectivity(void)
24 {
25         int i;
26
27         /* Look up all the requirements, warn about missing objects.. */
28         for (i = 0; i < nr_objs; i++) {
29                 struct object *obj = objs[i];
30                 struct object_list *refs;
31
32                 if (!obj->parsed) {
33                         if (!standalone && has_sha1_file(obj->sha1))
34                                 ; /* it is in pack */
35                         else
36                                 printf("missing %s %s\n",
37                                        obj->type, sha1_to_hex(obj->sha1));
38                         continue;
39                 }
40
41                 for (refs = obj->refs; refs; refs = refs->next) {
42                         if (refs->item->parsed ||
43                             (!standalone && has_sha1_file(refs->item->sha1)))
44                                 continue;
45                         printf("broken link from %7s %s\n",
46                                obj->type, sha1_to_hex(obj->sha1));
47                         printf("              to %7s %s\n",
48                                refs->item->type, sha1_to_hex(refs->item->sha1));
49                 }
50
51                 if (show_unreachable && !(obj->flags & REACHABLE)) {
52                         printf("unreachable %s %s\n",
53                                obj->type, sha1_to_hex(obj->sha1));
54                         continue;
55                 }
56
57                 if (!obj->used) {
58                         printf("dangling %s %s\n", obj->type, 
59                                sha1_to_hex(obj->sha1));
60                 }
61         }
62 }
63
64 /*
65  * The entries in a tree are ordered in the _path_ order,
66  * which means that a directory entry is ordered by adding
67  * a slash to the end of it.
68  *
69  * So a directory called "a" is ordered _after_ a file
70  * called "a.c", because "a/" sorts after "a.c".
71  */
72 #define TREE_UNORDERED (-1)
73 #define TREE_HAS_DUPS  (-2)
74
75 static int verify_ordered(struct tree_entry_list *a, struct tree_entry_list *b)
76 {
77         int len1 = strlen(a->name);
78         int len2 = strlen(b->name);
79         int len = len1 < len2 ? len1 : len2;
80         unsigned char c1, c2;
81         int cmp;
82
83         cmp = memcmp(a->name, b->name, len);
84         if (cmp < 0)
85                 return 0;
86         if (cmp > 0)
87                 return TREE_UNORDERED;
88
89         /*
90          * Ok, the first <len> characters are the same.
91          * Now we need to order the next one, but turn
92          * a '\0' into a '/' for a directory entry.
93          */
94         c1 = a->name[len];
95         c2 = b->name[len];
96         if (!c1 && !c2)
97                 /*
98                  * git-write-tree used to write out a nonsense tree that has
99                  * entries with the same name, one blob and one tree.  Make
100                  * sure we do not have duplicate entries.
101                  */
102                 return TREE_HAS_DUPS;
103         if (!c1 && a->directory)
104                 c1 = '/';
105         if (!c2 && b->directory)
106                 c2 = '/';
107         return c1 < c2 ? 0 : TREE_UNORDERED;
108 }
109
110 static int fsck_tree(struct tree *item)
111 {
112         int has_full_path = 0;
113         struct tree_entry_list *entry, *last;
114
115         last = NULL;
116         for (entry = item->entries; entry; entry = entry->next) {
117                 if (strchr(entry->name, '/'))
118                         has_full_path = 1;
119
120                 switch (entry->mode) {
121                 /*
122                  * Standard modes.. 
123                  */
124                 case S_IFREG | 0755:
125                 case S_IFREG | 0644:
126                 case S_IFLNK:
127                 case S_IFDIR:
128                         break;
129                 /*
130                  * This is nonstandard, but we had a few of these
131                  * early on when we honored the full set of mode
132                  * bits..
133                  */
134                 case S_IFREG | 0664:
135                         if (!check_strict)
136                                 break;
137                 default:
138                         printf("tree %s has entry %o %s\n",
139                                 sha1_to_hex(item->object.sha1),
140                                 entry->mode, entry->name);
141                 }
142
143                 if (last) {
144                         switch (verify_ordered(last, entry)) {
145                         case TREE_UNORDERED:
146                                 fprintf(stderr, "tree %s not ordered\n",
147                                         sha1_to_hex(item->object.sha1));
148                                 return -1;
149                         case TREE_HAS_DUPS:
150                                 fprintf(stderr, "tree %s has duplicate entries for '%s'\n",
151                                         sha1_to_hex(item->object.sha1),
152                                         entry->name);
153                                 return -1;
154                         default:
155                                 break;
156                         }
157                 }
158
159                 last = entry;
160         }
161
162         if (has_full_path) {
163                 fprintf(stderr, "warning: git-fsck-cache: tree %s "
164                         "has full pathnames in it\n", 
165                         sha1_to_hex(item->object.sha1));
166         }
167
168         return 0;
169 }
170
171 static int fsck_commit(struct commit *commit)
172 {
173         char *buffer = commit->buffer;
174         unsigned char sha1[20];
175
176         if (memcmp(buffer, "tree ", 5))
177                 return -1;
178         if (get_sha1_hex(buffer+5, sha1) || buffer[45] != '\n')
179                 return -1;
180         buffer += 46;
181         while (!memcmp(buffer, "parent ", 7)) {
182                 if (get_sha1_hex(buffer+7, sha1) || buffer[47] != '\n')
183                         return -1;
184                 buffer += 48;
185         }
186         if (memcmp(buffer, "author ", 7))
187                 return -1;
188         free(commit->buffer);
189         commit->buffer = NULL;
190         if (!commit->tree)
191                 return -1;
192         if (!commit->parents && show_root)
193                 printf("root %s\n", sha1_to_hex(commit->object.sha1));
194         if (!commit->date)
195                 printf("bad commit date in %s\n", 
196                        sha1_to_hex(commit->object.sha1));
197         return 0;
198 }
199
200 static int fsck_tag(struct tag *tag)
201 {
202         struct object *tagged = tag->tagged;
203
204         if (!tagged) {
205                 printf("bad object in tag %s\n", sha1_to_hex(tag->object.sha1));
206                 return -1;
207         }
208         if (!show_tags)
209                 return 0;
210
211         printf("tagged %s %s", tagged->type, sha1_to_hex(tagged->sha1));
212         printf(" (%s) in %s\n", tag->tag, sha1_to_hex(tag->object.sha1));
213         return 0;
214 }
215
216 static int fsck_sha1(unsigned char *sha1)
217 {
218         struct object *obj = parse_object(sha1);
219         if (!obj)
220                 return -1;
221         if (obj->type == blob_type)
222                 return 0;
223         if (obj->type == tree_type)
224                 return fsck_tree((struct tree *) obj);
225         if (obj->type == commit_type)
226                 return fsck_commit((struct commit *) obj);
227         if (obj->type == tag_type)
228                 return fsck_tag((struct tag *) obj);
229         return -1;
230 }
231
232 /*
233  * This is the sorting chunk size: make it reasonably
234  * big so that we can sort well..
235  */
236 #define MAX_SHA1_ENTRIES (1024)
237
238 struct sha1_entry {
239         unsigned long ino;
240         unsigned char sha1[20];
241 };
242
243 static struct {
244         unsigned long nr;
245         struct sha1_entry *entry[MAX_SHA1_ENTRIES];
246 } sha1_list;
247
248 static int ino_compare(const void *_a, const void *_b)
249 {
250         const struct sha1_entry *a = _a, *b = _b;
251         unsigned long ino1 = a->ino, ino2 = b->ino;
252         return ino1 < ino2 ? -1 : ino1 > ino2 ? 1 : 0;
253 }
254
255 static void fsck_sha1_list(void)
256 {
257         int i, nr = sha1_list.nr;
258
259         qsort(sha1_list.entry, nr, sizeof(struct sha1_entry *), ino_compare);
260         for (i = 0; i < nr; i++) {
261                 struct sha1_entry *entry = sha1_list.entry[i];
262                 unsigned char *sha1 = entry->sha1;
263
264                 sha1_list.entry[i] = NULL;
265                 if (fsck_sha1(sha1) < 0)
266                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
267                 free(entry);
268         }
269         sha1_list.nr = 0;
270 }
271
272 static void add_sha1_list(unsigned char *sha1, unsigned long ino)
273 {
274         struct sha1_entry *entry = xmalloc(sizeof(*entry));
275         int nr;
276
277         entry->ino = ino;
278         memcpy(entry->sha1, sha1, 20);
279         nr = sha1_list.nr;
280         if (nr == MAX_SHA1_ENTRIES) {
281                 fsck_sha1_list();
282                 nr = 0;
283         }
284         sha1_list.entry[nr] = entry;
285         sha1_list.nr = ++nr;
286 }
287
288 static int fsck_dir(int i, char *path)
289 {
290         DIR *dir = opendir(path);
291         struct dirent *de;
292
293         if (!dir) {
294                 return error("missing sha1 directory '%s'", path);
295         }
296
297         while ((de = readdir(dir)) != NULL) {
298                 char name[100];
299                 unsigned char sha1[20];
300                 int len = strlen(de->d_name);
301
302                 switch (len) {
303                 case 2:
304                         if (de->d_name[1] != '.')
305                                 break;
306                 case 1:
307                         if (de->d_name[0] != '.')
308                                 break;
309                         continue;
310                 case 38:
311                         sprintf(name, "%02x", i);
312                         memcpy(name+2, de->d_name, len+1);
313                         if (get_sha1_hex(name, sha1) < 0)
314                                 break;
315                         add_sha1_list(sha1, de->d_ino);
316                         continue;
317                 }
318                 fprintf(stderr, "bad sha1 file: %s/%s\n", path, de->d_name);
319         }
320         closedir(dir);
321         return 0;
322 }
323
324 static int default_refs = 0;
325
326 static int fsck_handle_ref(const char *refname, const unsigned char *sha1)
327 {
328         struct object *obj;
329
330         obj = lookup_object(sha1);
331         if (!obj) {
332                 if (!standalone && has_sha1_file(sha1)) {
333                         default_refs++;
334                         return 0; /* it is in a pack */
335                 }
336                 error("%s: invalid sha1 pointer %s", refname, sha1_to_hex(sha1));
337                 /* We'll continue with the rest despite the error.. */
338                 return 0;
339         }
340         default_refs++;
341         obj->used = 1;
342         mark_reachable(obj, REACHABLE);
343         return 0;
344 }
345
346 static void get_default_heads(void)
347 {
348         for_each_ref(fsck_handle_ref);
349         if (!default_refs)
350                 die("No default references");
351 }
352
353 static void fsck_object_dir(const char *path)
354 {
355         int i;
356         for (i = 0; i < 256; i++) {
357                 static char dir[4096];
358                 sprintf(dir, "%s/%02x", path, i);
359                 fsck_dir(i, dir);
360         }
361         fsck_sha1_list();
362 }
363
364 static int fsck_head_link(void)
365 {
366         int fd, count;
367         char hex[40];
368         unsigned char sha1[20];
369         static char path[PATH_MAX], link[PATH_MAX];
370         const char *git_dir = gitenv(GIT_DIR_ENVIRONMENT) ? : DEFAULT_GIT_DIR_ENVIRONMENT;
371
372         snprintf(path, sizeof(path), "%s/HEAD", git_dir);
373         if (readlink(path, link, sizeof(link)) < 0)
374                 return error("HEAD is not a symlink");
375         if (strncmp("refs/heads/", link, 11))
376                 return error("HEAD points to something strange (%s)", link);
377         fd = open(path, O_RDONLY);
378         if (fd < 0)
379                 return error("HEAD: %s", strerror(errno));
380         count = read(fd, hex, sizeof(hex));
381         close(fd);
382         if (count < 0)
383                 return error("HEAD: %s", strerror(errno));
384         if (count < 40 || get_sha1_hex(hex, sha1))
385                 return error("HEAD: not a valid git pointer");
386         return 0;
387 }
388
389 int main(int argc, char **argv)
390 {
391         int i, heads;
392
393         for (i = 1; i < argc; i++) {
394                 const char *arg = argv[i];
395
396                 if (!strcmp(arg, "--unreachable")) {
397                         show_unreachable = 1;
398                         continue;
399                 }
400                 if (!strcmp(arg, "--tags")) {
401                         show_tags = 1;
402                         continue;
403                 }
404                 if (!strcmp(arg, "--root")) {
405                         show_root = 1;
406                         continue;
407                 }
408                 if (!strcmp(arg, "--cache")) {
409                         keep_cache_objects = 1;
410                         continue;
411                 }
412                 if (!strcmp(arg, "--standalone")) {
413                         standalone = 1;
414                         continue;
415                 }
416                 if (!strcmp(arg, "--full")) {
417                         check_full = 1;
418                         continue;
419                 }
420                 if (!strcmp(arg, "--strict")) {
421                         check_strict = 1;
422                         continue;
423                 }
424                 if (*arg == '-')
425                         usage("git-fsck-cache [--tags] [[--unreachable] [--cache] [--standalone | --full] <head-sha1>*]");
426         }
427
428         if (standalone && check_full)
429                 die("Only one of --standalone or --full can be used.");
430         if (standalone)
431                 unsetenv("GIT_ALTERNATE_OBJECT_DIRECTORIES");
432
433         fsck_head_link();
434         fsck_object_dir(get_object_directory());
435         if (check_full) {
436                 int j;
437                 struct packed_git *p;
438                 prepare_alt_odb();
439                 for (j = 0; alt_odb[j].base; j++) {
440                         char namebuf[PATH_MAX];
441                         int namelen = alt_odb[j].name - alt_odb[j].base;
442                         memcpy(namebuf, alt_odb[j].base, namelen);
443                         namebuf[namelen - 1] = 0;
444                         fsck_object_dir(namebuf);
445                 }
446                 prepare_packed_git();
447                 for (p = packed_git; p; p = p->next)
448                         /* verify gives error messages itself */
449                         verify_pack(p, 0);
450
451                 for (p = packed_git; p; p = p->next) {
452                         int num = num_packed_objects(p);
453                         for (i = 0; i < num; i++) {
454                                 unsigned char sha1[20];
455                                 nth_packed_object_sha1(p, i, sha1);
456                                 if (fsck_sha1(sha1) < 0)
457                                         fprintf(stderr, "bad sha1 entry '%s'\n", sha1_to_hex(sha1));
458
459                         }
460                 }
461         }
462
463         heads = 0;
464         for (i = 1; i < argc; i++) {
465                 const char *arg = argv[i]; 
466
467                 if (*arg == '-')
468                         continue;
469
470                 if (!get_sha1(arg, head_sha1)) {
471                         struct object *obj = lookup_object(head_sha1);
472
473                         /* Error is printed by lookup_object(). */
474                         if (!obj)
475                                 continue;
476
477                         obj->used = 1;
478                         mark_reachable(obj, REACHABLE);
479                         heads++;
480                         continue;
481                 }
482                 error("expected sha1, got %s", arg);
483         }
484
485         /*
486          * If we've not been given any explicit head information, do the
487          * default ones from .git/refs. We also consider the index file
488          * in this case (ie this implies --cache).
489          */
490         if (!heads) {
491                 get_default_heads();
492                 keep_cache_objects = 1;
493         }
494
495         if (keep_cache_objects) {
496                 int i;
497                 read_cache();
498                 for (i = 0; i < active_nr; i++) {
499                         struct blob *blob = lookup_blob(active_cache[i]->sha1);
500                         struct object *obj;
501                         if (!blob)
502                                 continue;
503                         obj = &blob->object;
504                         obj->used = 1;
505                         mark_reachable(obj, REACHABLE);
506                 }
507         }
508
509         check_connectivity();
510         return 0;
511 }