7 #define PREV_BUF_SIZE 4096
8 #define RANGE_HEADER_SIZE 30
10 static int got_alternates = -1;
11 static int corrupt_object_found = 0;
13 static struct curl_slist *no_pragma_header;
19 struct packed_git *packs;
20 struct alt_base *next;
23 static struct alt_base *alt = NULL;
25 enum object_request_state {
34 unsigned char sha1[20];
35 struct alt_base *repo;
37 char filename[PATH_MAX];
38 char tmpfile[PATH_MAX];
40 enum object_request_state state;
42 char errorstr[CURL_ERROR_SIZE];
44 unsigned char real_sha1[20];
49 struct active_request_slot *slot;
50 struct object_request *next;
53 struct alternates_request {
56 struct buffer *buffer;
57 struct active_request_slot *slot;
61 static struct object_request *object_queue_head = NULL;
63 static size_t fwrite_sha1_file(void *ptr, size_t eltsize, size_t nmemb,
66 unsigned char expn[4096];
67 size_t size = eltsize * nmemb;
69 struct object_request *obj_req = (struct object_request *)data;
71 ssize_t retval = write(obj_req->local,
72 ptr + posn, size - posn);
76 } while (posn < size);
78 obj_req->stream.avail_in = size;
79 obj_req->stream.next_in = ptr;
81 obj_req->stream.next_out = expn;
82 obj_req->stream.avail_out = sizeof(expn);
83 obj_req->zret = inflate(&obj_req->stream, Z_SYNC_FLUSH);
84 SHA1_Update(&obj_req->c, expn,
85 sizeof(expn) - obj_req->stream.avail_out);
86 } while (obj_req->stream.avail_in && obj_req->zret == Z_OK);
91 static void fetch_alternates(char *base);
93 static void process_object_response(void *callback_data);
95 static void start_object_request(struct object_request *obj_req)
97 char *hex = sha1_to_hex(obj_req->sha1);
98 char prevfile[PATH_MAX];
102 unsigned char prev_buf[PREV_BUF_SIZE];
103 ssize_t prev_read = 0;
105 char range[RANGE_HEADER_SIZE];
106 struct curl_slist *range_header = NULL;
107 struct active_request_slot *slot;
109 snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
111 rename(obj_req->tmpfile, prevfile);
112 unlink(obj_req->tmpfile);
114 if (obj_req->local != -1)
115 error("fd leakage in start: %d", obj_req->local);
116 obj_req->local = open(obj_req->tmpfile,
117 O_WRONLY | O_CREAT | O_EXCL, 0666);
118 /* This could have failed due to the "lazy directory creation";
119 * try to mkdir the last path component.
121 if (obj_req->local < 0 && errno == ENOENT) {
122 char *dir = strrchr(obj_req->tmpfile, '/');
125 mkdir(obj_req->tmpfile, 0777);
128 obj_req->local = open(obj_req->tmpfile,
129 O_WRONLY | O_CREAT | O_EXCL, 0666);
132 if (obj_req->local < 0) {
133 obj_req->state = ABORTED;
134 error("Couldn't create temporary file %s for %s: %s",
135 obj_req->tmpfile, obj_req->filename, strerror(errno));
139 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
141 inflateInit(&obj_req->stream);
143 SHA1_Init(&obj_req->c);
145 url = xmalloc(strlen(obj_req->repo->base) + 50);
146 obj_req->url = xmalloc(strlen(obj_req->repo->base) + 50);
147 strcpy(url, obj_req->repo->base);
148 posn = url + strlen(obj_req->repo->base);
149 strcpy(posn, "objects/");
151 memcpy(posn, hex, 2);
154 strcpy(posn, hex + 2);
155 strcpy(obj_req->url, url);
157 /* If a previous temp file is present, process what was already
159 prevlocal = open(prevfile, O_RDONLY);
160 if (prevlocal != -1) {
162 prev_read = read(prevlocal, prev_buf, PREV_BUF_SIZE);
164 if (fwrite_sha1_file(prev_buf,
167 obj_req) == prev_read) {
168 prev_posn += prev_read;
173 } while (prev_read > 0);
178 /* Reset inflate/SHA1 if there was an error reading the previous temp
179 file; also rewind to the beginning of the local file. */
180 if (prev_read == -1) {
181 memset(&obj_req->stream, 0, sizeof(obj_req->stream));
182 inflateInit(&obj_req->stream);
183 SHA1_Init(&obj_req->c);
186 lseek(obj_req->local, SEEK_SET, 0);
187 ftruncate(obj_req->local, 0);
191 slot = get_active_slot();
192 slot->callback_func = process_object_response;
193 slot->callback_data = obj_req;
194 obj_req->slot = slot;
196 curl_easy_setopt(slot->curl, CURLOPT_FILE, obj_req);
197 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
198 curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, obj_req->errorstr);
199 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
200 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
202 /* If we have successfully processed data from a previous fetch
203 attempt, only fetch the data we don't already have. */
207 "Resuming fetch of object %s at byte %ld\n",
209 sprintf(range, "Range: bytes=%ld-", prev_posn);
210 range_header = curl_slist_append(range_header, range);
211 curl_easy_setopt(slot->curl,
212 CURLOPT_HTTPHEADER, range_header);
215 /* Try to get the request started, abort the request on error */
216 obj_req->state = ACTIVE;
217 if (!start_active_slot(slot)) {
218 obj_req->state = ABORTED;
219 obj_req->slot = NULL;
220 close(obj_req->local); obj_req->local = -1;
226 static void finish_object_request(struct object_request *obj_req)
230 fchmod(obj_req->local, 0444);
231 close(obj_req->local); obj_req->local = -1;
233 if (obj_req->http_code == 416) {
234 fprintf(stderr, "Warning: requested range invalid; we may already have all the data.\n");
235 } else if (obj_req->curl_result != CURLE_OK) {
236 if (stat(obj_req->tmpfile, &st) == 0)
238 unlink(obj_req->tmpfile);
242 inflateEnd(&obj_req->stream);
243 SHA1_Final(obj_req->real_sha1, &obj_req->c);
244 if (obj_req->zret != Z_STREAM_END) {
245 unlink(obj_req->tmpfile);
248 if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
249 unlink(obj_req->tmpfile);
253 move_temp_to_file(obj_req->tmpfile, obj_req->filename);
255 if (obj_req->rename == 0)
256 pull_say("got %s\n", sha1_to_hex(obj_req->sha1));
259 static void process_object_response(void *callback_data)
261 struct object_request *obj_req =
262 (struct object_request *)callback_data;
264 obj_req->curl_result = obj_req->slot->curl_result;
265 obj_req->http_code = obj_req->slot->http_code;
266 obj_req->slot = NULL;
267 obj_req->state = COMPLETE;
269 /* Use alternates if necessary */
270 if (obj_req->http_code == 404 ||
271 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE) {
272 fetch_alternates(alt->base);
273 if (obj_req->repo->next != NULL) {
276 close(obj_req->local);
278 start_object_request(obj_req);
283 finish_object_request(obj_req);
286 static void release_object_request(struct object_request *obj_req)
288 struct object_request *entry = object_queue_head;
290 if (obj_req->local != -1)
291 error("fd leakage in release: %d", obj_req->local);
292 if (obj_req == object_queue_head) {
293 object_queue_head = obj_req->next;
295 while (entry->next != NULL && entry->next != obj_req)
297 if (entry->next == obj_req)
298 entry->next = entry->next->next;
305 #ifdef USE_CURL_MULTI
306 void fill_active_slots(void)
308 struct object_request *obj_req = object_queue_head;
309 struct active_request_slot *slot = active_queue_head;
312 while (active_requests < max_requests && obj_req != NULL) {
313 if (obj_req->state == WAITING) {
314 if (has_sha1_file(obj_req->sha1))
315 obj_req->state = COMPLETE;
317 start_object_request(obj_req);
318 curl_multi_perform(curlm, &num_transfers);
320 obj_req = obj_req->next;
323 while (slot != NULL) {
324 if (!slot->in_use && slot->curl != NULL) {
325 curl_easy_cleanup(slot->curl);
333 void prefetch(unsigned char *sha1)
335 struct object_request *newreq;
336 struct object_request *tail;
337 char *filename = sha1_file_name(sha1);
339 newreq = xmalloc(sizeof(*newreq));
340 memcpy(newreq->sha1, sha1, 20);
344 newreq->state = WAITING;
345 snprintf(newreq->filename, sizeof(newreq->filename), "%s", filename);
346 snprintf(newreq->tmpfile, sizeof(newreq->tmpfile),
347 "%s.temp", filename);
350 if (object_queue_head == NULL) {
351 object_queue_head = newreq;
353 tail = object_queue_head;
354 while (tail->next != NULL) {
360 #ifdef USE_CURL_MULTI
366 static int fetch_index(struct alt_base *repo, unsigned char *sha1)
368 char *hex = sha1_to_hex(sha1);
371 char tmpfile[PATH_MAX];
373 char range[RANGE_HEADER_SIZE];
374 struct curl_slist *range_header = NULL;
377 struct active_request_slot *slot;
378 struct slot_results results;
380 if (has_pack_index(sha1))
384 fprintf(stderr, "Getting index for pack %s\n", hex);
386 url = xmalloc(strlen(repo->base) + 64);
387 sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
389 filename = sha1_pack_index_name(sha1);
390 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
391 indexfile = fopen(tmpfile, "a");
393 return error("Unable to open local file %s for pack index",
396 slot = get_active_slot();
397 slot->results = &results;
398 curl_easy_setopt(slot->curl, CURLOPT_FILE, indexfile);
399 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
400 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
401 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
402 slot->local = indexfile;
404 /* If there is data present from a previous transfer attempt,
405 resume where it left off */
406 prev_posn = ftell(indexfile);
410 "Resuming fetch of index for pack %s at byte %ld\n",
412 sprintf(range, "Range: bytes=%ld-", prev_posn);
413 range_header = curl_slist_append(range_header, range);
414 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
417 if (start_active_slot(slot)) {
418 run_active_slot(slot);
419 if (results.curl_result != CURLE_OK) {
421 return error("Unable to get pack index %s\n%s", url,
426 return error("Unable to start request");
431 return move_temp_to_file(tmpfile, filename);
434 static int setup_index(struct alt_base *repo, unsigned char *sha1)
436 struct packed_git *new_pack;
437 if (has_pack_file(sha1))
438 return 0; // don't list this as something we can get
440 if (fetch_index(repo, sha1))
443 new_pack = parse_pack_index(sha1);
444 new_pack->next = repo->packs;
445 repo->packs = new_pack;
449 static void process_alternates_response(void *callback_data)
451 struct alternates_request *alt_req =
452 (struct alternates_request *)callback_data;
453 struct active_request_slot *slot = alt_req->slot;
454 struct alt_base *tail = alt;
455 char *base = alt_req->base;
456 static const char null_byte = '\0';
460 if (alt_req->http_specific) {
461 if (slot->curl_result != CURLE_OK ||
462 !alt_req->buffer->posn) {
464 /* Try reusing the slot to get non-http alternates */
465 alt_req->http_specific = 0;
466 sprintf(alt_req->url, "%s/objects/info/alternates",
468 curl_easy_setopt(slot->curl, CURLOPT_URL,
472 if (slot->finished != NULL)
473 (*slot->finished) = 0;
474 if (!start_active_slot(slot)) {
477 if (slot->finished != NULL)
478 (*slot->finished) = 1;
482 } else if (slot->curl_result != CURLE_OK) {
483 if (slot->http_code != 404 &&
484 slot->curl_result != CURLE_FILE_COULDNT_READ_FILE) {
490 fwrite_buffer(&null_byte, 1, 1, alt_req->buffer);
491 alt_req->buffer->posn--;
492 data = alt_req->buffer->buffer;
494 while (i < alt_req->buffer->posn) {
496 while (posn < alt_req->buffer->posn && data[posn] != '\n')
498 if (data[posn] == '\n') {
501 struct alt_base *newalt;
503 if (data[i] == '/') {
504 serverlen = strchr(base + 8, '/') - base;
506 } else if (!memcmp(data + i, "../", 3)) {
508 serverlen = strlen(base);
509 while (i + 2 < posn &&
510 !memcmp(data + i, "../", 3)) {
513 } while (serverlen &&
514 base[serverlen - 1] != '/');
517 // If the server got removed, give up.
518 okay = strchr(base, ':') - base + 3 <
520 } else if (alt_req->http_specific) {
521 char *colon = strchr(data + i, ':');
522 char *slash = strchr(data + i, '/');
523 if (colon && slash && colon < data + posn &&
524 slash < data + posn && colon < slash) {
528 // skip 'objects' at end
530 target = xmalloc(serverlen + posn - i - 6);
531 strncpy(target, base, serverlen);
532 strncpy(target + serverlen, data + i,
534 target[serverlen + posn - i - 7] = '\0';
537 "Also look at %s\n", target);
538 newalt = xmalloc(sizeof(*newalt));
540 newalt->base = target;
541 newalt->got_indices = 0;
542 newalt->packs = NULL;
543 while (tail->next != NULL)
554 static void fetch_alternates(char *base)
556 struct buffer buffer;
559 struct active_request_slot *slot;
560 struct alternates_request alt_req;
562 /* If another request has already started fetching alternates,
563 wait for them to arrive and return to processing this request's
565 #ifdef USE_CURL_MULTI
566 while (got_alternates == 0) {
571 /* Nothing to do if they've already been fetched */
572 if (got_alternates == 1)
575 /* Start the fetch */
578 data = xmalloc(4096);
581 buffer.buffer = data;
584 fprintf(stderr, "Getting alternates list for %s\n", base);
586 url = xmalloc(strlen(base) + 31);
587 sprintf(url, "%s/objects/info/http-alternates", base);
589 /* Use a callback to process the result, since another request
590 may fail and need to have alternates loaded before continuing */
591 slot = get_active_slot();
592 slot->callback_func = process_alternates_response;
593 slot->callback_data = &alt_req;
595 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
596 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
597 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
601 alt_req.buffer = &buffer;
602 alt_req.http_specific = 1;
605 if (start_active_slot(slot))
606 run_active_slot(slot);
614 static int fetch_indices(struct alt_base *repo)
616 unsigned char sha1[20];
618 struct buffer buffer;
622 struct active_request_slot *slot;
623 struct slot_results results;
625 if (repo->got_indices)
628 data = xmalloc(4096);
631 buffer.buffer = data;
634 fprintf(stderr, "Getting pack list for %s\n", repo->base);
636 url = xmalloc(strlen(repo->base) + 21);
637 sprintf(url, "%s/objects/info/packs", repo->base);
639 slot = get_active_slot();
640 slot->results = &results;
641 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
642 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
643 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
644 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
645 if (start_active_slot(slot)) {
646 run_active_slot(slot);
647 if (results.curl_result != CURLE_OK) {
648 if (results.http_code == 404 ||
649 results.curl_result == CURLE_FILE_COULDNT_READ_FILE) {
650 repo->got_indices = 1;
654 repo->got_indices = 0;
656 return error("%s", curl_errorstr);
660 repo->got_indices = 0;
662 return error("Unable to start request");
665 data = buffer.buffer;
666 while (i < buffer.posn) {
670 if (i + 52 <= buffer.posn &&
671 !strncmp(data + i, " pack-", 6) &&
672 !strncmp(data + i + 46, ".pack\n", 6)) {
673 get_sha1_hex(data + i + 6, sha1);
674 setup_index(repo, sha1);
679 while (i < buffer.posn && data[i] != '\n')
686 repo->got_indices = 1;
690 static int fetch_pack(struct alt_base *repo, unsigned char *sha1)
693 struct packed_git *target;
694 struct packed_git **lst;
697 char tmpfile[PATH_MAX];
700 char range[RANGE_HEADER_SIZE];
701 struct curl_slist *range_header = NULL;
703 struct active_request_slot *slot;
704 struct slot_results results;
706 if (fetch_indices(repo))
708 target = find_sha1_pack(sha1, repo->packs);
713 fprintf(stderr, "Getting pack %s\n",
714 sha1_to_hex(target->sha1));
715 fprintf(stderr, " which contains %s\n",
719 url = xmalloc(strlen(repo->base) + 65);
720 sprintf(url, "%s/objects/pack/pack-%s.pack",
721 repo->base, sha1_to_hex(target->sha1));
723 filename = sha1_pack_name(target->sha1);
724 snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
725 packfile = fopen(tmpfile, "a");
727 return error("Unable to open local file %s for pack",
730 slot = get_active_slot();
731 slot->results = &results;
732 curl_easy_setopt(slot->curl, CURLOPT_FILE, packfile);
733 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
734 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
735 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, no_pragma_header);
736 slot->local = packfile;
738 /* If there is data present from a previous transfer attempt,
739 resume where it left off */
740 prev_posn = ftell(packfile);
744 "Resuming fetch of pack %s at byte %ld\n",
745 sha1_to_hex(target->sha1), prev_posn);
746 sprintf(range, "Range: bytes=%ld-", prev_posn);
747 range_header = curl_slist_append(range_header, range);
748 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, range_header);
751 if (start_active_slot(slot)) {
752 run_active_slot(slot);
753 if (results.curl_result != CURLE_OK) {
755 return error("Unable to get pack file %s\n%s", url,
760 return error("Unable to start request");
765 ret = move_temp_to_file(tmpfile, filename);
770 while (*lst != target)
771 lst = &((*lst)->next);
774 if (verify_pack(target, 0))
776 install_packed_git(target);
781 static void abort_object_request(struct object_request *obj_req)
783 if (obj_req->local >= 0) {
784 close(obj_req->local);
787 unlink(obj_req->tmpfile);
789 release_active_slot(obj_req->slot);
790 obj_req->slot = NULL;
792 release_object_request(obj_req);
795 static int fetch_object(struct alt_base *repo, unsigned char *sha1)
797 char *hex = sha1_to_hex(sha1);
799 struct object_request *obj_req = object_queue_head;
801 while (obj_req != NULL && memcmp(obj_req->sha1, sha1, 20))
802 obj_req = obj_req->next;
804 return error("Couldn't find request for %s in the queue", hex);
806 if (has_sha1_file(obj_req->sha1)) {
807 abort_object_request(obj_req);
811 #ifdef USE_CURL_MULTI
812 while (obj_req->state == WAITING) {
816 start_object_request(obj_req);
819 while (obj_req->state == ACTIVE) {
820 run_active_slot(obj_req->slot);
822 if (obj_req->local != -1) {
823 close(obj_req->local); obj_req->local = -1;
826 if (obj_req->state == ABORTED) {
827 ret = error("Request for %s aborted", hex);
828 } else if (obj_req->curl_result != CURLE_OK &&
829 obj_req->http_code != 416) {
830 if (obj_req->http_code == 404 ||
831 obj_req->curl_result == CURLE_FILE_COULDNT_READ_FILE)
832 ret = -1; /* Be silent, it is probably in a pack. */
834 ret = error("%s (curl_result = %d, http_code = %ld, sha1 = %s)",
835 obj_req->errorstr, obj_req->curl_result,
836 obj_req->http_code, hex);
837 } else if (obj_req->zret != Z_STREAM_END) {
838 corrupt_object_found++;
839 ret = error("File %s (%s) corrupt", hex, obj_req->url);
840 } else if (memcmp(obj_req->sha1, obj_req->real_sha1, 20)) {
841 ret = error("File %s has bad hash", hex);
842 } else if (obj_req->rename < 0) {
843 ret = error("unable to write sha1 filename %s",
847 release_object_request(obj_req);
851 int fetch(unsigned char *sha1)
853 struct alt_base *altbase = alt;
855 if (!fetch_object(altbase, sha1))
858 if (!fetch_pack(altbase, sha1))
860 fetch_alternates(alt->base);
861 altbase = altbase->next;
863 return error("Unable to find %s under %s", sha1_to_hex(sha1),
867 static inline int needs_quote(int ch)
870 case '/': case '-': case '.':
871 case 'A'...'Z': case 'a'...'z': case '0'...'9':
878 static inline int hex(int v)
880 if (v < 10) return '0' + v;
881 else return 'A' + v - 10;
884 static char *quote_ref_url(const char *base, const char *ref)
888 int len, baselen, ch;
890 baselen = strlen(base);
891 len = baselen + 6; /* "refs/" + NUL */
892 for (cp = ref; (ch = *cp) != 0; cp++, len++)
894 len += 2; /* extra two hex plus replacement % */
896 memcpy(qref, base, baselen);
897 memcpy(qref + baselen, "refs/", 5);
898 for (cp = ref, dp = qref + baselen + 5; (ch = *cp) != 0; cp++) {
899 if (needs_quote(ch)) {
901 *dp++ = hex((ch >> 4) & 0xF);
902 *dp++ = hex(ch & 0xF);
912 int fetch_ref(char *ref, unsigned char *sha1)
916 struct buffer buffer;
917 char *base = alt->base;
918 struct active_request_slot *slot;
919 struct slot_results results;
925 url = quote_ref_url(base, ref);
926 slot = get_active_slot();
927 slot->results = &results;
928 curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
929 curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
930 curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, NULL);
931 curl_easy_setopt(slot->curl, CURLOPT_URL, url);
932 if (start_active_slot(slot)) {
933 run_active_slot(slot);
934 if (results.curl_result != CURLE_OK)
935 return error("Couldn't get %s for %s\n%s",
936 url, ref, curl_errorstr);
938 return error("Unable to start request");
942 get_sha1_hex(hex, sha1);
946 int main(int argc, char **argv)
953 setup_git_directory();
955 while (arg < argc && argv[arg][0] == '-') {
956 if (argv[arg][1] == 't') {
958 } else if (argv[arg][1] == 'c') {
960 } else if (argv[arg][1] == 'a') {
964 } else if (argv[arg][1] == 'v') {
966 } else if (argv[arg][1] == 'w') {
967 write_ref = argv[arg + 1];
969 } else if (!strcmp(argv[arg], "--recover")) {
974 if (argc < arg + 2) {
975 usage("git-http-fetch [-c] [-t] [-a] [-d] [-v] [--recover] [-w ref] commit-id url");
978 commit_id = argv[arg];
983 no_pragma_header = curl_slist_append(no_pragma_header, "Pragma:");
985 alt = xmalloc(sizeof(*alt));
987 alt->got_indices = 0;
994 curl_slist_free_all(no_pragma_header);
998 if (corrupt_object_found) {
1000 "Some loose object were found to be corrupt, but they might be just\n"
1001 "a false '404 Not Found' error message sent with incorrect HTTP\n"
1002 "status code. Suggest running git fsck-objects.\n");