--- /dev/null
+/*
+ * Another stupid program, this one parsing the headers of an
+ * email to figure out authorship and subject
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+static FILE *cmitmsg, *patchfile;
+
+static char line[1000];
+static char name[1000];
+static char email[1000];
+static char subject[1000];
+
+static char *sanity_check(char *name, char *email)
+{
+ int len = strlen(name);
+ if (len < 3 || len > 60)
+ return email;
+ if (strchr(name, '@') || strchr(name, '<') || strchr(name, '>'))
+ return email;
+ return name;
+}
+
+static int handle_from(char *line)
+{
+ char *at = strchr(line, '@');
+ char *dst;
+
+ if (!at)
+ return 0;
+
+ /*
+ * If we already have one email, don't take any confusing lines
+ */
+ if (*email && strchr(at+1, '@'))
+ return 0;
+
+ while (at > line) {
+ char c = at[-1];
+ if (isspace(c) || c == '<')
+ break;
+ at--;
+ }
+ dst = email;
+ for (;;) {
+ unsigned char c = *at;
+ if (!c || c == '>' || isspace(c))
+ break;
+ *at++ = ' ';
+ *dst++ = c;
+ }
+ *dst++ = 0;
+
+ at = line + strlen(line);
+ while (at > line) {
+ unsigned char c = *--at;
+ if (isalnum(c))
+ break;
+ *at = 0;
+ }
+
+ at = line;
+ for (;;) {
+ unsigned char c = *at;
+ if (!c)
+ break;
+ if (isalnum(c))
+ break;
+ at++;
+ }
+
+ at = sanity_check(at, email);
+
+ strcpy(name, at);
+ return 1;
+}
+
+static void handle_subject(char *line)
+{
+ strcpy(subject, line);
+}
+
+static void add_subject_line(char *line)
+{
+ while (isspace(*line))
+ line++;
+ *--line = ' ';
+ strcat(subject, line);
+}
+
+static void check_line(char *line, int len)
+{
+ static int cont = -1;
+ if (!memcmp(line, "From:", 5) && isspace(line[5])) {
+ handle_from(line+6);
+ cont = 0;
+ return;
+ }
+ if (!memcmp(line, "Subject:", 8) && isspace(line[8])) {
+ handle_subject(line+9);
+ cont = 1;
+ return;
+ }
+ if (isspace(*line)) {
+ switch (cont) {
+ case 0:
+ fprintf(stderr, "I don't do 'From:' line continuations\n");
+ break;
+ case 1:
+ add_subject_line(line);
+ return;
+ default:
+ break;
+ }
+ }
+ cont = -1;
+}
+
+static char * cleanup_subject(char *subject)
+{
+ for (;;) {
+ char *p;
+ int len, remove;
+ switch (*subject) {
+ case 'r': case 'R':
+ if (!memcmp("e:", subject+1, 2)) {
+ subject +=3;
+ continue;
+ }
+ break;
+ case ' ': case '\t': case ':':
+ subject++;
+ continue;
+
+ case '[':
+ p = strchr(subject, ']');
+ if (!p) {
+ subject++;
+ continue;
+ }
+ len = strlen(p);
+ remove = p - subject;
+ if (remove <= len *2) {
+ subject = p+1;
+ continue;
+ }
+ break;
+ }
+ return subject;
+ }
+}
+
+static void cleanup_space(char *buf)
+{
+ unsigned char c;
+ while ((c = *buf) != 0) {
+ buf++;
+ if (isspace(c)) {
+ buf[-1] = ' ';
+ c = *buf;
+ while (isspace(c)) {
+ int len = strlen(buf);
+ memmove(buf, buf+1, len);
+ c = *buf;
+ }
+ }
+ }
+}
+
+/*
+ * Hacky hacky. This depends not only on -p1, but on
+ * filenames not having some special characters in them,
+ * like tilde.
+ */
+static void show_filename(char *line)
+{
+ int len;
+ char *name = strchr(line, '/');
+
+ if (!name || !isspace(*line))
+ return;
+ name++;
+ len = 0;
+ for (;;) {
+ unsigned char c = name[len];
+ switch (c) {
+ default:
+ len++;
+ continue;
+
+ case 0: case ' ':
+ case '\t': case '\n':
+ break;
+
+ case '~':
+ break;
+ }
+ break;
+ }
+ /* remove ".orig" from the end - common patch behaviour */
+ if (len > 5 && !memcmp(name+len-5, ".orig", 5))
+ len -=5;
+ if (!len)
+ return;
+ printf("filename: %.*s\n", len, name);
+}
+
+static void handle_rest(void)
+{
+ char *sub = cleanup_subject(subject);
+ cleanup_space(name);
+ cleanup_space(email);
+ cleanup_space(sub);
+ printf("Author: %s\nEmail: %s\nSubject: %s\n\n", name, email, sub);
+ FILE *out = cmitmsg;
+
+ do {
+ /* Track filename information from the patch.. */
+ if (!memcmp("---", line, 3)) {
+ out = patchfile;
+ show_filename(line+3);
+ }
+
+ if (!memcmp("+++", line, 3))
+ show_filename(line+3);
+
+ fputs(line, out);
+ } while (fgets(line, sizeof(line), stdin) != NULL);
+
+ if (out == cmitmsg) {
+ fprintf(stderr, "No patch found\n");
+ exit(1);
+ }
+
+ fclose(cmitmsg);
+ fclose(patchfile);
+}
+
+static int eatspace(char *line)
+{
+ int len = strlen(line);
+ while (len > 0 && isspace(line[len-1]))
+ line[--len] = 0;
+ return len;
+}
+
+static void handle_body(void)
+{
+ int has_from = 0;
+
+ /* First line of body can be a From: */
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+ int len = eatspace(line);
+ if (!len)
+ continue;
+ if (!memcmp("From:", line, 5) && isspace(line[5])) {
+ if (!has_from && handle_from(line+6)) {
+ has_from = 1;
+ continue;
+ }
+ }
+ line[len] = '\n';
+ handle_rest();
+ break;
+ }
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "mailinfo msg-file path-file < email\n");
+ exit(1);
+}
+
+int main(int argc, char ** argv)
+{
+ if (argc != 3)
+ usage();
+ cmitmsg = fopen(argv[1], "w");
+ if (!cmitmsg) {
+ perror(argv[1]);
+ exit(1);
+ }
+ patchfile = fopen(argv[2], "w");
+ if (!patchfile) {
+ perror(argv[2]);
+ exit(1);
+ }
+ while (fgets(line, sizeof(line), stdin) != NULL) {
+ int len = eatspace(line);
+ if (!len) {
+ handle_body();
+ break;
+ }
+ check_line(line, len);
+ }
+ return 0;
+}
--- /dev/null
+/*
+ * Totally braindamaged mbox splitter program.
+ *
+ * It just splits a mbox into a list of files: "0001" "0002" ..
+ * so you can process them further from there.
+ */
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+static int usage(void)
+{
+ fprintf(stderr, "mailsplit <mbox> <directory>\n");
+ exit(1);
+}
+
+static int linelen(const char *map, unsigned long size)
+{
+ int len = 0, c;
+
+ do {
+ c = *map;
+ map++;
+ size--;
+ len++;
+ } while (size && c != '\n');
+ return len;
+}
+
+static int is_from_line(const char *line, int len)
+{
+ const char *colon;
+
+ if (len < 20 || memcmp("From ", line, 5))
+ return 0;
+
+ colon = line + len - 2;
+ line += 5;
+ for (;;) {
+ if (colon < line)
+ return 0;
+ if (*--colon == ':')
+ break;
+ }
+
+ if (!isdigit(colon[-4]) ||
+ !isdigit(colon[-2]) ||
+ !isdigit(colon[-1]) ||
+ !isdigit(colon[ 1]) ||
+ !isdigit(colon[ 2]))
+ return 0;
+
+ /* year */
+ if (strtol(colon+3, NULL, 10) <= 90)
+ return 0;
+
+ /* Ok, close enough */
+ return 1;
+}
+
+static int parse_email(const void *map, unsigned long size)
+{
+ unsigned long offset;
+
+ if (size < 6 || memcmp("From ", map, 5))
+ goto corrupt;
+
+ /* Make sure we don't trigger on this first line */
+ map++; size--; offset=1;
+
+ /*
+ * Search for a line beginning with "From ", and
+ * having smething that looks like a date format.
+ */
+ do {
+ int len = linelen(map, size);
+ if (is_from_line(map, len))
+ return offset;
+ map += len;
+ size -= len;
+ offset += len;
+ } while (size);
+ return offset;
+
+corrupt:
+ fprintf(stderr, "corrupt mailbox\n");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ int fd, nr;
+ struct stat st;
+ unsigned long size;
+ void *map;
+
+ if (argc != 3)
+ usage();
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ exit(1);
+ }
+ if (chdir(argv[2]) < 0)
+ usage();
+ if (fstat(fd, &st) < 0) {
+ perror("stat");
+ exit(1);
+ }
+ size = st.st_size;
+ map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (-1 == (int)(long)map) {
+ perror("mmap");
+ exit(1);
+ }
+ close(fd);
+ nr = 0;
+ do {
+ char name[10];
+ unsigned long len = parse_email(map, size);
+ assert(len <= size);
+ sprintf(name, "%04d", ++nr);
+ fd = open(name, O_WRONLY | O_CREAT | O_EXCL, 0600);
+ if (fd < 0) {
+ perror(name);
+ exit(1);
+ }
+ if (write(fd, map, len) != len) {
+ perror("write");
+ exit(1);
+ }
+ close(fd);
+ map += len;
+ size -= len;
+ } while (size > 0);
+ return 0;
+}