X-Git-Url: https://git.verplant.org/?a=blobdiff_plain;f=builtin-grep.c;h=e87b5cb48d6eda5ada9a3cd9bb505d8abe36fd41;hb=7ed36f56e33bd838d06521a37a916516397e9e8b;hp=c3e6701aa0d29e13421fb47bdcb7bdfcad7d5438;hpb=df0e7aa8644eef5ebc018bf838739e25f3494b6c;p=git.git diff --git a/builtin-grep.c b/builtin-grep.c index c3e6701a..e87b5cb4 100644 --- a/builtin-grep.c +++ b/builtin-grep.c @@ -8,8 +8,7 @@ #include "tree.h" #include "commit.h" #include "tag.h" -#include "diff.h" -#include "revision.h" +#include "tree-walk.h" #include "builtin.h" #include #include @@ -18,16 +17,16 @@ * git grep pathspecs are somewhat different from diff-tree pathspecs; * pathname wildcards are allowed. */ -static int pathspec_matches(struct diff_options *opt, const char *name) +static int pathspec_matches(const char **paths, const char *name) { int namelen, i; - if (!opt->nr_paths) + if (!paths || !*paths) return 1; namelen = strlen(name); - for (i = 0; i < opt->nr_paths; i++) { - const char *match = opt->paths[i]; - int matchlen = opt->pathlens[i]; - const char *slash, *cp; + for (i = 0; paths[i]; i++) { + const char *match = paths[i]; + int matchlen = strlen(match); + const char *cp, *meta; if ((matchlen <= namelen) && !strncmp(name, match, matchlen) && @@ -39,54 +38,91 @@ static int pathspec_matches(struct diff_options *opt, const char *name) if (name[namelen-1] != '/') continue; - /* We are being asked if the name directory is worth + /* We are being asked if the directory ("name") is worth * descending into. * * Find the longest leading directory name that does * not have metacharacter in the pathspec; the name * we are looking at must overlap with that directory. */ - for (cp = match, slash = NULL; cp - match < matchlen; cp++) { + for (cp = match, meta = NULL; cp - match < matchlen; cp++) { char ch = *cp; - if (ch == '/') - slash = cp; - if (ch == '*' || ch == '[') + if (ch == '*' || ch == '[' || ch == '?') { + meta = cp; break; + } } - if (!slash) - slash = match; /* toplevel */ - else - slash++; - if (namelen <= slash - match) { + if (!meta) + meta = cp; /* fully literal */ + + if (namelen <= meta - match) { /* Looking at "Documentation/" and * the pattern says "Documentation/howto/", or - * "Documentation/diff*.txt". + * "Documentation/diff*.txt". The name we + * have should match prefix. */ if (!memcmp(match, name, namelen)) return 1; + continue; } - else { + + if (meta - match < namelen) { /* Looking at "Documentation/howto/" and - * the pattern says "Documentation/h*". + * the pattern says "Documentation/h*"; + * match up to "Do.../h"; this avoids descending + * into "Documentation/technical/". */ - if (!memcmp(match, name, slash - match)) + if (!memcmp(match, name, meta - match)) return 1; + continue; } } return 0; } -struct grep_opt { +struct grep_pat { + struct grep_pat *next; const char *pattern; regex_t regexp; +}; + +struct grep_opt { + struct grep_pat *pattern_list; + struct grep_pat **pattern_tail; + regex_t regexp; unsigned linenum:1; unsigned invert:1; unsigned name_only:1; + unsigned count:1; + unsigned word_regexp:1; int regflags; unsigned pre_context; unsigned post_context; }; +static void add_pattern(struct grep_opt *opt, const char *pat) +{ + struct grep_pat *p = xcalloc(1, sizeof(*p)); + p->pattern = pat; + *opt->pattern_tail = p; + opt->pattern_tail = &p->next; + p->next = NULL; +} + +static void compile_patterns(struct grep_opt *opt) +{ + struct grep_pat *p; + for (p = opt->pattern_list; p; p = p->next) { + int err = regcomp(&p->regexp, p->pattern, opt->regflags); + if (err) { + char errbuf[1024]; + regerror(err, &p->regexp, errbuf, 1024); + regfree(&p->regexp); + die("'%s': %s", p->pattern, errbuf); + } + } +} + static char *end_of_line(char *cp, unsigned long *left) { unsigned long l = *left; @@ -98,13 +134,18 @@ static char *end_of_line(char *cp, unsigned long *left) return cp; } +static int word_char(char ch) +{ + return isalnum(ch) || ch == '_'; +} + static void show_line(struct grep_opt *opt, const char *bol, const char *eol, const char *name, unsigned lno, char sign) { printf("%s%c", name, sign); if (opt->linenum) printf("%d%c", lno, sign); - printf("%.*s\n", eol-bol, bol); + printf("%.*s\n", (int)(eol-bol), bol); } static int grep_buffer(struct grep_opt *opt, const char *name, @@ -120,6 +161,7 @@ static int grep_buffer(struct grep_opt *opt, const char *name, unsigned last_hit = 0; unsigned last_shown = 0; const char *hunk_mark = ""; + unsigned count = 0; if (opt->pre_context) prev = xcalloc(opt->pre_context, sizeof(*prev)); @@ -129,23 +171,56 @@ static int grep_buffer(struct grep_opt *opt, const char *name, while (left) { regmatch_t pmatch[10]; char *eol, ch; - int hit; + int hit = 0; + struct grep_pat *p; eol = end_of_line(bol, &left); ch = *eol; *eol = 0; - hit = !regexec(&opt->regexp, bol, ARRAY_SIZE(pmatch), - pmatch, 0); + for (p = opt->pattern_list; p; p = p->next) { + regex_t *exp = &p->regexp; + hit = !regexec(exp, bol, ARRAY_SIZE(pmatch), + pmatch, 0); + + if (hit && opt->word_regexp) { + /* Match beginning must be either + * beginning of the line, or at word + * boundary (i.e. the last char must + * not be alnum or underscore). + */ + if ((pmatch[0].rm_so < 0) || + (eol - bol) <= pmatch[0].rm_so || + (pmatch[0].rm_eo < 0) || + (eol - bol) < pmatch[0].rm_eo) + die("regexp returned nonsense"); + if (pmatch[0].rm_so != 0 && + word_char(bol[pmatch[0].rm_so-1])) + continue; /* not a word boundary */ + if ((eol-bol) < pmatch[0].rm_eo && + word_char(bol[pmatch[0].rm_eo])) + continue; /* not a word boundary */ + } + if (hit) + break; + } + /* "grep -v -e foo -e bla" should list lines + * that do not have either, so inversion should + * be done outside. + */ if (opt->invert) hit = !hit; if (hit) { + count++; if (opt->name_only) { printf("%s\n", name); return 1; } /* Hit at this line. If we haven't shown the * pre-context lines, we would need to show them. + * When asked to do "count", this still show + * the context which is nonsense, but the user + * deserves to get that ;-). */ if (opt->pre_context) { unsigned from; @@ -167,7 +242,8 @@ static int grep_buffer(struct grep_opt *opt, const char *name, } if (last_shown && lno != last_shown + 1) printf(hunk_mark); - show_line(opt, bol, eol, name, lno, ':'); + if (!opt->count) + show_line(opt, bol, eol, name, lno, ':'); last_shown = last_hit = lno; } else if (last_hit && @@ -188,9 +264,18 @@ static int grep_buffer(struct grep_opt *opt, const char *name, } *eol = ch; bol = eol + 1; + if (!left) + break; left--; lno++; } + /* NEEDSWORK: + * The real "grep -c foo *.c" gives many "bar.c:0" lines, + * which feels mostly useless but sometimes useful. Maybe + * make it another option? For now suppress them. + */ + if (opt->count && count) + printf("%s:%u\n", name, count); return !!last_hit; } @@ -241,7 +326,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) return i; } -static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) +static int grep_cache(struct grep_opt *opt, const char **paths, int cached) { int hit = 0; int nr; @@ -251,7 +336,7 @@ static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) struct cache_entry *ce = active_cache[nr]; if (ce_stage(ce) || !S_ISREG(ntohl(ce->ce_mode))) continue; - if (!pathspec_matches(&revs->diffopt, ce->name)) + if (!pathspec_matches(paths, ce->name)) continue; if (cached) hit |= grep_sha1(opt, ce->sha1, ce->name); @@ -261,7 +346,7 @@ static int grep_cache(struct grep_opt *opt, struct rev_info *revs, int cached) return hit; } -static int grep_tree(struct grep_opt *opt, struct rev_info *revs, +static int grep_tree(struct grep_opt *opt, const char **paths, struct tree_desc *tree, const char *tree_name, const char *base) { @@ -297,7 +382,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, */ strcpy(path_buf + len + pathlen, "/"); - if (!pathspec_matches(&revs->diffopt, down)) + if (!pathspec_matches(paths, down)) ; else if (S_ISREG(mode)) hit |= grep_sha1(opt, sha1, path_buf); @@ -310,7 +395,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, die("unable to read tree (%s)", sha1_to_hex(sha1)); sub.buf = data; - hit |= grep_tree(opt, revs, &sub, tree_name, down); + hit |= grep_tree(opt, paths, &sub, tree_name, down); free(data); } update_tree_entry(tree); @@ -318,7 +403,7 @@ static int grep_tree(struct grep_opt *opt, struct rev_info *revs, return hit; } -static int grep_object(struct grep_opt *opt, struct rev_info *revs, +static int grep_object(struct grep_opt *opt, const char **paths, struct object *obj, const char *name) { if (!strcmp(obj->type, blob_type)) @@ -333,7 +418,7 @@ static int grep_object(struct grep_opt *opt, struct rev_info *revs, if (!data) die("unable to read tree (%s)", sha1_to_hex(obj->sha1)); tree.buf = data; - hit = grep_tree(opt, revs, &tree, name, ""); + hit = grep_tree(opt, paths, &tree, name, ""); free(data); return hit; } @@ -345,114 +430,168 @@ static const char builtin_grep_usage[] = int cmd_grep(int argc, const char **argv, char **envp) { - struct rev_info rev; - const char **dst, **src; - int err; int hit = 0; - int no_more_arg = 0; - int seen_range = 0; + int no_more_flags = 0; int seen_noncommit = 0; int cached = 0; struct grep_opt opt; - struct object_list *list; + struct object_list *list, **tail, *object_list = NULL; + const char *prefix = setup_git_directory(); + const char **paths = NULL; memset(&opt, 0, sizeof(opt)); + opt.pattern_tail = &opt.pattern_list; opt.regflags = REG_NEWLINE; /* - * Interpret and remove the grep options upfront. Sigh... + * No point using rev_info, really. */ - for (dst = src = &argv[1]; src < argc + argv; ) { - const char *arg = *src++; - if (!no_more_arg) { - if (!strcmp("--", arg)) { - no_more_arg = 1; - *dst++ = arg; - continue; - } - if (!strcmp("--cached", arg)) { - cached = 1; - continue; - } - if (!strcmp("-i", arg) || - !strcmp("--ignore-case", arg)) { - opt.regflags |= REG_ICASE; - continue; - } - if (!strcmp("-v", arg) || - !strcmp("--invert-match", arg)) { - opt.invert = 1; - continue; - } - if (!strcmp("-E", arg) || - !strcmp("--extended-regexp", arg)) { - opt.regflags |= REG_EXTENDED; - continue; - } - if (!strcmp("-G", arg) || - !strcmp("--basic-regexp", arg)) { - opt.regflags &= ~REG_EXTENDED; - continue; - } - if (!strcmp("-e", arg)) { - if (src < argc + argv) { - opt.pattern = *src++; - continue; + while (1 < argc) { + const char *arg = argv[1]; + argc--; argv++; + if (!strcmp("--cached", arg)) { + cached = 1; + continue; + } + if (!strcmp("-i", arg) || + !strcmp("--ignore-case", arg)) { + opt.regflags |= REG_ICASE; + continue; + } + if (!strcmp("-v", arg) || + !strcmp("--invert-match", arg)) { + opt.invert = 1; + continue; + } + if (!strcmp("-E", arg) || + !strcmp("--extended-regexp", arg)) { + opt.regflags |= REG_EXTENDED; + continue; + } + if (!strcmp("-G", arg) || + !strcmp("--basic-regexp", arg)) { + opt.regflags &= ~REG_EXTENDED; + continue; + } + if (!strcmp("-n", arg)) { + opt.linenum = 1; + continue; + } + if (!strcmp("-H", arg)) { + /* We always show the pathname, so this + * is a noop. + */ + continue; + } + if (!strcmp("-l", arg) || + !strcmp("--files-with-matches", arg)) { + opt.name_only = 1; + continue; + } + if (!strcmp("-c", arg) || + !strcmp("--count", arg)) { + opt.count = 1; + continue; + } + if (!strcmp("-w", arg) || + !strcmp("--word-regexp", arg)) { + opt.word_regexp = 1; + continue; + } + if (!strncmp("-A", arg, 2) || + !strncmp("-B", arg, 2) || + !strncmp("-C", arg, 2) || + (arg[0] == '-' && '1' <= arg[1] && arg[1] <= '9')) { + unsigned num; + const char *scan; + switch (arg[1]) { + case 'A': case 'B': case 'C': + if (!arg[2]) { + if (argc <= 1) + usage(builtin_grep_usage); + scan = *++argv; + argc--; } - usage(builtin_grep_usage); - } - if (!strcmp("-n", arg)) { - opt.linenum = 1; - continue; - } - if (!strcmp("-H", arg)) { - /* We always show the pathname, so this - * is a noop. - */ - continue; + else + scan = arg + 2; + break; + default: + scan = arg + 1; + break; } - if (!strcmp("-l", arg) || - !strcmp("--files-with-matches", arg)) { - opt.name_only = 1; - continue; + if (sscanf(scan, "%u", &num) != 1) + usage(builtin_grep_usage); + switch (arg[1]) { + case 'A': + opt.post_context = num; + break; + default: + case 'C': + opt.post_context = num; + case 'B': + opt.pre_context = num; + break; } - if (!strcmp("-A", arg) || - !strcmp("-B", arg) || - !strcmp("-C", arg)) { - unsigned num; - if ((argc + argv <= src) || - sscanf(*src++, "%u", &num) != 1) - usage(builtin_grep_usage); - switch (arg[1]) { - case 'A': - opt.post_context = num; - break; - case 'C': - opt.post_context = num; - case 'B': - opt.pre_context = num; - break; - } + continue; + } + if (!strcmp("-e", arg)) { + if (1 < argc) { + add_pattern(&opt, argv[1]); + argv++; + argc--; continue; } + usage(builtin_grep_usage); + } + if (!strcmp("--", arg)) { + no_more_flags = 1; + continue; + } + /* Either unrecognized option or a single pattern */ + if (!no_more_flags && *arg == '-') + usage(builtin_grep_usage); + if (!opt.pattern_list) { + add_pattern(&opt, arg); + break; + } + else { + /* We are looking at the first path or rev; + * it is found at argv[0] after leaving the + * loop. + */ + argc++; argv--; + break; } - *dst++ = arg; } - if (!opt.pattern) + if (!opt.pattern_list) die("no pattern given."); - - err = regcomp(&opt.regexp, opt.pattern, opt.regflags); - if (err) { - char errbuf[1024]; - regerror(err, &opt.regexp, errbuf, 1024); - regfree(&opt.regexp); - die("'%s': %s", opt.pattern, errbuf); + compile_patterns(&opt); + tail = &object_list; + while (1 < argc) { + struct object *object; + struct object_list *elem; + const char *arg = argv[1]; + unsigned char sha1[20]; + if (get_sha1(arg, sha1) < 0) + break; + object = parse_object(sha1); + if (!object) + die("bad object %s", arg); + elem = object_list_insert(object, tail); + elem->name = arg; + tail = &elem->next; + argc--; argv++; + } + if (1 < argc) + paths = get_pathspec(prefix, argv + 1); + else if (prefix) { + paths = xcalloc(2, sizeof(const char *)); + paths[0] = prefix; + paths[1] = NULL; } - init_revisions(&rev); - *dst = NULL; - argc = setup_revisions(dst - argv, argv, &rev, NULL); - + if (!object_list) + return !grep_cache(&opt, paths, cached); /* * Do not walk "grep -e foo master next pu -- Documentation/" * but do walk "grep -e foo master..next -- Documentation/". @@ -460,43 +599,19 @@ int cmd_grep(int argc, const char **argv, char **envp) * "grep -e foo v1.0.0:Documentation/ master..next" * so detect that and complain. */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; - if (list->item->flags & UNINTERESTING) - seen_range = 1; real_obj = deref_tag(list->item, NULL, 0); if (strcmp(real_obj->type, commit_type)) seen_noncommit = 1; } - if (!rev.pending_objects) - return !grep_cache(&opt, &rev, cached); if (cached) die("both --cached and revisions given."); - if (seen_range && seen_noncommit) - die("both A..B and non commit are given."); - if (seen_range) { - struct commit *commit; - prepare_revision_walk(&rev); - while ((commit = get_revision(&rev)) != NULL) { - unsigned char *sha1 = commit->object.sha1; - const char *n = find_unique_abbrev(sha1, rev.abbrev); - char rev_name[41]; - strcpy(rev_name, n); - if (grep_object(&opt, &rev, &commit->object, rev_name)) - hit = 1; - commit->buffer = NULL; - } - return !hit; - } - - /* all of them are non-commit; do not walk, and - * do not lose their names. - */ - for (list = rev.pending_objects; list; list = list->next) { + for (list = object_list; list; list = list->next) { struct object *real_obj; real_obj = deref_tag(list->item, NULL, 0); - if (grep_object(&opt, &rev, real_obj, list->name)) + if (grep_object(&opt, paths, real_obj, list->name)) hit = 1; } return !hit;