1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini grep implementation for busybox using libc regex.
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  *
8  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
9  */
10 /* BB_AUDIT SUSv3 defects - unsupported option -x "match whole line only". */
11 /* BB_AUDIT GNU defects - always acts as -a.  */
12 /* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */
13 /*
14  * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> -
15  * correction "-e pattern1 -e pattern2" logic and more optimizations.
16  * precompiled regex
17  *
18  * (C) 2006 Jac Goudsmit added -o option
19  */
20 //config:config GREP
21 //config:	bool "grep (8.6 kb)"
22 //config:	default y
23 //config:	help
24 //config:	grep is used to search files for a specified pattern.
25 //config:
26 //config:config EGREP
27 //config:	bool "egrep (7.8 kb)"
28 //config:	default y
29 //config:	help
30 //config:	Alias to "grep -E".
31 //config:
32 //config:config FGREP
33 //config:	bool "fgrep (7.8 kb)"
34 //config:	default y
35 //config:	help
36 //config:	Alias to "grep -F".
37 //config:
38 //config:config FEATURE_GREP_CONTEXT
39 //config:	bool "Enable before and after context flags (-A, -B and -C)"
40 //config:	default y
41 //config:	depends on GREP || EGREP || FGREP
42 //config:	help
43 //config:	Print the specified number of leading (-B) and/or trailing (-A)
44 //config:	context surrounding our matching lines.
45 //config:	Print the specified number of context lines (-C).
46 
47 //applet:IF_GREP(APPLET(grep, BB_DIR_BIN, BB_SUID_DROP))
48 //                APPLET_ODDNAME:name   main  location    suid_type     help
49 //applet:IF_EGREP(APPLET_ODDNAME(egrep, grep, BB_DIR_BIN, BB_SUID_DROP, egrep))
50 //applet:IF_FGREP(APPLET_ODDNAME(fgrep, grep, BB_DIR_BIN, BB_SUID_DROP, fgrep))
51 
52 //kbuild:lib-$(CONFIG_GREP) += grep.o
53 //kbuild:lib-$(CONFIG_EGREP) += grep.o
54 //kbuild:lib-$(CONFIG_FGREP) += grep.o
55 
56 #include "libbb.h"
57 #include "common_bufsiz.h"
58 #include "xregex.h"
59 
60 //usage:#define grep_trivial_usage
61 //usage:       "[-HhnlLoqvsrRiwFE"
62 //usage:	IF_EXTRA_COMPAT("z")
63 //usage:       "] [-m N] "
64 //usage:	IF_FEATURE_GREP_CONTEXT("[-A|B|C N] ")
65 //usage:       "{ PATTERN | -e PATTERN... | -f FILE... } [FILE]..."
66 //usage:#define grep_full_usage "\n\n"
67 //usage:       "Search for PATTERN in FILEs (or stdin)\n"
68 //usage:     "\n	-H	Add 'filename:' prefix"
69 //usage:     "\n	-h	Do not add 'filename:' prefix"
70 //usage:     "\n	-n	Add 'line_no:' prefix"
71 //usage:     "\n	-l	Show only names of files that match"
72 //usage:     "\n	-L	Show only names of files that don't match"
73 //usage:     "\n	-c	Show only count of matching lines"
74 //usage:     "\n	-o	Show only the matching part of line"
75 //usage:     "\n	-q	Quiet. Return 0 if PATTERN is found, 1 otherwise"
76 //usage:     "\n	-v	Select non-matching lines"
77 //usage:     "\n	-s	Suppress open and read errors"
78 //usage:     "\n	-r	Recurse"
79 //usage:     "\n	-R	Recurse and dereference symlinks"
80 //usage:     "\n	-i	Ignore case"
81 //usage:     "\n	-w	Match whole words only"
82 //usage:     "\n	-x	Match whole lines only"
83 //usage:     "\n	-F	PATTERN is a literal (not regexp)"
84 //usage:     "\n	-E	PATTERN is an extended regexp"
85 //usage:	IF_EXTRA_COMPAT(
86 //usage:     "\n	-z	NUL terminated input"
87 //usage:	)
88 //usage:     "\n	-m N	Match up to N times per file"
89 //usage:	IF_FEATURE_GREP_CONTEXT(
90 //usage:     "\n	-A N	Print N lines of trailing context"
91 //usage:     "\n	-B N	Print N lines of leading context"
92 //usage:     "\n	-C N	Same as '-A N -B N'"
93 //usage:	)
94 //usage:     "\n	-e PTRN	Pattern to match"
95 //usage:     "\n	-f FILE	Read pattern from file"
96 //usage:
97 //usage:#define grep_example_usage
98 //usage:       "$ grep root /etc/passwd\n"
99 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
100 //usage:       "$ grep ^[rR]oo. /etc/passwd\n"
101 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
102 //usage:
103 //usage:#define egrep_trivial_usage NOUSAGE_STR
104 //usage:#define egrep_full_usage ""
105 //usage:#define fgrep_trivial_usage NOUSAGE_STR
106 //usage:#define fgrep_full_usage ""
107 
108 /* -e,-f are lists; -m,-A,-B,-C have numeric param */
109 #define OPTSTR_GREP \
110 	"lnqvscFiHhe:*f:*LorRm:+wx" \
111 	IF_FEATURE_GREP_CONTEXT("A:+B:+C:+") \
112 	"E" \
113 	IF_EXTRA_COMPAT("z") \
114 	"aI"
115 /* ignored: -a "assume all files to be text" */
116 /* ignored: -I "assume binary files have no matches" */
117 enum {
118 	OPTBIT_l, /* list matched file names only */
119 	OPTBIT_n, /* print line# */
120 	OPTBIT_q, /* quiet - exit(EXIT_SUCCESS) of first match */
121 	OPTBIT_v, /* invert the match, to select non-matching lines */
122 	OPTBIT_s, /* suppress errors about file open errors */
123 	OPTBIT_c, /* count matches per file (suppresses normal output) */
124 	OPTBIT_F, /* literal match */
125 	OPTBIT_i, /* case-insensitive */
126 	OPTBIT_H, /* force filename display */
127 	OPTBIT_h, /* inhibit filename display */
128 	OPTBIT_e, /* -e PATTERN */
129 	OPTBIT_f, /* -f FILE_WITH_PATTERNS */
130 	OPTBIT_L, /* list unmatched file names only */
131 	OPTBIT_o, /* show only matching parts of lines */
132 	OPTBIT_r, /* recurse dirs */
133 	OPTBIT_R, /* recurse dirs and symlinks to dirs */
134 	OPTBIT_m, /* -m MAX_MATCHES */
135 	OPTBIT_w, /* -w whole word match */
136 	OPTBIT_x, /* -x whole line match */
137 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_A ,) /* -A NUM: after-match context */
138 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_B ,) /* -B NUM: before-match context */
139 	IF_FEATURE_GREP_CONTEXT(    OPTBIT_C ,) /* -C NUM: -A and -B combined */
140 	OPTBIT_E, /* extended regexp */
141 	IF_EXTRA_COMPAT(            OPTBIT_z ,) /* input is NUL terminated */
142 	OPT_l = 1 << OPTBIT_l,
143 	OPT_n = 1 << OPTBIT_n,
144 	OPT_q = 1 << OPTBIT_q,
145 	OPT_v = 1 << OPTBIT_v,
146 	OPT_s = 1 << OPTBIT_s,
147 	OPT_c = 1 << OPTBIT_c,
148 	OPT_F = 1 << OPTBIT_F,
149 	OPT_i = 1 << OPTBIT_i,
150 	OPT_H = 1 << OPTBIT_H,
151 	OPT_h = 1 << OPTBIT_h,
152 	OPT_e = 1 << OPTBIT_e,
153 	OPT_f = 1 << OPTBIT_f,
154 	OPT_L = 1 << OPTBIT_L,
155 	OPT_o = 1 << OPTBIT_o,
156 	OPT_r = 1 << OPTBIT_r,
157 	OPT_R = 1 << OPTBIT_R,
158 	OPT_m = 1 << OPTBIT_m,
159 	OPT_w = 1 << OPTBIT_w,
160 	OPT_x = 1 << OPTBIT_x,
161 	OPT_A = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_A)) + 0,
162 	OPT_B = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_B)) + 0,
163 	OPT_C = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_C)) + 0,
164 	OPT_E = 1 << OPTBIT_E,
165 	OPT_z = IF_EXTRA_COMPAT(            (1 << OPTBIT_z)) + 0,
166 };
167 
168 #define PRINT_LINE_NUM              (option_mask32 & OPT_n)
169 #define BE_QUIET                    (option_mask32 & OPT_q)
170 #define SUPPRESS_ERR_MSGS           (option_mask32 & OPT_s)
171 #define PRINT_MATCH_COUNTS          (option_mask32 & OPT_c)
172 #define FGREP_FLAG                  (option_mask32 & OPT_F)
173 #define NUL_DELIMITED               (option_mask32 & OPT_z)
174 
175 struct globals {
176 	int max_matches;
177 #if !ENABLE_EXTRA_COMPAT
178 	int reflags;
179 #else
180 	RE_TRANSLATE_TYPE case_fold; /* RE_TRANSLATE_TYPE is [[un]signed] char* */
181 #endif
182 	smalluint invert_search;
183 	smalluint print_filename;
184 	smalluint open_errors;
185 #if ENABLE_FEATURE_GREP_CONTEXT
186 	smalluint did_print_line;
187 	int lines_before;
188 	int lines_after;
189 	char **before_buf;
190 	IF_EXTRA_COMPAT(size_t *before_buf_size;)
191 	int last_line_printed;
192 #endif
193 	/* globals used internally */
194 	llist_t *pattern_head;   /* growable list of patterns to match */
195 	const char *cur_file;    /* the current file we are reading */
196 } FIX_ALIASING;
197 #define G (*(struct globals*)bb_common_bufsiz1)
198 #define INIT_G() do { \
199 	setup_common_bufsiz(); \
200 	BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
201 } while (0)
202 #define max_matches       (G.max_matches         )
203 #if !ENABLE_EXTRA_COMPAT
204 # define reflags          (G.reflags             )
205 #else
206 # define case_fold        (G.case_fold           )
207 /* http://www.delorie.com/gnu/docs/regex/regex_46.html */
208 # define reflags           re_syntax_options
209 # undef REG_NOSUB
210 # undef REG_EXTENDED
211 # undef REG_ICASE
212 # define REG_NOSUB    bug:is:here /* should not be used */
213 /* Just RE_SYNTAX_EGREP is not enough, need to enable {n[,[m]]} too */
214 # define REG_EXTENDED (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
215 # define REG_ICASE    bug:is:here /* should not be used */
216 #endif
217 #define invert_search     (G.invert_search       )
218 #define print_filename    (G.print_filename      )
219 #define open_errors       (G.open_errors         )
220 #define did_print_line    (G.did_print_line      )
221 #define lines_before      (G.lines_before        )
222 #define lines_after       (G.lines_after         )
223 #define before_buf        (G.before_buf          )
224 #define before_buf_size   (G.before_buf_size     )
225 #define last_line_printed (G.last_line_printed   )
226 #define pattern_head      (G.pattern_head        )
227 #define cur_file          (G.cur_file            )
228 
229 
230 typedef struct grep_list_data_t {
231 	char *pattern;
232 /* for GNU regex, matched_range must be persistent across grep_file() calls */
233 #if !ENABLE_EXTRA_COMPAT
234 	regex_t compiled_regex;
235 	regmatch_t matched_range;
236 #else
237 	struct re_pattern_buffer compiled_regex;
238 	struct re_registers matched_range;
239 #endif
240 #define ALLOCATED 1
241 #define COMPILED 2
242 	int flg_mem_allocated_compiled;
243 } grep_list_data_t;
244 
245 #if !ENABLE_EXTRA_COMPAT
246 #define print_line(line, line_len, linenum, decoration) \
247 	print_line(line, linenum, decoration)
248 #endif
print_line(const char * line,size_t line_len,int linenum,char decoration)249 static void print_line(const char *line, size_t line_len, int linenum, char decoration)
250 {
251 #if ENABLE_FEATURE_GREP_CONTEXT
252 	/* Happens when we go to next file, immediately hit match
253 	 * and try to print prev context... from prev file! Don't do it */
254 	if (linenum < 1)
255 		return;
256 	/* possibly print the little '--' separator */
257 	if ((lines_before || lines_after) && did_print_line
258 	 && last_line_printed != linenum - 1
259 	) {
260 		puts("--");
261 	}
262 	/* guard against printing "--" before first line of first file */
263 	did_print_line = 1;
264 	last_line_printed = linenum;
265 #endif
266 	if (print_filename)
267 		printf("%s%c", cur_file, decoration);
268 	if (PRINT_LINE_NUM)
269 		printf("%i%c", linenum, decoration);
270 	/* Emulate weird GNU grep behavior with -ov */
271 	if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) {
272 #if !ENABLE_EXTRA_COMPAT
273 		puts(line);
274 #else
275 		fwrite(line, 1, line_len, stdout);
276 		putchar(NUL_DELIMITED ? '\0' : '\n');
277 #endif
278 	}
279 }
280 
281 #if ENABLE_EXTRA_COMPAT
282 /* Unlike getline, this one removes trailing '\n' */
bb_getline(char ** line_ptr,size_t * line_alloc_len,FILE * file)283 static ssize_t FAST_FUNC bb_getline(char **line_ptr, size_t *line_alloc_len, FILE *file)
284 {
285 	ssize_t res_sz;
286 	char *line;
287 	int delim = (NUL_DELIMITED ? '\0' : '\n');
288 
289 	res_sz = getdelim(line_ptr, line_alloc_len, delim, file);
290 	line = *line_ptr;
291 
292 	if (res_sz > 0) {
293 		if (line[res_sz - 1] == delim)
294 			line[--res_sz] = '\0';
295 	} else {
296 		free(line); /* uclibc allocates a buffer even on EOF. WTF? */
297 	}
298 	return res_sz;
299 }
300 #endif
301 
grep_file(FILE * file)302 static int grep_file(FILE *file)
303 {
304 	smalluint found;
305 	int linenum = 0;
306 	int nmatches = 0;
307 #if !ENABLE_EXTRA_COMPAT
308 	char *line;
309 #else
310 	char *line = NULL;
311 	ssize_t line_len;
312 	size_t line_alloc_len;
313 # define rm_so start[0]
314 # define rm_eo end[0]
315 #endif
316 #if ENABLE_FEATURE_GREP_CONTEXT
317 	int print_n_lines_after = 0;
318 	int curpos = 0; /* track where we are in the circular 'before' buffer */
319 	int idx = 0; /* used for iteration through the circular buffer */
320 #else
321 	enum { print_n_lines_after = 0 };
322 #endif
323 
324 	while (
325 #if !ENABLE_EXTRA_COMPAT
326 		(line = xmalloc_fgetline(file)) != NULL
327 #else
328 		(line_len = bb_getline(&line, &line_alloc_len, file)) >= 0
329 #endif
330 	) {
331 		llist_t *pattern_ptr = pattern_head;
332 		grep_list_data_t *gl = gl; /* for gcc */
333 
334 		linenum++;
335 		found = 0;
336 		while (pattern_ptr) {
337 			gl = (grep_list_data_t *)pattern_ptr->data;
338 			if (FGREP_FLAG) {
339 				char *match;
340 				char *str = line;
341  opt_f_again:
342 				match = ((option_mask32 & OPT_i)
343 					? strcasestr(str, gl->pattern)
344 					: strstr(str, gl->pattern)
345 					);
346 				if (match) {
347 					if (option_mask32 & OPT_x) {
348 						if (match != str)
349 							goto opt_f_not_found;
350 						if (str[strlen(gl->pattern)] != '\0')
351 							goto opt_f_not_found;
352 					} else
353 					if (option_mask32 & OPT_w) {
354 						char c = (match != line) ? match[-1] : ' ';
355 						if (!isalnum(c) && c != '_') {
356 							c = match[strlen(gl->pattern)];
357 							if (!c || (!isalnum(c) && c != '_'))
358 								goto opt_f_found;
359 						}
360 						str = match + 1;
361 						goto opt_f_again;
362 					}
363  opt_f_found:
364 					found = 1;
365  opt_f_not_found: ;
366 				}
367 			} else {
368 #if ENABLE_EXTRA_COMPAT
369 				unsigned start_pos;
370 #else
371 				int match_flg;
372 #endif
373 				char *match_at;
374 
375 				if (!(gl->flg_mem_allocated_compiled & COMPILED)) {
376 					gl->flg_mem_allocated_compiled |= COMPILED;
377 #if !ENABLE_EXTRA_COMPAT
378 					xregcomp(&gl->compiled_regex, gl->pattern, reflags);
379 #else
380 					memset(&gl->compiled_regex, 0, sizeof(gl->compiled_regex));
381 					gl->compiled_regex.translate = case_fold; /* for -i */
382 					if (re_compile_pattern(gl->pattern, strlen(gl->pattern), &gl->compiled_regex))
383 						bb_error_msg_and_die("bad regex '%s'", gl->pattern);
384 #endif
385 				}
386 #if !ENABLE_EXTRA_COMPAT
387 				gl->matched_range.rm_so = 0;
388 				gl->matched_range.rm_eo = 0;
389 				match_flg = 0;
390 #else
391 				start_pos = 0;
392 #endif
393 				match_at = line;
394  opt_w_again:
395 //bb_error_msg("'%s' start_pos:%d line_len:%d", match_at, start_pos, line_len);
396 				if (
397 #if !ENABLE_EXTRA_COMPAT
398 					regexec(&gl->compiled_regex, match_at, 1, &gl->matched_range, match_flg) == 0
399 #else
400 					re_search(&gl->compiled_regex, match_at, line_len,
401 							start_pos, /*range:*/ line_len,
402 							&gl->matched_range) >= 0
403 #endif
404 				) {
405 					if (option_mask32 & OPT_x) {
406 						found |= (gl->matched_range.rm_so == 0
407 						         && match_at[gl->matched_range.rm_eo] == '\0');
408 					} else
409 					if (!(option_mask32 & OPT_w)) {
410 						found = 1;
411 					} else {
412 						char c = ' ';
413 						if (match_at > line || gl->matched_range.rm_so != 0) {
414 							c = match_at[gl->matched_range.rm_so - 1];
415 						}
416 						if (!isalnum(c) && c != '_') {
417 							c = match_at[gl->matched_range.rm_eo];
418 						}
419 						if (!isalnum(c) && c != '_') {
420 							found = 1;
421 						} else {
422 			/*
423 			 * Why check gl->matched_range.rm_eo?
424 			 * Zero-length match makes -w skip the line:
425 			 * "echo foo | grep ^" prints "foo",
426 			 * "echo foo | grep -w ^" prints nothing.
427 			 * Without such check, we can loop forever.
428 			 */
429 #if !ENABLE_EXTRA_COMPAT
430 							if (gl->matched_range.rm_eo != 0) {
431 								match_at += gl->matched_range.rm_eo;
432 								match_flg |= REG_NOTBOL;
433 								goto opt_w_again;
434 							}
435 #else
436 							if (gl->matched_range.rm_eo > start_pos) {
437 								start_pos = gl->matched_range.rm_eo;
438 								goto opt_w_again;
439 							}
440 #endif
441 						}
442 					}
443 				}
444 			}
445 			/* If it's a non-inverted search, we can stop
446 			 * at first match and report it.
447 			 * If it's an inverted search, we can move on
448 			 * to the next line of input, ignoring the
449 			 * rest of the patterns.
450 			 */
451 			if (found) {
452 				//if (invert_search)
453 				//	goto do_not_found;
454 				//goto do_found;
455 				break; // this accomplishes both
456 			}
457 			pattern_ptr = pattern_ptr->link;
458 		} /* while (pattern_ptr) */
459 
460 		if (found ^ invert_search) {
461  //do_found:
462 			/* keep track of matches */
463 			nmatches++;
464 
465 			/* quiet/print (non)matching file names only? */
466 			if (option_mask32 & (OPT_q|OPT_l|OPT_L)) {
467 				free(line); /* we don't need line anymore */
468 				if (BE_QUIET) {
469 					/* manpage says about -q:
470 					 * "exit immediately with zero status
471 					 * if any match is found,
472 					 * even if errors were detected" */
473 					exit(EXIT_SUCCESS);
474 				}
475 				/* -l "print filenames with matches": stop after the first match */
476 				if (option_mask32 & OPT_l) {
477 					puts(cur_file);
478 					return 1;
479 				}
480 				/* -L "print filenames without matches": return early too */
481 				return 0; /* 0: we do not print fname, hence it's "not a match" */
482 			}
483 
484 #if ENABLE_FEATURE_GREP_CONTEXT
485 			/* Were we printing context and saw next (unwanted) match? */
486 			if ((option_mask32 & OPT_m) && nmatches > max_matches)
487 				break;
488 #endif
489 
490 			/* print the matched line */
491 			if (PRINT_MATCH_COUNTS == 0) {
492 #if ENABLE_FEATURE_GREP_CONTEXT
493 				int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1;
494 
495 				/* if we were told to print 'before' lines and there is at least
496 				 * one line in the circular buffer, print them */
497 				if (lines_before && before_buf[prevpos] != NULL) {
498 					int first_buf_entry_line_num = linenum - lines_before;
499 
500 					/* advance to the first entry in the circular buffer, and
501 					 * figure out the line number is of the first line in the
502 					 * buffer */
503 					idx = curpos;
504 					while (before_buf[idx] == NULL) {
505 						idx = (idx + 1) % lines_before;
506 						first_buf_entry_line_num++;
507 					}
508 
509 					/* now print each line in the buffer, clearing them as we go */
510 					while (before_buf[idx] != NULL) {
511 						print_line(before_buf[idx], before_buf_size[idx], first_buf_entry_line_num, '-');
512 						free(before_buf[idx]);
513 						before_buf[idx] = NULL;
514 						idx = (idx + 1) % lines_before;
515 						first_buf_entry_line_num++;
516 					}
517 				}
518 
519 				/* make a note that we need to print 'after' lines */
520 				print_n_lines_after = lines_after;
521 #endif
522 				if (option_mask32 & OPT_o) {
523 					if (FGREP_FLAG) {
524 						/* -Fo just prints the pattern
525 						 * (unless -v: -Fov doesn't print anything at all) */
526 						if (found)
527 							print_line(gl->pattern, strlen(gl->pattern), linenum, ':');
528 					} else while (1) {
529 						unsigned start = gl->matched_range.rm_so;
530 						unsigned end = gl->matched_range.rm_eo;
531 						unsigned len = end - start;
532 						char old = line[end];
533 						line[end] = '\0';
534 						/* Empty match is not printed: try "echo test | grep -o ''" */
535 						if (len != 0)
536 							print_line(line + start, len, linenum, ':');
537 						if (old == '\0')
538 							break;
539 						line[end] = old;
540 						if (len == 0)
541 							end++;
542 #if !ENABLE_EXTRA_COMPAT
543 						if (regexec(&gl->compiled_regex, line + end,
544 								1, &gl->matched_range, REG_NOTBOL) != 0)
545 							break;
546 						gl->matched_range.rm_so += end;
547 						gl->matched_range.rm_eo += end;
548 #else
549 						if (re_search(&gl->compiled_regex, line, line_len,
550 								end, line_len - end,
551 								&gl->matched_range) < 0)
552 							break;
553 #endif
554 					}
555 				} else {
556 					print_line(line, line_len, linenum, ':');
557 				}
558 			}
559 		}
560 #if ENABLE_FEATURE_GREP_CONTEXT
561 		else { /* no match */
562  //do_not_found:
563 			/* if we need to print some context lines after the last match, do so */
564 			if (print_n_lines_after) {
565 				print_line(line, strlen(line), linenum, '-');
566 				print_n_lines_after--;
567 			} else if (lines_before) {
568 				/* Add the line to the circular 'before' buffer */
569 				free(before_buf[curpos]);
570 				before_buf[curpos] = line;
571 				IF_EXTRA_COMPAT(before_buf_size[curpos] = line_len;)
572 				curpos = (curpos + 1) % lines_before;
573 				/* avoid free(line) - we took the line */
574 				line = NULL;
575 			}
576 		}
577 
578 #endif /* ENABLE_FEATURE_GREP_CONTEXT */
579 #if !ENABLE_EXTRA_COMPAT
580 		free(line);
581 #endif
582 		/* Did we print all context after last requested match? */
583 		if ((option_mask32 & OPT_m)
584 		 && !print_n_lines_after
585 		 && nmatches == max_matches
586 		) {
587 			break;
588 		}
589 	} /* while (read line) */
590 
591 	/* special-case file post-processing for options where we don't print line
592 	 * matches, just filenames and possibly match counts */
593 
594 	/* grep -c: print [filename:]count, even if count is zero */
595 	if (PRINT_MATCH_COUNTS) {
596 		if (print_filename)
597 			printf("%s:", cur_file);
598 		printf("%d\n", nmatches);
599 	}
600 
601 	/* grep -L: "print filenames without matches" */
602 	if (option_mask32 & OPT_L) {
603 		/* nmatches is zero, no need to check it:
604 		 * we return 0 early if -L and we detect a match
605 		 */
606 		puts(cur_file);
607 		return 1; /* 1: we printed fname, hence it's "a match" */
608 	}
609 
610 	return nmatches != 0; /* we return not a count, but a boolean */
611 }
612 
613 #if ENABLE_FEATURE_CLEAN_UP
614 #define new_grep_list_data(p, m) add_grep_list_data(p, m)
add_grep_list_data(char * pattern,int flg_used_mem)615 static char *add_grep_list_data(char *pattern, int flg_used_mem)
616 #else
617 #define new_grep_list_data(p, m) add_grep_list_data(p)
618 static char *add_grep_list_data(char *pattern)
619 #endif
620 {
621 	grep_list_data_t *gl = xzalloc(sizeof(*gl));
622 	gl->pattern = pattern;
623 #if ENABLE_FEATURE_CLEAN_UP
624 	gl->flg_mem_allocated_compiled = flg_used_mem;
625 #else
626 	/*gl->flg_mem_allocated_compiled = 0;*/
627 #endif
628 	return (char *)gl;
629 }
630 
load_regexes_from_file(llist_t * fopt)631 static void load_regexes_from_file(llist_t *fopt)
632 {
633 	while (fopt) {
634 		char *line;
635 		FILE *fp;
636 		llist_t *cur = fopt;
637 		char *ffile = cur->data;
638 
639 		fopt = cur->link;
640 		free(cur);
641 		fp = xfopen_stdin(ffile);
642 		while ((line = xmalloc_fgetline(fp)) != NULL) {
643 			llist_add_to(&pattern_head,
644 				new_grep_list_data(line, ALLOCATED));
645 		}
646 		fclose_if_not_stdin(fp);
647 	}
648 }
649 
load_pattern_list(llist_t ** lst,char * pattern)650 static void load_pattern_list(llist_t **lst, char *pattern)
651 {
652 	char *p;
653 	while ((p = strsep(&pattern, "\n")) != NULL)
654 		llist_add_to(lst, new_grep_list_data(p, 0));
655 }
656 
file_action_grep(struct recursive_state * state UNUSED_PARAM,const char * filename,struct stat * statbuf)657 static int FAST_FUNC file_action_grep(struct recursive_state *state UNUSED_PARAM,
658 		const char *filename,
659 		struct stat *statbuf)
660 {
661 	FILE *file;
662 
663 	/* If we are given a link to a directory, we should bail out now, rather
664 	 * than trying to open the "file" and hoping getline gives us nothing,
665 	 * since that is not portable across operating systems (FreeBSD for
666 	 * example will return the raw directory contents). */
667 	if (S_ISLNK(statbuf->st_mode)) {
668 		struct stat sb;
669 		if (stat(filename, &sb) != 0) {
670 			if (!SUPPRESS_ERR_MSGS)
671 				bb_simple_perror_msg(filename);
672 			return 0;
673 		}
674 		if (S_ISDIR(sb.st_mode))
675 			return 1;
676 	}
677 
678 	file = fopen_for_read(filename);
679 	if (file == NULL) {
680 		if (!SUPPRESS_ERR_MSGS)
681 			bb_simple_perror_msg(filename);
682 		open_errors = 1;
683 		return 0;
684 	}
685 	cur_file = filename;
686 	*(int*)state->userData |= grep_file(file);
687 	fclose(file);
688 	return 1;
689 }
690 
grep_dir(const char * dir)691 static int grep_dir(const char *dir)
692 {
693 	int matched = 0;
694 	recursive_action(dir, 0
695 		| ACTION_RECURSE
696 		| ((option_mask32 & OPT_R) ? ACTION_FOLLOWLINKS : 0)
697 		| ACTION_FOLLOWLINKS_L0 /* grep -r ... SYMLINK follows it */
698 		| ACTION_DEPTHFIRST
699 		| 0,
700 		/* fileAction= */ file_action_grep,
701 		/* dirAction= */ NULL,
702 		/* userData= */ &matched
703 	);
704 	return matched;
705 }
706 
707 int grep_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
grep_main(int argc UNUSED_PARAM,char ** argv)708 int grep_main(int argc UNUSED_PARAM, char **argv)
709 {
710 	FILE *file;
711 	int matched;
712 	llist_t *fopt = NULL;
713 #if ENABLE_FEATURE_GREP_CONTEXT
714 	int Copt, opts;
715 #endif
716 	INIT_G();
717 
718 	/* For grep, exitcode of 1 is "not found". Other errors are 2: */
719 	xfunc_error_retval = 2;
720 
721 	/* do normal option parsing */
722 #if ENABLE_FEATURE_GREP_CONTEXT
723 	/* -H unsets -h; -C unsets -A,-B */
724 	opts = getopt32long(argv, "^"
725 		OPTSTR_GREP
726 			"\0"
727 			"H-h:C-AB",
728 		"color\0" Optional_argument "\xff",
729 		&pattern_head, &fopt, &max_matches,
730 		&lines_after, &lines_before, &Copt
731 		, NULL
732 	);
733 
734 	if (opts & OPT_C) {
735 		/* -C unsets prev -A and -B, but following -A or -B
736 		 * may override it */
737 		if (!(opts & OPT_A)) /* not overridden */
738 			lines_after = Copt;
739 		if (!(opts & OPT_B)) /* not overridden */
740 			lines_before = Copt;
741 	}
742 	/* sanity checks */
743 	if (opts & (OPT_c|OPT_q|OPT_l|OPT_L)) {
744 		option_mask32 &= ~OPT_n;
745 		lines_before = 0;
746 		lines_after = 0;
747 	} else if (lines_before > 0) {
748 		if (lines_before > INT_MAX / sizeof(long long))
749 			lines_before = INT_MAX / sizeof(long long);
750 		/* overflow in (lines_before * sizeof(x)) is prevented (above) */
751 		before_buf = xzalloc(lines_before * sizeof(before_buf[0]));
752 		IF_EXTRA_COMPAT(before_buf_size = xzalloc(lines_before * sizeof(before_buf_size[0]));)
753 	}
754 #else
755 	/* with auto sanity checks */
756 	getopt32(argv, "^" OPTSTR_GREP "\0" "H-h:c-n:q-n:l-n:", // why trailing ":"?
757 		&pattern_head, &fopt, &max_matches);
758 #endif
759 	invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */
760 
761 	{	/* convert char **argv to pattern_list */
762 		llist_t *cur, *new = NULL;
763 		for (cur = pattern_head; cur; cur = cur->link)
764 			load_pattern_list(&new, cur->data);
765 		llist_free(pattern_head, NULL);
766 		pattern_head = new;
767 	}
768 	if (option_mask32 & OPT_f) {
769 		load_regexes_from_file(fopt);
770 		if (!pattern_head) { /* -f EMPTY_FILE? */
771 			/* GNU grep treats it as "nothing matches" except when -x */
772 			const char *data = (option_mask32 & OPT_x) ? ".*" : "";
773 			llist_add_to(&pattern_head, new_grep_list_data((char*)data, 0));
774 			invert_search ^= 1;
775 		}
776 	}
777 
778 	if (ENABLE_FGREP && applet_name[0] == 'f')
779 		option_mask32 |= OPT_F;
780 
781 #if !ENABLE_EXTRA_COMPAT
782 	if (!(option_mask32 & (OPT_o | OPT_w | OPT_x)))
783 		reflags = REG_NOSUB;
784 #endif
785 
786 	if ((ENABLE_EGREP && applet_name[0] == 'e')
787 	 || (option_mask32 & OPT_E)
788 	) {
789 		reflags |= REG_EXTENDED;
790 	}
791 #if ENABLE_EXTRA_COMPAT
792 	else {
793 		reflags = RE_SYNTAX_GREP;
794 	}
795 #endif
796 
797 	if (option_mask32 & OPT_i) {
798 #if !ENABLE_EXTRA_COMPAT
799 		reflags |= REG_ICASE;
800 #else
801 		int i;
802 		case_fold = xmalloc(256);
803 		for (i = 0; i < 256; i++)
804 			case_fold[i] = (unsigned char)i;
805 		for (i = 'a'; i <= 'z'; i++)
806 			case_fold[i] = (unsigned char)(i - ('a' - 'A'));
807 #endif
808 	}
809 
810 	argv += optind;
811 
812 	/* if we didn't get a pattern from -e and no command file was specified,
813 	 * first parameter should be the pattern. no pattern, no worky */
814 	if (pattern_head == NULL) {
815 		if (*argv == NULL)
816 			bb_show_usage();
817 		load_pattern_list(&pattern_head, *argv++);
818 	}
819 
820 	/* argv[0..(argc-1)] should be names of file to grep through. If
821 	 * there is more than one file to grep, we will print the filenames. */
822 	if (argv[0] && argv[1])
823 		print_filename = 1;
824 	/* -H / -h of course override */
825 	if (option_mask32 & OPT_H)
826 		print_filename = 1;
827 	if (option_mask32 & OPT_h)
828 		print_filename = 0;
829 
830 	/* If no files were specified, or '-' was specified, take input from
831 	 * stdin. Otherwise, we grep through all the files specified. */
832 	matched = 0;
833 	do {
834 		cur_file = *argv;
835 		file = stdin;
836 		if (!cur_file || LONE_DASH(cur_file)) {
837 			cur_file = "(standard input)";
838 		} else {
839 			if (option_mask32 & (OPT_r|OPT_R)) {
840 				struct stat st;
841 				if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) {
842 					if (!(option_mask32 & OPT_h))
843 						print_filename = 1;
844 					matched |= grep_dir(cur_file);
845 					goto grep_done;
846 				}
847 			}
848 			/* else: fopen(dir) will succeed, but reading won't */
849 			file = fopen_for_read(cur_file);
850 			if (file == NULL) {
851 				if (!SUPPRESS_ERR_MSGS)
852 					bb_simple_perror_msg(cur_file);
853 				open_errors = 1;
854 				continue;
855 			}
856 		}
857 		matched |= grep_file(file);
858 		fclose_if_not_stdin(file);
859  grep_done: ;
860 	} while (*argv && *++argv);
861 
862 	/* destroy all the elements in the pattern list */
863 	if (ENABLE_FEATURE_CLEAN_UP) {
864 		while (pattern_head) {
865 			llist_t *pattern_head_ptr = pattern_head;
866 			grep_list_data_t *gl = (grep_list_data_t *)pattern_head_ptr->data;
867 
868 			pattern_head = pattern_head->link;
869 			if (gl->flg_mem_allocated_compiled & ALLOCATED)
870 				free(gl->pattern);
871 			if (gl->flg_mem_allocated_compiled & COMPILED)
872 				regfree(&gl->compiled_regex);
873 			free(gl);
874 			free(pattern_head_ptr);
875 		}
876 	}
877 	/* 0 = success, 1 = failed, 2 = error */
878 	if (open_errors)
879 		return 2;
880 	return !matched; /* invert return value: 0 = success, 1 = failed */
881 }
882