1 /* vi: set sw=4 ts=4: */
2 /*
3  * awk implementation for busybox
4  *
5  * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6  *
7  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8  */
9 //config:config AWK
10 //config:	bool "awk (23 kb)"
11 //config:	default y
12 //config:	help
13 //config:	Awk is used as a pattern scanning and processing language.
14 //config:
15 //config:config FEATURE_AWK_LIBM
16 //config:	bool "Enable math functions (requires libm)"
17 //config:	default y
18 //config:	depends on AWK
19 //config:	help
20 //config:	Enable math functions of the Awk programming language.
21 //config:	NOTE: This requires libm to be present for linking.
22 //config:
23 //config:config FEATURE_AWK_GNU_EXTENSIONS
24 //config:	bool "Enable a few GNU extensions"
25 //config:	default y
26 //config:	depends on AWK
27 //config:	help
28 //config:	Enable a few features from gawk:
29 //config:	* command line option -e AWK_PROGRAM
30 //config:	* simultaneous use of -f and -e on the command line.
31 //config:	This enables the use of awk library files.
32 //config:	Example: awk -f mylib.awk -e '{print myfunction($1);}' ...
33 
34 //applet:IF_AWK(APPLET_NOEXEC(awk, awk, BB_DIR_USR_BIN, BB_SUID_DROP, awk))
35 
36 //kbuild:lib-$(CONFIG_AWK) += awk.o
37 
38 //usage:#define awk_trivial_usage
39 //usage:       "[OPTIONS] [AWK_PROGRAM] [FILE]..."
40 //usage:#define awk_full_usage "\n\n"
41 //usage:       "	-v VAR=VAL	Set variable"
42 //usage:     "\n	-F SEP		Use SEP as field separator"
43 //usage:     "\n	-f FILE		Read program from FILE"
44 //usage:	IF_FEATURE_AWK_GNU_EXTENSIONS(
45 //usage:     "\n	-e AWK_PROGRAM"
46 //usage:	)
47 
48 #include "libbb.h"
49 #include "xregex.h"
50 #include <math.h>
51 
52 /* This is a NOEXEC applet. Be very careful! */
53 
54 
55 /* If you comment out one of these below, it will be #defined later
56  * to perform debug printfs to stderr: */
57 #define debug_printf_walker(...)  do {} while (0)
58 #define debug_printf_eval(...)  do {} while (0)
59 #define debug_printf_parse(...)  do {} while (0)
60 
61 #ifndef debug_printf_walker
62 # define debug_printf_walker(...) (fprintf(stderr, __VA_ARGS__))
63 #endif
64 #ifndef debug_printf_eval
65 # define debug_printf_eval(...) (fprintf(stderr, __VA_ARGS__))
66 #endif
67 #ifndef debug_printf_parse
68 # define debug_printf_parse(...) (fprintf(stderr, __VA_ARGS__))
69 #else
70 # define debug_parse_print_tc(...) ((void)0)
71 #endif
72 
73 
74 /* "+": stop on first non-option:
75  * $ awk 'BEGIN { for(i=1; i<ARGC; ++i) { print i ": " ARGV[i] }}' -argz
76  * 1: -argz
77  */
78 #define OPTSTR_AWK "+" \
79 	"F:v:*f:*" \
80 	IF_FEATURE_AWK_GNU_EXTENSIONS("e:*") \
81 	"W:"
82 enum {
83 	OPTBIT_F,	/* define field separator */
84 	OPTBIT_v,	/* define variable */
85 	OPTBIT_f,	/* pull in awk program from file */
86 	IF_FEATURE_AWK_GNU_EXTENSIONS(OPTBIT_e,) /* -e AWK_PROGRAM */
87 	OPTBIT_W,	/* -W ignored */
88 	OPT_F = 1 << OPTBIT_F,
89 	OPT_v = 1 << OPTBIT_v,
90 	OPT_f = 1 << OPTBIT_f,
91 	OPT_e = IF_FEATURE_AWK_GNU_EXTENSIONS((1 << OPTBIT_e)) + 0,
92 	OPT_W = 1 << OPTBIT_W
93 };
94 
95 #define	MAXVARFMT       240
96 
97 /* variable flags */
98 #define	VF_NUMBER       0x0001	/* 1 = primary type is number */
99 #define	VF_ARRAY        0x0002	/* 1 = it's an array */
100 
101 #define	VF_CACHED       0x0100	/* 1 = num/str value has cached str/num eq */
102 #define	VF_USER         0x0200	/* 1 = user input (may be numeric string) */
103 #define	VF_SPECIAL      0x0400	/* 1 = requires extra handling when changed */
104 #define	VF_WALK         0x0800	/* 1 = variable has alloc'd x.walker list */
105 #define	VF_FSTR         0x1000	/* 1 = don't free() var::string (not malloced, or is owned by something else) */
106 #define	VF_CHILD        0x2000	/* 1 = function arg; x.parent points to source */
107 #define	VF_DIRTY        0x4000	/* 1 = variable was set explicitly */
108 
109 /* these flags are static, don't change them when value is changed */
110 #define	VF_DONTTOUCH    (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
111 
112 typedef struct walker_list {
113 	char *end;
114 	char *cur;
115 	struct walker_list *prev;
116 	char wbuf[1];
117 } walker_list;
118 
119 /* Variable */
120 typedef struct var_s {
121 	unsigned type;            /* flags */
122 	char *string;
123 	double number;
124 	union {
125 		int aidx;               /* func arg idx (for compilation stage) */
126 		struct xhash_s *array;  /* array ptr */
127 		struct var_s *parent;   /* for func args, ptr to actual parameter */
128 		walker_list *walker;    /* list of array elements (for..in) */
129 	} x;
130 } var;
131 
132 /* Node chain (pattern-action chain, BEGIN, END, function bodies) */
133 typedef struct chain_s {
134 	struct node_s *first;
135 	struct node_s *last;
136 	const char *programname;
137 } chain;
138 
139 /* Function */
140 typedef struct func_s {
141 	unsigned nargs;
142 	smallint defined;
143 	struct chain_s body;
144 } func;
145 
146 /* I/O stream */
147 typedef struct rstream_s {
148 	FILE *F;
149 	char *buffer;
150 	int adv;
151 	int size;
152 	int pos;
153 	smallint is_pipe;
154 } rstream;
155 
156 typedef struct hash_item_s {
157 	union {
158 		struct var_s v;         /* variable/array hash */
159 		struct rstream_s rs;    /* redirect streams hash */
160 		struct func_s f;        /* functions hash */
161 	} data;
162 	struct hash_item_s *next;       /* next in chain */
163 	char name[1];                   /* really it's longer */
164 } hash_item;
165 
166 typedef struct xhash_s {
167 	unsigned nel;           /* num of elements */
168 	unsigned csize;         /* current hash size */
169 	unsigned nprime;        /* next hash size in PRIMES[] */
170 	unsigned glen;          /* summary length of item names */
171 	struct hash_item_s **items;
172 } xhash;
173 
174 /* Tree node */
175 typedef struct node_s {
176 	uint32_t info;
177 	unsigned lineno;
178 	union {
179 		struct node_s *n;
180 		var *v;
181 		int aidx;
182 		const char *new_progname;
183 		regex_t *re;
184 	} l;
185 	union {
186 		struct node_s *n;
187 		regex_t *ire;
188 		func *f;
189 	} r;
190 	union {
191 		struct node_s *n;
192 	} a;
193 } node;
194 
195 typedef struct tsplitter_s {
196 	node n;
197 	regex_t re[2];
198 } tsplitter;
199 
200 /* simple token classes */
201 /* order and hex values are very important!!!  See next_token() */
202 #define TC_LPAREN       (1 << 0)        /* ( */
203 #define TC_RPAREN       (1 << 1)        /* ) */
204 #define TC_REGEXP       (1 << 2)        /* /.../ */
205 #define TC_OUTRDR       (1 << 3)        /* | > >> */
206 #define TC_UOPPOST      (1 << 4)        /* unary postfix operator ++ -- */
207 #define TC_UOPPRE1      (1 << 5)        /* unary prefix operator ++ -- $ */
208 #define TC_BINOPX       (1 << 6)        /* two-opnd operator */
209 #define TC_IN           (1 << 7)        /* 'in' */
210 #define TC_COMMA        (1 << 8)        /* , */
211 #define TC_PIPE         (1 << 9)        /* input redirection pipe | */
212 #define TC_UOPPRE2      (1 << 10)       /* unary prefix operator + - ! */
213 #define TC_ARRTERM      (1 << 11)       /* ] */
214 #define TC_LBRACE       (1 << 12)       /* { */
215 #define TC_RBRACE       (1 << 13)       /* } */
216 #define TC_SEMICOL      (1 << 14)       /* ; */
217 #define TC_NEWLINE      (1 << 15)
218 #define TC_STATX        (1 << 16)       /* ctl statement (for, next...) */
219 #define TC_WHILE        (1 << 17)       /* 'while' */
220 #define TC_ELSE         (1 << 18)       /* 'else' */
221 #define TC_BUILTIN      (1 << 19)
222 /* This costs ~50 bytes of code.
223  * A separate class to support deprecated "length" form. If we don't need that
224  * (i.e. if we demand that only "length()" with () is valid), then TC_LENGTH
225  * can be merged with TC_BUILTIN:
226  */
227 #define TC_LENGTH       (1 << 20)       /* 'length' */
228 #define TC_GETLINE      (1 << 21)       /* 'getline' */
229 #define TC_FUNCDECL     (1 << 22)       /* 'function' 'func' */
230 #define TC_BEGIN        (1 << 23)       /* 'BEGIN' */
231 #define TC_END          (1 << 24)       /* 'END' */
232 #define TC_EOF          (1 << 25)
233 #define TC_VARIABLE     (1 << 26)       /* name */
234 #define TC_ARRAY        (1 << 27)       /* name[ */
235 #define TC_FUNCTION     (1 << 28)       /* name( */
236 #define TC_STRING       (1 << 29)       /* "..." */
237 #define TC_NUMBER       (1 << 30)
238 
239 #ifndef debug_parse_print_tc
debug_parse_print_tc(uint32_t n)240 static void debug_parse_print_tc(uint32_t n)
241 {
242 	if (n & TC_LPAREN  ) debug_printf_parse(" LPAREN"  );
243 	if (n & TC_RPAREN  ) debug_printf_parse(" RPAREN"  );
244 	if (n & TC_REGEXP  ) debug_printf_parse(" REGEXP"  );
245 	if (n & TC_OUTRDR  ) debug_printf_parse(" OUTRDR"  );
246 	if (n & TC_UOPPOST ) debug_printf_parse(" UOPPOST" );
247 	if (n & TC_UOPPRE1 ) debug_printf_parse(" UOPPRE1" );
248 	if (n & TC_BINOPX  ) debug_printf_parse(" BINOPX"  );
249 	if (n & TC_IN      ) debug_printf_parse(" IN"      );
250 	if (n & TC_COMMA   ) debug_printf_parse(" COMMA"   );
251 	if (n & TC_PIPE    ) debug_printf_parse(" PIPE"    );
252 	if (n & TC_UOPPRE2 ) debug_printf_parse(" UOPPRE2" );
253 	if (n & TC_ARRTERM ) debug_printf_parse(" ARRTERM" );
254 	if (n & TC_LBRACE  ) debug_printf_parse(" LBRACE"  );
255 	if (n & TC_RBRACE  ) debug_printf_parse(" RBRACE"  );
256 	if (n & TC_SEMICOL ) debug_printf_parse(" SEMICOL" );
257 	if (n & TC_NEWLINE ) debug_printf_parse(" NEWLINE" );
258 	if (n & TC_STATX   ) debug_printf_parse(" STATX"   );
259 	if (n & TC_WHILE   ) debug_printf_parse(" WHILE"   );
260 	if (n & TC_ELSE    ) debug_printf_parse(" ELSE"    );
261 	if (n & TC_BUILTIN ) debug_printf_parse(" BUILTIN" );
262 	if (n & TC_LENGTH  ) debug_printf_parse(" LENGTH"  );
263 	if (n & TC_GETLINE ) debug_printf_parse(" GETLINE" );
264 	if (n & TC_FUNCDECL) debug_printf_parse(" FUNCDECL");
265 	if (n & TC_BEGIN   ) debug_printf_parse(" BEGIN"   );
266 	if (n & TC_END     ) debug_printf_parse(" END"     );
267 	if (n & TC_EOF     ) debug_printf_parse(" EOF"     );
268 	if (n & TC_VARIABLE) debug_printf_parse(" VARIABLE");
269 	if (n & TC_ARRAY   ) debug_printf_parse(" ARRAY"   );
270 	if (n & TC_FUNCTION) debug_printf_parse(" FUNCTION");
271 	if (n & TC_STRING  ) debug_printf_parse(" STRING"  );
272 	if (n & TC_NUMBER  ) debug_printf_parse(" NUMBER"  );
273 }
274 #endif
275 
276 /* combined token classes ("token [class] sets") */
277 #define	TS_UOPPRE   (TC_UOPPRE1 | TC_UOPPRE2)
278 
279 #define	TS_BINOP    (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
280 //#define TS_UNARYOP (TS_UOPPRE | TC_UOPPOST)
281 #define	TS_OPERAND  (TC_VARIABLE | TC_ARRAY | TC_FUNCTION \
282                     | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
283                     | TC_LPAREN | TC_STRING | TC_NUMBER)
284 
285 #define	TS_LVALUE   (TC_VARIABLE | TC_ARRAY)
286 #define	TS_STATEMNT (TC_STATX | TC_WHILE)
287 
288 /* word tokens, cannot mean something else if not expected */
289 #define	TS_WORD     (TC_IN | TS_STATEMNT | TC_ELSE \
290                     | TC_BUILTIN | TC_LENGTH | TC_GETLINE \
291                     | TC_FUNCDECL | TC_BEGIN | TC_END)
292 
293 /* discard newlines after these */
294 #define	TS_NOTERM   (TS_BINOP | TC_COMMA | TC_LBRACE | TC_RBRACE \
295                     | TC_SEMICOL | TC_NEWLINE)
296 
297 /* what can expression begin with */
298 #define	TS_OPSEQ    (TS_OPERAND | TS_UOPPRE | TC_REGEXP)
299 /* what can group begin with */
300 #define	TS_GRPSEQ   (TS_OPSEQ | TS_STATEMNT \
301                     | TC_SEMICOL | TC_NEWLINE | TC_LBRACE)
302 
303 /* if previous token class is CONCAT_L and next is CONCAT_R, concatenation */
304 /* operator is inserted between them */
305 #define	TS_CONCAT_L (TC_VARIABLE | TC_ARRTERM | TC_RPAREN \
306                    | TC_STRING | TC_NUMBER | TC_UOPPOST \
307                    | TC_LENGTH)
308 #define	TS_CONCAT_R (TS_OPERAND | TS_UOPPRE)
309 
310 #define	OF_RES1     0x010000
311 #define	OF_RES2     0x020000
312 #define	OF_STR1     0x040000
313 #define	OF_STR2     0x080000
314 #define	OF_NUM1     0x100000
315 #define	OF_CHECKED  0x200000
316 #define	OF_REQUIRED 0x400000
317 
318 /* combined operator flags */
319 #define	xx	0
320 #define	xV	OF_RES2
321 #define	xS	(OF_RES2 | OF_STR2)
322 #define	Vx	OF_RES1
323 #define	Rx	OF_REQUIRED
324 #define	VV	(OF_RES1 | OF_RES2)
325 #define	Nx	(OF_RES1 | OF_NUM1)
326 #define	NV	(OF_RES1 | OF_NUM1 | OF_RES2)
327 #define	Sx	(OF_RES1 | OF_STR1)
328 #define	SV	(OF_RES1 | OF_STR1 | OF_RES2)
329 #define	SS	(OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
330 
331 #define	OPCLSMASK 0xFF00
332 #define	OPNMASK   0x007F
333 
334 /* operator priority is a highest byte (even: r->l, odd: l->r grouping)
335  * (for builtins it has different meaning)
336  */
337 #undef P
338 #undef PRIMASK
339 #undef PRIMASK2
340 #define P(x)      (x << 24)
341 #define PRIMASK   0x7F000000
342 #define PRIMASK2  0x7E000000
343 
344 /* Operation classes */
345 #define	SHIFT_TIL_THIS	0x0600
346 #define	RECUR_FROM_THIS	0x1000
347 enum {
348 	OC_DELETE = 0x0100,     OC_EXEC = 0x0200,       OC_NEWSOURCE = 0x0300,
349 	OC_PRINT = 0x0400,      OC_PRINTF = 0x0500,     OC_WALKINIT = 0x0600,
350 
351 	OC_BR = 0x0700,         OC_BREAK = 0x0800,      OC_CONTINUE = 0x0900,
352 	OC_EXIT = 0x0a00,       OC_NEXT = 0x0b00,       OC_NEXTFILE = 0x0c00,
353 	OC_TEST = 0x0d00,       OC_WALKNEXT = 0x0e00,
354 
355 	OC_BINARY = 0x1000,     OC_BUILTIN = 0x1100,    OC_COLON = 0x1200,
356 	OC_COMMA = 0x1300,      OC_COMPARE = 0x1400,    OC_CONCAT = 0x1500,
357 	OC_FBLTIN = 0x1600,     OC_FIELD = 0x1700,      OC_FNARG = 0x1800,
358 	OC_FUNC = 0x1900,       OC_GETLINE = 0x1a00,    OC_IN = 0x1b00,
359 	OC_LAND = 0x1c00,       OC_LOR = 0x1d00,        OC_MATCH = 0x1e00,
360 	OC_MOVE = 0x1f00,       OC_PGETLINE = 0x2000,   OC_REGEXP = 0x2100,
361 	OC_REPLACE = 0x2200,    OC_RETURN = 0x2300,     OC_SPRINTF = 0x2400,
362 	OC_TERNARY = 0x2500,    OC_UNARY = 0x2600,      OC_VAR = 0x2700,
363 	OC_DONE = 0x2800,
364 
365 	ST_IF = 0x3000,         ST_DO = 0x3100,         ST_FOR = 0x3200,
366 	ST_WHILE = 0x3300
367 };
368 
369 /* simple builtins */
370 enum {
371 	F_in,	F_rn,	F_co,	F_ex,	F_lg,	F_si,	F_sq,	F_sr,
372 	F_ti,	F_le,	F_sy,	F_ff,	F_cl
373 };
374 
375 /* builtins */
376 enum {
377 	B_a2,	B_ix,	B_ma,	B_sp,	B_ss,	B_ti,   B_mt,	B_lo,	B_up,
378 	B_ge,	B_gs,	B_su,
379 	B_an,	B_co,	B_ls,	B_or,	B_rs,	B_xo,
380 };
381 
382 /* tokens and their corresponding info values */
383 
384 #define NTC     "\377"  /* switch to next token class (tc<<1) */
385 #define NTCC    '\377'
386 
387 static const char tokenlist[] ALIGN1 =
388 	"\1("         NTC                                   /* TC_LPAREN */
389 	"\1)"         NTC                                   /* TC_RPAREN */
390 	"\1/"         NTC                                   /* TC_REGEXP */
391 	"\2>>"        "\1>"         "\1|"       NTC         /* TC_OUTRDR */
392 	"\2++"        "\2--"        NTC                     /* TC_UOPPOST */
393 	"\2++"        "\2--"        "\1$"       NTC         /* TC_UOPPRE1 */
394 	"\2=="        "\1="         "\2+="      "\2-="      /* TC_BINOPX */
395 	"\2*="        "\2/="        "\2%="      "\2^="
396 	"\1+"         "\1-"         "\3**="     "\2**"
397 	"\1/"         "\1%"         "\1^"       "\1*"
398 	"\2!="        "\2>="        "\2<="      "\1>"
399 	"\1<"         "\2!~"        "\1~"       "\2&&"
400 	"\2||"        "\1?"         "\1:"       NTC
401 	"\2in"        NTC                                   /* TC_IN */
402 	"\1,"         NTC                                   /* TC_COMMA */
403 	"\1|"         NTC                                   /* TC_PIPE */
404 	"\1+"         "\1-"         "\1!"       NTC         /* TC_UOPPRE2 */
405 	"\1]"         NTC                                   /* TC_ARRTERM */
406 	"\1{"         NTC                                   /* TC_LBRACE */
407 	"\1}"         NTC                                   /* TC_RBRACE */
408 	"\1;"         NTC                                   /* TC_SEMICOL */
409 	"\1\n"        NTC                                   /* TC_NEWLINE */
410 	"\2if"        "\2do"        "\3for"     "\5break"   /* TC_STATX */
411 	"\10continue" "\6delete"    "\5print"
412 	"\6printf"    "\4next"      "\10nextfile"
413 	"\6return"    "\4exit"      NTC
414 	"\5while"     NTC                                   /* TC_WHILE */
415 	"\4else"      NTC                                   /* TC_ELSE */
416 	"\3and"       "\5compl"     "\6lshift"  "\2or"      /* TC_BUILTIN */
417 	"\6rshift"    "\3xor"
418 	"\5close"     "\6system"    "\6fflush"  "\5atan2"
419 	"\3cos"       "\3exp"       "\3int"     "\3log"
420 	"\4rand"      "\3sin"       "\4sqrt"    "\5srand"
421 	"\6gensub"    "\4gsub"      "\5index"   /* "\6length" was here */
422 	"\5match"     "\5split"     "\7sprintf" "\3sub"
423 	"\6substr"    "\7systime"   "\10strftime" "\6mktime"
424 	"\7tolower"   "\7toupper"   NTC
425 	"\6length"    NTC                                   /* TC_LENGTH */
426 	"\7getline"   NTC                                   /* TC_GETLINE */
427 	"\4func"      "\10function" NTC                     /* TC_FUNCDECL */
428 	"\5BEGIN"     NTC                                   /* TC_BEGIN */
429 	"\3END"                                             /* TC_END */
430 	/* compiler adds trailing "\0" */
431 	;
432 
433 static const uint32_t tokeninfo[] ALIGN4 = {
434 	0,
435 	0,
436 #define TI_REGEXP OC_REGEXP
437 	TI_REGEXP,
438 	xS|'a',                  xS|'w',                  xS|'|',
439 	OC_UNARY|xV|P(9)|'p',    OC_UNARY|xV|P(9)|'m',
440 #define TI_PREINC (OC_UNARY|xV|P(9)|'P')
441 #define TI_PREDEC (OC_UNARY|xV|P(9)|'M')
442 	TI_PREINC,               TI_PREDEC,               OC_FIELD|xV|P(5),
443 	OC_COMPARE|VV|P(39)|5,   OC_MOVE|VV|P(74),        OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
444 	OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/', OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
445 	OC_BINARY|NV|P(29)|'+',  OC_BINARY|NV|P(29)|'-',  OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
446 	OC_BINARY|NV|P(25)|'/',  OC_BINARY|NV|P(25)|'%',  OC_BINARY|NV|P(15)|'&',  OC_BINARY|NV|P(25)|'*',
447 	OC_COMPARE|VV|P(39)|4,   OC_COMPARE|VV|P(39)|3,   OC_COMPARE|VV|P(39)|0,   OC_COMPARE|VV|P(39)|1,
448 #define TI_LESS     (OC_COMPARE|VV|P(39)|2)
449 	TI_LESS,                 OC_MATCH|Sx|P(45)|'!',   OC_MATCH|Sx|P(45)|'~',   OC_LAND|Vx|P(55),
450 #define TI_TERNARY  (OC_TERNARY|Vx|P(64)|'?')
451 #define TI_COLON    (OC_COLON|xx|P(67)|':')
452 	OC_LOR|Vx|P(59),         TI_TERNARY,              TI_COLON,
453 #define TI_IN       (OC_IN|SV|P(49))
454 	TI_IN,
455 #define TI_COMMA    (OC_COMMA|SS|P(80))
456 	TI_COMMA,
457 #define TI_PGETLINE (OC_PGETLINE|SV|P(37))
458 	TI_PGETLINE,
459 	OC_UNARY|xV|P(19)|'+',   OC_UNARY|xV|P(19)|'-',   OC_UNARY|xV|P(19)|'!',
460 	0, /* ] */
461 	0,
462 	0,
463 	0,
464 	0, /* \n */
465 	ST_IF,        ST_DO,        ST_FOR,      OC_BREAK,
466 	OC_CONTINUE,  OC_DELETE|Rx, OC_PRINT,
467 	OC_PRINTF,    OC_NEXT,      OC_NEXTFILE,
468 	OC_RETURN|Vx, OC_EXIT|Nx,
469 	ST_WHILE,
470 	0, /* else */
471 // OC_B's are builtins with enforced minimum number of arguments (two upper bits).
472 //  Highest byte bit pattern: nn s3s2s1 v3v2v1
473 //  nn - min. number of args, sN - resolve Nth arg to string, vN - resolve to var
474 // OC_F's are builtins with zero or one argument.
475 //  |Rx| enforces that arg is present for: system, close, cos, sin, exp, int, log, sqrt
476 //  Check for no args is present in builtins' code (not in this table): rand, systime
477 //  Have one _optional_ arg: fflush, srand, length
478 #define OC_B   OC_BUILTIN
479 #define OC_F   OC_FBLTIN
480 #define A1     P(0x40) /*one arg*/
481 #define A2     P(0x80) /*two args*/
482 #define A3     P(0xc0) /*three args*/
483 #define __v    P(1)
484 #define _vv    P(3)
485 #define __s__v P(9)
486 #define __s_vv P(0x0b)
487 #define __svvv P(0x0f)
488 #define _ss_vv P(0x1b)
489 #define _s_vv_ P(0x16)
490 #define ss_vv_ P(0x36)
491 	OC_B|B_an|_vv|A2,   OC_B|B_co|__v|A1,   OC_B|B_ls|_vv|A2,   OC_B|B_or|_vv|A2,   // and    compl   lshift   or
492 	OC_B|B_rs|_vv|A2,   OC_B|B_xo|_vv|A2,                                           // rshift xor
493 	OC_F|F_cl|Sx|Rx,    OC_F|F_sy|Sx|Rx,    OC_F|F_ff|Sx,       OC_B|B_a2|_vv|A2,   // close  system  fflush   atan2
494 	OC_F|F_co|Nx|Rx,    OC_F|F_ex|Nx|Rx,    OC_F|F_in|Nx|Rx,    OC_F|F_lg|Nx|Rx,    // cos    exp     int      log
495 	OC_F|F_rn,          OC_F|F_si|Nx|Rx,    OC_F|F_sq|Nx|Rx,    OC_F|F_sr|Nx,       // rand   sin     sqrt     srand
496 	OC_B|B_ge|_s_vv_|A3,OC_B|B_gs|ss_vv_|A2,OC_B|B_ix|_ss_vv|A2,                    // gensub gsub    index  /*length was here*/
497 	OC_B|B_ma|__s__v|A2,OC_B|B_sp|__s_vv|A2,OC_SPRINTF,         OC_B|B_su|ss_vv_|A2,// match  split   sprintf  sub
498 	OC_B|B_ss|__svvv|A2,OC_F|F_ti,          OC_B|B_ti|__s_vv,   OC_B|B_mt|__s_vv,   // substr systime strftime mktime
499 	OC_B|B_lo|__s__v|A1,OC_B|B_up|__s__v|A1,                                        // tolower toupper
500 	OC_F|F_le|Sx,   // length
501 	OC_GETLINE|SV,  // getline
502 	0, 0, // func function
503 	0, // BEGIN
504 	0  // END
505 #undef A1
506 #undef A2
507 #undef A3
508 #undef OC_B
509 #undef OC_F
510 };
511 
512 /* internal variable names and their initial values       */
513 /* asterisk marks SPECIAL vars; $ is just no-named Field0 */
514 enum {
515 	CONVFMT,    OFMT,       FS,         OFS,
516 	ORS,        RS,         RT,         FILENAME,
517 	SUBSEP,     F0,         ARGIND,     ARGC,
518 	ARGV,       ERRNO,      FNR,        NR,
519 	NF,         IGNORECASE, ENVIRON,    NUM_INTERNAL_VARS
520 };
521 
522 static const char vNames[] ALIGN1 =
523 	"CONVFMT\0" "OFMT\0"    "FS\0*"     "OFS\0"
524 	"ORS\0"     "RS\0*"     "RT\0"      "FILENAME\0"
525 	"SUBSEP\0"  "$\0*"      "ARGIND\0"  "ARGC\0"
526 	"ARGV\0"    "ERRNO\0"   "FNR\0"     "NR\0"
527 	"NF\0*"     "IGNORECASE\0*" "ENVIRON\0" "\0";
528 
529 static const char vValues[] ALIGN1 =
530 	"%.6g\0"    "%.6g\0"    " \0"       " \0"
531 	"\n\0"      "\n\0"      "\0"        "\0"
532 	"\034\0"    "\0"        "\377";
533 
534 /* hash size may grow to these values */
535 #define FIRST_PRIME 61
536 static const uint16_t PRIMES[] ALIGN2 = { 251, 1021, 4093, 16381, 65521 };
537 
538 
539 /* Globals. Split in two parts so that first one is addressed
540  * with (mostly short) negative offsets.
541  * NB: it's unsafe to put members of type "double"
542  * into globals2 (gcc may fail to align them).
543  */
544 struct globals {
545 	double t_double;
546 	chain beginseq, mainseq, endseq;
547 	chain *seq;
548 	node *break_ptr, *continue_ptr;
549 	rstream *iF;
550 	xhash *ahash;  /* argument names, used only while parsing function bodies */
551 	xhash *fnhash; /* function names, used only in parsing stage */
552 	xhash *vhash;  /* variables and arrays */
553 	//xhash *fdhash; /* file objects, used only in execution stage */
554 	//we are reusing ahash as fdhash, via define (see later)
555 	const char *g_progname;
556 	int g_lineno;
557 	int nfields;
558 	int maxfields; /* used in fsrealloc() only */
559 	var *Fields;
560 	char *g_pos;
561 	char g_saved_ch;
562 	smallint icase;
563 	smallint exiting;
564 	smallint nextrec;
565 	smallint nextfile;
566 	smallint is_f0_split;
567 	smallint t_rollback;
568 
569 	/* former statics from various functions */
570 	smallint next_token__concat_inserted;
571 	uint32_t next_token__save_tclass;
572 	uint32_t next_token__save_info;
573 };
574 struct globals2 {
575 	uint32_t t_info; /* often used */
576 	uint32_t t_tclass;
577 	char *t_string;
578 	int t_lineno;
579 
580 	var *intvar[NUM_INTERNAL_VARS]; /* often used */
581 
582 	/* former statics from various functions */
583 	char *split_f0__fstrings;
584 
585 	rstream next_input_file__rsm;
586 	smallint next_input_file__files_happen;
587 
588 	smalluint exitcode;
589 
590 	unsigned evaluate__seed;
591 	var *evaluate__fnargs;
592 	regex_t evaluate__sreg;
593 
594 	var ptest__tmpvar;
595 	var awk_printf__tmpvar;
596 	var as_regex__tmpvar;
597 	var exit__tmpvar;
598 	var main__tmpvar;
599 
600 	tsplitter exec_builtin__tspl;
601 
602 	/* biggest and least used members go last */
603 	tsplitter fsplitter, rsplitter;
604 
605 	char g_buf[MAXVARFMT + 1];
606 };
607 #define G1 (ptr_to_globals[-1])
608 #define G (*(struct globals2 *)ptr_to_globals)
609 /* For debug. nm --size-sort awk.o | grep -vi ' [tr] ' */
610 //char G1size[sizeof(G1)]; // 0x70
611 //char Gsize[sizeof(G)]; // 0x2f8
612 /* Trying to keep most of members accessible with short offsets: */
613 //char Gofs_seed[offsetof(struct globals2, evaluate__seed)]; // 0x7c
614 #define t_double     (G1.t_double    )
615 #define beginseq     (G1.beginseq    )
616 #define mainseq      (G1.mainseq     )
617 #define endseq       (G1.endseq      )
618 #define seq          (G1.seq         )
619 #define break_ptr    (G1.break_ptr   )
620 #define continue_ptr (G1.continue_ptr)
621 #define iF           (G1.iF          )
622 #define ahash        (G1.ahash       )
623 #define fnhash       (G1.fnhash      )
624 #define vhash        (G1.vhash       )
625 #define fdhash       ahash
626 //^^^^^^^^^^^^^^^^^^ ahash is cleared after every function parsing,
627 // and ends up empty after parsing phase. Thus, we can simply reuse it
628 // for fdhash in execution stage.
629 #define g_progname   (G1.g_progname  )
630 #define g_lineno     (G1.g_lineno    )
631 #define nfields      (G1.nfields     )
632 #define maxfields    (G1.maxfields   )
633 #define Fields       (G1.Fields      )
634 #define g_pos        (G1.g_pos       )
635 #define g_saved_ch   (G1.g_saved_ch  )
636 #define icase        (G1.icase       )
637 #define exiting      (G1.exiting     )
638 #define nextrec      (G1.nextrec     )
639 #define nextfile     (G1.nextfile    )
640 #define is_f0_split  (G1.is_f0_split )
641 #define t_rollback   (G1.t_rollback  )
642 #define t_info       (G.t_info      )
643 #define t_tclass     (G.t_tclass    )
644 #define t_string     (G.t_string    )
645 #define t_lineno     (G.t_lineno    )
646 #define intvar       (G.intvar      )
647 #define fsplitter    (G.fsplitter   )
648 #define rsplitter    (G.rsplitter   )
649 #define g_buf        (G.g_buf       )
650 #define INIT_G() do { \
651 	SET_PTR_TO_GLOBALS((char*)xzalloc(sizeof(G1)+sizeof(G)) + sizeof(G1)); \
652 	t_tclass = TC_NEWLINE; \
653 	G.evaluate__seed = 1; \
654 } while (0)
655 
656 static const char EMSG_UNEXP_EOS[] ALIGN1 = "Unexpected end of string";
657 static const char EMSG_UNEXP_TOKEN[] ALIGN1 = "Unexpected token";
658 static const char EMSG_DIV_BY_ZERO[] ALIGN1 = "Division by zero";
659 static const char EMSG_INV_FMT[] ALIGN1 = "Invalid format specifier";
660 static const char EMSG_TOO_FEW_ARGS[] ALIGN1 = "Too few arguments";
661 static const char EMSG_NOT_ARRAY[] ALIGN1 = "Not an array";
662 static const char EMSG_POSSIBLE_ERROR[] ALIGN1 = "Possible syntax error";
663 static const char EMSG_UNDEF_FUNC[] ALIGN1 = "Call to undefined function";
664 static const char EMSG_NO_MATH[] ALIGN1 = "Math support is not compiled in";
665 static const char EMSG_NEGATIVE_FIELD[] ALIGN1 = "Access to negative field";
666 
667 static int awk_exit(void) NORETURN;
668 
669 static void syntax_error(const char *message) NORETURN;
syntax_error(const char * message)670 static void syntax_error(const char *message)
671 {
672 	bb_error_msg_and_die("%s:%i: %s", g_progname, g_lineno, message);
673 }
674 
675 /* ---- hash stuff ---- */
676 
hashidx(const char * name)677 static unsigned hashidx(const char *name)
678 {
679 	unsigned idx = 0;
680 
681 	while (*name)
682 		idx = *name++ + (idx << 6) - idx;
683 	return idx;
684 }
685 
686 /* create new hash */
hash_init(void)687 static xhash *hash_init(void)
688 {
689 	xhash *newhash;
690 
691 	newhash = xzalloc(sizeof(*newhash));
692 	newhash->csize = FIRST_PRIME;
693 	newhash->items = xzalloc(FIRST_PRIME * sizeof(newhash->items[0]));
694 
695 	return newhash;
696 }
697 
hash_clear(xhash * hash)698 static void hash_clear(xhash *hash)
699 {
700 	unsigned i;
701 	hash_item *hi, *thi;
702 
703 	for (i = 0; i < hash->csize; i++) {
704 		hi = hash->items[i];
705 		while (hi) {
706 			thi = hi;
707 			hi = hi->next;
708 //FIXME: this assumes that it's a hash of *variables*:
709 			free(thi->data.v.string);
710 			free(thi);
711 		}
712 		hash->items[i] = NULL;
713 	}
714 	hash->glen = hash->nel = 0;
715 }
716 
717 #if 0 //UNUSED
718 static void hash_free(xhash *hash)
719 {
720 	hash_clear(hash);
721 	free(hash->items);
722 	free(hash);
723 }
724 #endif
725 
726 /* find item in hash, return ptr to data, NULL if not found */
hash_search3(xhash * hash,const char * name,unsigned idx)727 static NOINLINE void *hash_search3(xhash *hash, const char *name, unsigned idx)
728 {
729 	hash_item *hi;
730 
731 	hi = hash->items[idx % hash->csize];
732 	while (hi) {
733 		if (strcmp(hi->name, name) == 0)
734 			return &hi->data;
735 		hi = hi->next;
736 	}
737 	return NULL;
738 }
739 
hash_search(xhash * hash,const char * name)740 static void *hash_search(xhash *hash, const char *name)
741 {
742 	return hash_search3(hash, name,	hashidx(name));
743 }
744 
745 /* grow hash if it becomes too big */
hash_rebuild(xhash * hash)746 static void hash_rebuild(xhash *hash)
747 {
748 	unsigned newsize, i, idx;
749 	hash_item **newitems, *hi, *thi;
750 
751 	if (hash->nprime == ARRAY_SIZE(PRIMES))
752 		return;
753 
754 	newsize = PRIMES[hash->nprime++];
755 	newitems = xzalloc(newsize * sizeof(newitems[0]));
756 
757 	for (i = 0; i < hash->csize; i++) {
758 		hi = hash->items[i];
759 		while (hi) {
760 			thi = hi;
761 			hi = thi->next;
762 			idx = hashidx(thi->name) % newsize;
763 			thi->next = newitems[idx];
764 			newitems[idx] = thi;
765 		}
766 	}
767 
768 	free(hash->items);
769 	hash->csize = newsize;
770 	hash->items = newitems;
771 }
772 
773 /* find item in hash, add it if necessary. Return ptr to data */
hash_find(xhash * hash,const char * name)774 static void *hash_find(xhash *hash, const char *name)
775 {
776 	hash_item *hi;
777 	unsigned idx;
778 	int l;
779 
780 	idx = hashidx(name);
781 	hi = hash_search3(hash, name, idx);
782 	if (!hi) {
783 		if (++hash->nel > hash->csize * 8)
784 			hash_rebuild(hash);
785 
786 		l = strlen(name) + 1;
787 		hi = xzalloc(sizeof(*hi) + l);
788 		strcpy(hi->name, name);
789 
790 		idx = idx % hash->csize;
791 		hi->next = hash->items[idx];
792 		hash->items[idx] = hi;
793 		hash->glen += l;
794 	}
795 	return &hi->data;
796 }
797 
798 #define findvar(hash, name) ((var*)    hash_find((hash), (name)))
799 #define newvar(name)        ((var*)    hash_find(vhash, (name)))
800 #define newfile(name)       ((rstream*)hash_find(fdhash, (name)))
801 #define newfunc(name)       ((func*)   hash_find(fnhash, (name)))
802 
hash_remove(xhash * hash,const char * name)803 static void hash_remove(xhash *hash, const char *name)
804 {
805 	hash_item *hi, **phi;
806 
807 	phi = &hash->items[hashidx(name) % hash->csize];
808 	while (*phi) {
809 		hi = *phi;
810 		if (strcmp(hi->name, name) == 0) {
811 			hash->glen -= (strlen(name) + 1);
812 			hash->nel--;
813 			*phi = hi->next;
814 			free(hi);
815 			break;
816 		}
817 		phi = &hi->next;
818 	}
819 }
820 
821 /* ------ some useful functions ------ */
822 
skip_spaces(char * p)823 static char *skip_spaces(char *p)
824 {
825 	for (;;) {
826 		if (*p == '\\' && p[1] == '\n') {
827 			p++;
828 			t_lineno++;
829 		} else if (*p != ' ' && *p != '\t') {
830 			break;
831 		}
832 		p++;
833 	}
834 	return p;
835 }
836 
837 /* returns old *s, advances *s past word and terminating NUL */
nextword(char ** s)838 static char *nextword(char **s)
839 {
840 	char *p = *s;
841 	char *q = p;
842 	while (*q++ != '\0')
843 		continue;
844 	*s = q;
845 	return p;
846 }
847 
nextchar(char ** s)848 static char nextchar(char **s)
849 {
850 	char c, *pps;
851 
852 	c = *(*s)++;
853 	pps = *s;
854 	if (c == '\\')
855 		c = bb_process_escape_sequence((const char**)s);
856 	/* Example awk statement:
857 	 * s = "abc\"def"
858 	 * we must treat \" as "
859 	 */
860 	if (c == '\\' && *s == pps) { /* unrecognized \z? */
861 		c = *(*s); /* yes, fetch z */
862 		if (c)
863 			(*s)++; /* advance unless z = NUL */
864 	}
865 	return c;
866 }
867 
868 /* TODO: merge with strcpy_and_process_escape_sequences()?
869  */
unescape_string_in_place(char * s1)870 static void unescape_string_in_place(char *s1)
871 {
872 	char *s = s1;
873 	while ((*s1 = nextchar(&s)) != '\0')
874 		s1++;
875 }
876 
isalnum_(int c)877 static ALWAYS_INLINE int isalnum_(int c)
878 {
879 	return (isalnum(c) || c == '_');
880 }
881 
my_strtod(char ** pp)882 static double my_strtod(char **pp)
883 {
884 	char *cp = *pp;
885 	if (ENABLE_DESKTOP && cp[0] == '0') {
886 		/* Might be hex or octal integer: 0x123abc or 07777 */
887 		char c = (cp[1] | 0x20);
888 		if (c == 'x' || isdigit(cp[1])) {
889 			unsigned long long ull = strtoull(cp, pp, 0);
890 			if (c == 'x')
891 				return ull;
892 			c = **pp;
893 			if (!isdigit(c) && c != '.')
894 				return ull;
895 			/* else: it may be a floating number. Examples:
896 			 * 009.123 (*pp points to '9')
897 			 * 000.123 (*pp points to '.')
898 			 * fall through to strtod.
899 			 */
900 		}
901 	}
902 	return strtod(cp, pp);
903 }
904 
905 /* -------- working with variables (set/get/copy/etc) -------- */
906 
fmt_num(const char * format,double n)907 static void fmt_num(const char *format, double n)
908 {
909 	if (n == (long long)n) {
910 		snprintf(g_buf, MAXVARFMT, "%lld", (long long)n);
911 	} else {
912 		const char *s = format;
913 		char c;
914 
915 		do { c = *s; } while (c && *++s);
916 		if (strchr("diouxX", c)) {
917 			snprintf(g_buf, MAXVARFMT, format, (int)n);
918 		} else if (strchr("eEfFgGaA", c)) {
919 			snprintf(g_buf, MAXVARFMT, format, n);
920 		} else {
921 			syntax_error(EMSG_INV_FMT);
922 		}
923 	}
924 }
925 
iamarray(var * a)926 static xhash *iamarray(var *a)
927 {
928 	while (a->type & VF_CHILD)
929 		a = a->x.parent;
930 
931 	if (!(a->type & VF_ARRAY)) {
932 		a->type |= VF_ARRAY;
933 		a->x.array = hash_init();
934 	}
935 	return a->x.array;
936 }
937 
938 #define clear_array(array) hash_clear(array)
939 
940 /* clear a variable */
clrvar(var * v)941 static var *clrvar(var *v)
942 {
943 	if (!(v->type & VF_FSTR))
944 		free(v->string);
945 
946 	v->type &= VF_DONTTOUCH;
947 	v->type |= VF_DIRTY;
948 	v->string = NULL;
949 	return v;
950 }
951 
952 static void handle_special(var *);
953 
954 /* assign string value to variable */
setvar_p(var * v,char * value)955 static var *setvar_p(var *v, char *value)
956 {
957 	clrvar(v);
958 	v->string = value;
959 	handle_special(v);
960 	return v;
961 }
962 
963 /* same as setvar_p but make a copy of string */
setvar_s(var * v,const char * value)964 static var *setvar_s(var *v, const char *value)
965 {
966 	return setvar_p(v, (value && *value) ? xstrdup(value) : NULL);
967 }
968 
969 /* same as setvar_s but sets USER flag */
setvar_u(var * v,const char * value)970 static var *setvar_u(var *v, const char *value)
971 {
972 	v = setvar_s(v, value);
973 	v->type |= VF_USER;
974 	return v;
975 }
976 
977 /* set array element to user string */
setari_u(var * a,int idx,const char * s)978 static void setari_u(var *a, int idx, const char *s)
979 {
980 	var *v;
981 
982 	v = findvar(iamarray(a), itoa(idx));
983 	setvar_u(v, s);
984 }
985 
986 /* assign numeric value to variable */
setvar_i(var * v,double value)987 static var *setvar_i(var *v, double value)
988 {
989 	clrvar(v);
990 	v->type |= VF_NUMBER;
991 	v->number = value;
992 	handle_special(v);
993 	return v;
994 }
995 
getvar_s(var * v)996 static const char *getvar_s(var *v)
997 {
998 	/* if v is numeric and has no cached string, convert it to string */
999 	if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
1000 		fmt_num(getvar_s(intvar[CONVFMT]), v->number);
1001 		v->string = xstrdup(g_buf);
1002 		v->type |= VF_CACHED;
1003 	}
1004 	return (v->string == NULL) ? "" : v->string;
1005 }
1006 
getvar_i(var * v)1007 static double getvar_i(var *v)
1008 {
1009 	char *s;
1010 
1011 	if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
1012 		v->number = 0;
1013 		s = v->string;
1014 		if (s && *s) {
1015 			debug_printf_eval("getvar_i: '%s'->", s);
1016 			v->number = my_strtod(&s);
1017 			debug_printf_eval("%f (s:'%s')\n", v->number, s);
1018 			if (v->type & VF_USER) {
1019 //TODO: skip_spaces() also skips backslash+newline, is it intended here?
1020 				s = skip_spaces(s);
1021 				if (*s != '\0')
1022 					v->type &= ~VF_USER;
1023 			}
1024 		} else {
1025 			debug_printf_eval("getvar_i: '%s'->zero\n", s);
1026 			v->type &= ~VF_USER;
1027 		}
1028 		v->type |= VF_CACHED;
1029 	}
1030 	debug_printf_eval("getvar_i: %f\n", v->number);
1031 	return v->number;
1032 }
1033 
1034 /* Used for operands of bitwise ops */
getvar_i_int(var * v)1035 static unsigned long getvar_i_int(var *v)
1036 {
1037 	double d = getvar_i(v);
1038 
1039 	/* Casting doubles to longs is undefined for values outside
1040 	 * of target type range. Try to widen it as much as possible */
1041 	if (d >= 0)
1042 		return (unsigned long)d;
1043 	/* Why? Think about d == -4294967295.0 (assuming 32bit longs) */
1044 	return - (long) (unsigned long) (-d);
1045 }
1046 
copyvar(var * dest,const var * src)1047 static var *copyvar(var *dest, const var *src)
1048 {
1049 	if (dest != src) {
1050 		clrvar(dest);
1051 		dest->type |= (src->type & ~(VF_DONTTOUCH | VF_FSTR));
1052 		debug_printf_eval("copyvar: number:%f string:'%s'\n", src->number, src->string);
1053 		dest->number = src->number;
1054 		if (src->string)
1055 			dest->string = xstrdup(src->string);
1056 	}
1057 	handle_special(dest);
1058 	return dest;
1059 }
1060 
incvar(var * v)1061 static var *incvar(var *v)
1062 {
1063 	return setvar_i(v, getvar_i(v) + 1.0);
1064 }
1065 
1066 /* return true if v is number or numeric string */
is_numeric(var * v)1067 static int is_numeric(var *v)
1068 {
1069 	getvar_i(v);
1070 	return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
1071 }
1072 
1073 /* return 1 when value of v corresponds to true, 0 otherwise */
istrue(var * v)1074 static int istrue(var *v)
1075 {
1076 	if (is_numeric(v))
1077 		return (v->number != 0);
1078 	return (v->string && v->string[0]);
1079 }
1080 
1081 /* ------- awk program text parsing ------- */
1082 
1083 /* Parse next token pointed by global pos, place results into global t_XYZ variables.
1084  * If token isn't expected, print error message and die.
1085  * Return token class (also store it in t_tclass).
1086  */
next_token(uint32_t expected)1087 static uint32_t next_token(uint32_t expected)
1088 {
1089 #define concat_inserted (G1.next_token__concat_inserted)
1090 #define save_tclass     (G1.next_token__save_tclass)
1091 #define save_info       (G1.next_token__save_info)
1092 
1093 	char *p;
1094 	const char *tl;
1095 	const uint32_t *ti;
1096 	uint32_t tc, last_token_class;
1097 
1098 	last_token_class = t_tclass; /* t_tclass is initialized to TC_NEWLINE */
1099 
1100 	debug_printf_parse("%s() expected(%x):", __func__, expected);
1101 	debug_parse_print_tc(expected);
1102 	debug_printf_parse("\n");
1103 
1104 	if (t_rollback) {
1105 		debug_printf_parse("%s: using rolled-back token\n", __func__);
1106 		t_rollback = FALSE;
1107 	} else if (concat_inserted) {
1108 		debug_printf_parse("%s: using concat-inserted token\n", __func__);
1109 		concat_inserted = FALSE;
1110 		t_tclass = save_tclass;
1111 		t_info = save_info;
1112 	} else {
1113 		p = g_pos;
1114 		if (g_saved_ch != '\0') {
1115 			*p = g_saved_ch;
1116 			g_saved_ch = '\0';
1117 		}
1118  readnext:
1119 		p = skip_spaces(p);
1120 		g_lineno = t_lineno;
1121 		if (*p == '#')
1122 			while (*p != '\n' && *p != '\0')
1123 				p++;
1124 
1125 		if (*p == '\0') {
1126 			tc = TC_EOF;
1127 			debug_printf_parse("%s: token found: TC_EOF\n", __func__);
1128 		} else if (*p == '\"') {
1129 			/* it's a string */
1130 			char *s = t_string = ++p;
1131 			while (*p != '\"') {
1132 				char *pp;
1133 				if (*p == '\0' || *p == '\n')
1134 					syntax_error(EMSG_UNEXP_EOS);
1135 				pp = p;
1136 				*s++ = nextchar(&pp);
1137 				p = pp;
1138 			}
1139 			p++;
1140 			*s = '\0';
1141 			tc = TC_STRING;
1142 			debug_printf_parse("%s: token found:'%s' TC_STRING\n", __func__, t_string);
1143 		} else if ((expected & TC_REGEXP) && *p == '/') {
1144 			/* it's regexp */
1145 			char *s	= t_string = ++p;
1146 			while (*p != '/') {
1147 				if (*p == '\0' || *p == '\n')
1148 					syntax_error(EMSG_UNEXP_EOS);
1149 				*s = *p++;
1150 				if (*s++ == '\\') {
1151 					char *pp = p;
1152 					s[-1] = bb_process_escape_sequence((const char **)&pp);
1153 					if (*p == '\\')
1154 						*s++ = '\\';
1155 					if (pp == p)
1156 						*s++ = *p++;
1157 					else
1158 						p = pp;
1159 				}
1160 			}
1161 			p++;
1162 			*s = '\0';
1163 			tc = TC_REGEXP;
1164 			debug_printf_parse("%s: token found:'%s' TC_REGEXP\n", __func__, t_string);
1165 
1166 		} else if (*p == '.' || isdigit(*p)) {
1167 			/* it's a number */
1168 			char *pp = p;
1169 			t_double = my_strtod(&pp);
1170 			p = pp;
1171 			if (*p == '.')
1172 				syntax_error(EMSG_UNEXP_TOKEN);
1173 			tc = TC_NUMBER;
1174 			debug_printf_parse("%s: token found:%f TC_NUMBER\n", __func__, t_double);
1175 		} else {
1176 			char *end_of_name;
1177 
1178 			if (*p == '\n')
1179 				t_lineno++;
1180 
1181 			/* search for something known */
1182 			tl = tokenlist;
1183 			tc = 0x00000001;
1184 			ti = tokeninfo;
1185 			while (*tl) {
1186 				int l = (unsigned char) *tl++;
1187 				if (l == (unsigned char) NTCC) {
1188 					tc <<= 1;
1189 					continue;
1190 				}
1191 				/* if token class is expected,
1192 				 * token matches,
1193 				 * and it's not a longer word,
1194 				 */
1195 				if ((tc & (expected | TS_WORD | TC_NEWLINE))
1196 				 && strncmp(p, tl, l) == 0
1197 				 && !((tc & TS_WORD) && isalnum_(p[l]))
1198 				) {
1199 					/* then this is what we are looking for */
1200 					t_info = *ti;
1201 					debug_printf_parse("%s: token found:'%.*s' t_info:%x\n", __func__, l, p, t_info);
1202 					p += l;
1203 					goto token_found;
1204 				}
1205 				ti++;
1206 				tl += l;
1207 			}
1208 			/* not a known token */
1209 
1210 			/* is it a name? (var/array/function) */
1211 			if (!isalnum_(*p))
1212 				syntax_error(EMSG_UNEXP_TOKEN); /* no */
1213 			/* yes */
1214 			t_string = p;
1215 			while (isalnum_(*p))
1216 				p++;
1217 			end_of_name = p;
1218 
1219 			if (last_token_class == TC_FUNCDECL)
1220 				/* eat space in "function FUNC (...) {...}" declaration */
1221 				p = skip_spaces(p);
1222 			else if (expected & TC_ARRAY) {
1223 				/* eat space between array name and [ */
1224 				char *s = skip_spaces(p);
1225 				if (*s == '[') /* array ref, not just a name? */
1226 					p = s;
1227 			}
1228 			/* else: do NOT consume whitespace after variable name!
1229 			 * gawk allows definition "function FUNC (p) {...}" - note space,
1230 			 * but disallows the call "FUNC (p)" because it isn't one -
1231 			 * expression "v (a)" should NOT be parsed as TC_FUNCTION:
1232 			 * it is a valid concatenation if "v" is a variable,
1233 			 * not a function name (and type of name is not known at parse time).
1234 			 */
1235 
1236 			if (*p == '(') {
1237 				p++;
1238 				tc = TC_FUNCTION;
1239 				debug_printf_parse("%s: token found:'%s' TC_FUNCTION\n", __func__, t_string);
1240 			} else if (*p == '[') {
1241 				p++;
1242 				tc = TC_ARRAY;
1243 				debug_printf_parse("%s: token found:'%s' TC_ARRAY\n", __func__, t_string);
1244 			} else {
1245 				tc = TC_VARIABLE;
1246 				debug_printf_parse("%s: token found:'%s' TC_VARIABLE\n", __func__, t_string);
1247 				if (end_of_name == p) {
1248 					/* there is no space for trailing NUL in t_string!
1249 					 * We need to save the char we are going to NUL.
1250 					 * (we'll use it in future call to next_token())
1251 					 */
1252 					g_saved_ch = *end_of_name;
1253 // especially pathological example is V="abc"; V.2 - it's V concatenated to .2
1254 // (it evaluates to "abc0.2"). Because of this case, we can't simply cache
1255 // '.' and analyze it later: we also have to *store it back* in next
1256 // next_token(), in order to give my_strtod() the undamaged ".2" string.
1257 				}
1258 			}
1259 			*end_of_name = '\0'; /* terminate t_string */
1260 		}
1261  token_found:
1262 		g_pos = p;
1263 
1264 		/* skipping newlines in some cases */
1265 		if ((last_token_class & TS_NOTERM) && (tc & TC_NEWLINE))
1266 			goto readnext;
1267 
1268 		/* insert concatenation operator when needed */
1269 		debug_printf_parse("%s: concat_inserted if all nonzero: %x %x %x %x\n", __func__,
1270 			(last_token_class & TS_CONCAT_L), (tc & TS_CONCAT_R), (expected & TS_BINOP),
1271 			!(last_token_class == TC_LENGTH && tc == TC_LPAREN));
1272 		if ((last_token_class & TS_CONCAT_L) && (tc & TS_CONCAT_R) && (expected & TS_BINOP)
1273 		 && !(last_token_class == TC_LENGTH && tc == TC_LPAREN) /* but not for "length(..." */
1274 		) {
1275 			concat_inserted = TRUE;
1276 			save_tclass = tc;
1277 			save_info = t_info;
1278 			tc = TC_BINOPX;
1279 			t_info = OC_CONCAT | SS | P(35);
1280 		}
1281 
1282 		t_tclass = tc;
1283 		debug_printf_parse("%s: t_tclass=tc=%x\n", __func__, tc);
1284 	}
1285 	/* Are we ready for this? */
1286 	if (!(t_tclass & expected)) {
1287 		syntax_error((last_token_class & (TC_NEWLINE | TC_EOF)) ?
1288 				EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
1289 	}
1290 
1291 	debug_printf_parse("%s: returning, t_double:%f t_tclass:", __func__, t_double);
1292 	debug_parse_print_tc(t_tclass);
1293 	debug_printf_parse("\n");
1294 
1295 	return t_tclass;
1296 #undef concat_inserted
1297 #undef save_tclass
1298 #undef save_info
1299 }
1300 
rollback_token(void)1301 static ALWAYS_INLINE void rollback_token(void)
1302 {
1303 	t_rollback = TRUE;
1304 }
1305 
new_node(uint32_t info)1306 static node *new_node(uint32_t info)
1307 {
1308 	node *n;
1309 
1310 	n = xzalloc(sizeof(node));
1311 	n->info = info;
1312 	n->lineno = g_lineno;
1313 	return n;
1314 }
1315 
mk_re_node(const char * s,node * n,regex_t * re)1316 static void mk_re_node(const char *s, node *n, regex_t *re)
1317 {
1318 	n->info = TI_REGEXP;
1319 	n->l.re = re;
1320 	n->r.ire = re + 1;
1321 	xregcomp(re, s, REG_EXTENDED);
1322 	xregcomp(re + 1, s, REG_EXTENDED | REG_ICASE);
1323 }
1324 
1325 static node *parse_expr(uint32_t);
1326 
parse_lrparen_list(void)1327 static node *parse_lrparen_list(void)
1328 {
1329 	next_token(TC_LPAREN);
1330 	return parse_expr(TC_RPAREN);
1331 }
1332 
1333 /* parse expression terminated by given argument, return ptr
1334  * to built subtree. Terminator is eaten by parse_expr */
parse_expr(uint32_t term_tc)1335 static node *parse_expr(uint32_t term_tc)
1336 {
1337 	node sn;
1338 	node *cn = &sn;
1339 	node *vn, *glptr;
1340 	uint32_t tc, expected_tc;
1341 	var *v;
1342 
1343 	debug_printf_parse("%s() term_tc(%x):", __func__, term_tc);
1344 	debug_parse_print_tc(term_tc);
1345 	debug_printf_parse("\n");
1346 
1347 	sn.info = PRIMASK;
1348 	sn.r.n = sn.a.n = glptr = NULL;
1349 	expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP | term_tc;
1350 
1351 	while (!((tc = next_token(expected_tc)) & term_tc)) {
1352 
1353 		if (glptr && (t_info == TI_LESS)) {
1354 			/* input redirection (<) attached to glptr node */
1355 			debug_printf_parse("%s: input redir\n", __func__);
1356 			cn = glptr->l.n = new_node(OC_CONCAT | SS | P(37));
1357 			cn->a.n = glptr;
1358 			expected_tc = TS_OPERAND | TS_UOPPRE;
1359 			glptr = NULL;
1360 			continue;
1361 		}
1362 		if (tc & (TS_BINOP | TC_UOPPOST)) {
1363 			debug_printf_parse("%s: TS_BINOP | TC_UOPPOST tc:%x\n", __func__, tc);
1364 			/* for binary and postfix-unary operators, jump back over
1365 			 * previous operators with higher priority */
1366 			vn = cn;
1367 			while (((t_info & PRIMASK) > (vn->a.n->info & PRIMASK2))
1368 			    || ((t_info == vn->info) && t_info == TI_COLON)
1369 			) {
1370 				vn = vn->a.n;
1371 				if (!vn->a.n) syntax_error(EMSG_UNEXP_TOKEN);
1372 			}
1373 			if (t_info == TI_TERNARY)
1374 //TODO: why?
1375 				t_info += P(6);
1376 			cn = vn->a.n->r.n = new_node(t_info);
1377 			cn->a.n = vn->a.n;
1378 			if (tc & TS_BINOP) {
1379 				cn->l.n = vn;
1380 //FIXME: this is the place to detect and reject assignments to non-lvalues.
1381 //Currently we allow "assignments" to consts and temporaries, nonsense like this:
1382 // awk 'BEGIN { "qwe" = 1 }'
1383 // awk 'BEGIN { 7 *= 7 }'
1384 // awk 'BEGIN { length("qwe") = 1 }'
1385 // awk 'BEGIN { (1+1) += 3 }'
1386 				expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1387 				if (t_info == TI_PGETLINE) {
1388 					/* it's a pipe */
1389 					next_token(TC_GETLINE);
1390 					/* give maximum priority to this pipe */
1391 					cn->info &= ~PRIMASK;
1392 					expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1393 				}
1394 			} else {
1395 				cn->r.n = vn;
1396 				expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1397 			}
1398 			vn->a.n = cn;
1399 			continue;
1400 		}
1401 
1402 		debug_printf_parse("%s: other, t_info:%x\n", __func__, t_info);
1403 		/* for operands and prefix-unary operators, attach them
1404 		 * to last node */
1405 		vn = cn;
1406 		cn = vn->r.n = new_node(t_info);
1407 		cn->a.n = vn;
1408 
1409 		expected_tc = TS_OPERAND | TS_UOPPRE | TC_REGEXP;
1410 		if (t_info == TI_PREINC || t_info == TI_PREDEC)
1411 			expected_tc = TS_LVALUE | TC_UOPPRE1;
1412 
1413 		if (!(tc & (TS_OPERAND | TC_REGEXP)))
1414 			continue;
1415 
1416 		debug_printf_parse("%s: TS_OPERAND | TC_REGEXP\n", __func__);
1417 		expected_tc = TS_UOPPRE | TC_UOPPOST | TS_BINOP | TS_OPERAND | term_tc;
1418 		/* one should be very careful with switch on tclass -
1419 		 * only simple tclasses should be used (TC_xyz, not TS_xyz) */
1420 		switch (tc) {
1421 		case TC_VARIABLE:
1422 		case TC_ARRAY:
1423 			debug_printf_parse("%s: TC_VARIABLE | TC_ARRAY\n", __func__);
1424 			cn->info = OC_VAR;
1425 			v = hash_search(ahash, t_string);
1426 			if (v != NULL) {
1427 				cn->info = OC_FNARG;
1428 				cn->l.aidx = v->x.aidx;
1429 			} else {
1430 				cn->l.v = newvar(t_string);
1431 			}
1432 			if (tc & TC_ARRAY) {
1433 				cn->info |= xS;
1434 				cn->r.n = parse_expr(TC_ARRTERM);
1435 			}
1436 			break;
1437 
1438 		case TC_NUMBER:
1439 		case TC_STRING:
1440 			debug_printf_parse("%s: TC_NUMBER | TC_STRING\n", __func__);
1441 			cn->info = OC_VAR;
1442 			v = cn->l.v = xzalloc(sizeof(var));
1443 			if (tc & TC_NUMBER)
1444 				setvar_i(v, t_double);
1445 			else {
1446 				setvar_s(v, t_string);
1447 				expected_tc &= ~TC_UOPPOST; /* "str"++ is not allowed */
1448 			}
1449 			break;
1450 
1451 		case TC_REGEXP:
1452 			debug_printf_parse("%s: TC_REGEXP\n", __func__);
1453 			mk_re_node(t_string, cn, xzalloc(sizeof(regex_t)*2));
1454 			break;
1455 
1456 		case TC_FUNCTION:
1457 			debug_printf_parse("%s: TC_FUNCTION\n", __func__);
1458 			cn->info = OC_FUNC;
1459 			cn->r.f = newfunc(t_string);
1460 			cn->l.n = parse_expr(TC_RPAREN);
1461 			break;
1462 
1463 		case TC_LPAREN:
1464 			debug_printf_parse("%s: TC_LPAREN\n", __func__);
1465 			cn = vn->r.n = parse_expr(TC_RPAREN);
1466 			if (!cn)
1467 				syntax_error("Empty sequence");
1468 			cn->a.n = vn;
1469 			break;
1470 
1471 		case TC_GETLINE:
1472 			debug_printf_parse("%s: TC_GETLINE\n", __func__);
1473 			glptr = cn;
1474 			expected_tc = TS_OPERAND | TS_UOPPRE | TS_BINOP | term_tc;
1475 			break;
1476 
1477 		case TC_BUILTIN:
1478 			debug_printf_parse("%s: TC_BUILTIN\n", __func__);
1479 			cn->l.n = parse_lrparen_list();
1480 			break;
1481 
1482 		case TC_LENGTH:
1483 			debug_printf_parse("%s: TC_LENGTH\n", __func__);
1484 			tc = next_token(TC_LPAREN /* length(...) */
1485 				| TC_SEMICOL   /* length; */
1486 				| TC_NEWLINE   /* length<newline> */
1487 				| TC_RBRACE    /* length } */
1488 				| TC_BINOPX    /* length <op> NUM */
1489 				| TC_COMMA     /* print length, 1 */
1490 			);
1491 			if (tc != TC_LPAREN)
1492 				rollback_token();
1493 			else {
1494 				/* It was a "(" token. Handle just like TC_BUILTIN */
1495 				cn->l.n = parse_expr(TC_RPAREN);
1496 			}
1497 			break;
1498 		}
1499 	} /* while() */
1500 
1501 	debug_printf_parse("%s() returns %p\n", __func__, sn.r.n);
1502 	return sn.r.n;
1503 }
1504 
1505 /* add node to chain. Return ptr to alloc'd node */
chain_node(uint32_t info)1506 static node *chain_node(uint32_t info)
1507 {
1508 	node *n;
1509 
1510 	if (!seq->first)
1511 		seq->first = seq->last = new_node(0);
1512 
1513 	if (seq->programname != g_progname) {
1514 		seq->programname = g_progname;
1515 		n = chain_node(OC_NEWSOURCE);
1516 		n->l.new_progname = g_progname;
1517 	}
1518 
1519 	n = seq->last;
1520 	n->info = info;
1521 	seq->last = n->a.n = new_node(OC_DONE);
1522 
1523 	return n;
1524 }
1525 
chain_expr(uint32_t info)1526 static void chain_expr(uint32_t info)
1527 {
1528 	node *n;
1529 
1530 	n = chain_node(info);
1531 
1532 	n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1533 	if ((info & OF_REQUIRED) && !n->l.n)
1534 		syntax_error(EMSG_TOO_FEW_ARGS);
1535 
1536 	if (t_tclass & TC_RBRACE)
1537 		rollback_token();
1538 }
1539 
1540 static void chain_group(void);
1541 
chain_loop(node * nn)1542 static node *chain_loop(node *nn)
1543 {
1544 	node *n, *n2, *save_brk, *save_cont;
1545 
1546 	save_brk = break_ptr;
1547 	save_cont = continue_ptr;
1548 
1549 	n = chain_node(OC_BR | Vx);
1550 	continue_ptr = new_node(OC_EXEC);
1551 	break_ptr = new_node(OC_EXEC);
1552 	chain_group();
1553 	n2 = chain_node(OC_EXEC | Vx);
1554 	n2->l.n = nn;
1555 	n2->a.n = n;
1556 	continue_ptr->a.n = n2;
1557 	break_ptr->a.n = n->r.n = seq->last;
1558 
1559 	continue_ptr = save_cont;
1560 	break_ptr = save_brk;
1561 
1562 	return n;
1563 }
1564 
chain_until_rbrace(void)1565 static void chain_until_rbrace(void)
1566 {
1567 	uint32_t tc;
1568 	while ((tc = next_token(TS_GRPSEQ | TC_RBRACE)) != TC_RBRACE) {
1569 		debug_printf_parse("%s: !TC_RBRACE\n", __func__);
1570 		if (tc == TC_NEWLINE)
1571 			continue;
1572 		rollback_token();
1573 		chain_group();
1574 	}
1575 	debug_printf_parse("%s: TC_RBRACE\n", __func__);
1576 }
1577 
1578 /* parse group and attach it to chain */
chain_group(void)1579 static void chain_group(void)
1580 {
1581 	uint32_t tc;
1582 	node *n, *n2, *n3;
1583 
1584 	do {
1585 		tc = next_token(TS_GRPSEQ);
1586 	} while (tc == TC_NEWLINE);
1587 
1588 	if (tc == TC_LBRACE) {
1589 		debug_printf_parse("%s: TC_LBRACE\n", __func__);
1590 		chain_until_rbrace();
1591 		return;
1592 	}
1593 	if (tc & (TS_OPSEQ | TC_SEMICOL)) {
1594 		debug_printf_parse("%s: TS_OPSEQ | TC_SEMICOL\n", __func__);
1595 		rollback_token();
1596 		chain_expr(OC_EXEC | Vx);
1597 		return;
1598 	}
1599 
1600 	/* TS_STATEMNT */
1601 	debug_printf_parse("%s: TS_STATEMNT(?)\n", __func__);
1602 	switch (t_info & OPCLSMASK) {
1603 	case ST_IF:
1604 		debug_printf_parse("%s: ST_IF\n", __func__);
1605 		n = chain_node(OC_BR | Vx);
1606 		n->l.n = parse_lrparen_list();
1607 		chain_group();
1608 		n2 = chain_node(OC_EXEC);
1609 		n->r.n = seq->last;
1610 		if (next_token(TS_GRPSEQ | TC_RBRACE | TC_ELSE) == TC_ELSE) {
1611 			chain_group();
1612 			n2->a.n = seq->last;
1613 		} else {
1614 			rollback_token();
1615 		}
1616 		break;
1617 
1618 	case ST_WHILE:
1619 		debug_printf_parse("%s: ST_WHILE\n", __func__);
1620 		n2 = parse_lrparen_list();
1621 		n = chain_loop(NULL);
1622 		n->l.n = n2;
1623 		break;
1624 
1625 	case ST_DO:
1626 		debug_printf_parse("%s: ST_DO\n", __func__);
1627 		n2 = chain_node(OC_EXEC);
1628 		n = chain_loop(NULL);
1629 		n2->a.n = n->a.n;
1630 		next_token(TC_WHILE);
1631 		n->l.n = parse_lrparen_list();
1632 		break;
1633 
1634 	case ST_FOR:
1635 		debug_printf_parse("%s: ST_FOR\n", __func__);
1636 		next_token(TC_LPAREN);
1637 		n2 = parse_expr(TC_SEMICOL | TC_RPAREN);
1638 		if (t_tclass & TC_RPAREN) {	/* for (I in ARRAY) */
1639 			if (!n2 || n2->info != TI_IN)
1640 				syntax_error(EMSG_UNEXP_TOKEN);
1641 			n = chain_node(OC_WALKINIT | VV);
1642 			n->l.n = n2->l.n;
1643 			n->r.n = n2->r.n;
1644 			n = chain_loop(NULL);
1645 			n->info = OC_WALKNEXT | Vx;
1646 			n->l.n = n2->l.n;
1647 		} else {			/* for (;;) */
1648 			n = chain_node(OC_EXEC | Vx);
1649 			n->l.n = n2;
1650 			n2 = parse_expr(TC_SEMICOL);
1651 			n3 = parse_expr(TC_RPAREN);
1652 			n = chain_loop(n3);
1653 			n->l.n = n2;
1654 			if (!n2)
1655 				n->info = OC_EXEC;
1656 		}
1657 		break;
1658 
1659 	case OC_PRINT:
1660 	case OC_PRINTF:
1661 		debug_printf_parse("%s: OC_PRINT[F]\n", __func__);
1662 		n = chain_node(t_info);
1663 		n->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_OUTRDR | TC_RBRACE);
1664 		if (t_tclass & TC_OUTRDR) {
1665 			n->info |= t_info;
1666 			n->r.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_RBRACE);
1667 		}
1668 		if (t_tclass & TC_RBRACE)
1669 			rollback_token();
1670 		break;
1671 
1672 	case OC_BREAK:
1673 		debug_printf_parse("%s: OC_BREAK\n", __func__);
1674 		n = chain_node(OC_EXEC);
1675 		if (!break_ptr)
1676 			syntax_error("'break' not in a loop");
1677 		n->a.n = break_ptr;
1678 		chain_expr(t_info);
1679 		break;
1680 
1681 	case OC_CONTINUE:
1682 		debug_printf_parse("%s: OC_CONTINUE\n", __func__);
1683 		n = chain_node(OC_EXEC);
1684 		if (!continue_ptr)
1685 			syntax_error("'continue' not in a loop");
1686 		n->a.n = continue_ptr;
1687 		chain_expr(t_info);
1688 		break;
1689 
1690 	/* delete, next, nextfile, return, exit */
1691 	default:
1692 		debug_printf_parse("%s: default\n", __func__);
1693 		chain_expr(t_info);
1694 	}
1695 }
1696 
parse_program(char * p)1697 static void parse_program(char *p)
1698 {
1699 	debug_printf_parse("%s()\n", __func__);
1700 
1701 	g_pos = p;
1702 	t_lineno = 1;
1703 	for (;;) {
1704 		uint32_t tclass;
1705 
1706 		tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1707 			| TC_EOF | TC_NEWLINE /* but not TC_SEMICOL */);
1708  got_tok:
1709 		if (tclass == TC_EOF) {
1710 			debug_printf_parse("%s: TC_EOF\n", __func__);
1711 			break;
1712 		}
1713 		if (tclass == TC_NEWLINE) {
1714 			debug_printf_parse("%s: TC_NEWLINE\n", __func__);
1715 			continue;
1716 		}
1717 		if (tclass == TC_BEGIN) {
1718 			debug_printf_parse("%s: TC_BEGIN\n", __func__);
1719 			seq = &beginseq;
1720 			/* ensure there is no newline between BEGIN and { */
1721 			next_token(TC_LBRACE);
1722 			chain_until_rbrace();
1723 			goto next_tok;
1724 		}
1725 		if (tclass == TC_END) {
1726 			debug_printf_parse("%s: TC_END\n", __func__);
1727 			seq = &endseq;
1728 			/* ensure there is no newline between END and { */
1729 			next_token(TC_LBRACE);
1730 			chain_until_rbrace();
1731 			goto next_tok;
1732 		}
1733 		if (tclass == TC_FUNCDECL) {
1734 			func *f;
1735 
1736 			debug_printf_parse("%s: TC_FUNCDECL\n", __func__);
1737 			next_token(TC_FUNCTION);
1738 			f = newfunc(t_string);
1739 			if (f->defined)
1740 				syntax_error("Duplicate function");
1741 			f->defined = 1;
1742 			//f->body.first = NULL; - already is
1743 			//f->nargs = 0; - already is
1744 			/* func arg list: comma sep list of args, and a close paren */
1745 			for (;;) {
1746 				var *v;
1747 				if (next_token(TC_VARIABLE | TC_RPAREN) == TC_RPAREN) {
1748 					if (f->nargs == 0)
1749 						break; /* func() is ok */
1750 					/* func(a,) is not ok */
1751 					syntax_error(EMSG_UNEXP_TOKEN);
1752 				}
1753 				v = findvar(ahash, t_string);
1754 				v->x.aidx = f->nargs++;
1755 				/* Arg followed either by end of arg list or 1 comma */
1756 				if (next_token(TC_COMMA | TC_RPAREN) == TC_RPAREN)
1757 					break;
1758 				/* it was a comma, we ate it */
1759 			}
1760 			seq = &f->body;
1761 			/* ensure there is { after "func F(...)" - but newlines are allowed */
1762 			while (next_token(TC_LBRACE | TC_NEWLINE) == TC_NEWLINE)
1763 				continue;
1764 			chain_until_rbrace();
1765 			hash_clear(ahash);
1766 			goto next_tok;
1767 		}
1768 		seq = &mainseq;
1769 		if (tclass & TS_OPSEQ) {
1770 			node *cn;
1771 
1772 			debug_printf_parse("%s: TS_OPSEQ\n", __func__);
1773 			rollback_token();
1774 			cn = chain_node(OC_TEST);
1775 			cn->l.n = parse_expr(TC_SEMICOL | TC_NEWLINE | TC_EOF | TC_LBRACE);
1776 			if (t_tclass == TC_LBRACE) {
1777 				debug_printf_parse("%s: TC_LBRACE\n", __func__);
1778 				chain_until_rbrace();
1779 			} else {
1780 				/* no action, assume default "{ print }" */
1781 				debug_printf_parse("%s: !TC_LBRACE\n", __func__);
1782 				chain_node(OC_PRINT);
1783 			}
1784 			cn->r.n = mainseq.last;
1785 			goto next_tok;
1786 		}
1787 		/* tclass == TC_LBRACE */
1788 		debug_printf_parse("%s: TC_LBRACE(?)\n", __func__);
1789 		chain_until_rbrace();
1790  next_tok:
1791 		/* Same as next_token() at the top of the loop, + TC_SEMICOL */
1792 		tclass = next_token(TS_OPSEQ | TC_LBRACE | TC_BEGIN | TC_END | TC_FUNCDECL
1793 			| TC_EOF | TC_NEWLINE | TC_SEMICOL);
1794 		/* gawk allows many newlines, but does not allow more than one semicolon:
1795 		 *  BEGIN {...}<newline>;<newline>;
1796 		 * would complain "each rule must have a pattern or an action part".
1797 		 * Same message for
1798 		 *  ; BEGIN {...}
1799 		 */
1800 		if (tclass != TC_SEMICOL)
1801 			goto got_tok; /* use this token */
1802 		/* else: loop back - ate the semicolon, get and use _next_ token */
1803 	} /* for (;;) */
1804 }
1805 
1806 /* -------- program execution part -------- */
1807 
1808 /* temporary variables allocator */
nvalloc(int sz)1809 static var *nvalloc(int sz)
1810 {
1811 	return xzalloc(sz * sizeof(var));
1812 }
1813 
nvfree(var * v,int sz)1814 static void nvfree(var *v, int sz)
1815 {
1816 	var *p = v;
1817 
1818 	while (--sz >= 0) {
1819 		if ((p->type & (VF_ARRAY | VF_CHILD)) == VF_ARRAY) {
1820 			clear_array(iamarray(p));
1821 			free(p->x.array->items);
1822 			free(p->x.array);
1823 		}
1824 		if (p->type & VF_WALK) {
1825 			walker_list *n;
1826 			walker_list *w = p->x.walker;
1827 			debug_printf_walker("nvfree: freeing walker @%p\n", &p->x.walker);
1828 			p->x.walker = NULL;
1829 			while (w) {
1830 				n = w->prev;
1831 				debug_printf_walker(" free(%p)\n", w);
1832 				free(w);
1833 				w = n;
1834 			}
1835 		}
1836 		clrvar(p);
1837 		p++;
1838 	}
1839 
1840 	free(v);
1841 }
1842 
mk_splitter(const char * s,tsplitter * spl)1843 static node *mk_splitter(const char *s, tsplitter *spl)
1844 {
1845 	regex_t *re, *ire;
1846 	node *n;
1847 
1848 	re = &spl->re[0];
1849 	ire = &spl->re[1];
1850 	n = &spl->n;
1851 	if (n->info == TI_REGEXP) {
1852 		regfree(re);
1853 		regfree(ire); // TODO: nuke ire, use re+1?
1854 	}
1855 	if (s[0] && s[1]) { /* strlen(s) > 1 */
1856 		mk_re_node(s, n, re);
1857 	} else {
1858 		n->info = (uint32_t) s[0];
1859 	}
1860 
1861 	return n;
1862 }
1863 
1864 static var *evaluate(node *, var *);
1865 
1866 /* Use node as a regular expression. Supplied with node ptr and regex_t
1867  * storage space. Return ptr to regex (if result points to preg, it should
1868  * be later regfree'd manually).
1869  */
as_regex(node * op,regex_t * preg)1870 static regex_t *as_regex(node *op, regex_t *preg)
1871 {
1872 	int cflags;
1873 	const char *s;
1874 
1875 	if (op->info == TI_REGEXP) {
1876 		return icase ? op->r.ire : op->l.re;
1877 	}
1878 
1879 	//tmpvar = nvalloc(1);
1880 #define TMPVAR (&G.as_regex__tmpvar)
1881 	// We use a single "static" tmpvar (instead of on-stack or malloced one)
1882 	// to decrease memory consumption in deeply-recursive awk programs.
1883 	// The rule to work safely is to never call evaluate() while our static
1884 	// TMPVAR's value is still needed.
1885 	s = getvar_s(evaluate(op, TMPVAR));
1886 
1887 	cflags = icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED;
1888 	/* Testcase where REG_EXTENDED fails (unpaired '{'):
1889 	 * echo Hi | awk 'gsub("@(samp|code|file)\{","");'
1890 	 * gawk 3.1.5 eats this. We revert to ~REG_EXTENDED
1891 	 * (maybe gsub is not supposed to use REG_EXTENDED?).
1892 	 */
1893 	if (regcomp(preg, s, cflags)) {
1894 		cflags &= ~REG_EXTENDED;
1895 		xregcomp(preg, s, cflags);
1896 	}
1897 	//nvfree(tmpvar, 1);
1898 #undef TMPVAR
1899 	return preg;
1900 }
1901 
1902 /* gradually increasing buffer.
1903  * note that we reallocate even if n == old_size,
1904  * and thus there is at least one extra allocated byte.
1905  */
qrealloc(char * b,int n,int * size)1906 static char* qrealloc(char *b, int n, int *size)
1907 {
1908 	if (!b || n >= *size) {
1909 		*size = n + (n>>1) + 80;
1910 		b = xrealloc(b, *size);
1911 	}
1912 	return b;
1913 }
1914 
1915 /* resize field storage space */
fsrealloc(int size)1916 static void fsrealloc(int size)
1917 {
1918 	int i, newsize;
1919 
1920 	if (size >= maxfields) {
1921 		/* Sanity cap, easier than catering for overflows */
1922 		if (size > 0xffffff)
1923 			bb_die_memory_exhausted();
1924 
1925 		i = maxfields;
1926 		maxfields = size + 16;
1927 
1928 		newsize = maxfields * sizeof(Fields[0]);
1929 		debug_printf_eval("fsrealloc: xrealloc(%p, %u)\n", Fields, newsize);
1930 		Fields = xrealloc(Fields, newsize);
1931 		debug_printf_eval("fsrealloc: Fields=%p..%p\n", Fields, (char*)Fields + newsize - 1);
1932 		/* ^^^ did Fields[] move? debug aid for L.v getting "upstaged" by R.v in evaluate() */
1933 
1934 		for (; i < maxfields; i++) {
1935 			Fields[i].type = VF_SPECIAL;
1936 			Fields[i].string = NULL;
1937 		}
1938 	}
1939 	/* if size < nfields, clear extra field variables */
1940 	for (i = size; i < nfields; i++) {
1941 		clrvar(Fields + i);
1942 	}
1943 	nfields = size;
1944 }
1945 
regexec1_nonempty(const regex_t * preg,const char * s,regmatch_t pmatch[])1946 static int regexec1_nonempty(const regex_t *preg, const char *s, regmatch_t pmatch[])
1947 {
1948 	int r = regexec(preg, s, 1, pmatch, 0);
1949 	if (r == 0 && pmatch[0].rm_eo == 0) {
1950 		/* For example, happens when FS can match
1951 		 * an empty string (awk -F ' *'). Logically,
1952 		 * this should split into one-char fields.
1953 		 * However, gawk 5.0.1 searches for first
1954 		 * _non-empty_ separator string match:
1955 		 */
1956 		size_t ofs = 0;
1957 		do {
1958 			ofs++;
1959 			if (!s[ofs])
1960 				return REG_NOMATCH;
1961 			regexec(preg, s + ofs, 1, pmatch, 0);
1962 		} while (pmatch[0].rm_eo == 0);
1963 		pmatch[0].rm_so += ofs;
1964 		pmatch[0].rm_eo += ofs;
1965 	}
1966 	return r;
1967 }
1968 
awk_split(const char * s,node * spl,char ** slist)1969 static int awk_split(const char *s, node *spl, char **slist)
1970 {
1971 	int n;
1972 	char c[4];
1973 	char *s1;
1974 
1975 	/* in worst case, each char would be a separate field */
1976 	*slist = s1 = xzalloc(strlen(s) * 2 + 3);
1977 	strcpy(s1, s);
1978 
1979 	c[0] = c[1] = (char)spl->info;
1980 	c[2] = c[3] = '\0';
1981 	if (*getvar_s(intvar[RS]) == '\0')
1982 		c[2] = '\n';
1983 
1984 	n = 0;
1985 	if (spl->info == TI_REGEXP) {  /* regex split */
1986 		if (!*s)
1987 			return n; /* "": zero fields */
1988 		n++; /* at least one field will be there */
1989 		do {
1990 			int l;
1991 			regmatch_t pmatch[1];
1992 
1993 			l = strcspn(s, c+2); /* len till next NUL or \n */
1994 			if (regexec1_nonempty(icase ? spl->r.ire : spl->l.re, s, pmatch) == 0
1995 			 && pmatch[0].rm_so <= l
1996 			) {
1997 				/* if (pmatch[0].rm_eo == 0) ... - impossible */
1998 				l = pmatch[0].rm_so;
1999 				n++; /* we saw yet another delimiter */
2000 			} else {
2001 				pmatch[0].rm_eo = l;
2002 				if (s[l])
2003 					pmatch[0].rm_eo++;
2004 			}
2005 			s1 = mempcpy(s1, s, l);
2006 			*s1++ = '\0';
2007 			s += pmatch[0].rm_eo;
2008 		} while (*s);
2009 
2010 		/* echo a-- | awk -F-- '{ print NF, length($NF), $NF }'
2011 		 * should print "2 0 ":
2012 		 */
2013 		*s1 = '\0';
2014 
2015 		return n;
2016 	}
2017 	if (c[0] == '\0') {  /* null split */
2018 		while (*s) {
2019 			*s1++ = *s++;
2020 			*s1++ = '\0';
2021 			n++;
2022 		}
2023 		return n;
2024 	}
2025 	if (c[0] != ' ') {  /* single-character split */
2026 		if (icase) {
2027 			c[0] = toupper(c[0]);
2028 			c[1] = tolower(c[1]);
2029 		}
2030 		if (*s1)
2031 			n++;
2032 		while ((s1 = strpbrk(s1, c)) != NULL) {
2033 			*s1++ = '\0';
2034 			n++;
2035 		}
2036 		return n;
2037 	}
2038 	/* space split */
2039 	while (*s) {
2040 		s = skip_whitespace(s);
2041 		if (!*s)
2042 			break;
2043 		n++;
2044 		while (*s && !isspace(*s))
2045 			*s1++ = *s++;
2046 		*s1++ = '\0';
2047 	}
2048 	return n;
2049 }
2050 
split_f0(void)2051 static void split_f0(void)
2052 {
2053 /* static char *fstrings; */
2054 #define fstrings (G.split_f0__fstrings)
2055 
2056 	int i, n;
2057 	char *s;
2058 
2059 	if (is_f0_split)
2060 		return;
2061 
2062 	is_f0_split = TRUE;
2063 	free(fstrings);
2064 	fsrealloc(0);
2065 	n = awk_split(getvar_s(intvar[F0]), &fsplitter.n, &fstrings);
2066 	fsrealloc(n);
2067 	s = fstrings;
2068 	for (i = 0; i < n; i++) {
2069 		Fields[i].string = nextword(&s);
2070 		Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
2071 	}
2072 
2073 	/* set NF manually to avoid side effects */
2074 	clrvar(intvar[NF]);
2075 	intvar[NF]->type = VF_NUMBER | VF_SPECIAL;
2076 	intvar[NF]->number = nfields;
2077 #undef fstrings
2078 }
2079 
2080 /* perform additional actions when some internal variables changed */
handle_special(var * v)2081 static void handle_special(var *v)
2082 {
2083 	int n;
2084 	char *b;
2085 	const char *sep, *s;
2086 	int sl, l, len, i, bsize;
2087 
2088 	if (!(v->type & VF_SPECIAL))
2089 		return;
2090 
2091 	if (v == intvar[NF]) {
2092 		n = (int)getvar_i(v);
2093 		if (n < 0)
2094 			syntax_error("NF set to negative value");
2095 		fsrealloc(n);
2096 
2097 		/* recalculate $0 */
2098 		sep = getvar_s(intvar[OFS]);
2099 		sl = strlen(sep);
2100 		b = NULL;
2101 		len = 0;
2102 		for (i = 0; i < n; i++) {
2103 			s = getvar_s(&Fields[i]);
2104 			l = strlen(s);
2105 			if (b) {
2106 				memcpy(b+len, sep, sl);
2107 				len += sl;
2108 			}
2109 			b = qrealloc(b, len+l+sl, &bsize);
2110 			memcpy(b+len, s, l);
2111 			len += l;
2112 		}
2113 		if (b)
2114 			b[len] = '\0';
2115 		setvar_p(intvar[F0], b);
2116 		is_f0_split = TRUE;
2117 
2118 	} else if (v == intvar[F0]) {
2119 		is_f0_split = FALSE;
2120 
2121 	} else if (v == intvar[FS]) {
2122 		/*
2123 		 * The POSIX-2008 standard says that changing FS should have no effect on the
2124 		 * current input line, but only on the next one. The language is:
2125 		 *
2126 		 * > Before the first reference to a field in the record is evaluated, the record
2127 		 * > shall be split into fields, according to the rules in Regular Expressions,
2128 		 * > using the value of FS that was current at the time the record was read.
2129 		 *
2130 		 * So, split up current line before assignment to FS:
2131 		 */
2132 		split_f0();
2133 
2134 		mk_splitter(getvar_s(v), &fsplitter);
2135 	} else if (v == intvar[RS]) {
2136 		mk_splitter(getvar_s(v), &rsplitter);
2137 	} else if (v == intvar[IGNORECASE]) {
2138 		icase = istrue(v);
2139 	} else {				/* $n */
2140 		n = getvar_i(intvar[NF]);
2141 		setvar_i(intvar[NF], n > v-Fields ? n : v-Fields+1);
2142 		/* right here v is invalid. Just to note... */
2143 	}
2144 }
2145 
2146 /* step through func/builtin/etc arguments */
nextarg(node ** pn)2147 static node *nextarg(node **pn)
2148 {
2149 	node *n;
2150 
2151 	n = *pn;
2152 	if (n && n->info == TI_COMMA) {
2153 		*pn = n->r.n;
2154 		n = n->l.n;
2155 	} else {
2156 		*pn = NULL;
2157 	}
2158 	return n;
2159 }
2160 
hashwalk_init(var * v,xhash * array)2161 static void hashwalk_init(var *v, xhash *array)
2162 {
2163 	hash_item *hi;
2164 	unsigned i;
2165 	walker_list *w;
2166 	walker_list *prev_walker;
2167 
2168 	if (v->type & VF_WALK) {
2169 		prev_walker = v->x.walker;
2170 	} else {
2171 		v->type |= VF_WALK;
2172 		prev_walker = NULL;
2173 	}
2174 	debug_printf_walker("hashwalk_init: prev_walker:%p\n", prev_walker);
2175 
2176 	w = v->x.walker = xzalloc(sizeof(*w) + array->glen + 1); /* why + 1? */
2177 	debug_printf_walker(" walker@%p=%p\n", &v->x.walker, w);
2178 	w->cur = w->end = w->wbuf;
2179 	w->prev = prev_walker;
2180 	for (i = 0; i < array->csize; i++) {
2181 		hi = array->items[i];
2182 		while (hi) {
2183 			w->end = stpcpy(w->end, hi->name) + 1;
2184 			hi = hi->next;
2185 		}
2186 	}
2187 }
2188 
hashwalk_next(var * v)2189 static int hashwalk_next(var *v)
2190 {
2191 	walker_list *w = v->x.walker;
2192 
2193 	if (w->cur >= w->end) {
2194 		walker_list *prev_walker = w->prev;
2195 
2196 		debug_printf_walker("end of iteration, free(walker@%p:%p), prev_walker:%p\n", &v->x.walker, w, prev_walker);
2197 		free(w);
2198 		v->x.walker = prev_walker;
2199 		return FALSE;
2200 	}
2201 
2202 	setvar_s(v, nextword(&w->cur));
2203 	return TRUE;
2204 }
2205 
2206 /* evaluate node, return 1 when result is true, 0 otherwise */
ptest(node * pattern)2207 static int ptest(node *pattern)
2208 {
2209 	// We use a single "static" tmpvar (instead of on-stack or malloced one)
2210 	// to decrease memory consumption in deeply-recursive awk programs.
2211 	// The rule to work safely is to never call evaluate() while our static
2212 	// TMPVAR's value is still needed.
2213 	return istrue(evaluate(pattern, &G.ptest__tmpvar));
2214 }
2215 
2216 /* read next record from stream rsm into a variable v */
awk_getline(rstream * rsm,var * v)2217 static int awk_getline(rstream *rsm, var *v)
2218 {
2219 	char *b;
2220 	regmatch_t pmatch[1];
2221 	int size, a, p, pp = 0;
2222 	int fd, so, eo, r, rp;
2223 	char c, *m, *s;
2224 
2225 	debug_printf_eval("entered %s()\n", __func__);
2226 
2227 	/* we're using our own buffer since we need access to accumulating
2228 	 * characters
2229 	 */
2230 	fd = fileno(rsm->F);
2231 	m = rsm->buffer;
2232 	a = rsm->adv;
2233 	p = rsm->pos;
2234 	size = rsm->size;
2235 	c = (char) rsplitter.n.info;
2236 	rp = 0;
2237 
2238 	if (!m)
2239 		m = qrealloc(m, 256, &size);
2240 
2241 	do {
2242 		b = m + a;
2243 		so = eo = p;
2244 		r = 1;
2245 		if (p > 0) {
2246 			if (rsplitter.n.info == TI_REGEXP) {
2247 				if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
2248 							b, 1, pmatch, 0) == 0) {
2249 					so = pmatch[0].rm_so;
2250 					eo = pmatch[0].rm_eo;
2251 					if (b[eo] != '\0')
2252 						break;
2253 				}
2254 			} else if (c != '\0') {
2255 				s = strchr(b+pp, c);
2256 				if (!s)
2257 					s = memchr(b+pp, '\0', p - pp);
2258 				if (s) {
2259 					so = eo = s-b;
2260 					eo++;
2261 					break;
2262 				}
2263 			} else {
2264 				while (b[rp] == '\n')
2265 					rp++;
2266 				s = strstr(b+rp, "\n\n");
2267 				if (s) {
2268 					so = eo = s-b;
2269 					while (b[eo] == '\n')
2270 						eo++;
2271 					if (b[eo] != '\0')
2272 						break;
2273 				}
2274 			}
2275 		}
2276 
2277 		if (a > 0) {
2278 			memmove(m, m+a, p+1);
2279 			b = m;
2280 			a = 0;
2281 		}
2282 
2283 		m = qrealloc(m, a+p+128, &size);
2284 		b = m + a;
2285 		pp = p;
2286 		p += safe_read(fd, b+p, size-p-1);
2287 		if (p < pp) {
2288 			p = 0;
2289 			r = 0;
2290 			setvar_i(intvar[ERRNO], errno);
2291 		}
2292 		b[p] = '\0';
2293 
2294 	} while (p > pp);
2295 
2296 	if (p == 0) {
2297 		r--;
2298 	} else {
2299 		c = b[so]; b[so] = '\0';
2300 		setvar_s(v, b+rp);
2301 		v->type |= VF_USER;
2302 		b[so] = c;
2303 		c = b[eo]; b[eo] = '\0';
2304 		setvar_s(intvar[RT], b+so);
2305 		b[eo] = c;
2306 	}
2307 
2308 	rsm->buffer = m;
2309 	rsm->adv = a + eo;
2310 	rsm->pos = p - eo;
2311 	rsm->size = size;
2312 
2313 	debug_printf_eval("returning from %s(): %d\n", __func__, r);
2314 
2315 	return r;
2316 }
2317 
2318 /* formatted output into an allocated buffer, return ptr to buffer */
2319 #if !ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2320 # define awk_printf(a, b) awk_printf(a)
2321 #endif
awk_printf(node * n,size_t * len)2322 static char *awk_printf(node *n, size_t *len)
2323 {
2324 	char *b;
2325 	char *fmt, *f;
2326 	size_t i;
2327 
2328 	//tmpvar = nvalloc(1);
2329 #define TMPVAR (&G.awk_printf__tmpvar)
2330 	// We use a single "static" tmpvar (instead of on-stack or malloced one)
2331 	// to decrease memory consumption in deeply-recursive awk programs.
2332 	// The rule to work safely is to never call evaluate() while our static
2333 	// TMPVAR's value is still needed.
2334 	fmt = f = xstrdup(getvar_s(evaluate(nextarg(&n), TMPVAR)));
2335 	// ^^^^^^^^^ here we immediately strdup() the value, so the later call
2336 	// to evaluate() potentially recursing into another awk_printf() can't
2337 	// mangle the value.
2338 
2339 	b = NULL;
2340 	i = 0;
2341 	while (1) { /* "print one format spec" loop */
2342 		char *s;
2343 		char c;
2344 		char sv;
2345 		var *arg;
2346 		size_t slen;
2347 
2348 		/* Find end of the next format spec, or end of line */
2349 		s = f;
2350 		while (1) {
2351 			c = *f;
2352 			if (!c) /* no percent chars found at all */
2353 				goto nul;
2354 			f++;
2355 			if (c == '%')
2356 				break;
2357 		}
2358 		/* we are past % in "....%..." */
2359 		c = *f;
2360 		if (!c) /* "....%" */
2361 			goto nul;
2362 		if (c == '%') { /* "....%%...." */
2363 			slen = f - s;
2364 			s = xstrndup(s, slen);
2365 			f++;
2366 			goto append; /* print "....%" part verbatim */
2367 		}
2368 		while (1) {
2369 			if (isalpha(c))
2370 				break;
2371 			if (c == '*')
2372 				syntax_error("%*x formats are not supported");
2373 			c = *++f;
2374 			if (!c) { /* "....%...." and no letter found after % */
2375 				/* Example: awk 'BEGIN { printf "^^^%^^^\n"; }' */
2376  nul:
2377 				slen = f - s;
2378 				goto tail; /* print remaining string, exit loop */
2379 			}
2380 		}
2381 		/* we are at A in "....%...A..." */
2382 
2383 		arg = evaluate(nextarg(&n), TMPVAR);
2384 
2385 		/* Result can be arbitrarily long. Example:
2386 		 *  printf "%99999s", "BOOM"
2387 		 */
2388 		sv = *++f;
2389 		*f = '\0';
2390 		if (c == 'c') {
2391 			char cc = is_numeric(arg) ? getvar_i(arg) : *getvar_s(arg);
2392 			char *r = xasprintf(s, cc ? cc : '^' /* else strlen will be wrong */);
2393 			slen = strlen(r);
2394 			if (cc == '\0') /* if cc is NUL, re-format the string with it */
2395 				sprintf(r, s, cc);
2396 			s = r;
2397 		} else {
2398 			if (c == 's') {
2399 				s = xasprintf(s, getvar_s(arg));
2400 			} else {
2401 				double d = getvar_i(arg);
2402 				if (strchr("diouxX", c)) {
2403 //TODO: make it wider here (%x -> %llx etc)?
2404 					s = xasprintf(s, (int)d);
2405 				} else if (strchr("eEfFgGaA", c)) {
2406 					s = xasprintf(s, d);
2407 				} else {
2408 //TODO: GNU Awk 5.0.1: printf "%W" prints "%W", does not error out
2409 					syntax_error(EMSG_INV_FMT);
2410 				}
2411 			}
2412 			slen = strlen(s);
2413 		}
2414 		*f = sv;
2415  append:
2416 		if (i == 0) {
2417 			b = s;
2418 			i = slen;
2419 			continue;
2420 		}
2421  tail:
2422 		b = xrealloc(b, i + slen + 1);
2423 		strcpy(b + i, s);
2424 		i += slen;
2425 		if (!c) /* s is NOT allocated and this is the last part of string? */
2426 			break;
2427 		free(s);
2428 	}
2429 
2430 	free(fmt);
2431 	//nvfree(tmpvar, 1);
2432 #undef TMPVAR
2433 
2434 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
2435 	if (len)
2436 		*len = i;
2437 #endif
2438 	return b;
2439 }
2440 
2441 /* Common substitution routine.
2442  * Replace (nm)'th substring of (src) that matches (rn) with (repl),
2443  * store result into (dest), return number of substitutions.
2444  * If nm = 0, replace all matches.
2445  * If src or dst is NULL, use $0.
2446  * If subexp != 0, enable subexpression matching (\1-\9).
2447  */
awk_sub(node * rn,const char * repl,int nm,var * src,var * dest,int subexp)2448 static int awk_sub(node *rn, const char *repl, int nm, var *src, var *dest, int subexp)
2449 {
2450 	char *resbuf;
2451 	const char *sp;
2452 	int match_no, residx, replen, resbufsize;
2453 	int regexec_flags;
2454 	regmatch_t pmatch[10];
2455 	regex_t sreg, *regex;
2456 
2457 	resbuf = NULL;
2458 	residx = 0;
2459 	match_no = 0;
2460 	regexec_flags = 0;
2461 	regex = as_regex(rn, &sreg);
2462 	sp = getvar_s(src ? src : intvar[F0]);
2463 	replen = strlen(repl);
2464 	while (regexec(regex, sp, 10, pmatch, regexec_flags) == 0) {
2465 		int so = pmatch[0].rm_so;
2466 		int eo = pmatch[0].rm_eo;
2467 
2468 		//bb_error_msg("match %u: [%u,%u] '%s'%p", match_no+1, so, eo, sp,sp);
2469 		resbuf = qrealloc(resbuf, residx + eo + replen, &resbufsize);
2470 		memcpy(resbuf + residx, sp, eo);
2471 		residx += eo;
2472 		if (++match_no >= nm) {
2473 			const char *s;
2474 			int nbs;
2475 
2476 			/* replace */
2477 			residx -= (eo - so);
2478 			nbs = 0;
2479 			for (s = repl; *s; s++) {
2480 				char c = resbuf[residx++] = *s;
2481 				if (c == '\\') {
2482 					nbs++;
2483 					continue;
2484 				}
2485 				if (c == '&' || (subexp && c >= '0' && c <= '9')) {
2486 					int j;
2487 					residx -= ((nbs + 3) >> 1);
2488 					j = 0;
2489 					if (c != '&') {
2490 						j = c - '0';
2491 						nbs++;
2492 					}
2493 					if (nbs % 2) {
2494 						resbuf[residx++] = c;
2495 					} else {
2496 						int n = pmatch[j].rm_eo - pmatch[j].rm_so;
2497 						resbuf = qrealloc(resbuf, residx + replen + n, &resbufsize);
2498 						memcpy(resbuf + residx, sp + pmatch[j].rm_so, n);
2499 						residx += n;
2500 					}
2501 				}
2502 				nbs = 0;
2503 			}
2504 		}
2505 
2506 		regexec_flags = REG_NOTBOL;
2507 		sp += eo;
2508 		if (match_no == nm)
2509 			break;
2510 		if (eo == so) {
2511 			/* Empty match (e.g. "b*" will match anywhere).
2512 			 * Advance by one char. */
2513 //BUG (bug 1333):
2514 //gsub(/\<b*/,"") on "abc" will reach this point, advance to "bc"
2515 //... and will erroneously match "b" even though it is NOT at the word start.
2516 //we need REG_NOTBOW but it does not exist...
2517 //TODO: if EXTRA_COMPAT=y, use GNU matching and re_search,
2518 //it should be able to do it correctly.
2519 			/* Subtle: this is safe only because
2520 			 * qrealloc allocated at least one extra byte */
2521 			resbuf[residx] = *sp;
2522 			if (*sp == '\0')
2523 				goto ret;
2524 			sp++;
2525 			residx++;
2526 		}
2527 	}
2528 
2529 	resbuf = qrealloc(resbuf, residx + strlen(sp), &resbufsize);
2530 	strcpy(resbuf + residx, sp);
2531  ret:
2532 	//bb_error_msg("end sp:'%s'%p", sp,sp);
2533 	setvar_p(dest ? dest : intvar[F0], resbuf);
2534 	if (regex == &sreg)
2535 		regfree(regex);
2536 	return match_no;
2537 }
2538 
do_mktime(const char * ds)2539 static NOINLINE int do_mktime(const char *ds)
2540 {
2541 	struct tm then;
2542 	int count;
2543 
2544 	/*memset(&then, 0, sizeof(then)); - not needed */
2545 	then.tm_isdst = -1; /* default is unknown */
2546 
2547 	/* manpage of mktime says these fields are ints,
2548 	 * so we can sscanf stuff directly into them */
2549 	count = sscanf(ds, "%u %u %u %u %u %u %d",
2550 		&then.tm_year, &then.tm_mon, &then.tm_mday,
2551 		&then.tm_hour, &then.tm_min, &then.tm_sec,
2552 		&then.tm_isdst);
2553 
2554 	if (count < 6
2555 	 || (unsigned)then.tm_mon < 1
2556 	 || (unsigned)then.tm_year < 1900
2557 	) {
2558 		return -1;
2559 	}
2560 
2561 	then.tm_mon -= 1;
2562 	then.tm_year -= 1900;
2563 
2564 	return mktime(&then);
2565 }
2566 
2567 /* Reduce stack usage in exec_builtin() by keeping match() code separate */
do_match(node * an1,const char * as0)2568 static NOINLINE var *do_match(node *an1, const char *as0)
2569 {
2570 	regmatch_t pmatch[1];
2571 	regex_t sreg, *re;
2572 	int n, start, len;
2573 
2574 	re = as_regex(an1, &sreg);
2575 	n = regexec(re, as0, 1, pmatch, 0);
2576 	if (re == &sreg)
2577 		regfree(re);
2578 	start = 0;
2579 	len = -1;
2580 	if (n == 0) {
2581 		start = pmatch[0].rm_so + 1;
2582 		len = pmatch[0].rm_eo - pmatch[0].rm_so;
2583 	}
2584 	setvar_i(newvar("RLENGTH"), len);
2585 	return setvar_i(newvar("RSTART"), start);
2586 }
2587 
2588 /* Reduce stack usage in evaluate() by keeping builtins' code separate */
exec_builtin(node * op,var * res)2589 static NOINLINE var *exec_builtin(node *op, var *res)
2590 {
2591 #define tspl (G.exec_builtin__tspl)
2592 
2593 	var *tmpvars;
2594 	node *an[4];
2595 	var *av[4];
2596 	const char *as[4];
2597 	node *spl;
2598 	uint32_t isr, info;
2599 	int nargs;
2600 	time_t tt;
2601 	int i, l, ll, n;
2602 
2603 	tmpvars = nvalloc(4);
2604 #define TMPVAR0 (tmpvars)
2605 #define TMPVAR1 (tmpvars + 1)
2606 #define TMPVAR2 (tmpvars + 2)
2607 #define TMPVAR3 (tmpvars + 3)
2608 #define TMPVAR(i) (tmpvars + (i))
2609 	isr = info = op->info;
2610 	op = op->l.n;
2611 
2612 	av[2] = av[3] = NULL;
2613 	for (i = 0; i < 4 && op; i++) {
2614 		an[i] = nextarg(&op);
2615 		if (isr & 0x09000000) {
2616 			av[i] = evaluate(an[i], TMPVAR(i));
2617 			if (isr & 0x08000000)
2618 				as[i] = getvar_s(av[i]);
2619 		}
2620 		isr >>= 1;
2621 	}
2622 
2623 	nargs = i;
2624 	if ((uint32_t)nargs < (info >> 30))
2625 		syntax_error(EMSG_TOO_FEW_ARGS);
2626 
2627 	info &= OPNMASK;
2628 	switch (info) {
2629 
2630 	case B_a2:
2631 		if (ENABLE_FEATURE_AWK_LIBM)
2632 			setvar_i(res, atan2(getvar_i(av[0]), getvar_i(av[1])));
2633 		else
2634 			syntax_error(EMSG_NO_MATH);
2635 		break;
2636 
2637 	case B_sp: {
2638 		char *s, *s1;
2639 
2640 		if (nargs > 2) {
2641 			spl = (an[2]->info == TI_REGEXP) ? an[2]
2642 				: mk_splitter(getvar_s(evaluate(an[2], TMPVAR2)), &tspl);
2643 		} else {
2644 			spl = &fsplitter.n;
2645 		}
2646 
2647 		n = awk_split(as[0], spl, &s);
2648 		s1 = s;
2649 		clear_array(iamarray(av[1]));
2650 		for (i = 1; i <= n; i++)
2651 			setari_u(av[1], i, nextword(&s));
2652 		free(s1);
2653 		setvar_i(res, n);
2654 		break;
2655 	}
2656 
2657 	case B_ss: {
2658 		char *s;
2659 
2660 		l = strlen(as[0]);
2661 		i = getvar_i(av[1]) - 1;
2662 		if (i > l)
2663 			i = l;
2664 		if (i < 0)
2665 			i = 0;
2666 		n = (nargs > 2) ? getvar_i(av[2]) : l-i;
2667 		if (n < 0)
2668 			n = 0;
2669 		s = xstrndup(as[0]+i, n);
2670 		setvar_p(res, s);
2671 		break;
2672 	}
2673 
2674 	/* Bitwise ops must assume that operands are unsigned. GNU Awk 3.1.5:
2675 	 * awk '{ print or(-1,1) }' gives "4.29497e+09", not "-2.xxxe+09" */
2676 	case B_an:
2677 		setvar_i(res, getvar_i_int(av[0]) & getvar_i_int(av[1]));
2678 		break;
2679 
2680 	case B_co:
2681 		setvar_i(res, ~getvar_i_int(av[0]));
2682 		break;
2683 
2684 	case B_ls:
2685 		setvar_i(res, getvar_i_int(av[0]) << getvar_i_int(av[1]));
2686 		break;
2687 
2688 	case B_or:
2689 		setvar_i(res, getvar_i_int(av[0]) | getvar_i_int(av[1]));
2690 		break;
2691 
2692 	case B_rs:
2693 		setvar_i(res, getvar_i_int(av[0]) >> getvar_i_int(av[1]));
2694 		break;
2695 
2696 	case B_xo:
2697 		setvar_i(res, getvar_i_int(av[0]) ^ getvar_i_int(av[1]));
2698 		break;
2699 
2700 	case B_lo:
2701 	case B_up: {
2702 		char *s, *s1;
2703 		s1 = s = xstrdup(as[0]);
2704 		while (*s1) {
2705 			//*s1 = (info == B_up) ? toupper(*s1) : tolower(*s1);
2706 			if ((unsigned char)((*s1 | 0x20) - 'a') <= ('z' - 'a'))
2707 				*s1 = (info == B_up) ? (*s1 & 0xdf) : (*s1 | 0x20);
2708 			s1++;
2709 		}
2710 		setvar_p(res, s);
2711 		break;
2712 	}
2713 
2714 	case B_ix:
2715 		n = 0;
2716 		ll = strlen(as[1]);
2717 		l = strlen(as[0]) - ll;
2718 		if (ll > 0 && l >= 0) {
2719 			if (!icase) {
2720 				char *s = strstr(as[0], as[1]);
2721 				if (s)
2722 					n = (s - as[0]) + 1;
2723 			} else {
2724 				/* this piece of code is terribly slow and
2725 				 * really should be rewritten
2726 				 */
2727 				for (i = 0; i <= l; i++) {
2728 					if (strncasecmp(as[0]+i, as[1], ll) == 0) {
2729 						n = i+1;
2730 						break;
2731 					}
2732 				}
2733 			}
2734 		}
2735 		setvar_i(res, n);
2736 		break;
2737 
2738 	case B_ti:
2739 		if (nargs > 1)
2740 			tt = getvar_i(av[1]);
2741 		else
2742 			time(&tt);
2743 		//s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
2744 		i = strftime(g_buf, MAXVARFMT,
2745 			((nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y"),
2746 			localtime(&tt));
2747 		g_buf[i] = '\0';
2748 		setvar_s(res, g_buf);
2749 		break;
2750 
2751 	case B_mt:
2752 		setvar_i(res, do_mktime(as[0]));
2753 		break;
2754 
2755 	case B_ma:
2756 		res = do_match(an[1], as[0]);
2757 		break;
2758 
2759 	case B_ge:
2760 		awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2761 		break;
2762 
2763 	case B_gs:
2764 		setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2765 		break;
2766 
2767 	case B_su:
2768 		setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2769 		break;
2770 	}
2771 
2772 	nvfree(tmpvars, 4);
2773 #undef TMPVAR0
2774 #undef TMPVAR1
2775 #undef TMPVAR2
2776 #undef TMPVAR3
2777 #undef TMPVAR
2778 
2779 	return res;
2780 #undef tspl
2781 }
2782 
2783 /* if expr looks like "var=value", perform assignment and return 1,
2784  * otherwise return 0 */
is_assignment(const char * expr)2785 static int is_assignment(const char *expr)
2786 {
2787 	char *exprc, *val;
2788 
2789 	val = (char*)endofname(expr);
2790 	if (val == (char*)expr || *val != '=') {
2791 		return FALSE;
2792 	}
2793 
2794 	exprc = xstrdup(expr);
2795 	val = exprc + (val - expr);
2796 	*val++ = '\0';
2797 
2798 	unescape_string_in_place(val);
2799 	setvar_u(newvar(exprc), val);
2800 	free(exprc);
2801 	return TRUE;
2802 }
2803 
2804 /* switch to next input file */
next_input_file(void)2805 static rstream *next_input_file(void)
2806 {
2807 #define rsm          (G.next_input_file__rsm)
2808 #define files_happen (G.next_input_file__files_happen)
2809 
2810 	const char *fname, *ind;
2811 
2812 	if (rsm.F)
2813 		fclose(rsm.F);
2814 	rsm.F = NULL;
2815 	rsm.pos = rsm.adv = 0;
2816 
2817 	for (;;) {
2818 		if (getvar_i(intvar[ARGIND])+1 >= getvar_i(intvar[ARGC])) {
2819 			if (files_happen)
2820 				return NULL;
2821 			fname = "-";
2822 			rsm.F = stdin;
2823 			break;
2824 		}
2825 		ind = getvar_s(incvar(intvar[ARGIND]));
2826 		fname = getvar_s(findvar(iamarray(intvar[ARGV]), ind));
2827 		if (fname && *fname && !is_assignment(fname)) {
2828 			rsm.F = xfopen_stdin(fname);
2829 			break;
2830 		}
2831 	}
2832 
2833 	files_happen = TRUE;
2834 	setvar_s(intvar[FILENAME], fname);
2835 	return &rsm;
2836 #undef rsm
2837 #undef files_happen
2838 }
2839 
2840 /*
2841  * Evaluate node - the heart of the program. Supplied with subtree
2842  * and "res" variable to assign the result to if we evaluate an expression.
2843  * If node refers to e.g. a variable or a field, no assignment happens.
2844  * Return ptr to the result (which may or may not be the "res" variable!)
2845  */
2846 #define XC(n) ((n) >> 8)
2847 
evaluate(node * op,var * res)2848 static var *evaluate(node *op, var *res)
2849 {
2850 /* This procedure is recursive so we should count every byte */
2851 #define fnargs (G.evaluate__fnargs)
2852 /* seed is initialized to 1 */
2853 #define seed   (G.evaluate__seed)
2854 #define sreg   (G.evaluate__sreg)
2855 
2856 	var *tmpvars;
2857 
2858 	if (!op)
2859 		return setvar_s(res, NULL);
2860 
2861 	debug_printf_eval("entered %s()\n", __func__);
2862 
2863 	tmpvars = nvalloc(2);
2864 #define TMPVAR0 (tmpvars)
2865 #define TMPVAR1 (tmpvars + 1)
2866 
2867 	while (op) {
2868 		struct {
2869 			var *v;
2870 			const char *s;
2871 		} L = L; /* for compiler */
2872 		struct {
2873 			var *v;
2874 			const char *s;
2875 		} R = R;
2876 		double L_d = L_d;
2877 		uint32_t opinfo;
2878 		int opn;
2879 		node *op1;
2880 
2881 		opinfo = op->info;
2882 		opn = (opinfo & OPNMASK);
2883 		g_lineno = op->lineno;
2884 		op1 = op->l.n;
2885 		debug_printf_eval("opinfo:%08x opn:%08x\n", opinfo, opn);
2886 
2887 		/* execute inevitable things */
2888 		if (opinfo & OF_RES1) {
2889 			if ((opinfo & OF_REQUIRED) && !op1)
2890 				syntax_error(EMSG_TOO_FEW_ARGS);
2891 			L.v = evaluate(op1, TMPVAR0);
2892 			if (opinfo & OF_STR1) {
2893 				L.s = getvar_s(L.v);
2894 				debug_printf_eval("L.s:'%s'\n", L.s);
2895 			}
2896 			if (opinfo & OF_NUM1) {
2897 				L_d = getvar_i(L.v);
2898 				debug_printf_eval("L_d:%f\n", L_d);
2899 			}
2900 		}
2901 		/* NB: Must get string/numeric values of L (done above)
2902 		 * _before_ evaluate()'ing R.v: if both L and R are $NNNs,
2903 		 * and right one is large, then L.v points to Fields[NNN1],
2904 		 * second evaluate() reallocates and moves (!) Fields[],
2905 		 * R.v points to Fields[NNN2] but L.v now points to freed mem!
2906 		 * (Seen trying to evaluate "$444 $44444")
2907 		 */
2908 		if (opinfo & OF_RES2) {
2909 			R.v = evaluate(op->r.n, TMPVAR1);
2910 			//TODO: L.v may be invalid now, set L.v to NULL to catch bugs?
2911 			//L.v = NULL;
2912 			if (opinfo & OF_STR2) {
2913 				R.s = getvar_s(R.v);
2914 				debug_printf_eval("R.s:'%s'\n", R.s);
2915 			}
2916 		}
2917 
2918 		debug_printf_eval("switch(0x%x)\n", XC(opinfo & OPCLSMASK));
2919 		switch (XC(opinfo & OPCLSMASK)) {
2920 
2921 		/* -- iterative node type -- */
2922 
2923 		/* test pattern */
2924 		case XC( OC_TEST ):
2925 			debug_printf_eval("TEST\n");
2926 			if (op1->info == TI_COMMA) {
2927 				/* it's range pattern */
2928 				if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2929 					op->info |= OF_CHECKED;
2930 					if (ptest(op1->r.n))
2931 						op->info &= ~OF_CHECKED;
2932 					op = op->a.n;
2933 				} else {
2934 					op = op->r.n;
2935 				}
2936 			} else {
2937 				op = ptest(op1) ? op->a.n : op->r.n;
2938 			}
2939 			break;
2940 
2941 		/* just evaluate an expression, also used as unconditional jump */
2942 		case XC( OC_EXEC ):
2943 			debug_printf_eval("EXEC\n");
2944 			break;
2945 
2946 		/* branch, used in if-else and various loops */
2947 		case XC( OC_BR ):
2948 			debug_printf_eval("BR\n");
2949 			op = istrue(L.v) ? op->a.n : op->r.n;
2950 			break;
2951 
2952 		/* initialize for-in loop */
2953 		case XC( OC_WALKINIT ):
2954 			debug_printf_eval("WALKINIT\n");
2955 			hashwalk_init(L.v, iamarray(R.v));
2956 			break;
2957 
2958 		/* get next array item */
2959 		case XC( OC_WALKNEXT ):
2960 			debug_printf_eval("WALKNEXT\n");
2961 			op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2962 			break;
2963 
2964 		case XC( OC_PRINT ):
2965 			debug_printf_eval("PRINT /\n");
2966 		case XC( OC_PRINTF ):
2967 			debug_printf_eval("PRINTF\n");
2968 		{
2969 			FILE *F = stdout;
2970 
2971 			if (op->r.n) {
2972 				rstream *rsm = newfile(R.s);
2973 				if (!rsm->F) {
2974 					if (opn == '|') {
2975 						rsm->F = popen(R.s, "w");
2976 						if (rsm->F == NULL)
2977 							bb_simple_perror_msg_and_die("popen");
2978 						rsm->is_pipe = 1;
2979 					} else {
2980 						rsm->F = xfopen(R.s, opn=='w' ? "w" : "a");
2981 					}
2982 				}
2983 				F = rsm->F;
2984 			}
2985 
2986 			/* Can't just check 'opinfo == OC_PRINT' here, parser ORs
2987 			 * additional bits to opinfos of print/printf with redirects
2988 			 */
2989 			if ((opinfo & OPCLSMASK) == OC_PRINT) {
2990 				if (!op1) {
2991 					fputs(getvar_s(intvar[F0]), F);
2992 				} else {
2993 					for (;;) {
2994 						var *v = evaluate(nextarg(&op1), TMPVAR0);
2995 						if (v->type & VF_NUMBER) {
2996 							fmt_num(getvar_s(intvar[OFMT]),
2997 									getvar_i(v));
2998 							fputs(g_buf, F);
2999 						} else {
3000 							fputs(getvar_s(v), F);
3001 						}
3002 						if (!op1)
3003 							break;
3004 						fputs(getvar_s(intvar[OFS]), F);
3005 					}
3006 				}
3007 				fputs(getvar_s(intvar[ORS]), F);
3008 			} else {	/* PRINTF */
3009 				IF_FEATURE_AWK_GNU_EXTENSIONS(size_t len;)
3010 				char *s = awk_printf(op1, &len);
3011 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3012 				fwrite(s, len, 1, F);
3013 #else
3014 				fputs(s, F);
3015 #endif
3016 				free(s);
3017 			}
3018 			fflush(F);
3019 			break;
3020 		}
3021 
3022 		case XC( OC_DELETE ):
3023 			debug_printf_eval("DELETE\n");
3024 		{
3025 			/* "delete" is special:
3026 			 * "delete array[var--]" must evaluate index expr only once.
3027 			 */
3028 			uint32_t info = op1->info & OPCLSMASK;
3029 			var *v;
3030 
3031 			if (info == OC_VAR) {
3032 				v = op1->l.v;
3033 			} else if (info == OC_FNARG) {
3034 				v = &fnargs[op1->l.aidx];
3035 			} else {
3036 				syntax_error(EMSG_NOT_ARRAY);
3037 			}
3038 			if (op1->r.n) { /* array ref? */
3039 				const char *s;
3040 				s = getvar_s(evaluate(op1->r.n, TMPVAR0));
3041 				hash_remove(iamarray(v), s);
3042 			} else {
3043 				clear_array(iamarray(v));
3044 			}
3045 			break;
3046 		}
3047 
3048 		case XC( OC_NEWSOURCE ):
3049 			debug_printf_eval("NEWSOURCE\n");
3050 			g_progname = op->l.new_progname;
3051 			break;
3052 
3053 		case XC( OC_RETURN ):
3054 			debug_printf_eval("RETURN\n");
3055 			copyvar(res, L.v);
3056 			break;
3057 
3058 		case XC( OC_NEXTFILE ):
3059 			debug_printf_eval("NEXTFILE\n");
3060 			nextfile = TRUE;
3061 		case XC( OC_NEXT ):
3062 			debug_printf_eval("NEXT\n");
3063 			nextrec = TRUE;
3064 		case XC( OC_DONE ):
3065 			debug_printf_eval("DONE\n");
3066 			clrvar(res);
3067 			break;
3068 
3069 		case XC( OC_EXIT ):
3070 			debug_printf_eval("EXIT\n");
3071 			if (op1)
3072 				G.exitcode = (int)L_d;
3073 			awk_exit();
3074 
3075 		/* -- recursive node type -- */
3076 
3077 		case XC( OC_VAR ):
3078 			debug_printf_eval("VAR\n");
3079 			L.v = op->l.v;
3080 			if (L.v == intvar[NF])
3081 				split_f0();
3082 			goto v_cont;
3083 
3084 		case XC( OC_FNARG ):
3085 			debug_printf_eval("FNARG[%d]\n", op->l.aidx);
3086 			L.v = &fnargs[op->l.aidx];
3087  v_cont:
3088 			res = op->r.n ? findvar(iamarray(L.v), R.s) : L.v;
3089 			break;
3090 
3091 		case XC( OC_IN ):
3092 			debug_printf_eval("IN\n");
3093 			setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
3094 			break;
3095 
3096 		case XC( OC_REGEXP ):
3097 			debug_printf_eval("REGEXP\n");
3098 			op1 = op;
3099 			L.s = getvar_s(intvar[F0]);
3100 			goto re_cont;
3101 
3102 		case XC( OC_MATCH ):
3103 			debug_printf_eval("MATCH\n");
3104 			op1 = op->r.n;
3105  re_cont:
3106 			{
3107 				regex_t *re = as_regex(op1, &sreg);
3108 				int i = regexec(re, L.s, 0, NULL, 0);
3109 				if (re == &sreg)
3110 					regfree(re);
3111 				setvar_i(res, (i == 0) ^ (opn == '!'));
3112 			}
3113 			break;
3114 
3115 		case XC( OC_MOVE ):
3116 			debug_printf_eval("MOVE\n");
3117 			/* if source is a temporary string, jusk relink it to dest */
3118 			if (R.v == TMPVAR1
3119 			 && !(R.v->type & VF_NUMBER)
3120 				/* Why check !NUMBER? if R.v is a number but has cached R.v->string,
3121 				 * L.v ends up a string, which is wrong */
3122 			 /*&& R.v->string - always not NULL (right?) */
3123 			) {
3124 				res = setvar_p(L.v, R.v->string); /* avoids strdup */
3125 				R.v->string = NULL;
3126 			} else {
3127 				res = copyvar(L.v, R.v);
3128 			}
3129 			break;
3130 
3131 		case XC( OC_TERNARY ):
3132 			debug_printf_eval("TERNARY\n");
3133 			if (op->r.n->info != TI_COLON)
3134 				syntax_error(EMSG_POSSIBLE_ERROR);
3135 			res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
3136 			break;
3137 
3138 		case XC( OC_FUNC ): {
3139 			var *argvars, *sv_fnargs;
3140 			const char *sv_progname;
3141 			int nargs, i;
3142 
3143 			debug_printf_eval("FUNC\n");
3144 
3145 			if (!op->r.f->defined)
3146 				syntax_error(EMSG_UNDEF_FUNC);
3147 
3148 			/* The body might be empty, still has to eval the args */
3149 			nargs = op->r.f->nargs;
3150 			argvars = nvalloc(nargs);
3151 			i = 0;
3152 			while (op1) {
3153 				var *arg = evaluate(nextarg(&op1), TMPVAR0);
3154 				if (i == nargs) {
3155 					/* call with more arguments than function takes.
3156 					 * (gawk warns: "warning: function 'f' called with more arguments than declared").
3157 					 * They are still evaluated, but discarded: */
3158 					clrvar(arg);
3159 					continue;
3160 				}
3161 				copyvar(&argvars[i], arg);
3162 				argvars[i].type |= VF_CHILD;
3163 				argvars[i].x.parent = arg;
3164 				i++;
3165 			}
3166 
3167 			sv_fnargs = fnargs;
3168 			sv_progname = g_progname;
3169 
3170 			fnargs = argvars;
3171 			res = evaluate(op->r.f->body.first, res);
3172 			nvfree(argvars, nargs);
3173 
3174 			g_progname = sv_progname;
3175 			fnargs = sv_fnargs;
3176 
3177 			break;
3178 		}
3179 
3180 		case XC( OC_GETLINE ):
3181 			debug_printf_eval("GETLINE /\n");
3182 		case XC( OC_PGETLINE ):
3183 			debug_printf_eval("PGETLINE\n");
3184 		{
3185 			rstream *rsm;
3186 			int i;
3187 
3188 			if (op1) {
3189 				rsm = newfile(L.s);
3190 				if (!rsm->F) {
3191 					/* NB: can't use "opinfo == TI_PGETLINE", would break "cmd" | getline */
3192 					if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
3193 						rsm->F = popen(L.s, "r");
3194 						rsm->is_pipe = TRUE;
3195 					} else {
3196 						rsm->F = fopen_for_read(L.s);  /* not xfopen! */
3197 					}
3198 				}
3199 			} else {
3200 				if (!iF)
3201 					iF = next_input_file();
3202 				rsm = iF;
3203 			}
3204 
3205 			if (!rsm || !rsm->F) {
3206 				setvar_i(intvar[ERRNO], errno);
3207 				setvar_i(res, -1);
3208 				break;
3209 			}
3210 
3211 			if (!op->r.n)
3212 				R.v = intvar[F0];
3213 
3214 			i = awk_getline(rsm, R.v);
3215 			if (i > 0 && !op1) {
3216 				incvar(intvar[FNR]);
3217 				incvar(intvar[NR]);
3218 			}
3219 			setvar_i(res, i);
3220 			break;
3221 		}
3222 
3223 		/* simple builtins */
3224 		case XC( OC_FBLTIN ): {
3225 			double R_d = R_d; /* for compiler */
3226 			debug_printf_eval("FBLTIN\n");
3227 
3228 			if (op1 && op1->info == TI_COMMA)
3229 				/* Simple builtins take one arg maximum */
3230 				syntax_error("Too many arguments");
3231 
3232 			switch (opn) {
3233 			case F_in:
3234 				R_d = (long long)L_d;
3235 				break;
3236 
3237 			case F_rn: /*rand*/
3238 				if (op1)
3239 					syntax_error("Too many arguments");
3240 			{
3241 #if RAND_MAX >= 0x7fffffff
3242 				uint32_t u = ((uint32_t)rand() << 16) ^ rand();
3243 				uint64_t v = ((uint64_t)rand() << 32) | u;
3244 				/* the above shift+or is optimized out on 32-bit arches */
3245 # if RAND_MAX > 0x7fffffff
3246 				v &= 0x7fffffffffffffffULL;
3247 # endif
3248 				R_d = (double)v / 0x8000000000000000ULL;
3249 #else
3250 # error Not implemented for this value of RAND_MAX
3251 #endif
3252 				break;
3253 			}
3254 			case F_co:
3255 				if (ENABLE_FEATURE_AWK_LIBM) {
3256 					R_d = cos(L_d);
3257 					break;
3258 				}
3259 
3260 			case F_ex:
3261 				if (ENABLE_FEATURE_AWK_LIBM) {
3262 					R_d = exp(L_d);
3263 					break;
3264 				}
3265 
3266 			case F_lg:
3267 				if (ENABLE_FEATURE_AWK_LIBM) {
3268 					R_d = log(L_d);
3269 					break;
3270 				}
3271 
3272 			case F_si:
3273 				if (ENABLE_FEATURE_AWK_LIBM) {
3274 					R_d = sin(L_d);
3275 					break;
3276 				}
3277 
3278 			case F_sq:
3279 				if (ENABLE_FEATURE_AWK_LIBM) {
3280 					R_d = sqrt(L_d);
3281 					break;
3282 				}
3283 
3284 				syntax_error(EMSG_NO_MATH);
3285 				break;
3286 
3287 			case F_sr:
3288 				R_d = (double)seed;
3289 				seed = op1 ? (unsigned)L_d : (unsigned)time(NULL);
3290 				srand(seed);
3291 				break;
3292 
3293 			case F_ti: /*systime*/
3294 				if (op1)
3295 					syntax_error("Too many arguments");
3296 				R_d = time(NULL);
3297 				break;
3298 
3299 			case F_le:
3300 				debug_printf_eval("length: L.s:'%s'\n", L.s);
3301 				if (!op1) {
3302 					L.s = getvar_s(intvar[F0]);
3303 					debug_printf_eval("length: L.s='%s'\n", L.s);
3304 				}
3305 				else if (L.v->type & VF_ARRAY) {
3306 					R_d = L.v->x.array->nel;
3307 					debug_printf_eval("length: array_len:%d\n", L.v->x.array->nel);
3308 					break;
3309 				}
3310 				R_d = strlen(L.s);
3311 				break;
3312 
3313 			case F_sy:
3314 				fflush_all();
3315 				R_d = (ENABLE_FEATURE_ALLOW_EXEC && L.s && *L.s)
3316 						? (system(L.s) >> 8) : 0;
3317 				break;
3318 
3319 			case F_ff:
3320 				if (!op1) {
3321 					fflush(stdout);
3322 				} else if (L.s && *L.s) {
3323 					rstream *rsm = newfile(L.s);
3324 					fflush(rsm->F);
3325 				} else {
3326 					fflush_all();
3327 				}
3328 				break;
3329 
3330 			case F_cl: {
3331 				rstream *rsm;
3332 				int err = 0;
3333 				rsm = (rstream *)hash_search(fdhash, L.s);
3334 				debug_printf_eval("OC_FBLTIN close: op1:%p s:'%s' rsm:%p\n", op1, L.s, rsm);
3335 				if (rsm) {
3336 					debug_printf_eval("OC_FBLTIN F_cl "
3337 						"rsm->is_pipe:%d, ->F:%p\n",
3338 						rsm->is_pipe, rsm->F);
3339 					/* Can be NULL if open failed. Example:
3340 					 * getline line <"doesnt_exist";
3341 					 * close("doesnt_exist"); <--- here rsm->F is NULL
3342 					 */
3343 					if (rsm->F)
3344 						err = rsm->is_pipe ? pclose(rsm->F) : fclose(rsm->F);
3345 //TODO: fix this case:
3346 // $ awk 'BEGIN { print close(""); print ERRNO }'
3347 // -1
3348 // close of redirection that was never opened
3349 // (we print 0, 0)
3350 					free(rsm->buffer);
3351 					hash_remove(fdhash, L.s);
3352 				}
3353 				if (err)
3354 					setvar_i(intvar[ERRNO], errno);
3355 				R_d = (double)err;
3356 				break;
3357 			}
3358 			} /* switch */
3359 			setvar_i(res, R_d);
3360 			break;
3361 		}
3362 
3363 		case XC( OC_BUILTIN ):
3364 			debug_printf_eval("BUILTIN\n");
3365 			res = exec_builtin(op, res);
3366 			break;
3367 
3368 		case XC( OC_SPRINTF ):
3369 			debug_printf_eval("SPRINTF\n");
3370 			setvar_p(res, awk_printf(op1, NULL));
3371 			break;
3372 
3373 		case XC( OC_UNARY ):
3374 			debug_printf_eval("UNARY\n");
3375 		{
3376 			double Ld, R_d;
3377 
3378 			Ld = R_d = getvar_i(R.v);
3379 			switch (opn) {
3380 			case 'P':
3381 				Ld = ++R_d;
3382 				goto r_op_change;
3383 			case 'p':
3384 				R_d++;
3385 				goto r_op_change;
3386 			case 'M':
3387 				Ld = --R_d;
3388 				goto r_op_change;
3389 			case 'm':
3390 				R_d--;
3391  r_op_change:
3392 				setvar_i(R.v, R_d);
3393 				break;
3394 			case '!':
3395 				Ld = !istrue(R.v);
3396 				break;
3397 			case '-':
3398 				Ld = -R_d;
3399 				break;
3400 			}
3401 			setvar_i(res, Ld);
3402 			break;
3403 		}
3404 
3405 		case XC( OC_FIELD ):
3406 			debug_printf_eval("FIELD\n");
3407 		{
3408 			int i = (int)getvar_i(R.v);
3409 			if (i < 0)
3410 				syntax_error(EMSG_NEGATIVE_FIELD);
3411 			if (i == 0) {
3412 				res = intvar[F0];
3413 			} else {
3414 				split_f0();
3415 				if (i > nfields)
3416 					fsrealloc(i);
3417 				res = &Fields[i - 1];
3418 			}
3419 			break;
3420 		}
3421 
3422 		/* concatenation (" ") and index joining (",") */
3423 		case XC( OC_CONCAT ):
3424 			debug_printf_eval("CONCAT /\n");
3425 		case XC( OC_COMMA ): {
3426 			const char *sep = "";
3427 			debug_printf_eval("COMMA\n");
3428 			if (opinfo == TI_COMMA)
3429 				sep = getvar_s(intvar[SUBSEP]);
3430 			setvar_p(res, xasprintf("%s%s%s", L.s, sep, R.s));
3431 			break;
3432 		}
3433 
3434 		case XC( OC_LAND ):
3435 			debug_printf_eval("LAND\n");
3436 			setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
3437 			break;
3438 
3439 		case XC( OC_LOR ):
3440 			debug_printf_eval("LOR\n");
3441 			setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
3442 			break;
3443 
3444 		case XC( OC_BINARY ):
3445 			debug_printf_eval("BINARY /\n");
3446 		case XC( OC_REPLACE ):
3447 			debug_printf_eval("REPLACE\n");
3448 		{
3449 			double R_d = getvar_i(R.v);
3450 			debug_printf_eval("R_d:%f opn:%c\n", R_d, opn);
3451 			switch (opn) {
3452 			case '+':
3453 				L_d += R_d;
3454 				break;
3455 			case '-':
3456 				L_d -= R_d;
3457 				break;
3458 			case '*':
3459 				L_d *= R_d;
3460 				break;
3461 			case '/':
3462 				if (R_d == 0)
3463 					syntax_error(EMSG_DIV_BY_ZERO);
3464 				L_d /= R_d;
3465 				break;
3466 			case '&':
3467 				if (ENABLE_FEATURE_AWK_LIBM)
3468 					L_d = pow(L_d, R_d);
3469 				else
3470 					syntax_error(EMSG_NO_MATH);
3471 				break;
3472 			case '%':
3473 				if (R_d == 0)
3474 					syntax_error(EMSG_DIV_BY_ZERO);
3475 				L_d -= (long long)(L_d / R_d) * R_d;
3476 				break;
3477 			}
3478 			debug_printf_eval("BINARY/REPLACE result:%f\n", L_d);
3479 			res = setvar_i(((opinfo & OPCLSMASK) == OC_BINARY) ? res : L.v, L_d);
3480 			break;
3481 		}
3482 
3483 		case XC( OC_COMPARE ): {
3484 			int i = i; /* for compiler */
3485 			double Ld;
3486 			debug_printf_eval("COMPARE\n");
3487 
3488 			if (is_numeric(L.v) && is_numeric(R.v)) {
3489 				Ld = getvar_i(L.v) - getvar_i(R.v);
3490 			} else {
3491 				const char *l = getvar_s(L.v);
3492 				const char *r = getvar_s(R.v);
3493 				Ld = icase ? strcasecmp(l, r) : strcmp(l, r);
3494 			}
3495 			switch (opn & 0xfe) {
3496 			case 0:
3497 				i = (Ld > 0);
3498 				break;
3499 			case 2:
3500 				i = (Ld >= 0);
3501 				break;
3502 			case 4:
3503 				i = (Ld == 0);
3504 				break;
3505 			}
3506 			setvar_i(res, (i == 0) ^ (opn & 1));
3507 			break;
3508 		}
3509 
3510 		default:
3511 			syntax_error(EMSG_POSSIBLE_ERROR);
3512 		} /* switch */
3513 
3514 		if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
3515 			op = op->a.n;
3516 		if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
3517 			break;
3518 		if (nextrec)
3519 			break;
3520 	} /* while (op) */
3521 
3522 	nvfree(tmpvars, 2);
3523 #undef TMPVAR0
3524 #undef TMPVAR1
3525 
3526 	debug_printf_eval("returning from %s(): %p\n", __func__, res);
3527 	return res;
3528 #undef fnargs
3529 #undef seed
3530 #undef sreg
3531 }
3532 
3533 /* -------- main & co. -------- */
3534 
awk_exit(void)3535 static int awk_exit(void)
3536 {
3537 	unsigned i;
3538 
3539 	if (!exiting) {
3540 		exiting = TRUE;
3541 		nextrec = FALSE;
3542 		evaluate(endseq.first, &G.exit__tmpvar);
3543 	}
3544 
3545 	/* waiting for children */
3546 	for (i = 0; i < fdhash->csize; i++) {
3547 		hash_item *hi;
3548 		hi = fdhash->items[i];
3549 		while (hi) {
3550 			if (hi->data.rs.F && hi->data.rs.is_pipe)
3551 				pclose(hi->data.rs.F);
3552 			hi = hi->next;
3553 		}
3554 	}
3555 
3556 	exit(G.exitcode);
3557 }
3558 
3559 int awk_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
awk_main(int argc UNUSED_PARAM,char ** argv)3560 int awk_main(int argc UNUSED_PARAM, char **argv)
3561 {
3562 	unsigned opt;
3563 	char *opt_F;
3564 	llist_t *list_v = NULL;
3565 	llist_t *list_f = NULL;
3566 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3567 	llist_t *list_e = NULL;
3568 #endif
3569 	int i;
3570 
3571 	INIT_G();
3572 
3573 	/* Undo busybox.c, or else strtod may eat ','! This breaks parsing:
3574 	 * $1,$2 == '$1,' '$2', NOT '$1' ',' '$2' */
3575 	if (ENABLE_LOCALE_SUPPORT)
3576 		setlocale(LC_NUMERIC, "C");
3577 
3578 	/* initialize variables */
3579 	vhash = hash_init();
3580 	{
3581 		char *vnames = (char *)vNames; /* cheat */
3582 		char *vvalues = (char *)vValues;
3583 		for (i = 0; *vnames; i++) {
3584 			var *v;
3585 			intvar[i] = v = newvar(nextword(&vnames));
3586 			if (*vvalues != '\377')
3587 				setvar_s(v, nextword(&vvalues));
3588 			else
3589 				setvar_i(v, 0);
3590 
3591 			if (*vnames == '*') {
3592 				v->type |= VF_SPECIAL;
3593 				vnames++;
3594 			}
3595 		}
3596 	}
3597 
3598 	handle_special(intvar[FS]);
3599 	handle_special(intvar[RS]);
3600 
3601 	/* Huh, people report that sometimes environ is NULL. Oh well. */
3602 	if (environ) {
3603 		char **envp;
3604 		for (envp = environ; *envp; envp++) {
3605 			/* environ is writable, thus we don't strdup it needlessly */
3606 			char *s = *envp;
3607 			char *s1 = strchr(s, '=');
3608 			if (s1) {
3609 				*s1 = '\0';
3610 				/* Both findvar and setvar_u take const char*
3611 				 * as 2nd arg -> environment is not trashed */
3612 				setvar_u(findvar(iamarray(intvar[ENVIRON]), s), s1 + 1);
3613 				*s1 = '=';
3614 			}
3615 		}
3616 	}
3617 	opt = getopt32(argv, OPTSTR_AWK, &opt_F, &list_v, &list_f, IF_FEATURE_AWK_GNU_EXTENSIONS(&list_e,) NULL);
3618 	argv += optind;
3619 	//argc -= optind;
3620 	if (opt & OPT_W)
3621 		bb_simple_error_msg("warning: option -W is ignored");
3622 	if (opt & OPT_F) {
3623 		unescape_string_in_place(opt_F);
3624 		setvar_s(intvar[FS], opt_F);
3625 	}
3626 	while (list_v) {
3627 		if (!is_assignment(llist_pop(&list_v)))
3628 			bb_show_usage();
3629 	}
3630 
3631 	/* Parse all supplied programs */
3632 	fnhash = hash_init();
3633 	ahash = hash_init();
3634 	while (list_f) {
3635 		int fd;
3636 		char *s;
3637 
3638 		g_progname = llist_pop(&list_f);
3639 		fd = xopen_stdin(g_progname);
3640 		s = xmalloc_read(fd, NULL); /* it's NUL-terminated */
3641 		close(fd);
3642 		parse_program(s);
3643 		free(s);
3644 	}
3645 	g_progname = "cmd. line";
3646 #if ENABLE_FEATURE_AWK_GNU_EXTENSIONS
3647 	while (list_e) {
3648 		parse_program(llist_pop(&list_e));
3649 	}
3650 #endif
3651 //FIXME: preserve order of -e and -f
3652 //TODO: implement -i LIBRARY and -E FILE too, they are easy-ish
3653 	if (!(opt & (OPT_f | OPT_e))) {
3654 		if (!*argv)
3655 			bb_show_usage();
3656 		parse_program(*argv++);
3657 	}
3658 	/* Free unused parse structures */
3659 	//hash_free(fnhash); // ~250 bytes when empty, used only for function names
3660 	//^^^^^^^^^^^^^^^^^ does not work, hash_clear() inside SEGVs
3661 	// (IOW: hash_clear() assumes it's a hash of variables. fnhash is not).
3662 	free(fnhash->items);
3663 	free(fnhash);
3664 	fnhash = NULL; // debug
3665 	//hash_free(ahash); // empty after parsing, will reuse as fdhash instead of freeing
3666 
3667 	/* Parsing done, on to executing */
3668 
3669 	/* fill in ARGV array */
3670 	setari_u(intvar[ARGV], 0, "awk");
3671 	i = 0;
3672 	while (*argv)
3673 		setari_u(intvar[ARGV], ++i, *argv++);
3674 	setvar_i(intvar[ARGC], i + 1);
3675 
3676 	//fdhash = ahash; // done via define
3677 	newfile("/dev/stdin")->F = stdin;
3678 	newfile("/dev/stdout")->F = stdout;
3679 	newfile("/dev/stderr")->F = stderr;
3680 
3681 	evaluate(beginseq.first, &G.main__tmpvar);
3682 	if (!mainseq.first && !endseq.first)
3683 		awk_exit();
3684 
3685 	/* input file could already be opened in BEGIN block */
3686 	if (!iF)
3687 		iF = next_input_file();
3688 
3689 	/* passing through input files */
3690 	while (iF) {
3691 		nextfile = FALSE;
3692 		setvar_i(intvar[FNR], 0);
3693 
3694 		while ((i = awk_getline(iF, intvar[F0])) > 0) {
3695 			nextrec = FALSE;
3696 			incvar(intvar[NR]);
3697 			incvar(intvar[FNR]);
3698 			evaluate(mainseq.first, &G.main__tmpvar);
3699 
3700 			if (nextfile)
3701 				break;
3702 		}
3703 
3704 		if (i < 0)
3705 			syntax_error(strerror(errno));
3706 
3707 		iF = next_input_file();
3708 	}
3709 
3710 	awk_exit();
3711 	/*return 0;*/
3712 }
3713