1 /* vi: set sw=4 ts=4: */
2 /*
3  * reformime: parse MIME-encoded message
4  *
5  * Copyright (C) 2008 by Vladimir Dronnikov <dronnikov@gmail.com>
6  *
7  * Licensed under GPLv2, see file LICENSE in this source tree.
8  */
9 //config:config REFORMIME
10 //config:	bool "reformime (7.5 kb)"
11 //config:	default y
12 //config:	help
13 //config:	Parse MIME-formatted messages.
14 //config:
15 //config:config FEATURE_REFORMIME_COMPAT
16 //config:	bool "Accept and ignore options other than -x and -X"
17 //config:	default y
18 //config:	depends on REFORMIME
19 //config:	help
20 //config:	Accept (for compatibility only) and ignore options
21 //config:	other than -x and -X.
22 
23 //applet:IF_REFORMIME(APPLET(reformime, BB_DIR_BIN, BB_SUID_DROP))
24 
25 //kbuild:lib-$(CONFIG_REFORMIME) += reformime.o mail.o
26 
27 #include "libbb.h"
28 #include "mail.h"
29 
30 #if 0
31 # define dbg_error_msg(...) bb_error_msg(__VA_ARGS__)
32 #else
33 # define dbg_error_msg(...) ((void)0)
34 #endif
35 
find_token(const char * const string_array[],const char * key,const char * defvalue)36 static const char *find_token(const char *const string_array[], const char *key, const char *defvalue)
37 {
38 	const char *r = NULL;
39 	int i;
40 	for (i = 0; string_array[i] != NULL; i++) {
41 		if (strcasecmp(string_array[i], key) == 0) {
42 			r = (char *)string_array[i+1];
43 			break;
44 		}
45 	}
46 	return (r) ? r : defvalue;
47 }
48 
xfind_token(const char * const string_array[],const char * key)49 static const char *xfind_token(const char *const string_array[], const char *key)
50 {
51 	const char *r = find_token(string_array, key, NULL);
52 	if (r)
53 		return r;
54 	bb_error_msg_and_die("not found: '%s'", key);
55 }
56 
57 enum {
58 	OPT_x = 1 << 0,
59 	OPT_X = 1 << 1,
60 #if ENABLE_FEATURE_REFORMIME_COMPAT
61 	OPT_d = 1 << 2,
62 	OPT_e = 1 << 3,
63 	OPT_i = 1 << 4,
64 	OPT_s = 1 << 5,
65 	OPT_r = 1 << 6,
66 	OPT_c = 1 << 7,
67 	OPT_m = 1 << 8,
68 	OPT_h = 1 << 9,
69 	OPT_o = 1 << 10,
70 	OPT_O = 1 << 11,
71 #endif
72 };
73 
parse(const char * boundary,char ** argv)74 static int parse(const char *boundary, char **argv)
75 {
76 	int boundary_len = strlen(boundary);
77 	char uniq[sizeof("%%llu.%u") + sizeof(int)*3];
78 
79 	dbg_error_msg("BOUNDARY[%s]", boundary);
80 
81 	// prepare unique string pattern
82 	sprintf(uniq, "%%llu.%u", (unsigned)getpid());
83 	dbg_error_msg("UNIQ[%s]", uniq);
84 
85 	while (1) {
86 		char *header;
87 		const char *tokens[32]; /* 32 is enough */
88 		const char *type;
89 
90 		/* Read the header (everything up to two \n) */
91 		{
92 			unsigned header_idx = 0;
93 			int last_ch = 0;
94 			header = NULL;
95 			while (1) {
96 				int ch = fgetc(stdin);
97 				if (ch == '\r') /* Support both line endings */
98 					continue;
99 				if (ch == EOF)
100 					break;
101 				if (ch == '\n' && last_ch == ch)
102 					break;
103 				if (!(header_idx & 0xff))
104 					header = xrealloc(header, header_idx + 0x101);
105 				header[header_idx++] = last_ch = ch;
106 			}
107 			if (!header) {
108 				dbg_error_msg("EOF");
109 				break;
110 			}
111 			header[header_idx] = '\0';
112 			dbg_error_msg("H:'%s'", p);
113 		}
114 
115 		/* Split to tokens */
116 		{
117 			char *s, *p;
118 			char *tokstate;
119 			unsigned ntokens;
120 			const char *delims = ";=\" \t\n";
121 
122 			/* Skip to last Content-Type: */
123 			s = p = header;
124 			while ((p = strchr(p, '\n')) != NULL) {
125 				p++;
126 				if (strncasecmp(p, "Content-Type:", sizeof("Content-Type:")-1) == 0)
127 					s = p;
128 			}
129 			dbg_error_msg("L:'%s'", p);
130 			ntokens = 0;
131 			s = strtok_r(s, delims, &tokstate);
132 			while (s) {
133 				tokens[ntokens] = s;
134 				if (ntokens < ARRAY_SIZE(tokens) - 1)
135 					ntokens++;
136 				dbg_error_msg("L[%d]='%s'", ntokens, s);
137 				s = strtok_r(NULL, delims, &tokstate);
138 			}
139 			tokens[ntokens] = NULL;
140 			dbg_error_msg("EMPTYLINE, ntokens:%d", ntokens);
141 			if (ntokens == 0)
142 				break;
143 		}
144 
145 		/* Is it multipart? */
146 		type = find_token(tokens, "Content-Type:", "text/plain");
147 		dbg_error_msg("TYPE:'%s'", type);
148 		if (0 == strncasecmp(type, "multipart/", 10)) {
149 			/* Yes, recurse */
150 			if (strcasecmp(type + 10, "mixed") != 0)
151 				bb_error_msg_and_die("no support of content type '%s'", type);
152 			parse(xfind_token(tokens, "boundary"), argv);
153 		} else {
154 			/* No, process one non-multipart section */
155 			char *end;
156 			pid_t pid = pid;
157 			FILE *fp;
158 
159 			const char *charset = find_token(tokens, "charset", CONFIG_FEATURE_MIME_CHARSET);
160 			const char *encoding = find_token(tokens, "Content-Transfer-Encoding:", "7bit");
161 
162 			/* Compose target filename */
163 			char *filename = (char *)find_token(tokens, "filename", NULL);
164 			if (!filename)
165 				filename = xasprintf(uniq, monotonic_us());
166 			else
167 				filename = bb_get_last_path_component_strip(xstrdup(filename));
168 
169 			if (option_mask32 & OPT_X) {
170 				int fd[2];
171 
172 				/* start external helper */
173 				xpipe(fd);
174 				pid = vfork();
175 				if (0 == pid) {
176 					/* child reads from fd[0] */
177 					close(fd[1]);
178 					xmove_fd(fd[0], STDIN_FILENO);
179 					xsetenv("CONTENT_TYPE", type);
180 					xsetenv("CHARSET", charset);
181 					xsetenv("ENCODING", encoding);
182 					xsetenv("FILENAME", filename);
183 					BB_EXECVP_or_die(argv);
184 				}
185 				/* parent will write to fd[1] */
186 				close(fd[0]);
187 				fp = xfdopen_for_write(fd[1]);
188 				signal(SIGPIPE, SIG_IGN);
189 			} else {
190 				/* write to file */
191 				char *fname = xasprintf("%s%s", *argv, filename);
192 				fp = xfopen_for_write(fname);
193 				free(fname);
194 			}
195 			free(filename);
196 
197 			/* write to fp */
198 			end = NULL;
199 			if (0 == strcasecmp(encoding, "base64")) {
200 				read_base64(stdin, fp, '-');
201 			} else
202 			if (0 != strcasecmp(encoding, "7bit")
203 			 && 0 != strcasecmp(encoding, "8bit")
204 			) {
205 				/* quoted-printable, binary, user-defined are unsupported so far */
206 				bb_error_msg_and_die("encoding '%s' not supported", encoding);
207 			} else {
208 				/* plain 7bit or 8bit */
209 				while ((end = xmalloc_fgets(stdin)) != NULL) {
210 					if ('-' == end[0]
211 					 && '-' == end[1]
212 					 && strncmp(end + 2, boundary, boundary_len) == 0
213 					) {
214 						break;
215 					}
216 					fputs(end, fp);
217 				}
218 			}
219 			fclose(fp);
220 
221 			/* Wait for child */
222 			if (option_mask32 & OPT_X) {
223 				int rc;
224 				signal(SIGPIPE, SIG_DFL);
225 				rc = (wait4pid(pid) & 0xff);
226 				if (rc != 0)
227 					return rc + 20;
228 			}
229 
230 			/* Multipart ended? */
231 			if (end && '-' == end[2 + boundary_len] && '-' == end[2 + boundary_len + 1]) {
232 				dbg_error_msg("FINISHED MPART:'%s'", end);
233 				break;
234 			}
235 			dbg_error_msg("FINISHED:'%s'", end);
236 			free(end);
237 		} /* end of "handle one non-multipart block" */
238 
239 		free(header);
240 	} /* while (1) */
241 
242 	dbg_error_msg("ENDPARSE[%s]", boundary);
243 
244 	return EXIT_SUCCESS;
245 }
246 
247 //usage:#define reformime_trivial_usage
248 //usage:       "[OPTIONS]"
249 //usage:#define reformime_full_usage "\n\n"
250 //usage:       "Parse MIME-encoded message on stdin\n"
251 //usage:     "\n	-x PREFIX	Extract content of MIME sections to files"
252 //usage:     "\n	-X PROG ARGS	Filter content of MIME sections through PROG"
253 //usage:     "\n			Must be the last option"
254 //usage:     "\n"
255 //usage:     "\nOther options are silently ignored"
256 
257 /*
258 Usage: reformime [options]
259     -d - parse a delivery status notification.
260     -e - extract contents of MIME section.
261     -x - extract MIME section to a file.
262     -X - pipe MIME section to a program.
263     -i - show MIME info.
264     -s n.n.n.n - specify MIME section.
265     -r - rewrite message, filling in missing MIME headers.
266     -r7 - also convert 8bit/raw encoding to quoted-printable, if possible.
267     -r8 - also convert quoted-printable encoding to 8bit, if possible.
268     -c charset - default charset for rewriting, -o, and -O.
269     -m [file] [file]... - create a MIME message digest.
270     -h "header" - decode RFC 2047-encoded header.
271     -o "header" - encode unstructured header using RFC 2047.
272     -O "header" - encode address list header using RFC 2047.
273 */
274 
275 int reformime_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
reformime_main(int argc UNUSED_PARAM,char ** argv)276 int reformime_main(int argc UNUSED_PARAM, char **argv)
277 {
278 	unsigned opts;
279 	const char *opt_prefix = "";
280 
281 	INIT_G();
282 
283 	// parse options
284 	// N.B. only -x and -X are supported so far
285 	opts = getopt32(argv, "^"
286 		"x:X" IF_FEATURE_REFORMIME_COMPAT("deis:r:c:m:*h:o:O:")
287 		"\0" "x--X:X--x",
288 		&opt_prefix
289 		IF_FEATURE_REFORMIME_COMPAT(, NULL, NULL, &G.opt_charset, NULL, NULL, NULL, NULL)
290 	);
291 	argv += optind;
292 
293 	return parse("", (opts & OPT_X) ? argv : (char **)&opt_prefix);
294 }
295