1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <stddef.h>
4 #include <sys/epoll.h>
5 #include <sys/mman.h>
6 #include <sys/statvfs.h>
7 #include <unistd.h>
8 
9 #include "alloc-util.h"
10 #include "fd-util.h"
11 #include "fs-util.h"
12 #include "io-util.h"
13 #include "journal-importer.h"
14 #include "journal-util.h"
15 #include "journald-console.h"
16 #include "journald-kmsg.h"
17 #include "journald-native.h"
18 #include "journald-server.h"
19 #include "journald-syslog.h"
20 #include "journald-wall.h"
21 #include "memfd-util.h"
22 #include "memory-util.h"
23 #include "parse-util.h"
24 #include "path-util.h"
25 #include "process-util.h"
26 #include "selinux-util.h"
27 #include "socket-util.h"
28 #include "string-util.h"
29 #include "strv.h"
30 #include "unaligned.h"
31 
allow_object_pid(const struct ucred * ucred)32 static bool allow_object_pid(const struct ucred *ucred) {
33         return ucred && ucred->uid == 0;
34 }
35 
server_process_entry_meta(const char * p,size_t l,const struct ucred * ucred,int * priority,char ** identifier,char ** message,pid_t * object_pid)36 static void server_process_entry_meta(
37                 const char *p, size_t l,
38                 const struct ucred *ucred,
39                 int *priority,
40                 char **identifier,
41                 char **message,
42                 pid_t *object_pid) {
43 
44         /* We need to determine the priority of this entry for the rate limiting logic */
45 
46         if (l == 10 &&
47             startswith(p, "PRIORITY=") &&
48             p[9] >= '0' && p[9] <= '9')
49                 *priority = (*priority & LOG_FACMASK) | (p[9] - '0');
50 
51         else if (l == 17 &&
52                  startswith(p, "SYSLOG_FACILITY=") &&
53                  p[16] >= '0' && p[16] <= '9')
54                 *priority = (*priority & LOG_PRIMASK) | ((p[16] - '0') << 3);
55 
56         else if (l == 18 &&
57                  startswith(p, "SYSLOG_FACILITY=") &&
58                  p[16] >= '0' && p[16] <= '9' &&
59                  p[17] >= '0' && p[17] <= '9')
60                 *priority = (*priority & LOG_PRIMASK) | (((p[16] - '0')*10 + (p[17] - '0')) << 3);
61 
62         else if (l >= 19 &&
63                  startswith(p, "SYSLOG_IDENTIFIER=")) {
64                 char *t;
65 
66                 t = memdup_suffix0(p + 18, l - 18);
67                 if (t) {
68                         free(*identifier);
69                         *identifier = t;
70                 }
71 
72         } else if (l >= 8 &&
73                    startswith(p, "MESSAGE=")) {
74                 char *t;
75 
76                 t = memdup_suffix0(p + 8, l - 8);
77                 if (t) {
78                         free(*message);
79                         *message = t;
80                 }
81 
82         } else if (l > STRLEN("OBJECT_PID=") &&
83                    l < STRLEN("OBJECT_PID=")  + DECIMAL_STR_MAX(pid_t) &&
84                    startswith(p, "OBJECT_PID=") &&
85                    allow_object_pid(ucred)) {
86                 char buf[DECIMAL_STR_MAX(pid_t)];
87                 memcpy(buf, p + STRLEN("OBJECT_PID="),
88                        l - STRLEN("OBJECT_PID="));
89                 buf[l-STRLEN("OBJECT_PID=")] = '\0';
90 
91                 (void) parse_pid(buf, object_pid);
92         }
93 }
94 
server_process_entry(Server * s,const void * buffer,size_t * remaining,ClientContext * context,const struct ucred * ucred,const struct timeval * tv,const char * label,size_t label_len)95 static int server_process_entry(
96                 Server *s,
97                 const void *buffer, size_t *remaining,
98                 ClientContext *context,
99                 const struct ucred *ucred,
100                 const struct timeval *tv,
101                 const char *label, size_t label_len) {
102 
103         /* Process a single entry from a native message. Returns 0 if nothing special happened and the message
104          * processing should continue, and a negative or positive value otherwise.
105          *
106          * Note that *remaining is altered on both success and failure. */
107 
108         size_t n = 0, j, tn = SIZE_MAX, entry_size = 0;
109         char *identifier = NULL, *message = NULL;
110         struct iovec *iovec = NULL;
111         int priority = LOG_INFO;
112         pid_t object_pid = 0;
113         const char *p;
114         int r = 1;
115 
116         p = buffer;
117 
118         while (*remaining > 0) {
119                 const char *e, *q;
120 
121                 e = memchr(p, '\n', *remaining);
122 
123                 if (!e) {
124                         /* Trailing noise, let's ignore it, and flush what we collected */
125                         log_debug("Received message with trailing noise, ignoring.");
126                         break; /* finish processing of the message */
127                 }
128 
129                 if (e == p) {
130                         /* Entry separator */
131                         *remaining -= 1;
132                         break;
133                 }
134 
135                 if (IN_SET(*p, '.', '#')) {
136                         /* Ignore control commands for now, and comments too. */
137                         *remaining -= (e - p) + 1;
138                         p = e + 1;
139                         continue;
140                 }
141 
142                 /* A property follows */
143                 if (n > ENTRY_FIELD_COUNT_MAX) {
144                         log_debug("Received an entry that has more than " STRINGIFY(ENTRY_FIELD_COUNT_MAX) " fields, ignoring entry.");
145                         goto finish;
146                 }
147 
148                 /* n existing properties, 1 new, +1 for _TRANSPORT */
149                 if (!GREEDY_REALLOC(iovec,
150                                     n + 2 +
151                                     N_IOVEC_META_FIELDS + N_IOVEC_OBJECT_FIELDS +
152                                     client_context_extra_fields_n_iovec(context))) {
153                         r = log_oom();
154                         goto finish;
155                 }
156 
157                 q = memchr(p, '=', e - p);
158                 if (q) {
159                         if (journal_field_valid(p, q - p, false)) {
160                                 size_t l;
161 
162                                 l = e - p;
163                                 if (l > DATA_SIZE_MAX) {
164                                         log_debug("Received text block of %zu bytes is too large, ignoring entry.", l);
165                                         goto finish;
166                                 }
167 
168                                 if (entry_size + l + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
169                                         log_debug("Entry is too big (%zu bytes after processing %zu entries), ignoring entry.",
170                                                   entry_size + l, n + 1);
171                                         goto finish;
172                                 }
173 
174                                 /* If the field name starts with an underscore, skip the variable, since that indicates
175                                  * a trusted field */
176                                 iovec[n++] = IOVEC_MAKE((char*) p, l);
177                                 entry_size += l;
178 
179                                 server_process_entry_meta(p, l, ucred,
180                                                           &priority,
181                                                           &identifier,
182                                                           &message,
183                                                           &object_pid);
184                         }
185 
186                         *remaining -= (e - p) + 1;
187                         p = e + 1;
188                         continue;
189                 } else {
190                         uint64_t l, total;
191                         char *k;
192 
193                         if (*remaining < e - p + 1 + sizeof(uint64_t) + 1) {
194                                 log_debug("Failed to parse message, ignoring.");
195                                 break;
196                         }
197 
198                         l = unaligned_read_le64(e + 1);
199                         if (l > DATA_SIZE_MAX) {
200                                 log_debug("Received binary data block of %"PRIu64" bytes is too large, ignoring entry.", l);
201                                 goto finish;
202                         }
203 
204                         total = (e - p) + 1 + l;
205                         if (entry_size + total + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
206                                 log_debug("Entry is too big (%"PRIu64"bytes after processing %zu fields), ignoring.",
207                                           entry_size + total, n + 1);
208                                 goto finish;
209                         }
210 
211                         if ((uint64_t) *remaining < e - p + 1 + sizeof(uint64_t) + l + 1 ||
212                             e[1+sizeof(uint64_t)+l] != '\n') {
213                                 log_debug("Failed to parse message, ignoring.");
214                                 break;
215                         }
216 
217                         k = malloc(total);
218                         if (!k) {
219                                 log_oom();
220                                 break;
221                         }
222 
223                         memcpy(k, p, e - p);
224                         k[e - p] = '=';
225                         memcpy(k + (e - p) + 1, e + 1 + sizeof(uint64_t), l);
226 
227                         if (journal_field_valid(p, e - p, false)) {
228                                 iovec[n] = IOVEC_MAKE(k, (e - p) + 1 + l);
229                                 entry_size += iovec[n].iov_len;
230                                 n++;
231 
232                                 server_process_entry_meta(k, (e - p) + 1 + l, ucred,
233                                                           &priority,
234                                                           &identifier,
235                                                           &message,
236                                                           &object_pid);
237                         } else
238                                 free(k);
239 
240                         *remaining -= (e - p) + 1 + sizeof(uint64_t) + l + 1;
241                         p = e + 1 + sizeof(uint64_t) + l + 1;
242                 }
243         }
244 
245         if (n <= 0)
246                 goto finish;
247 
248         tn = n++;
249         iovec[tn] = IOVEC_MAKE_STRING("_TRANSPORT=journal");
250         entry_size += STRLEN("_TRANSPORT=journal");
251 
252         if (entry_size + n + 1 > ENTRY_SIZE_MAX) { /* data + separators + trailer */
253                 log_debug("Entry is too big with %zu properties and %zu bytes, ignoring.", n, entry_size);
254                 goto finish;
255         }
256 
257         r = 0; /* Success, we read the message. */
258 
259         if (!client_context_test_priority(context, priority))
260                 goto finish;
261 
262         if (message) {
263                 if (s->forward_to_syslog)
264                         server_forward_syslog(s, syslog_fixup_facility(priority), identifier, message, ucred, tv);
265 
266                 if (s->forward_to_kmsg)
267                         server_forward_kmsg(s, priority, identifier, message, ucred);
268 
269                 if (s->forward_to_console)
270                         server_forward_console(s, priority, identifier, message, ucred);
271 
272                 if (s->forward_to_wall)
273                         server_forward_wall(s, priority, identifier, message, ucred);
274         }
275 
276         server_dispatch_message(s, iovec, n, MALLOC_ELEMENTSOF(iovec), context, tv, priority, object_pid);
277 
278 finish:
279         for (j = 0; j < n; j++)  {
280                 if (j == tn)
281                         continue;
282 
283                 if (iovec[j].iov_base < buffer ||
284                     (const char*) iovec[j].iov_base >= p + *remaining)
285                         free(iovec[j].iov_base);
286         }
287 
288         free(iovec);
289         free(identifier);
290         free(message);
291 
292         return r;
293 }
294 
server_process_native_message(Server * s,const char * buffer,size_t buffer_size,const struct ucred * ucred,const struct timeval * tv,const char * label,size_t label_len)295 void server_process_native_message(
296                 Server *s,
297                 const char *buffer, size_t buffer_size,
298                 const struct ucred *ucred,
299                 const struct timeval *tv,
300                 const char *label, size_t label_len) {
301 
302         size_t remaining = buffer_size;
303         ClientContext *context = NULL;
304         int r;
305 
306         assert(s);
307         assert(buffer || buffer_size == 0);
308 
309         if (ucred && pid_is_valid(ucred->pid)) {
310                 r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context);
311                 if (r < 0)
312                         log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid);
313         }
314 
315         do {
316                 r = server_process_entry(s,
317                                          (const uint8_t*) buffer + (buffer_size - remaining), &remaining,
318                                          context, ucred, tv, label, label_len);
319         } while (r == 0);
320 }
321 
server_process_native_file(Server * s,int fd,const struct ucred * ucred,const struct timeval * tv,const char * label,size_t label_len)322 void server_process_native_file(
323                 Server *s,
324                 int fd,
325                 const struct ucred *ucred,
326                 const struct timeval *tv,
327                 const char *label, size_t label_len) {
328 
329         struct stat st;
330         bool sealed;
331         int r;
332 
333         /* Data is in the passed fd, probably it didn't fit in a datagram. */
334 
335         assert(s);
336         assert(fd >= 0);
337 
338         /* If it's a memfd, check if it is sealed. If so, we can just
339          * mmap it and use it, and do not need to copy the data out. */
340         sealed = memfd_get_sealed(fd) > 0;
341 
342         if (!sealed && (!ucred || ucred->uid != 0)) {
343                 _cleanup_free_ char *k = NULL;
344                 const char *e;
345 
346                 /* If this is not a sealed memfd, and the peer is unknown or
347                  * unprivileged, then verify the path. */
348 
349                 r = fd_get_path(fd, &k);
350                 if (r < 0) {
351                         log_error_errno(r, "readlink(/proc/self/fd/%i) failed: %m", fd);
352                         return;
353                 }
354 
355                 e = PATH_STARTSWITH_SET(k, "/dev/shm/", "/tmp/", "/var/tmp/");
356                 if (!e) {
357                         log_error("Received file outside of allowed directories. Refusing.");
358                         return;
359                 }
360 
361                 if (!filename_is_valid(e)) {
362                         log_error("Received file in subdirectory of allowed directories. Refusing.");
363                         return;
364                 }
365         }
366 
367         if (fstat(fd, &st) < 0) {
368                 log_error_errno(errno, "Failed to stat passed file, ignoring: %m");
369                 return;
370         }
371 
372         if (!S_ISREG(st.st_mode)) {
373                 log_error("File passed is not regular. Ignoring.");
374                 return;
375         }
376 
377         if (st.st_size <= 0)
378                 return;
379 
380         /* When !sealed, set a lower memory limit. We have to read the file,
381          * effectively doubling memory use. */
382         if (st.st_size > ENTRY_SIZE_MAX / (sealed ? 1 : 2)) {
383                 log_error("File passed too large (%"PRIu64" bytes). Ignoring.", (uint64_t) st.st_size);
384                 return;
385         }
386 
387         if (sealed) {
388                 void *p;
389                 size_t ps;
390 
391                 /* The file is sealed, we can just map it and use it. */
392 
393                 ps = PAGE_ALIGN(st.st_size);
394                 p = mmap(NULL, ps, PROT_READ, MAP_PRIVATE, fd, 0);
395                 if (p == MAP_FAILED) {
396                         log_error_errno(errno, "Failed to map memfd, ignoring: %m");
397                         return;
398                 }
399 
400                 server_process_native_message(s, p, st.st_size, ucred, tv, label, label_len);
401                 assert_se(munmap(p, ps) >= 0);
402         } else {
403                 _cleanup_free_ void *p = NULL;
404                 struct statvfs vfs;
405                 ssize_t n;
406 
407                 if (fstatvfs(fd, &vfs) < 0) {
408                         log_error_errno(errno, "Failed to stat file system of passed file, not processing it: %m");
409                         return;
410                 }
411 
412                 /* Refuse operating on file systems that have
413                  * mandatory locking enabled, see:
414                  *
415                  * https://github.com/systemd/systemd/issues/1822
416                  */
417                 if (vfs.f_flag & ST_MANDLOCK) {
418                         log_error("Received file descriptor from file system with mandatory locking enabled, not processing it.");
419                         return;
420                 }
421 
422                 /* Make the fd non-blocking. On regular files this has
423                  * the effect of bypassing mandatory locking. Of
424                  * course, this should normally not be necessary given
425                  * the check above, but let's better be safe than
426                  * sorry, after all NFS is pretty confusing regarding
427                  * file system flags, and we better don't trust it,
428                  * and so is SMB. */
429                 r = fd_nonblock(fd, true);
430                 if (r < 0) {
431                         log_error_errno(r, "Failed to make fd non-blocking, not processing it: %m");
432                         return;
433                 }
434 
435                 /* The file is not sealed, we can't map the file here, since
436                  * clients might then truncate it and trigger a SIGBUS for
437                  * us. So let's stupidly read it. */
438 
439                 p = malloc(st.st_size);
440                 if (!p) {
441                         log_oom();
442                         return;
443                 }
444 
445                 n = pread(fd, p, st.st_size, 0);
446                 if (n < 0)
447                         log_error_errno(errno, "Failed to read file, ignoring: %m");
448                 else if (n > 0)
449                         server_process_native_message(s, p, n, ucred, tv, label, label_len);
450         }
451 }
452 
server_open_native_socket(Server * s,const char * native_socket)453 int server_open_native_socket(Server *s, const char *native_socket) {
454         int r;
455 
456         assert(s);
457         assert(native_socket);
458 
459         if (s->native_fd < 0) {
460                 union sockaddr_union sa;
461                 size_t sa_len;
462 
463                 r = sockaddr_un_set_path(&sa.un, native_socket);
464                 if (r < 0)
465                         return log_error_errno(r, "Unable to use namespace path %s for AF_UNIX socket: %m", native_socket);
466                 sa_len = r;
467 
468                 s->native_fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0);
469                 if (s->native_fd < 0)
470                         return log_error_errno(errno, "socket() failed: %m");
471 
472                 (void) sockaddr_un_unlink(&sa.un);
473 
474                 r = bind(s->native_fd, &sa.sa, sa_len);
475                 if (r < 0)
476                         return log_error_errno(errno, "bind(%s) failed: %m", sa.un.sun_path);
477 
478                 (void) chmod(sa.un.sun_path, 0666);
479         } else
480                 (void) fd_nonblock(s->native_fd, true);
481 
482         r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSCRED, true);
483         if (r < 0)
484                 return log_error_errno(r, "SO_PASSCRED failed: %m");
485 
486         if (mac_selinux_use()) {
487                 r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_PASSSEC, true);
488                 if (r < 0)
489                         log_warning_errno(r, "SO_PASSSEC failed: %m");
490         }
491 
492         r = setsockopt_int(s->native_fd, SOL_SOCKET, SO_TIMESTAMP, true);
493         if (r < 0)
494                 return log_error_errno(r, "SO_TIMESTAMP failed: %m");
495 
496         r = sd_event_add_io(s->event, &s->native_event_source, s->native_fd, EPOLLIN, server_process_datagram, s);
497         if (r < 0)
498                 return log_error_errno(r, "Failed to add native server fd to event loop: %m");
499 
500         r = sd_event_source_set_priority(s->native_event_source, SD_EVENT_PRIORITY_NORMAL+5);
501         if (r < 0)
502                 return log_error_errno(r, "Failed to adjust native event source priority: %m");
503 
504         return 0;
505 }
506