1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <fnmatch.h>
4 #include <linux/bpf_insn.h>
5 
6 #include "bpf-devices.h"
7 #include "bpf-program.h"
8 #include "devnum-util.h"
9 #include "fd-util.h"
10 #include "fileio.h"
11 #include "nulstr-util.h"
12 #include "parse-util.h"
13 #include "path-util.h"
14 #include "stdio-util.h"
15 #include "string-util.h"
16 
17 #define PASS_JUMP_OFF 4096
18 
bpf_access_type(const char * acc)19 static int bpf_access_type(const char *acc) {
20         int r = 0;
21 
22         assert(acc);
23 
24         for (; *acc; acc++)
25                 switch (*acc) {
26                 case 'r':
27                         r |= BPF_DEVCG_ACC_READ;
28                         break;
29                 case 'w':
30                         r |= BPF_DEVCG_ACC_WRITE;
31                         break;
32                 case 'm':
33                         r |= BPF_DEVCG_ACC_MKNOD;
34                         break;
35                 default:
36                         return -EINVAL;
37                 }
38 
39         return r;
40 }
41 
bpf_prog_allow_list_device(BPFProgram * prog,char type,int major,int minor,const char * acc)42 static int bpf_prog_allow_list_device(
43                 BPFProgram *prog,
44                 char type,
45                 int major,
46                 int minor,
47                 const char *acc) {
48 
49         int r, access;
50 
51         assert(prog);
52         assert(acc);
53 
54         log_trace("%s: %c %d:%d %s", __func__, type, major, minor, acc);
55 
56         access = bpf_access_type(acc);
57         if (access <= 0)
58                 return -EINVAL;
59 
60         assert(IN_SET(type, 'b', 'c'));
61         const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
62 
63         const struct bpf_insn insn[] = {
64                 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
65                 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
66                 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 4), /* compare access type */
67 
68                 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 3),  /* compare device type */
69                 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 2),     /* compare major */
70                 BPF_JMP_IMM(BPF_JNE, BPF_REG_5, minor, 1),     /* compare minor */
71                 BPF_JMP_A(PASS_JUMP_OFF),                      /* jump to PASS */
72         };
73 
74         if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
75                 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
76         else
77                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
78         if (r < 0)
79                 log_error_errno(r, "Extending device control BPF program failed: %m");
80 
81         return r;
82 }
83 
bpf_prog_allow_list_major(BPFProgram * prog,char type,int major,const char * acc)84 static int bpf_prog_allow_list_major(
85                 BPFProgram *prog,
86                 char type,
87                 int major,
88                 const char *acc) {
89 
90         int r, access;
91 
92         assert(prog);
93         assert(acc);
94 
95         log_trace("%s: %c %d:* %s", __func__, type, major, acc);
96 
97         access = bpf_access_type(acc);
98         if (access <= 0)
99                 return -EINVAL;
100 
101         assert(IN_SET(type, 'b', 'c'));
102         const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
103 
104         const struct bpf_insn insn[] = {
105                 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
106                 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
107                 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 3), /* compare access type */
108 
109                 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 2),  /* compare device type */
110                 BPF_JMP_IMM(BPF_JNE, BPF_REG_4, major, 1),     /* compare major */
111                 BPF_JMP_A(PASS_JUMP_OFF),                      /* jump to PASS */
112         };
113 
114         if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
115                 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
116         else
117                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
118         if (r < 0)
119                 log_error_errno(r, "Extending device control BPF program failed: %m");
120 
121         return r;
122 }
123 
bpf_prog_allow_list_class(BPFProgram * prog,char type,const char * acc)124 static int bpf_prog_allow_list_class(
125                 BPFProgram *prog,
126                 char type,
127                 const char *acc) {
128 
129         int r, access;
130 
131         assert(prog);
132         assert(acc);
133 
134         log_trace("%s: %c *:* %s", __func__, type, acc);
135 
136         access = bpf_access_type(acc);
137         if (access <= 0)
138                 return -EINVAL;
139 
140         assert(IN_SET(type, 'b', 'c'));
141         const int bpf_type = type == 'c' ? BPF_DEVCG_DEV_CHAR : BPF_DEVCG_DEV_BLOCK;
142 
143         const struct bpf_insn insn[] = {
144                 BPF_MOV32_REG(BPF_REG_1, BPF_REG_3),
145                 BPF_ALU32_IMM(BPF_AND, BPF_REG_1, access),
146                 BPF_JMP_REG(BPF_JNE, BPF_REG_1, BPF_REG_3, 2), /* compare access type */
147 
148                 BPF_JMP_IMM(BPF_JNE, BPF_REG_2, bpf_type, 1), /* compare device type */
149                 BPF_JMP_A(PASS_JUMP_OFF),                     /* jump to PASS */
150         };
151 
152         if (FLAGS_SET(access, BPF_DEVCG_ACC_READ | BPF_DEVCG_ACC_WRITE | BPF_DEVCG_ACC_MKNOD))
153                 r = bpf_program_add_instructions(prog, insn + 3, ELEMENTSOF(insn) - 3);
154         else
155                 r = bpf_program_add_instructions(prog, insn, ELEMENTSOF(insn));
156         if (r < 0)
157                 log_error_errno(r, "Extending device control BPF program failed: %m");
158 
159         return r;
160 }
161 
bpf_devices_cgroup_init(BPFProgram ** ret,CGroupDevicePolicy policy,bool allow_list)162 int bpf_devices_cgroup_init(
163                 BPFProgram **ret,
164                 CGroupDevicePolicy policy,
165                 bool allow_list) {
166 
167         const struct bpf_insn pre_insn[] = {
168                 /* load device type to r2 */
169                 BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
170                             offsetof(struct bpf_cgroup_dev_ctx, access_type)),
171                 BPF_ALU32_IMM(BPF_AND, BPF_REG_2, 0xFFFF),
172 
173                 /* load access type to r3 */
174                 BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
175                             offsetof(struct bpf_cgroup_dev_ctx, access_type)),
176                 BPF_ALU32_IMM(BPF_RSH, BPF_REG_3, 16),
177 
178                 /* load major number to r4 */
179                 BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
180                             offsetof(struct bpf_cgroup_dev_ctx, major)),
181 
182                 /* load minor number to r5 */
183                 BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
184                             offsetof(struct bpf_cgroup_dev_ctx, minor)),
185         };
186 
187         _cleanup_(bpf_program_freep) BPFProgram *prog = NULL;
188         int r;
189 
190         assert(ret);
191 
192         if (policy == CGROUP_DEVICE_POLICY_AUTO && !allow_list)
193                 return 0;
194 
195         r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &prog);
196         if (r < 0)
197                 return log_error_errno(r, "Loading device control BPF program failed: %m");
198 
199         if (policy == CGROUP_DEVICE_POLICY_CLOSED || allow_list) {
200                 r = bpf_program_add_instructions(prog, pre_insn, ELEMENTSOF(pre_insn));
201                 if (r < 0)
202                         return log_error_errno(r, "Extending device control BPF program failed: %m");
203         }
204 
205         *ret = TAKE_PTR(prog);
206 
207         return 0;
208 }
209 
bpf_devices_apply_policy(BPFProgram ** prog,CGroupDevicePolicy policy,bool allow_list,const char * cgroup_path,BPFProgram ** prog_installed)210 int bpf_devices_apply_policy(
211                 BPFProgram **prog,
212                 CGroupDevicePolicy policy,
213                 bool allow_list,
214                 const char *cgroup_path,
215                 BPFProgram **prog_installed) {
216 
217         _cleanup_free_ char *controller_path = NULL;
218         int r;
219 
220         /* This will assign *prog_installed if everything goes well. */
221 
222         assert(prog);
223         if (!*prog)
224                 goto finish;
225 
226         const bool deny_everything = policy == CGROUP_DEVICE_POLICY_STRICT && !allow_list;
227 
228         const struct bpf_insn post_insn[] = {
229                 /* return DENY */
230                 BPF_MOV64_IMM(BPF_REG_0, 0),
231                 BPF_JMP_A(1),
232         };
233 
234         const struct bpf_insn exit_insn[] = {
235                 /* finally return DENY if deny_everything else ALLOW */
236                 BPF_MOV64_IMM(BPF_REG_0, deny_everything ? 0 : 1),
237                 BPF_EXIT_INSN()
238         };
239 
240         if (!deny_everything) {
241                 r = bpf_program_add_instructions(*prog, post_insn, ELEMENTSOF(post_insn));
242                 if (r < 0)
243                         return log_error_errno(r, "Extending device control BPF program failed: %m");
244 
245                 /* Fixup PASS_JUMP_OFF jump offsets. */
246                 for (size_t off = 0; off < (*prog)->n_instructions; off++) {
247                         struct bpf_insn *ins = &((*prog)->instructions[off]);
248 
249                         if (ins->code == (BPF_JMP | BPF_JA) && ins->off == PASS_JUMP_OFF)
250                                 ins->off = (*prog)->n_instructions - off - 1;
251                 }
252         }
253 
254         r = bpf_program_add_instructions(*prog, exit_insn, ELEMENTSOF(exit_insn));
255         if (r < 0)
256                 return log_error_errno(r, "Extending device control BPF program failed: %m");
257 
258         r = cg_get_path(SYSTEMD_CGROUP_CONTROLLER, cgroup_path, NULL, &controller_path);
259         if (r < 0)
260                 return log_error_errno(r, "Failed to determine cgroup path: %m");
261 
262         r = bpf_program_cgroup_attach(*prog, BPF_CGROUP_DEVICE, controller_path, BPF_F_ALLOW_MULTI);
263         if (r < 0)
264                 return log_error_errno(r, "Attaching device control BPF program to cgroup %s failed: %m",
265                                        empty_to_root(cgroup_path));
266 
267  finish:
268         /* Unref the old BPF program (which will implicitly detach it) right before attaching the new program. */
269         if (prog_installed) {
270                 bpf_program_free(*prog_installed);
271                 *prog_installed = TAKE_PTR(*prog);
272         }
273         return 0;
274 }
275 
bpf_devices_supported(void)276 int bpf_devices_supported(void) {
277         const struct bpf_insn trivial[] = {
278                 BPF_MOV64_IMM(BPF_REG_0, 1),
279                 BPF_EXIT_INSN()
280         };
281 
282         _cleanup_(bpf_program_freep) BPFProgram *program = NULL;
283         static int supported = -1;
284         int r;
285 
286         /* Checks whether BPF device controller is supported. For this, we check five things:
287          *
288          * a) whether we are privileged
289          * b) whether the unified hierarchy is being used
290          * c) the BPF implementation in the kernel supports BPF_PROG_TYPE_CGROUP_DEVICE programs, which we require
291          */
292 
293         if (supported >= 0)
294                 return supported;
295 
296         if (geteuid() != 0) {
297                 log_debug("Not enough privileges, BPF device control is not supported.");
298                 return supported = 0;
299         }
300 
301         r = cg_unified_controller(SYSTEMD_CGROUP_CONTROLLER);
302         if (r < 0)
303                 return log_error_errno(r, "Can't determine whether the unified hierarchy is used: %m");
304         if (r == 0) {
305                 log_debug("Not running with unified cgroups, BPF device control is not supported.");
306                 return supported = 0;
307         }
308 
309         r = bpf_program_new(BPF_PROG_TYPE_CGROUP_DEVICE, "sd_devices", &program);
310         if (r < 0) {
311                 log_debug_errno(r, "Can't allocate CGROUP DEVICE BPF program, BPF device control is not supported: %m");
312                 return supported = 0;
313         }
314 
315         r = bpf_program_add_instructions(program, trivial, ELEMENTSOF(trivial));
316         if (r < 0) {
317                 log_debug_errno(r, "Can't add trivial instructions to CGROUP DEVICE BPF program, BPF device control is not supported: %m");
318                 return supported = 0;
319         }
320 
321         r = bpf_program_load_kernel(program, NULL, 0);
322         if (r < 0) {
323                 log_debug_errno(r, "Can't load kernel CGROUP DEVICE BPF program, BPF device control is not supported: %m");
324                 return supported = 0;
325         }
326 
327         return supported = 1;
328 }
329 
allow_list_device_pattern(BPFProgram * prog,const char * path,char type,const unsigned * maj,const unsigned * min,const char * acc)330 static int allow_list_device_pattern(
331                 BPFProgram *prog,
332                 const char *path,
333                 char type,
334                 const unsigned *maj,
335                 const unsigned *min,
336                 const char *acc) {
337 
338         assert(IN_SET(type, 'b', 'c'));
339 
340         if (cg_all_unified() > 0) {
341                 if (!prog)
342                         return 0;
343 
344                 if (maj && min)
345                         return bpf_prog_allow_list_device(prog, type, *maj, *min, acc);
346                 else if (maj)
347                         return bpf_prog_allow_list_major(prog, type, *maj, acc);
348                 else
349                         return bpf_prog_allow_list_class(prog, type, acc);
350 
351         } else {
352                 char buf[2+DECIMAL_STR_MAX(unsigned)*2+2+4];
353                 int r;
354 
355                 if (maj && min)
356                         xsprintf(buf, "%c %u:%u %s", type, *maj, *min, acc);
357                 else if (maj)
358                         xsprintf(buf, "%c %u:* %s", type, *maj, acc);
359                 else
360                         xsprintf(buf, "%c *:* %s", type, acc);
361 
362                 /* Changing the devices list of a populated cgroup might result in EINVAL, hence ignore
363                  * EINVAL here. */
364 
365                 r = cg_set_attribute("devices", path, "devices.allow", buf);
366                 if (r < 0)
367                         log_full_errno(IN_SET(r, -ENOENT, -EROFS, -EINVAL, -EACCES, -EPERM) ? LOG_DEBUG : LOG_WARNING,
368                                        r, "Failed to set devices.allow on %s: %m", path);
369 
370                 return r;
371         }
372 }
373 
bpf_devices_allow_list_device(BPFProgram * prog,const char * path,const char * node,const char * acc)374 int bpf_devices_allow_list_device(
375                 BPFProgram *prog,
376                 const char *path,
377                 const char *node,
378                 const char *acc) {
379 
380         mode_t mode;
381         dev_t rdev;
382         int r;
383 
384         assert(path);
385         assert(acc);
386         assert(strlen(acc) <= 3);
387 
388         log_trace("%s: %s %s", __func__, node, acc);
389 
390         /* Some special handling for /dev/block/%u:%u, /dev/char/%u:%u, /run/systemd/inaccessible/chr and
391          * /run/systemd/inaccessible/blk paths. Instead of stat()ing these we parse out the major/minor directly. This
392          * means clients can use these path without the device node actually around */
393         r = device_path_parse_major_minor(node, &mode, &rdev);
394         if (r < 0) {
395                 if (r != -ENODEV)
396                         return log_warning_errno(r, "Couldn't parse major/minor from device path '%s': %m", node);
397 
398                 struct stat st;
399                 if (stat(node, &st) < 0)
400                         return log_warning_errno(errno, "Couldn't stat device %s: %m", node);
401 
402                 if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode))
403                         return log_warning_errno(SYNTHETIC_ERRNO(ENODEV), "%s is not a device.", node);
404 
405                 mode = st.st_mode;
406                 rdev = (dev_t) st.st_rdev;
407         }
408 
409         unsigned maj = major(rdev), min = minor(rdev);
410         return allow_list_device_pattern(prog, path, S_ISCHR(mode) ? 'c' : 'b', &maj, &min, acc);
411 }
412 
bpf_devices_allow_list_major(BPFProgram * prog,const char * path,const char * name,char type,const char * acc)413 int bpf_devices_allow_list_major(
414                 BPFProgram *prog,
415                 const char *path,
416                 const char *name,
417                 char type,
418                 const char *acc) {
419 
420         unsigned maj;
421         int r;
422 
423         assert(path);
424         assert(acc);
425         assert(IN_SET(type, 'b', 'c'));
426 
427         if (streq(name, "*"))
428                 /* If the name is a wildcard, then apply this list to all devices of this type */
429                 return allow_list_device_pattern(prog, path, type, NULL, NULL, acc);
430 
431         if (safe_atou(name, &maj) >= 0 && DEVICE_MAJOR_VALID(maj))
432                 /* The name is numeric and suitable as major. In that case, let's take its major, and create
433                  * the entry directly. */
434                 return allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
435 
436         _cleanup_fclose_ FILE *f = NULL;
437         bool good = false, any = false;
438 
439         f = fopen("/proc/devices", "re");
440         if (!f)
441                 return log_warning_errno(errno, "Cannot open /proc/devices to resolve %s: %m", name);
442 
443         for (;;) {
444                 _cleanup_free_ char *line = NULL;
445                 char *w, *p;
446 
447                 r = read_line(f, LONG_LINE_MAX, &line);
448                 if (r < 0)
449                         return log_warning_errno(r, "Failed to read /proc/devices: %m");
450                 if (r == 0)
451                         break;
452 
453                 if (type == 'c' && streq(line, "Character devices:")) {
454                         good = true;
455                         continue;
456                 }
457 
458                 if (type == 'b' && streq(line, "Block devices:")) {
459                         good = true;
460                         continue;
461                 }
462 
463                 if (isempty(line)) {
464                         good = false;
465                         continue;
466                 }
467 
468                 if (!good)
469                         continue;
470 
471                 p = strstrip(line);
472 
473                 w = strpbrk(p, WHITESPACE);
474                 if (!w)
475                         continue;
476                 *w = 0;
477 
478                 r = safe_atou(p, &maj);
479                 if (r < 0)
480                         continue;
481                 if (maj <= 0)
482                         continue;
483 
484                 w++;
485                 w += strspn(w, WHITESPACE);
486 
487                 if (fnmatch(name, w, 0) != 0)
488                         continue;
489 
490                 any = true;
491                 (void) allow_list_device_pattern(prog, path, type, &maj, NULL, acc);
492         }
493 
494         if (!any)
495                 return log_debug_errno(SYNTHETIC_ERRNO(ENOENT),
496                                        "Device allow list pattern \"%s\" did not match anything.", name);
497 
498         return 0;
499 }
500 
bpf_devices_allow_list_static(BPFProgram * prog,const char * path)501 int bpf_devices_allow_list_static(
502                 BPFProgram *prog,
503                 const char *path) {
504 
505         static const char auto_devices[] =
506                 "/dev/null\0" "rwm\0"
507                 "/dev/zero\0" "rwm\0"
508                 "/dev/full\0" "rwm\0"
509                 "/dev/random\0" "rwm\0"
510                 "/dev/urandom\0" "rwm\0"
511                 "/dev/tty\0" "rwm\0"
512                 "/dev/ptmx\0" "rwm\0"
513                 /* Allow /run/systemd/inaccessible/{chr,blk} devices for mapping InaccessiblePaths */
514                 "/run/systemd/inaccessible/chr\0" "rwm\0"
515                 "/run/systemd/inaccessible/blk\0" "rwm\0";
516         int r = 0, k;
517 
518         const char *node, *acc;
519         NULSTR_FOREACH_PAIR(node, acc, auto_devices) {
520                 k = bpf_devices_allow_list_device(prog, path, node, acc);
521                 if (r >= 0 && k < 0)
522                         r = k;
523         }
524 
525         /* PTS (/dev/pts) devices may not be duplicated, but accessed */
526         k = bpf_devices_allow_list_major(prog, path, "pts", 'c', "rw");
527         if (r >= 0 && k < 0)
528                 r = k;
529 
530         return r;
531 }
532