1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <errno.h>
4 #include <stdio.h>
5 #include <stdlib.h>
6 #include <sys/prctl.h>
7 #include <unistd.h>
8
9 #include "alloc-util.h"
10 #include "capability-util.h"
11 #include "cap-list.h"
12 #include "fileio.h"
13 #include "log.h"
14 #include "macro.h"
15 #include "missing_prctl.h"
16 #include "parse-util.h"
17 #include "user-util.h"
18 #include "util.h"
19
have_effective_cap(int value)20 int have_effective_cap(int value) {
21 _cleanup_cap_free_ cap_t cap = NULL;
22 cap_flag_value_t fv;
23
24 cap = cap_get_proc();
25 if (!cap)
26 return -errno;
27
28 if (cap_get_flag(cap, value, CAP_EFFECTIVE, &fv) < 0)
29 return -errno;
30
31 return fv == CAP_SET;
32 }
33
cap_last_cap(void)34 unsigned cap_last_cap(void) {
35 static thread_local unsigned saved;
36 static thread_local bool valid = false;
37 _cleanup_free_ char *content = NULL;
38 unsigned long p = 0;
39 int r;
40
41 if (valid)
42 return saved;
43
44 /* available since linux-3.2 */
45 r = read_one_line_file("/proc/sys/kernel/cap_last_cap", &content);
46 if (r >= 0) {
47 r = safe_atolu(content, &p);
48 if (r >= 0) {
49
50 if (p > 63) /* Safety for the future: if one day the kernel learns more than 64 caps,
51 * then we are in trouble (since we, as much userspace and kernel space
52 * store capability masks in uint64_t types). Let's hence protect
53 * ourselves against that and always cap at 63 for now. */
54 p = 63;
55
56 saved = p;
57 valid = true;
58 return p;
59 }
60 }
61
62 /* fall back to syscall-probing for pre linux-3.2 */
63 p = MIN((unsigned long) CAP_LAST_CAP, 63U);
64
65 if (prctl(PR_CAPBSET_READ, p) < 0) {
66
67 /* Hmm, look downwards, until we find one that works */
68 for (p--; p > 0; p--)
69 if (prctl(PR_CAPBSET_READ, p) >= 0)
70 break;
71
72 } else {
73
74 /* Hmm, look upwards, until we find one that doesn't work */
75 for (; p < 63; p++)
76 if (prctl(PR_CAPBSET_READ, p+1) < 0)
77 break;
78 }
79
80 saved = p;
81 valid = true;
82
83 return p;
84 }
85
capability_update_inherited_set(cap_t caps,uint64_t set)86 int capability_update_inherited_set(cap_t caps, uint64_t set) {
87 /* Add capabilities in the set to the inherited caps, drops capabilities not in the set.
88 * Do not apply them yet. */
89
90 for (unsigned i = 0; i <= cap_last_cap(); i++) {
91 cap_flag_value_t flag = set & (UINT64_C(1) << i) ? CAP_SET : CAP_CLEAR;
92 cap_value_t v;
93
94 v = (cap_value_t) i;
95
96 if (cap_set_flag(caps, CAP_INHERITABLE, 1, &v, flag) < 0)
97 return -errno;
98 }
99
100 return 0;
101 }
102
capability_ambient_set_apply(uint64_t set,bool also_inherit)103 int capability_ambient_set_apply(uint64_t set, bool also_inherit) {
104 _cleanup_cap_free_ cap_t caps = NULL;
105 int r;
106
107 /* Remove capabilities requested in ambient set, but not in the bounding set */
108 for (unsigned i = 0; i <= cap_last_cap(); i++) {
109 if (set == 0)
110 break;
111
112 if (FLAGS_SET(set, (UINT64_C(1) << i)) && prctl(PR_CAPBSET_READ, i) != 1) {
113 log_debug("Ambient capability %s requested but missing from bounding set,"
114 " suppressing automatically.", capability_to_name(i));
115 set &= ~(UINT64_C(1) << i);
116 }
117 }
118
119 /* Add the capabilities to the ambient set (an possibly also the inheritable set) */
120
121 /* Check that we can use PR_CAP_AMBIENT or quit early. */
122 if (!ambient_capabilities_supported())
123 return (set & all_capabilities()) == 0 ?
124 0 : -EOPNOTSUPP; /* if actually no ambient caps are to be set, be silent,
125 * otherwise fail recognizably */
126
127 if (also_inherit) {
128 caps = cap_get_proc();
129 if (!caps)
130 return -errno;
131
132 r = capability_update_inherited_set(caps, set);
133 if (r < 0)
134 return -errno;
135
136 if (cap_set_proc(caps) < 0)
137 return -errno;
138 }
139
140 for (unsigned i = 0; i <= cap_last_cap(); i++) {
141
142 if (set & (UINT64_C(1) << i)) {
143
144 /* Add the capability to the ambient set. */
145 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_RAISE, i, 0, 0) < 0)
146 return -errno;
147 } else {
148
149 /* Drop the capability so we don't inherit capabilities we didn't ask for. */
150 r = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, i, 0, 0);
151 if (r < 0)
152 return -errno;
153
154 if (r)
155 if (prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_LOWER, i, 0, 0) < 0)
156 return -errno;
157
158 }
159 }
160
161 return 0;
162 }
163
capability_gain_cap_setpcap(cap_t * ret_before_caps)164 int capability_gain_cap_setpcap(cap_t *ret_before_caps) {
165 _cleanup_cap_free_ cap_t caps = NULL;
166 cap_flag_value_t fv;
167 caps = cap_get_proc();
168 if (!caps)
169 return -errno;
170
171 if (cap_get_flag(caps, CAP_SETPCAP, CAP_EFFECTIVE, &fv) < 0)
172 return -errno;
173
174 if (fv != CAP_SET) {
175 _cleanup_cap_free_ cap_t temp_cap = NULL;
176 static const cap_value_t v = CAP_SETPCAP;
177
178 temp_cap = cap_dup(caps);
179 if (!temp_cap)
180 return -errno;
181
182 if (cap_set_flag(temp_cap, CAP_EFFECTIVE, 1, &v, CAP_SET) < 0)
183 return -errno;
184
185 if (cap_set_proc(temp_cap) < 0)
186 log_debug_errno(errno, "Can't acquire effective CAP_SETPCAP bit, ignoring: %m");
187
188 /* If we didn't manage to acquire the CAP_SETPCAP bit, we continue anyway, after all this just means
189 * we'll fail later, when we actually intend to drop some capabilities or try to set securebits. */
190 }
191 if (ret_before_caps)
192 /* Return the capabilities as they have been before setting CAP_SETPCAP */
193 *ret_before_caps = TAKE_PTR(caps);
194
195 return 0;
196 }
197
capability_bounding_set_drop(uint64_t keep,bool right_now)198 int capability_bounding_set_drop(uint64_t keep, bool right_now) {
199 _cleanup_cap_free_ cap_t before_cap = NULL, after_cap = NULL;
200 int r;
201
202 /* If we are run as PID 1 we will lack CAP_SETPCAP by default
203 * in the effective set (yes, the kernel drops that when
204 * executing init!), so get it back temporarily so that we can
205 * call PR_CAPBSET_DROP. */
206
207 r = capability_gain_cap_setpcap(&before_cap);
208 if (r < 0)
209 return r;
210
211 after_cap = cap_dup(before_cap);
212 if (!after_cap)
213 return -errno;
214
215 for (unsigned i = 0; i <= cap_last_cap(); i++) {
216 cap_value_t v;
217
218 if ((keep & (UINT64_C(1) << i)))
219 continue;
220
221 /* Drop it from the bounding set */
222 if (prctl(PR_CAPBSET_DROP, i) < 0) {
223 r = -errno;
224
225 /* If dropping the capability failed, let's see if we didn't have it in the first place. If so,
226 * continue anyway, as dropping a capability we didn't have in the first place doesn't really
227 * matter anyway. */
228 if (prctl(PR_CAPBSET_READ, i) != 0)
229 goto finish;
230 }
231 v = (cap_value_t) i;
232
233 /* Also drop it from the inheritable set, so
234 * that anything we exec() loses the
235 * capability for good. */
236 if (cap_set_flag(after_cap, CAP_INHERITABLE, 1, &v, CAP_CLEAR) < 0) {
237 r = -errno;
238 goto finish;
239 }
240
241 /* If we shall apply this right now drop it
242 * also from our own capability sets. */
243 if (right_now) {
244 if (cap_set_flag(after_cap, CAP_PERMITTED, 1, &v, CAP_CLEAR) < 0 ||
245 cap_set_flag(after_cap, CAP_EFFECTIVE, 1, &v, CAP_CLEAR) < 0) {
246 r = -errno;
247 goto finish;
248 }
249 }
250 }
251
252 r = 0;
253
254 finish:
255 if (cap_set_proc(after_cap) < 0) {
256 /* If there are no actual changes anyway then let's ignore this error. */
257 if (cap_compare(before_cap, after_cap) != 0)
258 r = -errno;
259 }
260
261 return r;
262 }
263
drop_from_file(const char * fn,uint64_t keep)264 static int drop_from_file(const char *fn, uint64_t keep) {
265 _cleanup_free_ char *p = NULL;
266 uint64_t current, after;
267 uint32_t hi, lo;
268 int r, k;
269
270 r = read_one_line_file(fn, &p);
271 if (r < 0)
272 return r;
273
274 k = sscanf(p, "%" PRIu32 " %" PRIu32, &lo, &hi);
275 if (k != 2)
276 return -EIO;
277
278 current = (uint64_t) lo | ((uint64_t) hi << 32);
279 after = current & keep;
280
281 if (current == after)
282 return 0;
283
284 lo = after & UINT32_C(0xFFFFFFFF);
285 hi = (after >> 32) & UINT32_C(0xFFFFFFFF);
286
287 return write_string_filef(fn, 0, "%" PRIu32 " %" PRIu32, lo, hi);
288 }
289
capability_bounding_set_drop_usermode(uint64_t keep)290 int capability_bounding_set_drop_usermode(uint64_t keep) {
291 int r;
292
293 r = drop_from_file("/proc/sys/kernel/usermodehelper/inheritable", keep);
294 if (r < 0)
295 return r;
296
297 r = drop_from_file("/proc/sys/kernel/usermodehelper/bset", keep);
298 if (r < 0)
299 return r;
300
301 return r;
302 }
303
drop_privileges(uid_t uid,gid_t gid,uint64_t keep_capabilities)304 int drop_privileges(uid_t uid, gid_t gid, uint64_t keep_capabilities) {
305 int r;
306
307 /* Unfortunately we cannot leave privilege dropping to PID 1 here, since we want to run as user but
308 * want to keep some capabilities. Since file capabilities have been introduced this cannot be done
309 * across exec() anymore, unless our binary has the capability configured in the file system, which
310 * we want to avoid. */
311
312 if (setresgid(gid, gid, gid) < 0)
313 return log_error_errno(errno, "Failed to change group ID: %m");
314
315 r = maybe_setgroups(0, NULL);
316 if (r < 0)
317 return log_error_errno(r, "Failed to drop auxiliary groups list: %m");
318
319 /* Ensure we keep the permitted caps across the setresuid(). Note that we do this even if we actually
320 * don't want to keep any capabilities, since we want to be able to drop them from the bounding set
321 * too, and we can only do that if we have capabilities. */
322 if (prctl(PR_SET_KEEPCAPS, 1) < 0)
323 return log_error_errno(errno, "Failed to enable keep capabilities flag: %m");
324
325 if (setresuid(uid, uid, uid) < 0)
326 return log_error_errno(errno, "Failed to change user ID: %m");
327
328 if (prctl(PR_SET_KEEPCAPS, 0) < 0)
329 return log_error_errno(errno, "Failed to disable keep capabilities flag: %m");
330
331 /* Drop all caps from the bounding set (as well as the inheritable/permitted/effective sets), except
332 * the ones we want to keep */
333 r = capability_bounding_set_drop(keep_capabilities, true);
334 if (r < 0)
335 return log_error_errno(r, "Failed to drop capabilities: %m");
336
337 /* Now upgrade the permitted caps we still kept to effective caps */
338 if (keep_capabilities != 0) {
339 cap_value_t bits[log2u64(keep_capabilities) + 1];
340 _cleanup_cap_free_ cap_t d = NULL;
341 unsigned i, j = 0;
342
343 d = cap_init();
344 if (!d)
345 return log_oom();
346
347 for (i = 0; i < ELEMENTSOF(bits); i++)
348 if (keep_capabilities & (1ULL << i))
349 bits[j++] = i;
350
351 /* use enough bits */
352 assert(i == 64 || (keep_capabilities >> i) == 0);
353 /* don't use too many bits */
354 assert(keep_capabilities & (UINT64_C(1) << (i - 1)));
355
356 if (cap_set_flag(d, CAP_EFFECTIVE, j, bits, CAP_SET) < 0 ||
357 cap_set_flag(d, CAP_PERMITTED, j, bits, CAP_SET) < 0)
358 return log_error_errno(errno, "Failed to enable capabilities bits: %m");
359
360 if (cap_set_proc(d) < 0)
361 return log_error_errno(errno, "Failed to increase capabilities: %m");
362 }
363
364 return 0;
365 }
366
drop_capability(cap_value_t cv)367 int drop_capability(cap_value_t cv) {
368 _cleanup_cap_free_ cap_t tmp_cap = NULL;
369
370 tmp_cap = cap_get_proc();
371 if (!tmp_cap)
372 return -errno;
373
374 if ((cap_set_flag(tmp_cap, CAP_INHERITABLE, 1, &cv, CAP_CLEAR) < 0) ||
375 (cap_set_flag(tmp_cap, CAP_PERMITTED, 1, &cv, CAP_CLEAR) < 0) ||
376 (cap_set_flag(tmp_cap, CAP_EFFECTIVE, 1, &cv, CAP_CLEAR) < 0))
377 return -errno;
378
379 if (cap_set_proc(tmp_cap) < 0)
380 return -errno;
381
382 return 0;
383 }
384
ambient_capabilities_supported(void)385 bool ambient_capabilities_supported(void) {
386 static int cache = -1;
387
388 if (cache >= 0)
389 return cache;
390
391 /* If PR_CAP_AMBIENT returns something valid, or an unexpected error code we assume that ambient caps are
392 * available. */
393
394 cache = prctl(PR_CAP_AMBIENT, PR_CAP_AMBIENT_IS_SET, CAP_KILL, 0, 0) >= 0 ||
395 !IN_SET(errno, EINVAL, EOPNOTSUPP, ENOSYS);
396
397 return cache;
398 }
399
capability_quintet_mangle(CapabilityQuintet * q)400 bool capability_quintet_mangle(CapabilityQuintet *q) {
401 uint64_t combined, drop = 0;
402 bool ambient_supported;
403
404 assert(q);
405
406 combined = q->effective | q->bounding | q->inheritable | q->permitted;
407
408 ambient_supported = q->ambient != UINT64_MAX;
409 if (ambient_supported)
410 combined |= q->ambient;
411
412 for (unsigned i = 0; i <= cap_last_cap(); i++) {
413 unsigned long bit = UINT64_C(1) << i;
414 if (!FLAGS_SET(combined, bit))
415 continue;
416
417 if (prctl(PR_CAPBSET_READ, i) > 0)
418 continue;
419
420 drop |= bit;
421
422 log_debug("Not in the current bounding set: %s", capability_to_name(i));
423 }
424
425 q->effective &= ~drop;
426 q->bounding &= ~drop;
427 q->inheritable &= ~drop;
428 q->permitted &= ~drop;
429
430 if (ambient_supported)
431 q->ambient &= ~drop;
432
433 return drop != 0; /* Let the caller know we changed something */
434 }
435
capability_quintet_enforce(const CapabilityQuintet * q)436 int capability_quintet_enforce(const CapabilityQuintet *q) {
437 _cleanup_cap_free_ cap_t c = NULL, modified = NULL;
438 int r;
439
440 if (q->ambient != UINT64_MAX) {
441 bool changed = false;
442
443 c = cap_get_proc();
444 if (!c)
445 return -errno;
446
447 /* In order to raise the ambient caps set we first need to raise the matching
448 * inheritable + permitted cap */
449 for (unsigned i = 0; i <= cap_last_cap(); i++) {
450 uint64_t m = UINT64_C(1) << i;
451 cap_value_t cv = (cap_value_t) i;
452 cap_flag_value_t old_value_inheritable, old_value_permitted;
453
454 if ((q->ambient & m) == 0)
455 continue;
456
457 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value_inheritable) < 0)
458 return -errno;
459 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value_permitted) < 0)
460 return -errno;
461
462 if (old_value_inheritable == CAP_SET && old_value_permitted == CAP_SET)
463 continue;
464
465 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, CAP_SET) < 0)
466 return -errno;
467 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
468 return -errno;
469
470 changed = true;
471 }
472
473 if (changed)
474 if (cap_set_proc(c) < 0)
475 return -errno;
476
477 r = capability_ambient_set_apply(q->ambient, false);
478 if (r < 0)
479 return r;
480 }
481
482 if (q->inheritable != UINT64_MAX || q->permitted != UINT64_MAX || q->effective != UINT64_MAX) {
483 bool changed = false;
484
485 if (!c) {
486 c = cap_get_proc();
487 if (!c)
488 return -errno;
489 }
490
491 for (unsigned i = 0; i <= cap_last_cap(); i++) {
492 uint64_t m = UINT64_C(1) << i;
493 cap_value_t cv = (cap_value_t) i;
494
495 if (q->inheritable != UINT64_MAX) {
496 cap_flag_value_t old_value, new_value;
497
498 if (cap_get_flag(c, cv, CAP_INHERITABLE, &old_value) < 0) {
499 if (errno == EINVAL) /* If the kernel knows more caps than this
500 * version of libcap, then this will return
501 * EINVAL. In that case, simply ignore it,
502 * pretend it doesn't exist. */
503 continue;
504
505 return -errno;
506 }
507
508 new_value = (q->inheritable & m) ? CAP_SET : CAP_CLEAR;
509
510 if (old_value != new_value) {
511 changed = true;
512
513 if (cap_set_flag(c, CAP_INHERITABLE, 1, &cv, new_value) < 0)
514 return -errno;
515 }
516 }
517
518 if (q->permitted != UINT64_MAX) {
519 cap_flag_value_t old_value, new_value;
520
521 if (cap_get_flag(c, cv, CAP_PERMITTED, &old_value) < 0) {
522 if (errno == EINVAL)
523 continue;
524
525 return -errno;
526 }
527
528 new_value = (q->permitted & m) ? CAP_SET : CAP_CLEAR;
529
530 if (old_value != new_value) {
531 changed = true;
532
533 if (cap_set_flag(c, CAP_PERMITTED, 1, &cv, new_value) < 0)
534 return -errno;
535 }
536 }
537
538 if (q->effective != UINT64_MAX) {
539 cap_flag_value_t old_value, new_value;
540
541 if (cap_get_flag(c, cv, CAP_EFFECTIVE, &old_value) < 0) {
542 if (errno == EINVAL)
543 continue;
544
545 return -errno;
546 }
547
548 new_value = (q->effective & m) ? CAP_SET : CAP_CLEAR;
549
550 if (old_value != new_value) {
551 changed = true;
552
553 if (cap_set_flag(c, CAP_EFFECTIVE, 1, &cv, new_value) < 0)
554 return -errno;
555 }
556 }
557 }
558
559 if (changed) {
560 /* In order to change the bounding caps, we need to keep CAP_SETPCAP for a bit
561 * longer. Let's add it to our list hence for now. */
562 if (q->bounding != UINT64_MAX) {
563 cap_value_t cv = CAP_SETPCAP;
564
565 modified = cap_dup(c);
566 if (!modified)
567 return -ENOMEM;
568
569 if (cap_set_flag(modified, CAP_PERMITTED, 1, &cv, CAP_SET) < 0)
570 return -errno;
571 if (cap_set_flag(modified, CAP_EFFECTIVE, 1, &cv, CAP_SET) < 0)
572 return -errno;
573
574 if (cap_compare(modified, c) == 0) {
575 /* No change? then drop this nonsense again */
576 cap_free(modified);
577 modified = NULL;
578 }
579 }
580
581 /* Now, let's enforce the caps for the first time. Note that this is where we acquire
582 * caps in any of the sets we currently don't have. We have to do this before
583 * dropping the bounding caps below, since at that point we can never acquire new
584 * caps in inherited/permitted/effective anymore, but only lose them. */
585 if (cap_set_proc(modified ?: c) < 0)
586 return -errno;
587 }
588 }
589
590 if (q->bounding != UINT64_MAX) {
591 r = capability_bounding_set_drop(q->bounding, false);
592 if (r < 0)
593 return r;
594 }
595
596 /* If needed, let's now set the caps again, this time in the final version, which differs from what
597 * we have already set only in the CAP_SETPCAP bit, which we needed for dropping the bounding
598 * bits. This call only undoes bits and doesn't acquire any which means the bounding caps don't
599 * matter. */
600 if (modified)
601 if (cap_set_proc(c) < 0)
602 return -errno;
603
604 return 0;
605 }
606