1 // SPDX-License-Identifier: GPL-2.0
2
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <limits.h>
7 #include <linux/types.h>
8 #include <sched.h>
9 #include <signal.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <syscall.h>
14 #include <sys/prctl.h>
15 #include <sys/wait.h>
16 #include <unistd.h>
17 #include <sys/socket.h>
18 #include <sys/stat.h>
19
20 #include "pidfd.h"
21 #include "../clone3/clone3_selftests.h"
22 #include "../kselftest_harness.h"
23
24 enum {
25 PIDFD_NS_USER,
26 PIDFD_NS_MNT,
27 PIDFD_NS_PID,
28 PIDFD_NS_UTS,
29 PIDFD_NS_IPC,
30 PIDFD_NS_NET,
31 PIDFD_NS_CGROUP,
32 PIDFD_NS_PIDCLD,
33 PIDFD_NS_TIME,
34 PIDFD_NS_MAX
35 };
36
37 const struct ns_info {
38 const char *name;
39 int flag;
40 } ns_info[] = {
41 [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
42 [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
43 [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
44 [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
45 [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
46 [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
47 [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
48 [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
49 [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
50 };
51
FIXTURE(current_nsset)52 FIXTURE(current_nsset)
53 {
54 pid_t pid;
55 int pidfd;
56 int nsfds[PIDFD_NS_MAX];
57
58 pid_t child_pid_exited;
59 int child_pidfd_exited;
60
61 pid_t child_pid1;
62 int child_pidfd1;
63 int child_nsfds1[PIDFD_NS_MAX];
64
65 pid_t child_pid2;
66 int child_pidfd2;
67 int child_nsfds2[PIDFD_NS_MAX];
68 };
69
sys_waitid(int which,pid_t pid,int options)70 static int sys_waitid(int which, pid_t pid, int options)
71 {
72 return syscall(__NR_waitid, which, pid, NULL, options, NULL);
73 }
74
create_child(int * pidfd,unsigned flags)75 pid_t create_child(int *pidfd, unsigned flags)
76 {
77 struct __clone_args args = {
78 .flags = CLONE_PIDFD | flags,
79 .exit_signal = SIGCHLD,
80 .pidfd = ptr_to_u64(pidfd),
81 };
82
83 return sys_clone3(&args, sizeof(struct clone_args));
84 }
85
switch_timens(void)86 static bool switch_timens(void)
87 {
88 int fd, ret;
89
90 if (unshare(CLONE_NEWTIME))
91 return false;
92
93 fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
94 if (fd < 0)
95 return false;
96
97 ret = setns(fd, CLONE_NEWTIME);
98 close(fd);
99 return ret == 0;
100 }
101
read_nointr(int fd,void * buf,size_t count)102 static ssize_t read_nointr(int fd, void *buf, size_t count)
103 {
104 ssize_t ret;
105
106 do {
107 ret = read(fd, buf, count);
108 } while (ret < 0 && errno == EINTR);
109
110 return ret;
111 }
112
write_nointr(int fd,const void * buf,size_t count)113 static ssize_t write_nointr(int fd, const void *buf, size_t count)
114 {
115 ssize_t ret;
116
117 do {
118 ret = write(fd, buf, count);
119 } while (ret < 0 && errno == EINTR);
120
121 return ret;
122 }
123
FIXTURE_SETUP(current_nsset)124 FIXTURE_SETUP(current_nsset)
125 {
126 int i, proc_fd, ret;
127 int ipc_sockets[2];
128 char c;
129
130 for (i = 0; i < PIDFD_NS_MAX; i++) {
131 self->nsfds[i] = -EBADF;
132 self->child_nsfds1[i] = -EBADF;
133 self->child_nsfds2[i] = -EBADF;
134 }
135
136 proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
137 ASSERT_GE(proc_fd, 0) {
138 TH_LOG("%m - Failed to open /proc/self/ns");
139 }
140
141 self->pid = getpid();
142 for (i = 0; i < PIDFD_NS_MAX; i++) {
143 const struct ns_info *info = &ns_info[i];
144 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
145 if (self->nsfds[i] < 0) {
146 EXPECT_EQ(errno, ENOENT) {
147 TH_LOG("%m - Failed to open %s namespace for process %d",
148 info->name, self->pid);
149 }
150 }
151 }
152
153 self->pidfd = sys_pidfd_open(self->pid, 0);
154 EXPECT_GT(self->pidfd, 0) {
155 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
156 }
157
158 /* Create task that exits right away. */
159 self->child_pid_exited = create_child(&self->child_pidfd_exited,
160 CLONE_NEWUSER | CLONE_NEWNET);
161 EXPECT_GT(self->child_pid_exited, 0);
162
163 if (self->child_pid_exited == 0)
164 _exit(EXIT_SUCCESS);
165
166 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
167
168 self->pidfd = sys_pidfd_open(self->pid, 0);
169 EXPECT_GE(self->pidfd, 0) {
170 TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
171 }
172
173 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
174 EXPECT_EQ(ret, 0);
175
176 /* Create tasks that will be stopped. */
177 self->child_pid1 = create_child(&self->child_pidfd1,
178 CLONE_NEWUSER | CLONE_NEWNS |
179 CLONE_NEWCGROUP | CLONE_NEWIPC |
180 CLONE_NEWUTS | CLONE_NEWPID |
181 CLONE_NEWNET);
182 EXPECT_GE(self->child_pid1, 0);
183
184 if (self->child_pid1 == 0) {
185 close(ipc_sockets[0]);
186
187 if (!switch_timens())
188 _exit(EXIT_FAILURE);
189
190 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
191 _exit(EXIT_FAILURE);
192
193 close(ipc_sockets[1]);
194
195 pause();
196 _exit(EXIT_SUCCESS);
197 }
198
199 close(ipc_sockets[1]);
200 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
201 close(ipc_sockets[0]);
202
203 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
204 EXPECT_EQ(ret, 0);
205
206 self->child_pid2 = create_child(&self->child_pidfd2,
207 CLONE_NEWUSER | CLONE_NEWNS |
208 CLONE_NEWCGROUP | CLONE_NEWIPC |
209 CLONE_NEWUTS | CLONE_NEWPID |
210 CLONE_NEWNET);
211 EXPECT_GE(self->child_pid2, 0);
212
213 if (self->child_pid2 == 0) {
214 close(ipc_sockets[0]);
215
216 if (!switch_timens())
217 _exit(EXIT_FAILURE);
218
219 if (write_nointr(ipc_sockets[1], "1", 1) < 0)
220 _exit(EXIT_FAILURE);
221
222 close(ipc_sockets[1]);
223
224 pause();
225 _exit(EXIT_SUCCESS);
226 }
227
228 close(ipc_sockets[1]);
229 ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
230 close(ipc_sockets[0]);
231
232 for (i = 0; i < PIDFD_NS_MAX; i++) {
233 char p[100];
234
235 const struct ns_info *info = &ns_info[i];
236
237 self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
238 if (self->nsfds[i] < 0) {
239 EXPECT_EQ(errno, ENOENT) {
240 TH_LOG("%m - Failed to open %s namespace for process %d",
241 info->name, self->pid);
242 }
243 }
244
245 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
246 self->child_pid1, info->name);
247 EXPECT_GT(ret, 0);
248 EXPECT_LT(ret, sizeof(p));
249
250 self->child_nsfds1[i] = open(p, O_RDONLY | O_CLOEXEC);
251 if (self->child_nsfds1[i] < 0) {
252 EXPECT_EQ(errno, ENOENT) {
253 TH_LOG("%m - Failed to open %s namespace for process %d",
254 info->name, self->child_pid1);
255 }
256 }
257
258 ret = snprintf(p, sizeof(p), "/proc/%d/ns/%s",
259 self->child_pid2, info->name);
260 EXPECT_GT(ret, 0);
261 EXPECT_LT(ret, sizeof(p));
262
263 self->child_nsfds2[i] = open(p, O_RDONLY | O_CLOEXEC);
264 if (self->child_nsfds2[i] < 0) {
265 EXPECT_EQ(errno, ENOENT) {
266 TH_LOG("%m - Failed to open %s namespace for process %d",
267 info->name, self->child_pid1);
268 }
269 }
270 }
271
272 close(proc_fd);
273 }
274
FIXTURE_TEARDOWN(current_nsset)275 FIXTURE_TEARDOWN(current_nsset)
276 {
277 int i;
278
279 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd1,
280 SIGKILL, NULL, 0), 0);
281 ASSERT_EQ(sys_pidfd_send_signal(self->child_pidfd2,
282 SIGKILL, NULL, 0), 0);
283
284 for (i = 0; i < PIDFD_NS_MAX; i++) {
285 if (self->nsfds[i] >= 0)
286 close(self->nsfds[i]);
287 if (self->child_nsfds1[i] >= 0)
288 close(self->child_nsfds1[i]);
289 if (self->child_nsfds2[i] >= 0)
290 close(self->child_nsfds2[i]);
291 }
292
293 if (self->child_pidfd1 >= 0)
294 EXPECT_EQ(0, close(self->child_pidfd1));
295 if (self->child_pidfd2 >= 0)
296 EXPECT_EQ(0, close(self->child_pidfd2));
297 ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
298 ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
299 ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
300 }
301
preserve_ns(const int pid,const char * ns)302 static int preserve_ns(const int pid, const char *ns)
303 {
304 int ret;
305 char path[50];
306
307 ret = snprintf(path, sizeof(path), "/proc/%d/ns/%s", pid, ns);
308 if (ret < 0 || (size_t)ret >= sizeof(path))
309 return -EIO;
310
311 return open(path, O_RDONLY | O_CLOEXEC);
312 }
313
in_same_namespace(int ns_fd1,pid_t pid2,const char * ns)314 static int in_same_namespace(int ns_fd1, pid_t pid2, const char *ns)
315 {
316 int ns_fd2 = -EBADF;
317 int ret = -1;
318 struct stat ns_st1, ns_st2;
319
320 ret = fstat(ns_fd1, &ns_st1);
321 if (ret < 0)
322 return -1;
323
324 ns_fd2 = preserve_ns(pid2, ns);
325 if (ns_fd2 < 0)
326 return -1;
327
328 ret = fstat(ns_fd2, &ns_st2);
329 close(ns_fd2);
330 if (ret < 0)
331 return -1;
332
333 /* processes are in the same namespace */
334 if ((ns_st1.st_dev == ns_st2.st_dev) &&
335 (ns_st1.st_ino == ns_st2.st_ino))
336 return 1;
337
338 /* processes are in different namespaces */
339 return 0;
340 }
341
342 /* Test that we can't pass garbage to the kernel. */
TEST_F(current_nsset,invalid_flags)343 TEST_F(current_nsset, invalid_flags)
344 {
345 ASSERT_NE(setns(self->pidfd, 0), 0);
346 EXPECT_EQ(errno, EINVAL);
347
348 ASSERT_NE(setns(self->pidfd, -1), 0);
349 EXPECT_EQ(errno, EINVAL);
350
351 ASSERT_NE(setns(self->pidfd, CLONE_VM), 0);
352 EXPECT_EQ(errno, EINVAL);
353
354 ASSERT_NE(setns(self->pidfd, CLONE_NEWUSER | CLONE_VM), 0);
355 EXPECT_EQ(errno, EINVAL);
356 }
357
358 /* Test that we can't attach to a task that has already exited. */
TEST_F(current_nsset,pidfd_exited_child)359 TEST_F(current_nsset, pidfd_exited_child)
360 {
361 int i;
362 pid_t pid;
363
364 ASSERT_NE(setns(self->child_pidfd_exited, CLONE_NEWUSER | CLONE_NEWNET),
365 0);
366 EXPECT_EQ(errno, ESRCH);
367
368 pid = getpid();
369 for (i = 0; i < PIDFD_NS_MAX; i++) {
370 const struct ns_info *info = &ns_info[i];
371 /* Verify that we haven't changed any namespaces. */
372 if (self->nsfds[i] >= 0)
373 ASSERT_EQ(in_same_namespace(self->nsfds[i], pid, info->name), 1);
374 }
375 }
376
TEST_F(current_nsset,pidfd_incremental_setns)377 TEST_F(current_nsset, pidfd_incremental_setns)
378 {
379 int i;
380 pid_t pid;
381
382 pid = getpid();
383 for (i = 0; i < PIDFD_NS_MAX; i++) {
384 const struct ns_info *info = &ns_info[i];
385 int nsfd;
386
387 if (self->child_nsfds1[i] < 0)
388 continue;
389
390 if (info->flag) {
391 ASSERT_EQ(setns(self->child_pidfd1, info->flag), 0) {
392 TH_LOG("%m - Failed to setns to %s namespace of %d via pidfd %d",
393 info->name, self->child_pid1,
394 self->child_pidfd1);
395 }
396 }
397
398 /* Verify that we have changed to the correct namespaces. */
399 if (info->flag == CLONE_NEWPID)
400 nsfd = self->nsfds[i];
401 else
402 nsfd = self->child_nsfds1[i];
403 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
404 TH_LOG("setns failed to place us correctly into %s namespace of %d via pidfd %d",
405 info->name, self->child_pid1,
406 self->child_pidfd1);
407 }
408 TH_LOG("Managed to correctly setns to %s namespace of %d via pidfd %d",
409 info->name, self->child_pid1, self->child_pidfd1);
410 }
411 }
412
TEST_F(current_nsset,nsfd_incremental_setns)413 TEST_F(current_nsset, nsfd_incremental_setns)
414 {
415 int i;
416 pid_t pid;
417
418 pid = getpid();
419 for (i = 0; i < PIDFD_NS_MAX; i++) {
420 const struct ns_info *info = &ns_info[i];
421 int nsfd;
422
423 if (self->child_nsfds1[i] < 0)
424 continue;
425
426 if (info->flag) {
427 ASSERT_EQ(setns(self->child_nsfds1[i], info->flag), 0) {
428 TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
429 info->name, self->child_pid1,
430 self->child_nsfds1[i]);
431 }
432 }
433
434 /* Verify that we have changed to the correct namespaces. */
435 if (info->flag == CLONE_NEWPID)
436 nsfd = self->nsfds[i];
437 else
438 nsfd = self->child_nsfds1[i];
439 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
440 TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
441 info->name, self->child_pid1,
442 self->child_nsfds1[i]);
443 }
444 TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
445 info->name, self->child_pid1, self->child_nsfds1[i]);
446 }
447 }
448
TEST_F(current_nsset,pidfd_one_shot_setns)449 TEST_F(current_nsset, pidfd_one_shot_setns)
450 {
451 unsigned flags = 0;
452 int i;
453 pid_t pid;
454
455 for (i = 0; i < PIDFD_NS_MAX; i++) {
456 const struct ns_info *info = &ns_info[i];
457
458 if (self->child_nsfds1[i] < 0)
459 continue;
460
461 flags |= info->flag;
462 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
463 info->name, self->child_pid1);
464 }
465
466 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
467 TH_LOG("%m - Failed to setns to namespaces of %d",
468 self->child_pid1);
469 }
470
471 pid = getpid();
472 for (i = 0; i < PIDFD_NS_MAX; i++) {
473 const struct ns_info *info = &ns_info[i];
474 int nsfd;
475
476 if (self->child_nsfds1[i] < 0)
477 continue;
478
479 /* Verify that we have changed to the correct namespaces. */
480 if (info->flag == CLONE_NEWPID)
481 nsfd = self->nsfds[i];
482 else
483 nsfd = self->child_nsfds1[i];
484 ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
485 TH_LOG("setns failed to place us correctly into %s namespace of %d",
486 info->name, self->child_pid1);
487 }
488 TH_LOG("Managed to correctly setns to %s namespace of %d",
489 info->name, self->child_pid1);
490 }
491 }
492
TEST_F(current_nsset,no_foul_play)493 TEST_F(current_nsset, no_foul_play)
494 {
495 unsigned flags = 0;
496 int i;
497
498 for (i = 0; i < PIDFD_NS_MAX; i++) {
499 const struct ns_info *info = &ns_info[i];
500
501 if (self->child_nsfds1[i] < 0)
502 continue;
503
504 flags |= info->flag;
505 if (info->flag) /* No use logging pid_for_children. */
506 TH_LOG("Adding %s namespace of %d to list of namespaces to attach to",
507 info->name, self->child_pid1);
508 }
509
510 ASSERT_EQ(setns(self->child_pidfd1, flags), 0) {
511 TH_LOG("%m - Failed to setns to namespaces of %d vid pidfd %d",
512 self->child_pid1, self->child_pidfd1);
513 }
514
515 /*
516 * Can't setns to a user namespace outside of our hierarchy since we
517 * don't have caps in there and didn't create it. That means that under
518 * no circumstances should we be able to setns to any of the other
519 * ones since they aren't owned by our user namespace.
520 */
521 for (i = 0; i < PIDFD_NS_MAX; i++) {
522 const struct ns_info *info = &ns_info[i];
523
524 if (self->child_nsfds2[i] < 0 || !info->flag)
525 continue;
526
527 ASSERT_NE(setns(self->child_pidfd2, info->flag), 0) {
528 TH_LOG("Managed to setns to %s namespace of %d via pidfd %d",
529 info->name, self->child_pid2,
530 self->child_pidfd2);
531 }
532 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via pidfd %d",
533 info->name, self->child_pid2,
534 self->child_pidfd2);
535
536 ASSERT_NE(setns(self->child_nsfds2[i], info->flag), 0) {
537 TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
538 info->name, self->child_pid2,
539 self->child_nsfds2[i]);
540 }
541 TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
542 info->name, self->child_pid2,
543 self->child_nsfds2[i]);
544 }
545 }
546
TEST(setns_einval)547 TEST(setns_einval)
548 {
549 int fd;
550
551 fd = sys_memfd_create("rostock", 0);
552 EXPECT_GT(fd, 0);
553
554 ASSERT_NE(setns(fd, 0), 0);
555 EXPECT_EQ(errno, EINVAL);
556 close(fd);
557 }
558
559 TEST_HARNESS_MAIN
560