1 // SPDX-License-Identifier: GPL-2.0
2 
3 #define _GNU_SOURCE
4 #include <errno.h>
5 #include <fcntl.h>
6 #include <linux/kernel.h>
7 #include <limits.h>
8 #include <stdbool.h>
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <syscall.h>
13 #include <unistd.h>
14 #include <sys/resource.h>
15 
16 #include "../kselftest_harness.h"
17 #include "../clone3/clone3_selftests.h"
18 
19 #ifndef __NR_close_range
20 	#if defined __alpha__
21 		#define __NR_close_range 546
22 	#elif defined _MIPS_SIM
23 		#if _MIPS_SIM == _MIPS_SIM_ABI32	/* o32 */
24 			#define __NR_close_range (436 + 4000)
25 		#endif
26 		#if _MIPS_SIM == _MIPS_SIM_NABI32	/* n32 */
27 			#define __NR_close_range (436 + 6000)
28 		#endif
29 		#if _MIPS_SIM == _MIPS_SIM_ABI64	/* n64 */
30 			#define __NR_close_range (436 + 5000)
31 		#endif
32 	#elif defined __ia64__
33 		#define __NR_close_range (436 + 1024)
34 	#else
35 		#define __NR_close_range 436
36 	#endif
37 #endif
38 
39 #ifndef CLOSE_RANGE_UNSHARE
40 #define CLOSE_RANGE_UNSHARE	(1U << 1)
41 #endif
42 
43 #ifndef CLOSE_RANGE_CLOEXEC
44 #define CLOSE_RANGE_CLOEXEC	(1U << 2)
45 #endif
46 
sys_close_range(unsigned int fd,unsigned int max_fd,unsigned int flags)47 static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
48 				  unsigned int flags)
49 {
50 	return syscall(__NR_close_range, fd, max_fd, flags);
51 }
52 
TEST(core_close_range)53 TEST(core_close_range)
54 {
55 	int i, ret;
56 	int open_fds[101];
57 
58 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
59 		int fd;
60 
61 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
62 		ASSERT_GE(fd, 0) {
63 			if (errno == ENOENT)
64 				SKIP(return, "Skipping test since /dev/null does not exist");
65 		}
66 
67 		open_fds[i] = fd;
68 	}
69 
70 	EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
71 		if (errno == ENOSYS)
72 			SKIP(return, "close_range() syscall not supported");
73 	}
74 
75 	EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
76 
77 	for (i = 0; i <= 50; i++)
78 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
79 
80 	for (i = 51; i <= 100; i++)
81 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
82 
83 	/* create a couple of gaps */
84 	close(57);
85 	close(78);
86 	close(81);
87 	close(82);
88 	close(84);
89 	close(90);
90 
91 	EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
92 
93 	for (i = 51; i <= 92; i++)
94 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
95 
96 	for (i = 93; i <= 100; i++)
97 		EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
98 
99 	/* test that the kernel caps and still closes all fds */
100 	EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
101 
102 	for (i = 93; i <= 99; i++)
103 		EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
104 
105 	EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
106 
107 	EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
108 
109 	EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
110 }
111 
TEST(close_range_unshare)112 TEST(close_range_unshare)
113 {
114 	int i, ret, status;
115 	pid_t pid;
116 	int open_fds[101];
117 	struct __clone_args args = {
118 		.flags = CLONE_FILES,
119 		.exit_signal = SIGCHLD,
120 	};
121 
122 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
123 		int fd;
124 
125 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
126 		ASSERT_GE(fd, 0) {
127 			if (errno == ENOENT)
128 				SKIP(return, "Skipping test since /dev/null does not exist");
129 		}
130 
131 		open_fds[i] = fd;
132 	}
133 
134 	pid = sys_clone3(&args, sizeof(args));
135 	ASSERT_GE(pid, 0);
136 
137 	if (pid == 0) {
138 		ret = sys_close_range(open_fds[0], open_fds[50],
139 				      CLOSE_RANGE_UNSHARE);
140 		if (ret)
141 			exit(EXIT_FAILURE);
142 
143 		for (i = 0; i <= 50; i++)
144 			if (fcntl(open_fds[i], F_GETFL) != -1)
145 				exit(EXIT_FAILURE);
146 
147 		for (i = 51; i <= 100; i++)
148 			if (fcntl(open_fds[i], F_GETFL) == -1)
149 				exit(EXIT_FAILURE);
150 
151 		/* create a couple of gaps */
152 		close(57);
153 		close(78);
154 		close(81);
155 		close(82);
156 		close(84);
157 		close(90);
158 
159 		ret = sys_close_range(open_fds[51], open_fds[92],
160 				      CLOSE_RANGE_UNSHARE);
161 		if (ret)
162 			exit(EXIT_FAILURE);
163 
164 		for (i = 51; i <= 92; i++)
165 			if (fcntl(open_fds[i], F_GETFL) != -1)
166 				exit(EXIT_FAILURE);
167 
168 		for (i = 93; i <= 100; i++)
169 			if (fcntl(open_fds[i], F_GETFL) == -1)
170 				exit(EXIT_FAILURE);
171 
172 		/* test that the kernel caps and still closes all fds */
173 		ret = sys_close_range(open_fds[93], open_fds[99],
174 				      CLOSE_RANGE_UNSHARE);
175 		if (ret)
176 			exit(EXIT_FAILURE);
177 
178 		for (i = 93; i <= 99; i++)
179 			if (fcntl(open_fds[i], F_GETFL) != -1)
180 				exit(EXIT_FAILURE);
181 
182 		if (fcntl(open_fds[100], F_GETFL) == -1)
183 			exit(EXIT_FAILURE);
184 
185 		ret = sys_close_range(open_fds[100], open_fds[100],
186 				      CLOSE_RANGE_UNSHARE);
187 		if (ret)
188 			exit(EXIT_FAILURE);
189 
190 		if (fcntl(open_fds[100], F_GETFL) != -1)
191 			exit(EXIT_FAILURE);
192 
193 		exit(EXIT_SUCCESS);
194 	}
195 
196 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
197 	EXPECT_EQ(true, WIFEXITED(status));
198 	EXPECT_EQ(0, WEXITSTATUS(status));
199 }
200 
TEST(close_range_unshare_capped)201 TEST(close_range_unshare_capped)
202 {
203 	int i, ret, status;
204 	pid_t pid;
205 	int open_fds[101];
206 	struct __clone_args args = {
207 		.flags = CLONE_FILES,
208 		.exit_signal = SIGCHLD,
209 	};
210 
211 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
212 		int fd;
213 
214 		fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
215 		ASSERT_GE(fd, 0) {
216 			if (errno == ENOENT)
217 				SKIP(return, "Skipping test since /dev/null does not exist");
218 		}
219 
220 		open_fds[i] = fd;
221 	}
222 
223 	pid = sys_clone3(&args, sizeof(args));
224 	ASSERT_GE(pid, 0);
225 
226 	if (pid == 0) {
227 		ret = sys_close_range(open_fds[0], UINT_MAX,
228 				      CLOSE_RANGE_UNSHARE);
229 		if (ret)
230 			exit(EXIT_FAILURE);
231 
232 		for (i = 0; i <= 100; i++)
233 			if (fcntl(open_fds[i], F_GETFL) != -1)
234 				exit(EXIT_FAILURE);
235 
236 		exit(EXIT_SUCCESS);
237 	}
238 
239 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
240 	EXPECT_EQ(true, WIFEXITED(status));
241 	EXPECT_EQ(0, WEXITSTATUS(status));
242 }
243 
TEST(close_range_cloexec)244 TEST(close_range_cloexec)
245 {
246 	int i, ret;
247 	int open_fds[101];
248 	struct rlimit rlimit;
249 
250 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
251 		int fd;
252 
253 		fd = open("/dev/null", O_RDONLY);
254 		ASSERT_GE(fd, 0) {
255 			if (errno == ENOENT)
256 				SKIP(return, "Skipping test since /dev/null does not exist");
257 		}
258 
259 		open_fds[i] = fd;
260 	}
261 
262 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
263 	if (ret < 0) {
264 		if (errno == ENOSYS)
265 			SKIP(return, "close_range() syscall not supported");
266 		if (errno == EINVAL)
267 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
268 	}
269 
270 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
271 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
272 	rlimit.rlim_cur = 25;
273 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
274 
275 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
276 	ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
277 	ASSERT_EQ(0, ret);
278 	ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
279 	ASSERT_EQ(0, ret);
280 
281 	for (i = 0; i <= 50; i++) {
282 		int flags = fcntl(open_fds[i], F_GETFD);
283 
284 		EXPECT_GT(flags, -1);
285 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
286 	}
287 
288 	for (i = 51; i <= 74; i++) {
289 		int flags = fcntl(open_fds[i], F_GETFD);
290 
291 		EXPECT_GT(flags, -1);
292 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
293 	}
294 
295 	for (i = 75; i <= 100; i++) {
296 		int flags = fcntl(open_fds[i], F_GETFD);
297 
298 		EXPECT_GT(flags, -1);
299 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
300 	}
301 
302 	/* Test a common pattern.  */
303 	ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
304 	for (i = 0; i <= 100; i++) {
305 		int flags = fcntl(open_fds[i], F_GETFD);
306 
307 		EXPECT_GT(flags, -1);
308 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
309 	}
310 }
311 
TEST(close_range_cloexec_unshare)312 TEST(close_range_cloexec_unshare)
313 {
314 	int i, ret;
315 	int open_fds[101];
316 	struct rlimit rlimit;
317 
318 	for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
319 		int fd;
320 
321 		fd = open("/dev/null", O_RDONLY);
322 		ASSERT_GE(fd, 0) {
323 			if (errno == ENOENT)
324 				SKIP(return, "Skipping test since /dev/null does not exist");
325 		}
326 
327 		open_fds[i] = fd;
328 	}
329 
330 	ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
331 	if (ret < 0) {
332 		if (errno == ENOSYS)
333 			SKIP(return, "close_range() syscall not supported");
334 		if (errno == EINVAL)
335 			SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
336 	}
337 
338 	/* Ensure the FD_CLOEXEC bit is set also with a resource limit in place.  */
339 	ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
340 	rlimit.rlim_cur = 25;
341 	ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
342 
343 	/* Set close-on-exec for two ranges: [0-50] and [75-100].  */
344 	ret = sys_close_range(open_fds[0], open_fds[50],
345 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
346 	ASSERT_EQ(0, ret);
347 	ret = sys_close_range(open_fds[75], open_fds[100],
348 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
349 	ASSERT_EQ(0, ret);
350 
351 	for (i = 0; i <= 50; i++) {
352 		int flags = fcntl(open_fds[i], F_GETFD);
353 
354 		EXPECT_GT(flags, -1);
355 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
356 	}
357 
358 	for (i = 51; i <= 74; i++) {
359 		int flags = fcntl(open_fds[i], F_GETFD);
360 
361 		EXPECT_GT(flags, -1);
362 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
363 	}
364 
365 	for (i = 75; i <= 100; i++) {
366 		int flags = fcntl(open_fds[i], F_GETFD);
367 
368 		EXPECT_GT(flags, -1);
369 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
370 	}
371 
372 	/* Test a common pattern.  */
373 	ret = sys_close_range(3, UINT_MAX,
374 			      CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
375 	for (i = 0; i <= 100; i++) {
376 		int flags = fcntl(open_fds[i], F_GETFD);
377 
378 		EXPECT_GT(flags, -1);
379 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
380 	}
381 }
382 
383 /*
384  * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
385  */
TEST(close_range_cloexec_syzbot)386 TEST(close_range_cloexec_syzbot)
387 {
388 	int fd1, fd2, fd3, flags, ret, status;
389 	pid_t pid;
390 	struct __clone_args args = {
391 		.flags = CLONE_FILES,
392 		.exit_signal = SIGCHLD,
393 	};
394 
395 	/* Create a huge gap in the fd table. */
396 	fd1 = open("/dev/null", O_RDWR);
397 	EXPECT_GT(fd1, 0);
398 
399 	fd2 = dup2(fd1, 1000);
400 	EXPECT_GT(fd2, 0);
401 
402 	pid = sys_clone3(&args, sizeof(args));
403 	ASSERT_GE(pid, 0);
404 
405 	if (pid == 0) {
406 		ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
407 		if (ret)
408 			exit(EXIT_FAILURE);
409 
410 		/*
411 			 * We now have a private file descriptor table and all
412 			 * our open fds should still be open but made
413 			 * close-on-exec.
414 			 */
415 		flags = fcntl(fd1, F_GETFD);
416 		EXPECT_GT(flags, -1);
417 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
418 
419 		flags = fcntl(fd2, F_GETFD);
420 		EXPECT_GT(flags, -1);
421 		EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
422 
423 		fd3 = dup2(fd1, 42);
424 		EXPECT_GT(fd3, 0);
425 
426 		/*
427 			 * Duplicating the file descriptor must remove the
428 			 * FD_CLOEXEC flag.
429 			 */
430 		flags = fcntl(fd3, F_GETFD);
431 		EXPECT_GT(flags, -1);
432 		EXPECT_EQ(flags & FD_CLOEXEC, 0);
433 
434 		exit(EXIT_SUCCESS);
435 	}
436 
437 	EXPECT_EQ(waitpid(pid, &status, 0), pid);
438 	EXPECT_EQ(true, WIFEXITED(status));
439 	EXPECT_EQ(0, WEXITSTATUS(status));
440 
441 	/*
442 	 * We had a shared file descriptor table before along with requesting
443 	 * close-on-exec so the original fds must not be close-on-exec.
444 	 */
445 	flags = fcntl(fd1, F_GETFD);
446 	EXPECT_GT(flags, -1);
447 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
448 
449 	flags = fcntl(fd2, F_GETFD);
450 	EXPECT_GT(flags, -1);
451 	EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
452 
453 	fd3 = dup2(fd1, 42);
454 	EXPECT_GT(fd3, 0);
455 
456 	flags = fcntl(fd3, F_GETFD);
457 	EXPECT_GT(flags, -1);
458 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
459 
460 	EXPECT_EQ(close(fd1), 0);
461 	EXPECT_EQ(close(fd2), 0);
462 	EXPECT_EQ(close(fd3), 0);
463 }
464 
465 /*
466  * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
467  */
TEST(close_range_cloexec_unshare_syzbot)468 TEST(close_range_cloexec_unshare_syzbot)
469 {
470 	int i, fd1, fd2, fd3, flags, ret, status;
471 	pid_t pid;
472 	struct __clone_args args = {
473 		.flags = CLONE_FILES,
474 		.exit_signal = SIGCHLD,
475 	};
476 
477 	/*
478 	 * Create a huge gap in the fd table. When we now call
479 	 * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
480 	 * bound the kernel will only copy up to fd1 file descriptors into the
481 	 * new fd table. If the kernel is buggy and doesn't handle
482 	 * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
483 	 * descriptors and we will oops!
484 	 *
485 	 * On a buggy kernel this should immediately oops. But let's loop just
486 	 * to be sure.
487 	 */
488 	fd1 = open("/dev/null", O_RDWR);
489 	EXPECT_GT(fd1, 0);
490 
491 	fd2 = dup2(fd1, 1000);
492 	EXPECT_GT(fd2, 0);
493 
494 	for (i = 0; i < 100; i++) {
495 
496 		pid = sys_clone3(&args, sizeof(args));
497 		ASSERT_GE(pid, 0);
498 
499 		if (pid == 0) {
500 			ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
501 						      CLOSE_RANGE_CLOEXEC);
502 			if (ret)
503 				exit(EXIT_FAILURE);
504 
505 			/*
506 			 * We now have a private file descriptor table and all
507 			 * our open fds should still be open but made
508 			 * close-on-exec.
509 			 */
510 			flags = fcntl(fd1, F_GETFD);
511 			EXPECT_GT(flags, -1);
512 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
513 
514 			flags = fcntl(fd2, F_GETFD);
515 			EXPECT_GT(flags, -1);
516 			EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
517 
518 			fd3 = dup2(fd1, 42);
519 			EXPECT_GT(fd3, 0);
520 
521 			/*
522 			 * Duplicating the file descriptor must remove the
523 			 * FD_CLOEXEC flag.
524 			 */
525 			flags = fcntl(fd3, F_GETFD);
526 			EXPECT_GT(flags, -1);
527 			EXPECT_EQ(flags & FD_CLOEXEC, 0);
528 
529 			EXPECT_EQ(close(fd1), 0);
530 			EXPECT_EQ(close(fd2), 0);
531 			EXPECT_EQ(close(fd3), 0);
532 
533 			exit(EXIT_SUCCESS);
534 		}
535 
536 		EXPECT_EQ(waitpid(pid, &status, 0), pid);
537 		EXPECT_EQ(true, WIFEXITED(status));
538 		EXPECT_EQ(0, WEXITSTATUS(status));
539 	}
540 
541 	/*
542 	 * We created a private file descriptor table before along with
543 	 * requesting close-on-exec so the original fds must not be
544 	 * close-on-exec.
545 	 */
546 	flags = fcntl(fd1, F_GETFD);
547 	EXPECT_GT(flags, -1);
548 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
549 
550 	flags = fcntl(fd2, F_GETFD);
551 	EXPECT_GT(flags, -1);
552 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
553 
554 	fd3 = dup2(fd1, 42);
555 	EXPECT_GT(fd3, 0);
556 
557 	flags = fcntl(fd3, F_GETFD);
558 	EXPECT_GT(flags, -1);
559 	EXPECT_EQ(flags & FD_CLOEXEC, 0);
560 
561 	EXPECT_EQ(close(fd1), 0);
562 	EXPECT_EQ(close(fd2), 0);
563 	EXPECT_EQ(close(fd3), 0);
564 }
565 
566 TEST_HARNESS_MAIN
567