1 /*
2  * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
3  *
4  * Permission to use, copy, modify, and distribute this software for any
5  * purpose with or without fee is hereby granted, provided that the above
6  * copyright notice and this permission notice appear in all copies.
7  *
8  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15  */
16 /*
17  * Create a process without mappings by unmapping everything at once and
18  * holding it with ptrace(2). See what happens to
19  *
20  *	/proc/${pid}/maps
21  *	/proc/${pid}/numa_maps
22  *	/proc/${pid}/smaps
23  *	/proc/${pid}/smaps_rollup
24  */
25 #undef NDEBUG
26 #include <assert.h>
27 #include <errno.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <fcntl.h>
33 #include <sys/mman.h>
34 #include <sys/ptrace.h>
35 #include <sys/resource.h>
36 #include <sys/types.h>
37 #include <sys/wait.h>
38 #include <unistd.h>
39 
40 /*
41  * 0: vsyscall VMA doesn't exist	vsyscall=none
42  * 1: vsyscall VMA is --xp		vsyscall=xonly
43  * 2: vsyscall VMA is r-xp		vsyscall=emulate
44  */
45 static volatile int g_vsyscall;
46 static const char *g_proc_pid_maps_vsyscall;
47 static const char *g_proc_pid_smaps_vsyscall;
48 
49 static const char proc_pid_maps_vsyscall_0[] = "";
50 static const char proc_pid_maps_vsyscall_1[] =
51 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n";
52 static const char proc_pid_maps_vsyscall_2[] =
53 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n";
54 
55 static const char proc_pid_smaps_vsyscall_0[] = "";
56 
57 static const char proc_pid_smaps_vsyscall_1[] =
58 "ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0                  [vsyscall]\n"
59 "Size:                  4 kB\n"
60 "KernelPageSize:        4 kB\n"
61 "MMUPageSize:           4 kB\n"
62 "Rss:                   0 kB\n"
63 "Pss:                   0 kB\n"
64 "Pss_Dirty:             0 kB\n"
65 "Shared_Clean:          0 kB\n"
66 "Shared_Dirty:          0 kB\n"
67 "Private_Clean:         0 kB\n"
68 "Private_Dirty:         0 kB\n"
69 "Referenced:            0 kB\n"
70 "Anonymous:             0 kB\n"
71 "LazyFree:              0 kB\n"
72 "AnonHugePages:         0 kB\n"
73 "ShmemPmdMapped:        0 kB\n"
74 "FilePmdMapped:         0 kB\n"
75 "Shared_Hugetlb:        0 kB\n"
76 "Private_Hugetlb:       0 kB\n"
77 "Swap:                  0 kB\n"
78 "SwapPss:               0 kB\n"
79 "Locked:                0 kB\n"
80 "THPeligible:    0\n"
81 /*
82  * "ProtectionKey:" field is conditional. It is possible to check it as well,
83  * but I don't have such machine.
84  */
85 ;
86 
87 static const char proc_pid_smaps_vsyscall_2[] =
88 "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0                  [vsyscall]\n"
89 "Size:                  4 kB\n"
90 "KernelPageSize:        4 kB\n"
91 "MMUPageSize:           4 kB\n"
92 "Rss:                   0 kB\n"
93 "Pss:                   0 kB\n"
94 "Pss_Dirty:             0 kB\n"
95 "Shared_Clean:          0 kB\n"
96 "Shared_Dirty:          0 kB\n"
97 "Private_Clean:         0 kB\n"
98 "Private_Dirty:         0 kB\n"
99 "Referenced:            0 kB\n"
100 "Anonymous:             0 kB\n"
101 "LazyFree:              0 kB\n"
102 "AnonHugePages:         0 kB\n"
103 "ShmemPmdMapped:        0 kB\n"
104 "FilePmdMapped:         0 kB\n"
105 "Shared_Hugetlb:        0 kB\n"
106 "Private_Hugetlb:       0 kB\n"
107 "Swap:                  0 kB\n"
108 "SwapPss:               0 kB\n"
109 "Locked:                0 kB\n"
110 "THPeligible:    0\n"
111 /*
112  * "ProtectionKey:" field is conditional. It is possible to check it as well,
113  * but I'm too tired.
114  */
115 ;
116 
sigaction_SIGSEGV(int _,siginfo_t * __,void * ___)117 static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
118 {
119 	_exit(EXIT_FAILURE);
120 }
121 
sigaction_SIGSEGV_vsyscall(int _,siginfo_t * __,void * ___)122 static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
123 {
124 	_exit(g_vsyscall);
125 }
126 
127 /*
128  * vsyscall page can't be unmapped, probe it directly.
129  */
vsyscall(void)130 static void vsyscall(void)
131 {
132 	pid_t pid;
133 	int wstatus;
134 
135 	pid = fork();
136 	if (pid < 0) {
137 		fprintf(stderr, "fork, errno %d\n", errno);
138 		exit(1);
139 	}
140 	if (pid == 0) {
141 		setrlimit(RLIMIT_CORE, &(struct rlimit){});
142 
143 		/* Hide "segfault at ffffffffff600000" messages. */
144 		struct sigaction act = {};
145 		act.sa_flags = SA_SIGINFO;
146 		act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
147 		sigaction(SIGSEGV, &act, NULL);
148 
149 		g_vsyscall = 0;
150 		/* gettimeofday(NULL, NULL); */
151 		uint64_t rax = 0xffffffffff600000;
152 		asm volatile (
153 			"call *%[rax]"
154 			: [rax] "+a" (rax)
155 			: "D" (NULL), "S" (NULL)
156 			: "rcx", "r11"
157 		);
158 
159 		g_vsyscall = 1;
160 		*(volatile int *)0xffffffffff600000UL;
161 
162 		g_vsyscall = 2;
163 		exit(g_vsyscall);
164 	}
165 	waitpid(pid, &wstatus, 0);
166 	if (WIFEXITED(wstatus)) {
167 		g_vsyscall = WEXITSTATUS(wstatus);
168 	} else {
169 		fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
170 		exit(1);
171 	}
172 }
173 
test_proc_pid_maps(pid_t pid)174 static int test_proc_pid_maps(pid_t pid)
175 {
176 	char buf[4096];
177 	snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
178 	int fd = open(buf, O_RDONLY);
179 	if (fd == -1) {
180 		perror("open /proc/${pid}/maps");
181 		return EXIT_FAILURE;
182 	} else {
183 		ssize_t rv = read(fd, buf, sizeof(buf));
184 		close(fd);
185 		if (g_vsyscall == 0) {
186 			assert(rv == 0);
187 		} else {
188 			size_t len = strlen(g_proc_pid_maps_vsyscall);
189 			assert(rv == len);
190 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
191 		}
192 		return EXIT_SUCCESS;
193 	}
194 }
195 
test_proc_pid_numa_maps(pid_t pid)196 static int test_proc_pid_numa_maps(pid_t pid)
197 {
198 	char buf[4096];
199 	snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
200 	int fd = open(buf, O_RDONLY);
201 	if (fd == -1) {
202 		if (errno == ENOENT) {
203 			/*
204 			 * /proc/${pid}/numa_maps is under CONFIG_NUMA,
205 			 * it doesn't necessarily exist.
206 			 */
207 			return EXIT_SUCCESS;
208 		}
209 		perror("open /proc/${pid}/numa_maps");
210 		return EXIT_FAILURE;
211 	} else {
212 		ssize_t rv = read(fd, buf, sizeof(buf));
213 		close(fd);
214 		assert(rv == 0);
215 		return EXIT_SUCCESS;
216 	}
217 }
218 
test_proc_pid_smaps(pid_t pid)219 static int test_proc_pid_smaps(pid_t pid)
220 {
221 	char buf[4096];
222 	snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
223 	int fd = open(buf, O_RDONLY);
224 	if (fd == -1) {
225 		if (errno == ENOENT) {
226 			/*
227 			 * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
228 			 * it doesn't necessarily exist.
229 			 */
230 			return EXIT_SUCCESS;
231 		}
232 		perror("open /proc/${pid}/smaps");
233 		return EXIT_FAILURE;
234 	} else {
235 		ssize_t rv = read(fd, buf, sizeof(buf));
236 		close(fd);
237 		if (g_vsyscall == 0) {
238 			assert(rv == 0);
239 		} else {
240 			size_t len = strlen(g_proc_pid_maps_vsyscall);
241 			/* TODO "ProtectionKey:" */
242 			assert(rv > len);
243 			assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
244 		}
245 		return EXIT_SUCCESS;
246 	}
247 }
248 
249 static const char g_smaps_rollup[] =
250 "00000000-00000000 ---p 00000000 00:00 0                                  [rollup]\n"
251 "Rss:                   0 kB\n"
252 "Pss:                   0 kB\n"
253 "Pss_Dirty:             0 kB\n"
254 "Pss_Anon:              0 kB\n"
255 "Pss_File:              0 kB\n"
256 "Pss_Shmem:             0 kB\n"
257 "Shared_Clean:          0 kB\n"
258 "Shared_Dirty:          0 kB\n"
259 "Private_Clean:         0 kB\n"
260 "Private_Dirty:         0 kB\n"
261 "Referenced:            0 kB\n"
262 "Anonymous:             0 kB\n"
263 "LazyFree:              0 kB\n"
264 "AnonHugePages:         0 kB\n"
265 "ShmemPmdMapped:        0 kB\n"
266 "FilePmdMapped:         0 kB\n"
267 "Shared_Hugetlb:        0 kB\n"
268 "Private_Hugetlb:       0 kB\n"
269 "Swap:                  0 kB\n"
270 "SwapPss:               0 kB\n"
271 "Locked:                0 kB\n"
272 ;
273 
test_proc_pid_smaps_rollup(pid_t pid)274 static int test_proc_pid_smaps_rollup(pid_t pid)
275 {
276 	char buf[4096];
277 	snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
278 	int fd = open(buf, O_RDONLY);
279 	if (fd == -1) {
280 		if (errno == ENOENT) {
281 			/*
282 			 * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
283 			 * it doesn't necessarily exist.
284 			 */
285 			return EXIT_SUCCESS;
286 		}
287 		perror("open /proc/${pid}/smaps_rollup");
288 		return EXIT_FAILURE;
289 	} else {
290 		ssize_t rv = read(fd, buf, sizeof(buf));
291 		close(fd);
292 		assert(rv == sizeof(g_smaps_rollup) - 1);
293 		assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
294 		return EXIT_SUCCESS;
295 	}
296 }
297 
main(void)298 int main(void)
299 {
300 	int rv = EXIT_SUCCESS;
301 
302 	vsyscall();
303 
304 	switch (g_vsyscall) {
305 	case 0:
306 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_0;
307 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
308 		break;
309 	case 1:
310 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_1;
311 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
312 		break;
313 	case 2:
314 		g_proc_pid_maps_vsyscall  = proc_pid_maps_vsyscall_2;
315 		g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
316 		break;
317 	default:
318 		abort();
319 	}
320 
321 	pid_t pid = fork();
322 	if (pid == -1) {
323 		perror("fork");
324 		return EXIT_FAILURE;
325 	} else if (pid == 0) {
326 		rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
327 		if (rv != 0) {
328 			if (errno == EPERM) {
329 				fprintf(stderr,
330 "Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
331 				);
332 				kill(getppid(), SIGTERM);
333 				return EXIT_FAILURE;
334 			}
335 			perror("ptrace PTRACE_TRACEME");
336 			return EXIT_FAILURE;
337 		}
338 
339 		/*
340 		 * Hide "segfault at ..." messages. Signal handler won't run.
341 		 */
342 		struct sigaction act = {};
343 		act.sa_flags = SA_SIGINFO;
344 		act.sa_sigaction = sigaction_SIGSEGV;
345 		sigaction(SIGSEGV, &act, NULL);
346 
347 #ifdef __amd64__
348 		munmap(NULL, ((size_t)1 << 47) - 4096);
349 #else
350 #error "implement 'unmap everything'"
351 #endif
352 		return EXIT_FAILURE;
353 	} else {
354 		/*
355 		 * TODO find reliable way to signal parent that munmap(2) completed.
356 		 * Child can't do it directly because it effectively doesn't exist
357 		 * anymore. Looking at child's VM files isn't 100% reliable either:
358 		 * due to a bug they may not become empty or empty-like.
359 		 */
360 		sleep(1);
361 
362 		if (rv == EXIT_SUCCESS) {
363 			rv = test_proc_pid_maps(pid);
364 		}
365 		if (rv == EXIT_SUCCESS) {
366 			rv = test_proc_pid_numa_maps(pid);
367 		}
368 		if (rv == EXIT_SUCCESS) {
369 			rv = test_proc_pid_smaps(pid);
370 		}
371 		if (rv == EXIT_SUCCESS) {
372 			rv = test_proc_pid_smaps_rollup(pid);
373 		}
374 		/*
375 		 * TODO test /proc/${pid}/statm, task_statm()
376 		 * ->start_code, ->end_code aren't updated by munmap().
377 		 * Output can be "0 0 0 2 0 0 0\n" where "2" can be anything.
378 		 */
379 
380 		/* Cut the rope. */
381 		int wstatus;
382 		waitpid(pid, &wstatus, 0);
383 		assert(WIFSTOPPED(wstatus));
384 		assert(WSTOPSIG(wstatus) == SIGSEGV);
385 	}
386 
387 	return rv;
388 }
389