1 // SPDX-License-Identifier: GPL-2.0
2 #define _GNU_SOURCE
3 #include <sched.h>
4 #include <sys/mount.h>
5 #include <sys/stat.h>
6 #include <sys/types.h>
7 #include <linux/limits.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <linux/sched.h>
11 #include <fcntl.h>
12 #include <unistd.h>
13 #include <ftw.h>
14
15 #include "cgroup_helpers.h"
16
17 /*
18 * To avoid relying on the system setup, when setup_cgroup_env is called
19 * we create a new mount namespace, and cgroup namespace. The cgroupv2
20 * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
21 * have cgroupv2 enabled at this point in time. It's easier to create our
22 * own mount namespace and manage it ourselves. We assume /mnt exists.
23 *
24 * Related cgroupv1 helpers are named *classid*(), since we only use the
25 * net_cls controller for tagging net_cls.classid. We assume the default
26 * mount under /sys/fs/cgroup/net_cls, which should be the case for the
27 * vast majority of users.
28 */
29
30 #define WALK_FD_LIMIT 16
31
32 #define CGROUP_MOUNT_PATH "/mnt"
33 #define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
34 #define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
35 #define CGROUP_WORK_DIR "/cgroup-test-work-dir"
36
37 #define format_cgroup_path_pid(buf, path, pid) \
38 snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
39 CGROUP_WORK_DIR, pid, path)
40
41 #define format_cgroup_path(buf, path) \
42 format_cgroup_path_pid(buf, path, getpid())
43
44 #define format_parent_cgroup_path(buf, path) \
45 format_cgroup_path_pid(buf, path, getppid())
46
47 #define format_classid_path(buf) \
48 snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
49 CGROUP_WORK_DIR)
50
__enable_controllers(const char * cgroup_path,const char * controllers)51 static int __enable_controllers(const char *cgroup_path, const char *controllers)
52 {
53 char path[PATH_MAX + 1];
54 char enable[PATH_MAX + 1];
55 char *c, *c2;
56 int fd, cfd;
57 ssize_t len;
58
59 /* If not controllers are passed, enable all available controllers */
60 if (!controllers) {
61 snprintf(path, sizeof(path), "%s/cgroup.controllers",
62 cgroup_path);
63 fd = open(path, O_RDONLY);
64 if (fd < 0) {
65 log_err("Opening cgroup.controllers: %s", path);
66 return 1;
67 }
68 len = read(fd, enable, sizeof(enable) - 1);
69 if (len < 0) {
70 close(fd);
71 log_err("Reading cgroup.controllers: %s", path);
72 return 1;
73 } else if (len == 0) { /* No controllers to enable */
74 close(fd);
75 return 0;
76 }
77 enable[len] = 0;
78 close(fd);
79 } else {
80 strncpy(enable, controllers, sizeof(enable));
81 }
82
83 snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
84 cfd = open(path, O_RDWR);
85 if (cfd < 0) {
86 log_err("Opening cgroup.subtree_control: %s", path);
87 return 1;
88 }
89
90 for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
91 if (dprintf(cfd, "+%s\n", c) <= 0) {
92 log_err("Enabling controller %s: %s", c, path);
93 close(cfd);
94 return 1;
95 }
96 }
97 close(cfd);
98 return 0;
99 }
100
101 /**
102 * enable_controllers() - Enable cgroup v2 controllers
103 * @relative_path: The cgroup path, relative to the workdir
104 * @controllers: List of controllers to enable in cgroup.controllers format
105 *
106 *
107 * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
108 * available controllers.
109 *
110 * If successful, 0 is returned.
111 */
enable_controllers(const char * relative_path,const char * controllers)112 int enable_controllers(const char *relative_path, const char *controllers)
113 {
114 char cgroup_path[PATH_MAX + 1];
115
116 format_cgroup_path(cgroup_path, relative_path);
117 return __enable_controllers(cgroup_path, controllers);
118 }
119
__write_cgroup_file(const char * cgroup_path,const char * file,const char * buf)120 static int __write_cgroup_file(const char *cgroup_path, const char *file,
121 const char *buf)
122 {
123 char file_path[PATH_MAX + 1];
124 int fd;
125
126 snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
127 fd = open(file_path, O_RDWR);
128 if (fd < 0) {
129 log_err("Opening %s", file_path);
130 return 1;
131 }
132
133 if (dprintf(fd, "%s", buf) <= 0) {
134 log_err("Writing to %s", file_path);
135 close(fd);
136 return 1;
137 }
138 close(fd);
139 return 0;
140 }
141
142 /**
143 * write_cgroup_file() - Write to a cgroup file
144 * @relative_path: The cgroup path, relative to the workdir
145 * @file: The name of the file in cgroupfs to write to
146 * @buf: Buffer to write to the file
147 *
148 * Write to a file in the given cgroup's directory.
149 *
150 * If successful, 0 is returned.
151 */
write_cgroup_file(const char * relative_path,const char * file,const char * buf)152 int write_cgroup_file(const char *relative_path, const char *file,
153 const char *buf)
154 {
155 char cgroup_path[PATH_MAX - 24];
156
157 format_cgroup_path(cgroup_path, relative_path);
158 return __write_cgroup_file(cgroup_path, file, buf);
159 }
160
161 /**
162 * write_cgroup_file_parent() - Write to a cgroup file in the parent process
163 * workdir
164 * @relative_path: The cgroup path, relative to the parent process workdir
165 * @file: The name of the file in cgroupfs to write to
166 * @buf: Buffer to write to the file
167 *
168 * Write to a file in the given cgroup's directory under the parent process
169 * workdir.
170 *
171 * If successful, 0 is returned.
172 */
write_cgroup_file_parent(const char * relative_path,const char * file,const char * buf)173 int write_cgroup_file_parent(const char *relative_path, const char *file,
174 const char *buf)
175 {
176 char cgroup_path[PATH_MAX - 24];
177
178 format_parent_cgroup_path(cgroup_path, relative_path);
179 return __write_cgroup_file(cgroup_path, file, buf);
180 }
181
182 /**
183 * setup_cgroup_environment() - Setup the cgroup environment
184 *
185 * After calling this function, cleanup_cgroup_environment should be called
186 * once testing is complete.
187 *
188 * This function will print an error to stderr and return 1 if it is unable
189 * to setup the cgroup environment. If setup is successful, 0 is returned.
190 */
setup_cgroup_environment(void)191 int setup_cgroup_environment(void)
192 {
193 char cgroup_workdir[PATH_MAX - 24];
194
195 format_cgroup_path(cgroup_workdir, "");
196
197 if (unshare(CLONE_NEWNS)) {
198 log_err("unshare");
199 return 1;
200 }
201
202 if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) {
203 log_err("mount fakeroot");
204 return 1;
205 }
206
207 if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL) && errno != EBUSY) {
208 log_err("mount cgroup2");
209 return 1;
210 }
211
212 /* Cleanup existing failed runs, now that the environment is setup */
213 cleanup_cgroup_environment();
214
215 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
216 log_err("mkdir cgroup work dir");
217 return 1;
218 }
219
220 /* Enable all available controllers to increase test coverage */
221 if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
222 __enable_controllers(cgroup_workdir, NULL))
223 return 1;
224
225 return 0;
226 }
227
nftwfunc(const char * filename,const struct stat * statptr,int fileflags,struct FTW * pfwt)228 static int nftwfunc(const char *filename, const struct stat *statptr,
229 int fileflags, struct FTW *pfwt)
230 {
231 if ((fileflags & FTW_D) && rmdir(filename))
232 log_err("Removing cgroup: %s", filename);
233 return 0;
234 }
235
join_cgroup_from_top(const char * cgroup_path)236 static int join_cgroup_from_top(const char *cgroup_path)
237 {
238 char cgroup_procs_path[PATH_MAX + 1];
239 pid_t pid = getpid();
240 int fd, rc = 0;
241
242 snprintf(cgroup_procs_path, sizeof(cgroup_procs_path),
243 "%s/cgroup.procs", cgroup_path);
244
245 fd = open(cgroup_procs_path, O_WRONLY);
246 if (fd < 0) {
247 log_err("Opening Cgroup Procs: %s", cgroup_procs_path);
248 return 1;
249 }
250
251 if (dprintf(fd, "%d\n", pid) < 0) {
252 log_err("Joining Cgroup");
253 rc = 1;
254 }
255
256 close(fd);
257 return rc;
258 }
259
260 /**
261 * join_cgroup() - Join a cgroup
262 * @relative_path: The cgroup path, relative to the workdir, to join
263 *
264 * This function expects a cgroup to already be created, relative to the cgroup
265 * work dir, and it joins it. For example, passing "/my-cgroup" as the path
266 * would actually put the calling process into the cgroup
267 * "/cgroup-test-work-dir/my-cgroup"
268 *
269 * On success, it returns 0, otherwise on failure it returns 1.
270 */
join_cgroup(const char * relative_path)271 int join_cgroup(const char *relative_path)
272 {
273 char cgroup_path[PATH_MAX + 1];
274
275 format_cgroup_path(cgroup_path, relative_path);
276 return join_cgroup_from_top(cgroup_path);
277 }
278
279 /**
280 * join_parent_cgroup() - Join a cgroup in the parent process workdir
281 * @relative_path: The cgroup path, relative to parent process workdir, to join
282 *
283 * See join_cgroup().
284 *
285 * On success, it returns 0, otherwise on failure it returns 1.
286 */
join_parent_cgroup(const char * relative_path)287 int join_parent_cgroup(const char *relative_path)
288 {
289 char cgroup_path[PATH_MAX + 1];
290
291 format_parent_cgroup_path(cgroup_path, relative_path);
292 return join_cgroup_from_top(cgroup_path);
293 }
294
295 /**
296 * cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
297 *
298 * This is an idempotent function to delete all temporary cgroups that
299 * have been created during the test, including the cgroup testing work
300 * directory.
301 *
302 * At call time, it moves the calling process to the root cgroup, and then
303 * runs the deletion process. It is idempotent, and should not fail, unless
304 * a process is lingering.
305 *
306 * On failure, it will print an error to stderr, and try to continue.
307 */
cleanup_cgroup_environment(void)308 void cleanup_cgroup_environment(void)
309 {
310 char cgroup_workdir[PATH_MAX + 1];
311
312 format_cgroup_path(cgroup_workdir, "");
313 join_cgroup_from_top(CGROUP_MOUNT_PATH);
314 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
315 }
316
317 /**
318 * get_root_cgroup() - Get the FD of the root cgroup
319 *
320 * On success, it returns the file descriptor. On failure, it returns -1.
321 * If there is a failure, it prints the error to stderr.
322 */
get_root_cgroup(void)323 int get_root_cgroup(void)
324 {
325 int fd;
326
327 fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
328 if (fd < 0) {
329 log_err("Opening root cgroup");
330 return -1;
331 }
332 return fd;
333 }
334
335 /**
336 * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
337 * @relative_path: The cgroup path, relative to the workdir, to join
338 *
339 * This function creates a cgroup under the top level workdir and returns the
340 * file descriptor. It is idempotent.
341 *
342 * On success, it returns the file descriptor. On failure it returns -1.
343 * If there is a failure, it prints the error to stderr.
344 */
create_and_get_cgroup(const char * relative_path)345 int create_and_get_cgroup(const char *relative_path)
346 {
347 char cgroup_path[PATH_MAX + 1];
348 int fd;
349
350 format_cgroup_path(cgroup_path, relative_path);
351 if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
352 log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
353 return -1;
354 }
355
356 fd = open(cgroup_path, O_RDONLY);
357 if (fd < 0) {
358 log_err("Opening Cgroup");
359 return -1;
360 }
361
362 return fd;
363 }
364
365 /**
366 * get_cgroup_id() - Get cgroup id for a particular cgroup path
367 * @relative_path: The cgroup path, relative to the workdir, to join
368 *
369 * On success, it returns the cgroup id. On failure it returns 0,
370 * which is an invalid cgroup id.
371 * If there is a failure, it prints the error to stderr.
372 */
get_cgroup_id(const char * relative_path)373 unsigned long long get_cgroup_id(const char *relative_path)
374 {
375 int dirfd, err, flags, mount_id, fhsize;
376 union {
377 unsigned long long cgid;
378 unsigned char raw_bytes[8];
379 } id;
380 char cgroup_workdir[PATH_MAX + 1];
381 struct file_handle *fhp, *fhp2;
382 unsigned long long ret = 0;
383
384 format_cgroup_path(cgroup_workdir, relative_path);
385
386 dirfd = AT_FDCWD;
387 flags = 0;
388 fhsize = sizeof(*fhp);
389 fhp = calloc(1, fhsize);
390 if (!fhp) {
391 log_err("calloc");
392 return 0;
393 }
394 err = name_to_handle_at(dirfd, cgroup_workdir, fhp, &mount_id, flags);
395 if (err >= 0 || fhp->handle_bytes != 8) {
396 log_err("name_to_handle_at");
397 goto free_mem;
398 }
399
400 fhsize = sizeof(struct file_handle) + fhp->handle_bytes;
401 fhp2 = realloc(fhp, fhsize);
402 if (!fhp2) {
403 log_err("realloc");
404 goto free_mem;
405 }
406 err = name_to_handle_at(dirfd, cgroup_workdir, fhp2, &mount_id, flags);
407 fhp = fhp2;
408 if (err < 0) {
409 log_err("name_to_handle_at");
410 goto free_mem;
411 }
412
413 memcpy(id.raw_bytes, fhp->f_handle, 8);
414 ret = id.cgid;
415
416 free_mem:
417 free(fhp);
418 return ret;
419 }
420
cgroup_setup_and_join(const char * path)421 int cgroup_setup_and_join(const char *path) {
422 int cg_fd;
423
424 if (setup_cgroup_environment()) {
425 fprintf(stderr, "Failed to setup cgroup environment\n");
426 return -EINVAL;
427 }
428
429 cg_fd = create_and_get_cgroup(path);
430 if (cg_fd < 0) {
431 fprintf(stderr, "Failed to create test cgroup\n");
432 cleanup_cgroup_environment();
433 return cg_fd;
434 }
435
436 if (join_cgroup(path)) {
437 fprintf(stderr, "Failed to join cgroup\n");
438 cleanup_cgroup_environment();
439 return -EINVAL;
440 }
441 return cg_fd;
442 }
443
444 /**
445 * setup_classid_environment() - Setup the cgroupv1 net_cls environment
446 *
447 * After calling this function, cleanup_classid_environment should be called
448 * once testing is complete.
449 *
450 * This function will print an error to stderr and return 1 if it is unable
451 * to setup the cgroup environment. If setup is successful, 0 is returned.
452 */
setup_classid_environment(void)453 int setup_classid_environment(void)
454 {
455 char cgroup_workdir[PATH_MAX + 1];
456
457 format_classid_path(cgroup_workdir);
458
459 if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
460 errno != EBUSY) {
461 log_err("mount cgroup base");
462 return 1;
463 }
464
465 if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
466 log_err("mkdir cgroup net_cls");
467 return 1;
468 }
469
470 if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
471 errno != EBUSY) {
472 log_err("mount cgroup net_cls");
473 return 1;
474 }
475
476 cleanup_classid_environment();
477
478 if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
479 log_err("mkdir cgroup work dir");
480 return 1;
481 }
482
483 return 0;
484 }
485
486 /**
487 * set_classid() - Set a cgroupv1 net_cls classid
488 * @id: the numeric classid
489 *
490 * Writes the passed classid into the cgroup work dir's net_cls.classid
491 * file in order to later on trigger socket tagging.
492 *
493 * On success, it returns 0, otherwise on failure it returns 1. If there
494 * is a failure, it prints the error to stderr.
495 */
set_classid(unsigned int id)496 int set_classid(unsigned int id)
497 {
498 char cgroup_workdir[PATH_MAX - 42];
499 char cgroup_classid_path[PATH_MAX + 1];
500 int fd, rc = 0;
501
502 format_classid_path(cgroup_workdir);
503 snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
504 "%s/net_cls.classid", cgroup_workdir);
505
506 fd = open(cgroup_classid_path, O_WRONLY);
507 if (fd < 0) {
508 log_err("Opening cgroup classid: %s", cgroup_classid_path);
509 return 1;
510 }
511
512 if (dprintf(fd, "%u\n", id) < 0) {
513 log_err("Setting cgroup classid");
514 rc = 1;
515 }
516
517 close(fd);
518 return rc;
519 }
520
521 /**
522 * join_classid() - Join a cgroupv1 net_cls classid
523 *
524 * This function expects the cgroup work dir to be already created, as we
525 * join it here. This causes the process sockets to be tagged with the given
526 * net_cls classid.
527 *
528 * On success, it returns 0, otherwise on failure it returns 1.
529 */
join_classid(void)530 int join_classid(void)
531 {
532 char cgroup_workdir[PATH_MAX + 1];
533
534 format_classid_path(cgroup_workdir);
535 return join_cgroup_from_top(cgroup_workdir);
536 }
537
538 /**
539 * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
540 *
541 * At call time, it moves the calling process to the root cgroup, and then
542 * runs the deletion process.
543 *
544 * On failure, it will print an error to stderr, and try to continue.
545 */
cleanup_classid_environment(void)546 void cleanup_classid_environment(void)
547 {
548 char cgroup_workdir[PATH_MAX + 1];
549
550 format_classid_path(cgroup_workdir);
551 join_cgroup_from_top(NETCLS_MOUNT_PATH);
552 nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
553 }
554