1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #include <fcntl.h>
4 #include <sys/ioctl.h>
5 #include <sys/mount.h>
6 
7 #include "errno-util.h"
8 #include "fd-util.h"
9 #include "fileio.h"
10 #include "missing_fs.h"
11 #include "missing_magic.h"
12 #include "namespace-util.h"
13 #include "process-util.h"
14 #include "stat-util.h"
15 #include "stdio-util.h"
16 #include "user-util.h"
17 
namespace_open(pid_t pid,int * pidns_fd,int * mntns_fd,int * netns_fd,int * userns_fd,int * root_fd)18 int namespace_open(pid_t pid, int *pidns_fd, int *mntns_fd, int *netns_fd, int *userns_fd, int *root_fd) {
19         _cleanup_close_ int pidnsfd = -1, mntnsfd = -1, netnsfd = -1, usernsfd = -1;
20         int rfd = -1;
21 
22         assert(pid >= 0);
23 
24         if (mntns_fd) {
25                 const char *mntns;
26 
27                 mntns = procfs_file_alloca(pid, "ns/mnt");
28                 mntnsfd = open(mntns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
29                 if (mntnsfd < 0)
30                         return -errno;
31         }
32 
33         if (pidns_fd) {
34                 const char *pidns;
35 
36                 pidns = procfs_file_alloca(pid, "ns/pid");
37                 pidnsfd = open(pidns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
38                 if (pidnsfd < 0)
39                         return -errno;
40         }
41 
42         if (netns_fd) {
43                 const char *netns;
44 
45                 netns = procfs_file_alloca(pid, "ns/net");
46                 netnsfd = open(netns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
47                 if (netnsfd < 0)
48                         return -errno;
49         }
50 
51         if (userns_fd) {
52                 const char *userns;
53 
54                 userns = procfs_file_alloca(pid, "ns/user");
55                 usernsfd = open(userns, O_RDONLY|O_NOCTTY|O_CLOEXEC);
56                 if (usernsfd < 0 && errno != ENOENT)
57                         return -errno;
58         }
59 
60         if (root_fd) {
61                 const char *root;
62 
63                 root = procfs_file_alloca(pid, "root");
64                 rfd = open(root, O_RDONLY|O_NOCTTY|O_CLOEXEC|O_DIRECTORY);
65                 if (rfd < 0)
66                         return -errno;
67         }
68 
69         if (pidns_fd)
70                 *pidns_fd = TAKE_FD(pidnsfd);
71 
72         if (mntns_fd)
73                 *mntns_fd = TAKE_FD(mntnsfd);
74 
75         if (netns_fd)
76                 *netns_fd = TAKE_FD(netnsfd);
77 
78         if (userns_fd)
79                 *userns_fd = TAKE_FD(usernsfd);
80 
81         if (root_fd)
82                 *root_fd = TAKE_FD(rfd);
83 
84         return 0;
85 }
86 
namespace_enter(int pidns_fd,int mntns_fd,int netns_fd,int userns_fd,int root_fd)87 int namespace_enter(int pidns_fd, int mntns_fd, int netns_fd, int userns_fd, int root_fd) {
88         int r;
89 
90         if (userns_fd >= 0) {
91                 /* Can't setns to your own userns, since then you could escalate from non-root to root in
92                  * your own namespace, so check if namespaces are equal before attempting to enter. */
93 
94                 r = files_same(FORMAT_PROC_FD_PATH(userns_fd), "/proc/self/ns/user", 0);
95                 if (r < 0)
96                         return r;
97                 if (r)
98                         userns_fd = -1;
99         }
100 
101         if (pidns_fd >= 0)
102                 if (setns(pidns_fd, CLONE_NEWPID) < 0)
103                         return -errno;
104 
105         if (mntns_fd >= 0)
106                 if (setns(mntns_fd, CLONE_NEWNS) < 0)
107                         return -errno;
108 
109         if (netns_fd >= 0)
110                 if (setns(netns_fd, CLONE_NEWNET) < 0)
111                         return -errno;
112 
113         if (userns_fd >= 0)
114                 if (setns(userns_fd, CLONE_NEWUSER) < 0)
115                         return -errno;
116 
117         if (root_fd >= 0) {
118                 if (fchdir(root_fd) < 0)
119                         return -errno;
120 
121                 if (chroot(".") < 0)
122                         return -errno;
123         }
124 
125         return reset_uid_gid();
126 }
127 
fd_is_ns(int fd,unsigned long nsflag)128 int fd_is_ns(int fd, unsigned long nsflag) {
129         struct statfs s;
130         int r;
131 
132         /* Checks whether the specified file descriptor refers to a namespace created by specifying nsflag in clone().
133          * On old kernels there's no nice way to detect that, hence on those we'll return a recognizable error (EUCLEAN),
134          * so that callers can handle this somewhat nicely.
135          *
136          * This function returns > 0 if the fd definitely refers to a network namespace, 0 if it definitely does not
137          * refer to a network namespace, -EUCLEAN if we can't determine, and other negative error codes on error. */
138 
139         if (fstatfs(fd, &s) < 0)
140                 return -errno;
141 
142         if (!is_fs_type(&s, NSFS_MAGIC)) {
143                 /* On really old kernels, there was no "nsfs", and network namespace sockets belonged to procfs
144                  * instead. Handle that in a somewhat smart way. */
145 
146                 if (is_fs_type(&s, PROC_SUPER_MAGIC)) {
147                         struct statfs t;
148 
149                         /* OK, so it is procfs. Let's see if our own network namespace is procfs, too. If so, then the
150                          * passed fd might refer to a network namespace, but we can't know for sure. In that case,
151                          * return a recognizable error. */
152 
153                         if (statfs("/proc/self/ns/net", &t) < 0)
154                                 return -errno;
155 
156                         if (s.f_type == t.f_type)
157                                 return -EUCLEAN; /* It's possible, we simply don't know */
158                 }
159 
160                 return 0; /* No! */
161         }
162 
163         r = ioctl(fd, NS_GET_NSTYPE);
164         if (r < 0) {
165                 if (errno == ENOTTY) /* Old kernels didn't know this ioctl, let's also return a recognizable error in that case */
166                         return -EUCLEAN;
167 
168                 return -errno;
169         }
170 
171         return (unsigned long) r == nsflag;
172 }
173 
detach_mount_namespace(void)174 int detach_mount_namespace(void) {
175 
176         /* Detaches the mount namespace, disabling propagation from our namespace to the host */
177 
178         if (unshare(CLONE_NEWNS) < 0)
179                 return -errno;
180 
181         return RET_NERRNO(mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL));
182 }
183 
userns_acquire(const char * uid_map,const char * gid_map)184 int userns_acquire(const char *uid_map, const char *gid_map) {
185         char path[STRLEN("/proc//uid_map") + DECIMAL_STR_MAX(pid_t) + 1];
186         _cleanup_(sigkill_waitp) pid_t pid = 0;
187         _cleanup_close_ int userns_fd = -1;
188         int r;
189 
190         assert(uid_map);
191         assert(gid_map);
192 
193         /* Forks off a process in a new userns, configures the specified uidmap/gidmap, acquires an fd to it,
194          * and then kills the process again. This way we have a userns fd that is not bound to any
195          * process. We can use that for file system mounts and similar. */
196 
197         r = safe_fork("(sd-mkuserns)", FORK_CLOSE_ALL_FDS|FORK_DEATHSIG|FORK_NEW_USERNS, &pid);
198         if (r < 0)
199                 return r;
200         if (r == 0)
201                 /* Child. We do nothing here, just freeze until somebody kills us. */
202                 freeze();
203 
204         xsprintf(path, "/proc/" PID_FMT "/uid_map", pid);
205         r = write_string_file(path, uid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
206         if (r < 0)
207                 return log_error_errno(r, "Failed to write UID map: %m");
208 
209         xsprintf(path, "/proc/" PID_FMT "/gid_map", pid);
210         r = write_string_file(path, gid_map, WRITE_STRING_FILE_DISABLE_BUFFER);
211         if (r < 0)
212                 return log_error_errno(r, "Failed to write GID map: %m");
213 
214         r = namespace_open(pid, NULL, NULL, NULL, &userns_fd, NULL);
215         if (r < 0)
216                 return log_error_errno(r, "Failed to open userns fd: %m");
217 
218         return TAKE_FD(userns_fd);
219 
220 }
221