1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2 
3 #if defined(__i386__) || defined(__x86_64__)
4 #include <cpuid.h>
5 #endif
6 #include <errno.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <unistd.h>
10 
11 #include "alloc-util.h"
12 #include "cgroup-util.h"
13 #include "dirent-util.h"
14 #include "env-util.h"
15 #include "errno-util.h"
16 #include "fd-util.h"
17 #include "fileio.h"
18 #include "macro.h"
19 #include "process-util.h"
20 #include "stat-util.h"
21 #include "string-table.h"
22 #include "string-util.h"
23 #include "virt.h"
24 
25 enum {
26       SMBIOS_VM_BIT_SET,
27       SMBIOS_VM_BIT_UNSET,
28       SMBIOS_VM_BIT_UNKNOWN,
29 };
30 
detect_vm_cpuid(void)31 static Virtualization detect_vm_cpuid(void) {
32 
33         /* CPUID is an x86 specific interface. */
34 #if defined(__i386__) || defined(__x86_64__)
35 
36         static const struct {
37                 const char sig[13];
38                 Virtualization id;
39         } vm_table[] = {
40                 { "XenVMMXenVMM", VIRTUALIZATION_XEN       },
41                 { "KVMKVMKVM",    VIRTUALIZATION_KVM       }, /* qemu with KVM */
42                 { "Linux KVM Hv", VIRTUALIZATION_KVM       }, /* qemu with KVM + HyperV Enlightenments */
43                 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU      }, /* qemu without KVM */
44                 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
45                 { "VMwareVMware", VIRTUALIZATION_VMWARE    },
46                 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
47                 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
48                 /* https://wiki.freebsd.org/bhyve */
49                 { "bhyve bhyve ", VIRTUALIZATION_BHYVE     },
50                 { "QNXQVMBSQG",   VIRTUALIZATION_QNX       },
51                 /* https://projectacrn.org */
52                 { "ACRNACRNACRN", VIRTUALIZATION_ACRN      },
53         };
54 
55         uint32_t eax, ebx, ecx, edx;
56         bool hypervisor;
57 
58         /* http://lwn.net/Articles/301888/ */
59 
60         /* First detect whether there is a hypervisor */
61         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
62                 return VIRTUALIZATION_NONE;
63 
64         hypervisor = ecx & 0x80000000U;
65 
66         if (hypervisor) {
67                 union {
68                         uint32_t sig32[3];
69                         char text[13];
70                 } sig = {};
71 
72                 /* There is a hypervisor, see what it is */
73                 __cpuid(0x40000000U, eax, ebx, ecx, edx);
74 
75                 sig.sig32[0] = ebx;
76                 sig.sig32[1] = ecx;
77                 sig.sig32[2] = edx;
78 
79                 log_debug("Virtualization found, CPUID=%s", sig.text);
80 
81                 for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
82                         if (memcmp_nn(sig.text, sizeof(sig.text),
83                                       vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
84                                 return vm_table[i].id;
85 
86                 log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
87                 return VIRTUALIZATION_VM_OTHER;
88         }
89 #endif
90         log_debug("No virtualization found in CPUID");
91 
92         return VIRTUALIZATION_NONE;
93 }
94 
detect_vm_device_tree(void)95 static Virtualization detect_vm_device_tree(void) {
96 #if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
97         _cleanup_free_ char *hvtype = NULL;
98         int r;
99 
100         r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
101         if (r == -ENOENT) {
102                 _cleanup_closedir_ DIR *dir = NULL;
103 
104                 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
105                     access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
106                     access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
107                         return VIRTUALIZATION_POWERVM;
108 
109                 dir = opendir("/proc/device-tree");
110                 if (!dir) {
111                         if (errno == ENOENT) {
112                                 log_debug_errno(errno, "/proc/device-tree: %m");
113                                 return VIRTUALIZATION_NONE;
114                         }
115                         return -errno;
116                 }
117 
118                 FOREACH_DIRENT(de, dir, return -errno)
119                         if (strstr(de->d_name, "fw-cfg")) {
120                                 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
121                                 return VIRTUALIZATION_QEMU;
122                         }
123 
124                 log_debug("No virtualization found in /proc/device-tree/*");
125                 return VIRTUALIZATION_NONE;
126         } else if (r < 0)
127                 return r;
128 
129         log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
130         if (streq(hvtype, "linux,kvm"))
131                 return VIRTUALIZATION_KVM;
132         else if (strstr(hvtype, "xen"))
133                 return VIRTUALIZATION_XEN;
134         else if (strstr(hvtype, "vmware"))
135                 return VIRTUALIZATION_VMWARE;
136         else
137                 return VIRTUALIZATION_VM_OTHER;
138 #else
139         log_debug("This platform does not support /proc/device-tree");
140         return VIRTUALIZATION_NONE;
141 #endif
142 }
143 
144 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
detect_vm_dmi_vendor(void)145 static Virtualization detect_vm_dmi_vendor(void) {
146         static const char* const dmi_vendors[] = {
147                 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
148                 "/sys/class/dmi/id/sys_vendor",
149                 "/sys/class/dmi/id/board_vendor",
150                 "/sys/class/dmi/id/bios_vendor",
151                 "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
152                 NULL
153         };
154 
155         static const struct {
156                 const char *vendor;
157                 Virtualization id;
158         } dmi_vendor_table[] = {
159                 { "KVM",                 VIRTUALIZATION_KVM       },
160                 { "OpenStack",           VIRTUALIZATION_KVM       }, /* Detect OpenStack instance as KVM in non x86 architecture */
161                 { "Amazon EC2",          VIRTUALIZATION_AMAZON    },
162                 { "QEMU",                VIRTUALIZATION_QEMU      },
163                 { "VMware",              VIRTUALIZATION_VMWARE    }, /* https://kb.vmware.com/s/article/1009458 */
164                 { "VMW",                 VIRTUALIZATION_VMWARE    },
165                 { "innotek GmbH",        VIRTUALIZATION_ORACLE    },
166                 { "VirtualBox",          VIRTUALIZATION_ORACLE    },
167                 { "Xen",                 VIRTUALIZATION_XEN       },
168                 { "Bochs",               VIRTUALIZATION_BOCHS     },
169                 { "Parallels",           VIRTUALIZATION_PARALLELS },
170                 /* https://wiki.freebsd.org/bhyve */
171                 { "BHYVE",               VIRTUALIZATION_BHYVE     },
172                 { "Hyper-V",             VIRTUALIZATION_MICROSOFT },
173         };
174         int r;
175 
176         STRV_FOREACH(vendor, dmi_vendors) {
177                 _cleanup_free_ char *s = NULL;
178 
179                 r = read_one_line_file(*vendor, &s);
180                 if (r < 0) {
181                         if (r == -ENOENT)
182                                 continue;
183 
184                         return r;
185                 }
186 
187                 for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
188                         if (startswith(s, dmi_vendor_table[i].vendor)) {
189                                 log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
190                                 return dmi_vendor_table[i].id;
191                         }
192         }
193         log_debug("No virtualization found in DMI vendor table.");
194         return VIRTUALIZATION_NONE;
195 }
196 
detect_vm_smbios(void)197 static int detect_vm_smbios(void) {
198         /* The SMBIOS BIOS Charateristics Extension Byte 2 (Section 2.1.2.2 of
199          * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
200          * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
201          * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
202          * imply that the system is running on bare-metal.  For example, QEMU 3.1.0 (with or without KVM)
203          * with SeaBIOS does not set this bit. */
204         _cleanup_free_ char *s = NULL;
205         size_t readsize;
206         int r;
207 
208         r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
209         if (r < 0) {
210                 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
211                                 "using the virtualization information found in DMI vendor table, ignoring: %m");
212                 return SMBIOS_VM_BIT_UNKNOWN;
213         }
214         if (readsize < 20 || s[1] < 20) {
215                 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
216                  * extension bytes. The data we're interested in is in extension byte 2, which would be at
217                  * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
218                  * much data, we don't infer anything from the SMBIOS. */
219                 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
220                           "Using the virtualization information found in DMI vendor table.", readsize);
221                 return SMBIOS_VM_BIT_UNKNOWN;
222         }
223 
224         uint8_t byte = (uint8_t) s[19];
225         if (byte & (1U<<4)) {
226                 log_debug("DMI BIOS Extension table indicates virtualization.");
227                 return SMBIOS_VM_BIT_SET;
228         }
229         log_debug("DMI BIOS Extension table does not indicate virtualization.");
230         return SMBIOS_VM_BIT_UNSET;
231 }
232 #endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) */
233 
detect_vm_dmi(void)234 static Virtualization detect_vm_dmi(void) {
235 #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
236 
237         int r;
238         r = detect_vm_dmi_vendor();
239 
240         /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
241          * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
242         if (r == VIRTUALIZATION_AMAZON) {
243                 switch (detect_vm_smbios()) {
244                 case SMBIOS_VM_BIT_SET:
245                         return VIRTUALIZATION_AMAZON;
246                 case SMBIOS_VM_BIT_UNSET:
247                         return VIRTUALIZATION_NONE;
248                 case SMBIOS_VM_BIT_UNKNOWN: {
249                         /* The DMI information we are after is only accessible to the root user,
250                          * so we fallback to using the product name which is less restricted
251                          * to distinguish metal systems from virtualized instances */
252                         _cleanup_free_ char *s = NULL;
253 
254                         r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
255                         /* In EC2, virtualized is much more common than metal, so if for some reason
256                          * we fail to read the DMI data, assume we are virtualized. */
257                         if (r < 0) {
258                                 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
259                                                 " assuming virtualized: %m");
260                                 return VIRTUALIZATION_AMAZON;
261                         }
262                         if (endswith(truncate_nl(s), ".metal")) {
263                                 log_debug("DMI product name ends with '.metal', assuming no virtualization");
264                                 return VIRTUALIZATION_NONE;
265                         } else
266                                 return VIRTUALIZATION_AMAZON;
267                 }
268                 default:
269                         assert_not_reached();
270               }
271         }
272 
273         /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
274          * have no further information about what it is. */
275         if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
276                 return VIRTUALIZATION_VM_OTHER;
277         return r;
278 #else
279         return VIRTUALIZATION_NONE;
280 #endif
281 }
282 
283 #define XENFEAT_dom0 11 /* xen/include/public/features.h */
284 #define PATH_FEATURES "/sys/hypervisor/properties/features"
285 /* Returns -errno, or 0 for domU, or 1 for dom0 */
detect_vm_xen_dom0(void)286 static int detect_vm_xen_dom0(void) {
287         _cleanup_free_ char *domcap = NULL;
288         int r;
289 
290         r = read_one_line_file(PATH_FEATURES, &domcap);
291         if (r < 0 && r != -ENOENT)
292                 return r;
293         if (r >= 0) {
294                 unsigned long features;
295 
296                 /* Here, we need to use sscanf() instead of safe_atoul()
297                  * as the string lacks the leading "0x". */
298                 r = sscanf(domcap, "%lx", &features);
299                 if (r == 1) {
300                         r = !!(features & (1U << XENFEAT_dom0));
301                         log_debug("Virtualization XEN, found %s with value %08lx, "
302                                   "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
303                                   PATH_FEATURES, features, r ? "" : " not");
304                         return r;
305                 }
306                 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
307                           PATH_FEATURES, domcap);
308         }
309 
310         r = read_one_line_file("/proc/xen/capabilities", &domcap);
311         if (r == -ENOENT) {
312                 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
313                 return 0;
314         }
315         if (r < 0)
316                 return r;
317 
318         for (const char *i = domcap;;) {
319                 _cleanup_free_ char *cap = NULL;
320 
321                 r = extract_first_word(&i, &cap, ",", 0);
322                 if (r < 0)
323                         return r;
324                 if (r == 0) {
325                         log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
326                         return 0;
327                 }
328 
329                 if (streq(cap, "control_d")) {
330                         log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
331                         return 1;
332                 }
333         }
334 }
335 
detect_vm_xen(void)336 static Virtualization detect_vm_xen(void) {
337         /* The presence of /proc/xen indicates some form of a Xen domain
338            The check for Dom0 is handled outside this function */
339         if (access("/proc/xen", F_OK) < 0) {
340                 log_debug("Virtualization XEN not found, /proc/xen does not exist");
341                 return VIRTUALIZATION_NONE;
342         }
343         log_debug("Virtualization XEN found (/proc/xen exists)");
344         return VIRTUALIZATION_XEN;
345 }
346 
detect_vm_hypervisor(void)347 static Virtualization detect_vm_hypervisor(void) {
348         _cleanup_free_ char *hvtype = NULL;
349         int r;
350 
351         r = read_one_line_file("/sys/hypervisor/type", &hvtype);
352         if (r == -ENOENT)
353                 return VIRTUALIZATION_NONE;
354         if (r < 0)
355                 return r;
356 
357         log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
358 
359         if (streq(hvtype, "xen"))
360                 return VIRTUALIZATION_XEN;
361         else
362                 return VIRTUALIZATION_VM_OTHER;
363 }
364 
detect_vm_uml(void)365 static Virtualization detect_vm_uml(void) {
366         _cleanup_fclose_ FILE *f = NULL;
367         int r;
368 
369         /* Detect User-Mode Linux by reading /proc/cpuinfo */
370         f = fopen("/proc/cpuinfo", "re");
371         if (!f) {
372                 if (errno == ENOENT) {
373                         log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
374                         return VIRTUALIZATION_NONE;
375                 }
376                 return -errno;
377         }
378 
379         for (;;) {
380                 _cleanup_free_ char *line = NULL;
381                 const char *t;
382 
383                 r = read_line(f, LONG_LINE_MAX, &line);
384                 if (r < 0)
385                         return r;
386                 if (r == 0)
387                         break;
388 
389                 t = startswith(line, "vendor_id\t: ");
390                 if (t) {
391                         if (startswith(t, "User Mode Linux")) {
392                                 log_debug("UML virtualization found in /proc/cpuinfo");
393                                 return VIRTUALIZATION_UML;
394                         }
395 
396                         break;
397                 }
398         }
399 
400         log_debug("UML virtualization not found in /proc/cpuinfo.");
401         return VIRTUALIZATION_NONE;
402 }
403 
detect_vm_zvm(void)404 static Virtualization detect_vm_zvm(void) {
405 
406 #if defined(__s390__)
407         _cleanup_free_ char *t = NULL;
408         int r;
409 
410         r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
411         if (r == -ENOENT)
412                 return VIRTUALIZATION_NONE;
413         if (r < 0)
414                 return r;
415 
416         log_debug("Virtualization %s found in /proc/sysinfo", t);
417         if (streq(t, "z/VM"))
418                 return VIRTUALIZATION_ZVM;
419         else
420                 return VIRTUALIZATION_KVM;
421 #else
422         log_debug("This platform does not support /proc/sysinfo");
423         return VIRTUALIZATION_NONE;
424 #endif
425 }
426 
427 /* Returns a short identifier for the various VM implementations */
detect_vm(void)428 Virtualization detect_vm(void) {
429         static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
430         bool other = false;
431         int xen_dom0 = 0;
432         Virtualization v, dmi;
433 
434         if (cached_found >= 0)
435                 return cached_found;
436 
437         /* We have to use the correct order here:
438          *
439          * → First, try to detect Oracle Virtualbox and Amazon EC2 Nitro, even if they use KVM, as well as Xen even if
440          *   it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also since it runs as a user-process
441          *   nested inside other VMs. Also check for Xen now, because Xen PV mode does not override CPUID when nested
442          *   inside another hypervisor.
443          *
444          * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
445          *   overwritten.
446          *
447          * → Third, try to detect from DMI. */
448 
449         dmi = detect_vm_dmi();
450         if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN, VIRTUALIZATION_AMAZON)) {
451                 v = dmi;
452                 goto finish;
453         }
454 
455         /* Detect UML */
456         v = detect_vm_uml();
457         if (v < 0)
458                 return v;
459         if (v != VIRTUALIZATION_NONE)
460                 goto finish;
461 
462         /* Detect Xen */
463         v = detect_vm_xen();
464         if (v < 0)
465                 return v;
466         if (v == VIRTUALIZATION_XEN) {
467                  /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
468                   * inside another hypervisor which can be detected via the CPUID check, wait to report this
469                   * until after the CPUID check. */
470                 xen_dom0 = detect_vm_xen_dom0();
471                 if (xen_dom0 < 0)
472                         return xen_dom0;
473                 if (xen_dom0 == 0)
474                         goto finish;
475 
476                 v = VIRTUALIZATION_NONE;
477         } else if (v != VIRTUALIZATION_NONE)
478                 assert_not_reached();
479 
480         /* Detect from CPUID */
481         v = detect_vm_cpuid();
482         if (v < 0)
483                 return v;
484         if (v == VIRTUALIZATION_VM_OTHER)
485                 other = true;
486         else if (v != VIRTUALIZATION_NONE)
487                 goto finish;
488 
489         /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
490         if (xen_dom0 > 0)
491                 goto finish;
492 
493         /* Now, let's get back to DMI */
494         if (dmi < 0)
495                 return dmi;
496         if (dmi == VIRTUALIZATION_VM_OTHER)
497                 other = true;
498         else if (dmi != VIRTUALIZATION_NONE) {
499                 v = dmi;
500                 goto finish;
501         }
502 
503         /* Check high-level hypervisor sysfs file */
504         v = detect_vm_hypervisor();
505         if (v < 0)
506                 return v;
507         if (v == VIRTUALIZATION_VM_OTHER)
508                 other = true;
509         else if (v != VIRTUALIZATION_NONE)
510                 goto finish;
511 
512         v = detect_vm_device_tree();
513         if (v < 0)
514                 return v;
515         if (v == VIRTUALIZATION_VM_OTHER)
516                 other = true;
517         else if (v != VIRTUALIZATION_NONE)
518                 goto finish;
519 
520         v = detect_vm_zvm();
521         if (v < 0)
522                 return v;
523 
524 finish:
525         if (v == VIRTUALIZATION_NONE && other)
526                 v = VIRTUALIZATION_VM_OTHER;
527 
528         cached_found = v;
529         log_debug("Found VM virtualization %s", virtualization_to_string(v));
530         return v;
531 }
532 
533 static const char *const container_table[_VIRTUALIZATION_MAX] = {
534         [VIRTUALIZATION_LXC]            = "lxc",
535         [VIRTUALIZATION_LXC_LIBVIRT]    = "lxc-libvirt",
536         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
537         [VIRTUALIZATION_DOCKER]         = "docker",
538         [VIRTUALIZATION_PODMAN]         = "podman",
539         [VIRTUALIZATION_RKT]            = "rkt",
540         [VIRTUALIZATION_WSL]            = "wsl",
541         [VIRTUALIZATION_PROOT]          = "proot",
542         [VIRTUALIZATION_POUCH]          = "pouch",
543 };
544 
545 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
546 
running_in_cgroupns(void)547 static int running_in_cgroupns(void) {
548         int r;
549 
550         if (!cg_ns_supported())
551                 return false;
552 
553         r = cg_all_unified();
554         if (r < 0)
555                 return r;
556 
557         if (r) {
558                 /* cgroup v2 */
559 
560                 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
561                 if (r < 0) {
562                         if (errno != ENOENT)
563                                 return -errno;
564                         /* All kernel versions have cgroup.events in nested cgroups. */
565                         return false;
566                 }
567 
568                 /* There's no cgroup.type in the root cgroup, and future kernel versions
569                  * are unlikely to add it since cgroup.type is something that makes no sense
570                  * whatsoever in the root cgroup. */
571                 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
572                 if (r == 0)
573                         return true;
574                 if (r < 0 && errno != ENOENT)
575                         return -errno;
576 
577                 /* On older kernel versions, there's no cgroup.type */
578                 r = access("/sys/kernel/cgroup/features", F_OK);
579                 if (r < 0) {
580                         if (errno != ENOENT)
581                                 return -errno;
582                         /* This is an old kernel that we know for sure has cgroup.events
583                          * only in nested cgroups. */
584                         return true;
585                 }
586 
587                 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
588                  * in the root cgroup. */
589                 return false;
590         } else {
591                 /* cgroup v1 */
592 
593                 /* If systemd controller is not mounted, do not even bother. */
594                 r = access("/sys/fs/cgroup/systemd", F_OK);
595                 if (r < 0) {
596                         if (errno != ENOENT)
597                                 return -errno;
598                         return false;
599                 }
600 
601                 /* release_agent only exists in the root cgroup. */
602                 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
603                 if (r < 0) {
604                         if (errno != ENOENT)
605                                 return -errno;
606                         return true;
607                 }
608 
609                 return false;
610         }
611 }
612 
detect_container_files(void)613 static Virtualization detect_container_files(void) {
614         static const struct {
615                 const char *file_path;
616                 Virtualization id;
617         } container_file_table[] = {
618                 /* https://github.com/containers/podman/issues/6192 */
619                 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
620                 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
621                 /* https://github.com/moby/moby/issues/18355 */
622                 /* Docker must be the last in this table, see below. */
623                 { "/.dockerenv",        VIRTUALIZATION_DOCKER },
624         };
625 
626         for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
627                 if (access(container_file_table[i].file_path, F_OK) >= 0)
628                         return container_file_table[i].id;
629 
630                 if (errno != ENOENT)
631                         log_debug_errno(errno,
632                                         "Checking if %s exists failed, ignoring: %m",
633                                         container_file_table[i].file_path);
634         }
635 
636         return VIRTUALIZATION_NONE;
637 }
638 
detect_container(void)639 Virtualization detect_container(void) {
640         static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
641         _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
642         const char *e = NULL;
643         Virtualization v;
644         int r;
645 
646         if (cached_found >= 0)
647                 return cached_found;
648 
649         /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
650         if (access("/proc/vz", F_OK) < 0) {
651                 if (errno != ENOENT)
652                         log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
653         } else if (access("/proc/bc", F_OK) < 0) {
654                 if (errno == ENOENT) {
655                         v = VIRTUALIZATION_OPENVZ;
656                         goto finish;
657                 }
658 
659                 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
660         }
661 
662         /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
663         r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
664         if (r < 0)
665                 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
666         else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
667                 v = VIRTUALIZATION_WSL;
668                 goto finish;
669         }
670 
671         /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
672          * invocation without worrying about it being elsewhere.
673          */
674         r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
675         if (r < 0)
676                 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
677         else if (!streq(p, "0")) {
678                 pid_t ptrace_pid;
679 
680                 r = parse_pid(p, &ptrace_pid);
681                 if (r < 0)
682                         log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
683                 else {
684                         _cleanup_free_ char *ptrace_comm = NULL;
685                         const char *pf;
686 
687                         pf = procfs_file_alloca(ptrace_pid, "comm");
688                         r = read_one_line_file(pf, &ptrace_comm);
689                         if (r < 0)
690                                 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
691                         else if (startswith(ptrace_comm, "proot")) {
692                                 v = VIRTUALIZATION_PROOT;
693                                 goto finish;
694                         }
695                 }
696         }
697 
698         /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
699          * because we can be consumed just like that, without special privileges. */
700         r = read_one_line_file("/run/host/container-manager", &m);
701         if (r > 0) {
702                 e = m;
703                 goto translate_name;
704         }
705         if (!IN_SET(r, -ENOENT, 0))
706                 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
707 
708         if (getpid_cached() == 1) {
709                 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
710                  * We distinguish three cases:
711                  * - the variable is not defined → we jump to other checks
712                  * - the variable is defined to an empty value → we are not in a container
713                  * - anything else → some container, either one of the known ones or "container-other"
714                  */
715                 e = getenv("container");
716                 if (!e)
717                         goto check_files;
718                 if (isempty(e)) {
719                         v = VIRTUALIZATION_NONE;
720                         goto finish;
721                 }
722 
723                 goto translate_name;
724         }
725 
726         /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
727          * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
728         r = read_one_line_file("/run/systemd/container", &m);
729         if (r > 0) {
730                 e = m;
731                 goto translate_name;
732         }
733         if (!IN_SET(r, -ENOENT, 0))
734                 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
735 
736         /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
737         r = getenv_for_pid(1, "container", &m);
738         if (r > 0) {
739                 e = m;
740                 goto translate_name;
741         }
742         if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
743                 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
744 
745 check_files:
746         /* Check for existence of some well-known files. We only do this after checking
747          * for other specific container managers, otherwise we risk mistaking another
748          * container manager for Docker: the /.dockerenv file could inadvertently end up
749          * in a file system image. */
750         v = detect_container_files();
751         if (v < 0)
752                 return v;
753         if (v != VIRTUALIZATION_NONE)
754                 goto finish;
755 
756         r = running_in_cgroupns();
757         if (r > 0) {
758                 v = VIRTUALIZATION_CONTAINER_OTHER;
759                 goto finish;
760         }
761         if (r < 0)
762                 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
763 
764         /* If none of that worked, give up, assume no container manager. */
765         v = VIRTUALIZATION_NONE;
766         goto finish;
767 
768 translate_name:
769         if (streq(e, "oci")) {
770                 /* Some images hardcode container=oci, but OCI is not a specific container manager.
771                  * Try to detect one based on well-known files. */
772                 v = detect_container_files();
773                 if (v != VIRTUALIZATION_NONE)
774                         v = VIRTUALIZATION_CONTAINER_OTHER;
775                 goto finish;
776         }
777         v = container_from_string(e);
778         if (v < 0)
779                 v = VIRTUALIZATION_CONTAINER_OTHER;
780 
781 finish:
782         log_debug("Found container virtualization %s.", virtualization_to_string(v));
783         cached_found = v;
784         return v;
785 }
786 
detect_virtualization(void)787 Virtualization detect_virtualization(void) {
788         int v;
789 
790         v = detect_container();
791         if (v != VIRTUALIZATION_NONE)
792                 return v;
793 
794         return detect_vm();
795 }
796 
userns_has_mapping(const char * name)797 static int userns_has_mapping(const char *name) {
798         _cleanup_fclose_ FILE *f = NULL;
799         uid_t a, b, c;
800         int r;
801 
802         f = fopen(name, "re");
803         if (!f) {
804                 log_debug_errno(errno, "Failed to open %s: %m", name);
805                 return errno == ENOENT ? false : -errno;
806         }
807 
808         errno = 0;
809         r = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &a, &b, &c);
810         if (r == EOF) {
811                 if (ferror(f))
812                         return log_debug_errno(errno_or_else(EIO), "Failed to read %s: %m", name);
813 
814                 log_debug("%s is empty, we're in an uninitialized user namespace", name);
815                 return true;
816         }
817         if (r != 3)
818                 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse %s: %m", name);
819 
820         if (a == 0 && b == 0 && c == UINT32_MAX) {
821                 /* The kernel calls mappings_overlap() and does not allow overlaps */
822                 log_debug("%s has a full 1:1 mapping", name);
823                 return false;
824         }
825 
826         /* Anything else implies that we are in a user namespace */
827         log_debug("Mapping found in %s, we're in a user namespace", name);
828         return true;
829 }
830 
running_in_userns(void)831 int running_in_userns(void) {
832         _cleanup_free_ char *line = NULL;
833         int r;
834 
835         r = userns_has_mapping("/proc/self/uid_map");
836         if (r != 0)
837                 return r;
838 
839         r = userns_has_mapping("/proc/self/gid_map");
840         if (r != 0)
841                 return r;
842 
843         /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
844          * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
845          * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
846          * than on an old one, and if the file is not found, return false. */
847         r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
848         if (r < 0) {
849                 log_debug_errno(r, "/proc/self/setgroups: %m");
850                 return r == -ENOENT ? false : r;
851         }
852 
853         strstrip(line); /* remove trailing newline */
854 
855         r = streq(line, "deny");
856         /* See user_namespaces(7) for a description of this "setgroups" contents. */
857         log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
858         return r;
859 }
860 
running_in_chroot(void)861 int running_in_chroot(void) {
862         int r;
863 
864         if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
865                 return 0;
866 
867         r = files_same("/proc/1/root", "/", 0);
868         if (r < 0)
869                 return r;
870 
871         return r == 0;
872 }
873 
874 #if defined(__i386__) || defined(__x86_64__)
875 struct cpuid_table_entry {
876         uint32_t flag_bit;
877         const char *name;
878 };
879 
880 static const struct cpuid_table_entry leaf1_edx[] = {
881         {  0, "fpu" },
882         {  1, "vme" },
883         {  2, "de" },
884         {  3, "pse" },
885         {  4, "tsc" },
886         {  5, "msr" },
887         {  6, "pae" },
888         {  7, "mce" },
889         {  8, "cx8" },
890         {  9, "apic" },
891         { 11, "sep" },
892         { 12, "mtrr" },
893         { 13, "pge" },
894         { 14, "mca" },
895         { 15, "cmov" },
896         { 16, "pat" },
897         { 17, "pse36" },
898         { 19, "clflush" },
899         { 23, "mmx" },
900         { 24, "fxsr" },
901         { 25, "sse" },
902         { 26, "sse2" },
903         { 28, "ht" },
904 };
905 
906 static const struct cpuid_table_entry leaf1_ecx[] = {
907         {  0, "pni" },
908         {  1, "pclmul" },
909         {  3, "monitor" },
910         {  9, "ssse3" },
911         { 12, "fma3" },
912         { 13, "cx16" },
913         { 19, "sse4_1" },
914         { 20, "sse4_2" },
915         { 22, "movbe" },
916         { 23, "popcnt" },
917         { 25, "aes" },
918         { 26, "xsave" },
919         { 27, "osxsave" },
920         { 28, "avx" },
921         { 29, "f16c" },
922         { 30, "rdrand" },
923 };
924 
925 static const struct cpuid_table_entry leaf7_ebx[] = {
926         {  3, "bmi1" },
927         {  5, "avx2" },
928         {  8, "bmi2" },
929         { 18, "rdseed" },
930         { 19, "adx" },
931         { 29, "sha_ni" },
932 };
933 
934 static const struct cpuid_table_entry leaf81_edx[] = {
935         { 11, "syscall" },
936         { 27, "rdtscp" },
937         { 29, "lm" },
938 };
939 
940 static const struct cpuid_table_entry leaf81_ecx[] = {
941         {  0, "lahf_lm" },
942         {  5, "abm" },
943 };
944 
945 static const struct cpuid_table_entry leaf87_edx[] = {
946         {  8, "constant_tsc" },
947 };
948 
given_flag_in_set(const char * flag,const struct cpuid_table_entry * set,size_t set_size,uint32_t val)949 static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
950         for (size_t i = 0; i < set_size; i++) {
951                 if ((UINT32_C(1) << set[i].flag_bit) & val &&
952                                 streq(flag, set[i].name))
953                         return true;
954         }
955         return false;
956 }
957 
real_has_cpu_with_flag(const char * flag)958 static bool real_has_cpu_with_flag(const char *flag) {
959         uint32_t eax, ebx, ecx, edx;
960 
961         if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
962                 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
963                         return true;
964 
965                 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
966                         return true;
967         }
968 
969         if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
970                 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
971                         return true;
972         }
973 
974         if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
975                 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
976                         return true;
977 
978                 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
979                         return true;
980         }
981 
982         if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
983                 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
984                         return true;
985 
986         return false;
987 }
988 #endif
989 
has_cpu_with_flag(const char * flag)990 bool has_cpu_with_flag(const char *flag) {
991         /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
992 #if defined(__i386__) || defined(__x86_64__)
993         return real_has_cpu_with_flag(flag);
994 #else
995         return false;
996 #endif
997 }
998 
999 static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
1000         [VIRTUALIZATION_NONE] = "none",
1001         [VIRTUALIZATION_KVM] = "kvm",
1002         [VIRTUALIZATION_AMAZON] = "amazon",
1003         [VIRTUALIZATION_QEMU] = "qemu",
1004         [VIRTUALIZATION_BOCHS] = "bochs",
1005         [VIRTUALIZATION_XEN] = "xen",
1006         [VIRTUALIZATION_UML] = "uml",
1007         [VIRTUALIZATION_VMWARE] = "vmware",
1008         [VIRTUALIZATION_ORACLE] = "oracle",
1009         [VIRTUALIZATION_MICROSOFT] = "microsoft",
1010         [VIRTUALIZATION_ZVM] = "zvm",
1011         [VIRTUALIZATION_PARALLELS] = "parallels",
1012         [VIRTUALIZATION_BHYVE] = "bhyve",
1013         [VIRTUALIZATION_QNX] = "qnx",
1014         [VIRTUALIZATION_ACRN] = "acrn",
1015         [VIRTUALIZATION_POWERVM] = "powervm",
1016         [VIRTUALIZATION_VM_OTHER] = "vm-other",
1017 
1018         [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1019         [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1020         [VIRTUALIZATION_LXC] = "lxc",
1021         [VIRTUALIZATION_OPENVZ] = "openvz",
1022         [VIRTUALIZATION_DOCKER] = "docker",
1023         [VIRTUALIZATION_PODMAN] = "podman",
1024         [VIRTUALIZATION_RKT] = "rkt",
1025         [VIRTUALIZATION_WSL] = "wsl",
1026         [VIRTUALIZATION_PROOT] = "proot",
1027         [VIRTUALIZATION_POUCH] = "pouch",
1028         [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1029 };
1030 
1031 DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);
1032