]> git.proxmox.com Git - systemd.git/blame - src/basic/virt.c
New upstream version 252.5
[systemd.git] / src / basic / virt.c
CommitLineData
a032b68d 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
663996b3 2
1d42b86d
MB
3#if defined(__i386__) || defined(__x86_64__)
4#include <cpuid.h>
5#endif
663996b3 6#include <errno.h>
4c89c718
MP
7#include <stdint.h>
8#include <stdlib.h>
663996b3
MS
9#include <unistd.h>
10
db2df898 11#include "alloc-util.h"
3a6ce677 12#include "cgroup-util.h"
db2df898 13#include "dirent-util.h"
2897b343 14#include "env-util.h"
8f232108 15#include "errno-util.h"
db2df898
MP
16#include "fd-util.h"
17#include "fileio.h"
4c89c718 18#include "macro.h"
e3bff60a 19#include "process-util.h"
db2df898
MP
20#include "stat-util.h"
21#include "string-table.h"
22#include "string-util.h"
663996b3 23#include "virt.h"
663996b3 24
8b3d4ff0
MB
25enum {
26 SMBIOS_VM_BIT_SET,
27 SMBIOS_VM_BIT_UNSET,
28 SMBIOS_VM_BIT_UNKNOWN,
29};
30
8f232108 31static Virtualization detect_vm_cpuid(void) {
663996b3 32
db2df898 33 /* CPUID is an x86 specific interface. */
60f067b4 34#if defined(__i386__) || defined(__x86_64__)
663996b3 35
8f232108
MB
36 static const struct {
37 const char sig[13];
38 Virtualization id;
39 } vm_table[] = {
40 { "XenVMMXenVMM", VIRTUALIZATION_XEN },
41 { "KVMKVMKVM", VIRTUALIZATION_KVM }, /* qemu with KVM */
42 { "Linux KVM Hv", VIRTUALIZATION_KVM }, /* qemu with KVM + HyperV Enlightenments */
43 { "TCGTCGTCGTCG", VIRTUALIZATION_QEMU }, /* qemu without KVM */
44 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
45 { "VMwareVMware", VIRTUALIZATION_VMWARE },
46 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
47 { "Microsoft Hv", VIRTUALIZATION_MICROSOFT },
48 /* https://wiki.freebsd.org/bhyve */
49 { "bhyve bhyve ", VIRTUALIZATION_BHYVE },
50 { "QNXQVMBSQG", VIRTUALIZATION_QNX },
51 /* https://projectacrn.org */
52 { "ACRNACRNACRN", VIRTUALIZATION_ACRN },
9fe6880f
MB
53 /* https://www.lockheedmartin.com/en-us/products/Hardened-Security-for-Intel-Processors.html */
54 { "SRESRESRESRE", VIRTUALIZATION_SRE },
8f232108
MB
55 };
56
1d42b86d 57 uint32_t eax, ebx, ecx, edx;
663996b3 58 bool hypervisor;
663996b3
MS
59
60 /* http://lwn.net/Articles/301888/ */
61
663996b3 62 /* First detect whether there is a hypervisor */
1d42b86d
MB
63 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
64 return VIRTUALIZATION_NONE;
663996b3 65
b012e921 66 hypervisor = ecx & 0x80000000U;
663996b3
MS
67
68 if (hypervisor) {
6300502b
MP
69 union {
70 uint32_t sig32[3];
71 char text[13];
72 } sig = {};
663996b3
MS
73
74 /* There is a hypervisor, see what it is */
1d42b86d
MB
75 __cpuid(0x40000000U, eax, ebx, ecx, edx);
76
77 sig.sig32[0] = ebx;
78 sig.sig32[1] = ecx;
79 sig.sig32[2] = edx;
663996b3 80
aa27b158
MP
81 log_debug("Virtualization found, CPUID=%s", sig.text);
82
8f232108
MB
83 for (size_t i = 0; i < ELEMENTSOF(vm_table); i++)
84 if (memcmp_nn(sig.text, sizeof(sig.text),
85 vm_table[i].sig, sizeof(vm_table[i].sig)) == 0)
86 return vm_table[i].id;
60f067b4 87
8f232108
MB
88 log_debug("Unknown virtualization with CPUID=%s. Add to vm_table[]?", sig.text);
89 return VIRTUALIZATION_VM_OTHER;
663996b3 90 }
60f067b4 91#endif
aa27b158 92 log_debug("No virtualization found in CPUID");
60f067b4 93
6300502b 94 return VIRTUALIZATION_NONE;
60f067b4
JS
95}
96
8f232108 97static Virtualization detect_vm_device_tree(void) {
e3bff60a 98#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
e735f4d4
MP
99 _cleanup_free_ char *hvtype = NULL;
100 int r;
101
e3bff60a 102 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
6300502b 103 if (r == -ENOENT) {
e3bff60a 104 _cleanup_closedir_ DIR *dir = NULL;
e3bff60a 105
a032b68d
MB
106 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
107 access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
108 access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
109 return VIRTUALIZATION_POWERVM;
110
e3bff60a
MP
111 dir = opendir("/proc/device-tree");
112 if (!dir) {
aa27b158
MP
113 if (errno == ENOENT) {
114 log_debug_errno(errno, "/proc/device-tree: %m");
6300502b 115 return VIRTUALIZATION_NONE;
aa27b158 116 }
e3bff60a
MP
117 return -errno;
118 }
119
ea0999c9
MB
120 FOREACH_DIRENT(de, dir, return -errno)
121 if (strstr(de->d_name, "fw-cfg")) {
122 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
6300502b 123 return VIRTUALIZATION_QEMU;
aa27b158 124 }
6300502b 125
aa27b158 126 log_debug("No virtualization found in /proc/device-tree/*");
6300502b
MP
127 return VIRTUALIZATION_NONE;
128 } else if (r < 0)
129 return r;
130
aa27b158 131 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
6300502b
MP
132 if (streq(hvtype, "linux,kvm"))
133 return VIRTUALIZATION_KVM;
134 else if (strstr(hvtype, "xen"))
135 return VIRTUALIZATION_XEN;
e1f67bc7
MB
136 else if (strstr(hvtype, "vmware"))
137 return VIRTUALIZATION_VMWARE;
6300502b
MP
138 else
139 return VIRTUALIZATION_VM_OTHER;
140#else
aa27b158 141 log_debug("This platform does not support /proc/device-tree");
6300502b 142 return VIRTUALIZATION_NONE;
e735f4d4 143#endif
e735f4d4
MP
144}
145
ed86a03f 146#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
8f232108 147static Virtualization detect_vm_dmi_vendor(void) {
b3e21333 148 static const char* const dmi_vendors[] = {
db2df898 149 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
60f067b4
JS
150 "/sys/class/dmi/id/sys_vendor",
151 "/sys/class/dmi/id/board_vendor",
ea0999c9 152 "/sys/class/dmi/id/bios_vendor",
b3e21333
LB
153 "/sys/class/dmi/id/product_version", /* For Hyper-V VMs test */
154 NULL
60f067b4
JS
155 };
156
6300502b
MP
157 static const struct {
158 const char *vendor;
8f232108 159 Virtualization id;
6300502b 160 } dmi_vendor_table[] = {
d9f5095a
MB
161 { "KVM", VIRTUALIZATION_KVM },
162 { "OpenStack", VIRTUALIZATION_KVM }, /* Detect OpenStack instance as KVM in non x86 architecture */
163 { "KubeVirt", VIRTUALIZATION_KVM }, /* Detect KubeVirt instance as KVM in non x86 architecture */
164 { "Amazon EC2", VIRTUALIZATION_AMAZON },
165 { "QEMU", VIRTUALIZATION_QEMU },
166 { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
167 { "VMW", VIRTUALIZATION_VMWARE },
168 { "innotek GmbH", VIRTUALIZATION_ORACLE },
169 { "VirtualBox", VIRTUALIZATION_ORACLE },
170 { "Xen", VIRTUALIZATION_XEN },
171 { "Bochs", VIRTUALIZATION_BOCHS },
172 { "Parallels", VIRTUALIZATION_PARALLELS },
8a584da2 173 /* https://wiki.freebsd.org/bhyve */
d9f5095a
MB
174 { "BHYVE", VIRTUALIZATION_BHYVE },
175 { "Hyper-V", VIRTUALIZATION_MICROSOFT },
086111aa 176 { "Apple Virtualization", VIRTUALIZATION_APPLE },
6300502b 177 };
6300502b 178 int r;
663996b3 179
b3e21333 180 STRV_FOREACH(vendor, dmi_vendors) {
663996b3 181 _cleanup_free_ char *s = NULL;
663996b3 182
b3e21333 183 r = read_one_line_file(*vendor, &s);
663996b3 184 if (r < 0) {
6300502b
MP
185 if (r == -ENOENT)
186 continue;
663996b3 187
6300502b 188 return r;
663996b3
MS
189 }
190
b3e21333
LB
191 for (size_t i = 0; i < ELEMENTSOF(dmi_vendor_table); i++)
192 if (startswith(s, dmi_vendor_table[i].vendor)) {
193 log_debug("Virtualization %s found in DMI (%s)", s, *vendor);
194 return dmi_vendor_table[i].id;
aa27b158 195 }
60f067b4 196 }
626cb2db 197 log_debug("No virtualization found in DMI vendor table.");
8b3d4ff0
MB
198 return VIRTUALIZATION_NONE;
199}
200
201static int detect_vm_smbios(void) {
202 /* The SMBIOS BIOS Charateristics Extension Byte 2 (Section 2.1.2.2 of
203 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
204 * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
205 * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
206 * imply that the system is running on bare-metal. For example, QEMU 3.1.0 (with or without KVM)
207 * with SeaBIOS does not set this bit. */
208 _cleanup_free_ char *s = NULL;
209 size_t readsize;
210 int r;
211
212 r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
213 if (r < 0) {
626cb2db
MB
214 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
215 "using the virtualization information found in DMI vendor table, ignoring: %m");
8b3d4ff0
MB
216 return SMBIOS_VM_BIT_UNKNOWN;
217 }
218 if (readsize < 20 || s[1] < 20) {
219 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
220 * extension bytes. The data we're interested in is in extension byte 2, which would be at
221 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
222 * much data, we don't infer anything from the SMBIOS. */
626cb2db
MB
223 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
224 "Using the virtualization information found in DMI vendor table.", readsize);
8b3d4ff0
MB
225 return SMBIOS_VM_BIT_UNKNOWN;
226 }
663996b3 227
8b3d4ff0
MB
228 uint8_t byte = (uint8_t) s[19];
229 if (byte & (1U<<4)) {
626cb2db 230 log_debug("DMI BIOS Extension table indicates virtualization.");
8b3d4ff0
MB
231 return SMBIOS_VM_BIT_SET;
232 }
626cb2db 233 log_debug("DMI BIOS Extension table does not indicate virtualization.");
8b3d4ff0
MB
234 return SMBIOS_VM_BIT_UNSET;
235}
ed86a03f 236#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64) */
8b3d4ff0 237
8f232108 238static Virtualization detect_vm_dmi(void) {
ed86a03f 239#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) || defined(__loongarch64)
8b3d4ff0
MB
240
241 int r;
242 r = detect_vm_dmi_vendor();
aa27b158 243
8b3d4ff0
MB
244 /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
245 * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
ce5f39bd
MB
246 if (r == VIRTUALIZATION_AMAZON) {
247 switch (detect_vm_smbios()) {
248 case SMBIOS_VM_BIT_SET:
249 return VIRTUALIZATION_AMAZON;
250 case SMBIOS_VM_BIT_UNSET:
251 return VIRTUALIZATION_NONE;
252 case SMBIOS_VM_BIT_UNKNOWN: {
253 /* The DMI information we are after is only accessible to the root user,
254 * so we fallback to using the product name which is less restricted
255 * to distinguish metal systems from virtualized instances */
256 _cleanup_free_ char *s = NULL;
257
258 r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
259 /* In EC2, virtualized is much more common than metal, so if for some reason
260 * we fail to read the DMI data, assume we are virtualized. */
261 if (r < 0) {
262 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
263 " assuming virtualized: %m");
264 return VIRTUALIZATION_AMAZON;
265 }
266 if (endswith(truncate_nl(s), ".metal")) {
267 log_debug("DMI product name ends with '.metal', assuming no virtualization");
268 return VIRTUALIZATION_NONE;
269 } else
270 return VIRTUALIZATION_AMAZON;
271 }
272 default:
ea0999c9 273 assert_not_reached();
ce5f39bd
MB
274 }
275 }
8b3d4ff0
MB
276
277 /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
278 * have no further information about what it is. */
279 if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
280 return VIRTUALIZATION_VM_OTHER;
281 return r;
282#else
6300502b 283 return VIRTUALIZATION_NONE;
8b3d4ff0 284#endif
60f067b4 285}
663996b3 286
52ad194e
MB
287#define XENFEAT_dom0 11 /* xen/include/public/features.h */
288#define PATH_FEATURES "/sys/hypervisor/properties/features"
289/* Returns -errno, or 0 for domU, or 1 for dom0 */
290static int detect_vm_xen_dom0(void) {
6300502b 291 _cleanup_free_ char *domcap = NULL;
60f067b4
JS
292 int r;
293
52ad194e
MB
294 r = read_one_line_file(PATH_FEATURES, &domcap);
295 if (r < 0 && r != -ENOENT)
296 return r;
bb4f798a 297 if (r >= 0) {
52ad194e
MB
298 unsigned long features;
299
1d42b86d
MB
300 /* Here, we need to use sscanf() instead of safe_atoul()
301 * as the string lacks the leading "0x". */
302 r = sscanf(domcap, "%lx", &features);
303 if (r == 1) {
52ad194e
MB
304 r = !!(features & (1U << XENFEAT_dom0));
305 log_debug("Virtualization XEN, found %s with value %08lx, "
306 "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
307 PATH_FEATURES, features, r ? "" : " not");
308 return r;
309 }
310 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
311 PATH_FEATURES, domcap);
312 }
313
6300502b 314 r = read_one_line_file("/proc/xen/capabilities", &domcap);
aa27b158 315 if (r == -ENOENT) {
52ad194e
MB
316 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
317 return 0;
aa27b158
MP
318 }
319 if (r < 0)
320 return r;
663996b3 321
f2dec872
BR
322 for (const char *i = domcap;;) {
323 _cleanup_free_ char *cap = NULL;
663996b3 324
f2dec872
BR
325 r = extract_first_word(&i, &cap, ",", 0);
326 if (r < 0)
327 return r;
328 if (r == 0) {
329 log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
330 return 0;
331 }
332
333 if (streq(cap, "control_d")) {
334 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
335 return 1;
336 }
337 }
6300502b 338}
60f067b4 339
8f232108 340static Virtualization detect_vm_xen(void) {
9cde670f
LB
341 /* The presence of /proc/xen indicates some form of a Xen domain
342 The check for Dom0 is handled outside this function */
ea0999c9
MB
343 if (access("/proc/xen", F_OK) < 0) {
344 log_debug("Virtualization XEN not found, /proc/xen does not exist");
345 return VIRTUALIZATION_NONE;
346 }
347 log_debug("Virtualization XEN found (/proc/xen exists)");
ea0999c9
MB
348 return VIRTUALIZATION_XEN;
349}
350
8f232108 351static Virtualization detect_vm_hypervisor(void) {
6300502b
MP
352 _cleanup_free_ char *hvtype = NULL;
353 int r;
60f067b4 354
6300502b
MP
355 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
356 if (r == -ENOENT)
357 return VIRTUALIZATION_NONE;
358 if (r < 0)
359 return r;
60f067b4 360
aa27b158
MP
361 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
362
6300502b
MP
363 if (streq(hvtype, "xen"))
364 return VIRTUALIZATION_XEN;
365 else
366 return VIRTUALIZATION_VM_OTHER;
367}
60f067b4 368
8f232108 369static Virtualization detect_vm_uml(void) {
6e866b33 370 _cleanup_fclose_ FILE *f = NULL;
6300502b 371 int r;
60f067b4 372
6300502b 373 /* Detect User-Mode Linux by reading /proc/cpuinfo */
6e866b33
MB
374 f = fopen("/proc/cpuinfo", "re");
375 if (!f) {
376 if (errno == ENOENT) {
377 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
378 return VIRTUALIZATION_NONE;
379 }
380 return -errno;
98393f85 381 }
aa27b158 382
6e866b33
MB
383 for (;;) {
384 _cleanup_free_ char *line = NULL;
385 const char *t;
386
387 r = read_line(f, LONG_LINE_MAX, &line);
388 if (r < 0)
389 return r;
390 if (r == 0)
391 break;
392
393 t = startswith(line, "vendor_id\t: ");
394 if (t) {
395 if (startswith(t, "User Mode Linux")) {
396 log_debug("UML virtualization found in /proc/cpuinfo");
397 return VIRTUALIZATION_UML;
398 }
399
400 break;
401 }
aa27b158 402 }
60f067b4 403
98393f85 404 log_debug("UML virtualization not found in /proc/cpuinfo.");
6300502b
MP
405 return VIRTUALIZATION_NONE;
406}
7035cd9e 407
8f232108 408static Virtualization detect_vm_zvm(void) {
60f067b4 409
6300502b
MP
410#if defined(__s390__)
411 _cleanup_free_ char *t = NULL;
412 int r;
7035cd9e 413
6300502b
MP
414 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
415 if (r == -ENOENT)
416 return VIRTUALIZATION_NONE;
417 if (r < 0)
418 return r;
7035cd9e 419
aa27b158 420 log_debug("Virtualization %s found in /proc/sysinfo", t);
6300502b
MP
421 if (streq(t, "z/VM"))
422 return VIRTUALIZATION_ZVM;
423 else
424 return VIRTUALIZATION_KVM;
425#else
aa27b158 426 log_debug("This platform does not support /proc/sysinfo");
6300502b
MP
427 return VIRTUALIZATION_NONE;
428#endif
429}
7035cd9e 430
6300502b 431/* Returns a short identifier for the various VM implementations */
8f232108
MB
432Virtualization detect_vm(void) {
433 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
52ad194e 434 bool other = false;
8f232108
MB
435 int xen_dom0 = 0;
436 Virtualization v, dmi;
7035cd9e 437
6300502b
MP
438 if (cached_found >= 0)
439 return cached_found;
60f067b4 440
4c89c718 441 /* We have to use the correct order here:
4c89c718 442 *
3e11e177
MB
443 * → First, try to detect Oracle Virtualbox, Amazon EC2 Nitro, and Parallels, even if they use KVM,
444 * as well as Xen even if it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also
445 * since it runs as a user-process nested inside other VMs. Also check for Xen now, because Xen PV
446 * mode does not override CPUID when nested inside another hypervisor.
b012e921 447 *
3e11e177
MB
448 * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if
449 * info in DMI is overwritten.
b012e921
MB
450 *
451 * → Third, try to detect from DMI. */
2897b343
MP
452
453 dmi = detect_vm_dmi();
3e11e177
MB
454 if (IN_SET(dmi,
455 VIRTUALIZATION_ORACLE,
456 VIRTUALIZATION_XEN,
457 VIRTUALIZATION_AMAZON,
458 VIRTUALIZATION_PARALLELS)) {
8f232108 459 v = dmi;
2897b343
MP
460 goto finish;
461 }
462
9bb629ec 463 /* Detect UML */
8f232108
MB
464 v = detect_vm_uml();
465 if (v < 0)
466 return v;
467 if (v != VIRTUALIZATION_NONE)
9bb629ec
MB
468 goto finish;
469
ea0999c9 470 /* Detect Xen */
8f232108
MB
471 v = detect_vm_xen();
472 if (v < 0)
473 return v;
474 if (v == VIRTUALIZATION_XEN) {
9cde670f
LB
475 /* If we are Dom0, then we expect to not report as a VM. However, as we might be nested
476 * inside another hypervisor which can be detected via the CPUID check, wait to report this
477 * until after the CPUID check. */
478 xen_dom0 = detect_vm_xen_dom0();
479 if (xen_dom0 < 0)
480 return xen_dom0;
481 if (xen_dom0 == 0)
482 goto finish;
483
8f232108
MB
484 v = VIRTUALIZATION_NONE;
485 } else if (v != VIRTUALIZATION_NONE)
9cde670f 486 assert_not_reached();
ea0999c9 487
9bb629ec 488 /* Detect from CPUID */
8f232108
MB
489 v = detect_vm_cpuid();
490 if (v < 0)
491 return v;
492 if (v == VIRTUALIZATION_VM_OTHER)
b012e921 493 other = true;
8f232108 494 else if (v != VIRTUALIZATION_NONE)
b012e921 495 goto finish;
e735f4d4 496
9cde670f
LB
497 /* If we are in Dom0 and have not yet finished, finish with the result of detect_vm_cpuid */
498 if (xen_dom0 > 0)
499 goto finish;
500
b012e921
MB
501 /* Now, let's get back to DMI */
502 if (dmi < 0)
503 return dmi;
504 if (dmi == VIRTUALIZATION_VM_OTHER)
505 other = true;
506 else if (dmi != VIRTUALIZATION_NONE) {
8f232108 507 v = dmi;
b012e921 508 goto finish;
52ad194e 509 }
14228c0d 510
ea0999c9 511 /* Check high-level hypervisor sysfs file */
8f232108
MB
512 v = detect_vm_hypervisor();
513 if (v < 0)
514 return v;
515 if (v == VIRTUALIZATION_VM_OTHER)
b012e921 516 other = true;
8f232108 517 else if (v != VIRTUALIZATION_NONE)
b012e921 518 goto finish;
5eef597e 519
8f232108
MB
520 v = detect_vm_device_tree();
521 if (v < 0)
522 return v;
523 if (v == VIRTUALIZATION_VM_OTHER)
b012e921 524 other = true;
8f232108 525 else if (v != VIRTUALIZATION_NONE)
b012e921 526 goto finish;
5eef597e 527
8f232108
MB
528 v = detect_vm_zvm();
529 if (v < 0)
530 return v;
60f067b4
JS
531
532finish:
8f232108
MB
533 if (v == VIRTUALIZATION_NONE && other)
534 v = VIRTUALIZATION_VM_OTHER;
aa27b158 535
8f232108
MB
536 cached_found = v;
537 log_debug("Found VM virtualization %s", virtualization_to_string(v));
538 return v;
663996b3
MS
539}
540
46cdbd49
BR
541static const char *const container_table[_VIRTUALIZATION_MAX] = {
542 [VIRTUALIZATION_LXC] = "lxc",
543 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
544 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
545 [VIRTUALIZATION_DOCKER] = "docker",
546 [VIRTUALIZATION_PODMAN] = "podman",
547 [VIRTUALIZATION_RKT] = "rkt",
548 [VIRTUALIZATION_WSL] = "wsl",
a10f5d05 549 [VIRTUALIZATION_PROOT] = "proot",
a032b68d 550 [VIRTUALIZATION_POUCH] = "pouch",
46cdbd49
BR
551};
552
553DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
663996b3 554
3a6ce677
BR
555static int running_in_cgroupns(void) {
556 int r;
557
558 if (!cg_ns_supported())
559 return false;
560
561 r = cg_all_unified();
562 if (r < 0)
563 return r;
564
565 if (r) {
566 /* cgroup v2 */
567
568 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
569 if (r < 0) {
570 if (errno != ENOENT)
571 return -errno;
572 /* All kernel versions have cgroup.events in nested cgroups. */
573 return false;
574 }
575
576 /* There's no cgroup.type in the root cgroup, and future kernel versions
577 * are unlikely to add it since cgroup.type is something that makes no sense
578 * whatsoever in the root cgroup. */
579 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
580 if (r == 0)
581 return true;
582 if (r < 0 && errno != ENOENT)
583 return -errno;
584
585 /* On older kernel versions, there's no cgroup.type */
586 r = access("/sys/kernel/cgroup/features", F_OK);
587 if (r < 0) {
588 if (errno != ENOENT)
589 return -errno;
590 /* This is an old kernel that we know for sure has cgroup.events
591 * only in nested cgroups. */
592 return true;
593 }
594
595 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
596 * in the root cgroup. */
597 return false;
598 } else {
599 /* cgroup v1 */
600
601 /* If systemd controller is not mounted, do not even bother. */
602 r = access("/sys/fs/cgroup/systemd", F_OK);
603 if (r < 0) {
604 if (errno != ENOENT)
605 return -errno;
606 return false;
607 }
608
609 /* release_agent only exists in the root cgroup. */
610 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
611 if (r < 0) {
612 if (errno != ENOENT)
613 return -errno;
614 return true;
615 }
616
617 return false;
618 }
619}
620
8f232108 621static Virtualization detect_container_files(void) {
3a6ce677
BR
622 static const struct {
623 const char *file_path;
8f232108 624 Virtualization id;
3a6ce677
BR
625 } container_file_table[] = {
626 /* https://github.com/containers/podman/issues/6192 */
627 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
628 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
629 /* https://github.com/moby/moby/issues/18355 */
630 /* Docker must be the last in this table, see below. */
631 { "/.dockerenv", VIRTUALIZATION_DOCKER },
632 };
633
8f232108 634 for (size_t i = 0; i < ELEMENTSOF(container_file_table); i++) {
3a6ce677
BR
635 if (access(container_file_table[i].file_path, F_OK) >= 0)
636 return container_file_table[i].id;
637
638 if (errno != ENOENT)
639 log_debug_errno(errno,
640 "Checking if %s exists failed, ignoring: %m",
641 container_file_table[i].file_path);
642 }
643
644 return VIRTUALIZATION_NONE;
645}
646
8f232108
MB
647Virtualization detect_container(void) {
648 static thread_local Virtualization cached_found = _VIRTUALIZATION_INVALID;
3a6ce677 649 _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
6300502b 650 const char *e = NULL;
8f232108 651 Virtualization v;
60f067b4
JS
652 int r;
653
6300502b 654 if (cached_found >= 0)
60f067b4 655 return cached_found;
663996b3 656
2897b343 657 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
3a6ce677
BR
658 if (access("/proc/vz", F_OK) < 0) {
659 if (errno != ENOENT)
660 log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
661 } else if (access("/proc/bc", F_OK) < 0) {
662 if (errno == ENOENT) {
8f232108 663 v = VIRTUALIZATION_OPENVZ;
3a6ce677
BR
664 goto finish;
665 }
666
667 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
663996b3
MS
668 }
669
a10f5d05 670 /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
bb4f798a 671 r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
3a6ce677
BR
672 if (r < 0)
673 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
674 else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
8f232108 675 v = VIRTUALIZATION_WSL;
d0648cfe 676 goto finish;
bb4f798a
MB
677 }
678
a10f5d05
MB
679 /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
680 * invocation without worrying about it being elsewhere.
681 */
682 r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
3a6ce677
BR
683 if (r < 0)
684 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
685 else if (!streq(p, "0")) {
a10f5d05 686 pid_t ptrace_pid;
3a6ce677 687
a10f5d05 688 r = parse_pid(p, &ptrace_pid);
3a6ce677
BR
689 if (r < 0)
690 log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
691 else {
a10f5d05 692 _cleanup_free_ char *ptrace_comm = NULL;
3a6ce677
BR
693 const char *pf;
694
695 pf = procfs_file_alloca(ptrace_pid, "comm");
a10f5d05 696 r = read_one_line_file(pf, &ptrace_comm);
3a6ce677
BR
697 if (r < 0)
698 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
699 else if (startswith(ptrace_comm, "proot")) {
8f232108 700 v = VIRTUALIZATION_PROOT;
a10f5d05
MB
701 goto finish;
702 }
703 }
704 }
705
3a6ce677 706 /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
a032b68d
MB
707 * because we can be consumed just like that, without special privileges. */
708 r = read_one_line_file("/run/host/container-manager", &m);
709 if (r > 0) {
710 e = m;
711 goto translate_name;
712 }
713 if (!IN_SET(r, -ENOENT, 0))
3a6ce677 714 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
a032b68d 715
f5e65279 716 if (getpid_cached() == 1) {
f2dec872
BR
717 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
718 * We distinguish three cases:
719 * - the variable is not defined → we jump to other checks
720 * - the variable is defined to an empty value → we are not in a container
721 * - anything else → some container, either one of the known ones or "container-other"
722 */
60f067b4 723 e = getenv("container");
f2dec872 724 if (!e)
3a6ce677 725 goto check_files;
60f067b4 726 if (isempty(e)) {
8f232108 727 v = VIRTUALIZATION_NONE;
60f067b4
JS
728 goto finish;
729 }
663996b3 730
2897b343
MP
731 goto translate_name;
732 }
733
734 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
735 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
736 r = read_one_line_file("/run/systemd/container", &m);
bb4f798a 737 if (r > 0) {
2897b343
MP
738 e = m;
739 goto translate_name;
740 }
bb4f798a 741 if (!IN_SET(r, -ENOENT, 0))
2897b343
MP
742 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
743
744 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
745 r = getenv_for_pid(1, "container", &m);
746 if (r > 0) {
60f067b4 747 e = m;
2897b343 748 goto translate_name;
60f067b4 749 }
2897b343
MP
750 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
751 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
752
3a6ce677
BR
753check_files:
754 /* Check for existence of some well-known files. We only do this after checking
755 * for other specific container managers, otherwise we risk mistaking another
756 * container manager for Docker: the /.dockerenv file could inadvertently end up
757 * in a file system image. */
8f232108
MB
758 v = detect_container_files();
759 if (v < 0)
760 return v;
761 if (v != VIRTUALIZATION_NONE)
3a6ce677 762 goto finish;
2897b343 763
3a6ce677
BR
764 r = running_in_cgroupns();
765 if (r > 0) {
8f232108 766 v = VIRTUALIZATION_CONTAINER_OTHER;
3a6ce677
BR
767 goto finish;
768 }
769 if (r < 0)
770 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
2897b343 771
3a6ce677 772 /* If none of that worked, give up, assume no container manager. */
8f232108 773 v = VIRTUALIZATION_NONE;
2897b343 774 goto finish;
663996b3 775
2897b343 776translate_name:
3a6ce677
BR
777 if (streq(e, "oci")) {
778 /* Some images hardcode container=oci, but OCI is not a specific container manager.
779 * Try to detect one based on well-known files. */
8f232108 780 v = detect_container_files();
28085778 781 if (v == VIRTUALIZATION_NONE)
8f232108 782 v = VIRTUALIZATION_CONTAINER_OTHER;
3a6ce677
BR
783 goto finish;
784 }
8f232108
MB
785 v = container_from_string(e);
786 if (v < 0)
787 v = VIRTUALIZATION_CONTAINER_OTHER;
663996b3 788
60f067b4 789finish:
8f232108
MB
790 log_debug("Found container virtualization %s.", virtualization_to_string(v));
791 cached_found = v;
792 return v;
60f067b4 793}
663996b3 794
8f232108
MB
795Virtualization detect_virtualization(void) {
796 int v;
663996b3 797
8f232108
MB
798 v = detect_container();
799 if (v != VIRTUALIZATION_NONE)
800 return v;
663996b3 801
8f232108 802 return detect_vm();
663996b3 803}
6300502b 804
8a584da2
MP
805static int userns_has_mapping(const char *name) {
806 _cleanup_fclose_ FILE *f = NULL;
8f232108 807 uid_t a, b, c;
8a584da2
MP
808 int r;
809
810 f = fopen(name, "re");
811 if (!f) {
812 log_debug_errno(errno, "Failed to open %s: %m", name);
2897b343 813 return errno == ENOENT ? false : -errno;
8a584da2
MP
814 }
815
8f232108
MB
816 errno = 0;
817 r = fscanf(f, UID_FMT " " UID_FMT " " UID_FMT "\n", &a, &b, &c);
818 if (r == EOF) {
819 if (ferror(f))
820 return log_debug_errno(errno_or_else(EIO), "Failed to read %s: %m", name);
8a584da2 821
8f232108
MB
822 log_debug("%s is empty, we're in an uninitialized user namespace", name);
823 return true;
8a584da2 824 }
8f232108
MB
825 if (r != 3)
826 return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG), "Failed to parse %s: %m", name);
8a584da2
MP
827
828 if (a == 0 && b == 0 && c == UINT32_MAX) {
829 /* The kernel calls mappings_overlap() and does not allow overlaps */
830 log_debug("%s has a full 1:1 mapping", name);
831 return false;
832 }
833
834 /* Anything else implies that we are in a user namespace */
835 log_debug("Mapping found in %s, we're in a user namespace", name);
836 return true;
837}
838
839int running_in_userns(void) {
840 _cleanup_free_ char *line = NULL;
841 int r;
842
843 r = userns_has_mapping("/proc/self/uid_map");
844 if (r != 0)
845 return r;
846
847 r = userns_has_mapping("/proc/self/gid_map");
848 if (r != 0)
849 return r;
850
8f232108
MB
851 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also possible to compile a
852 * kernel without CONFIG_USER_NS, in which case "setgroups" also does not exist. We cannot
853 * distinguish those two cases, so assume that we're running on a stripped-down recent kernel, rather
854 * than on an old one, and if the file is not found, return false. */
855 r = read_virtual_file("/proc/self/setgroups", SIZE_MAX, &line, NULL);
8a584da2
MP
856 if (r < 0) {
857 log_debug_errno(r, "/proc/self/setgroups: %m");
858 return r == -ENOENT ? false : r;
859 }
860
8f232108
MB
861 strstrip(line); /* remove trailing newline */
862
8a584da2
MP
863 r = streq(line, "deny");
864 /* See user_namespaces(7) for a description of this "setgroups" contents. */
865 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
866 return r;
867}
868
db2df898 869int running_in_chroot(void) {
98393f85 870 int r;
db2df898 871
8a584da2
MP
872 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
873 return 0;
874
98393f85
MB
875 r = files_same("/proc/1/root", "/", 0);
876 if (r < 0)
877 return r;
db2df898 878
98393f85 879 return r == 0;
db2df898
MP
880}
881
3a6ce677
BR
882#if defined(__i386__) || defined(__x86_64__)
883struct cpuid_table_entry {
884 uint32_t flag_bit;
885 const char *name;
886};
887
888static const struct cpuid_table_entry leaf1_edx[] = {
d9f5095a
MB
889 { 0, "fpu" },
890 { 1, "vme" },
891 { 2, "de" },
892 { 3, "pse" },
893 { 4, "tsc" },
894 { 5, "msr" },
895 { 6, "pae" },
896 { 7, "mce" },
897 { 8, "cx8" },
898 { 9, "apic" },
899 { 11, "sep" },
900 { 12, "mtrr" },
901 { 13, "pge" },
902 { 14, "mca" },
903 { 15, "cmov" },
904 { 16, "pat" },
905 { 17, "pse36" },
3a6ce677 906 { 19, "clflush" },
d9f5095a
MB
907 { 23, "mmx" },
908 { 24, "fxsr" },
909 { 25, "sse" },
910 { 26, "sse2" },
911 { 28, "ht" },
3a6ce677
BR
912};
913
914static const struct cpuid_table_entry leaf1_ecx[] = {
d9f5095a
MB
915 { 0, "pni" },
916 { 1, "pclmul" },
3a6ce677 917 { 3, "monitor" },
d9f5095a
MB
918 { 9, "ssse3" },
919 { 12, "fma3" },
920 { 13, "cx16" },
921 { 19, "sse4_1" },
922 { 20, "sse4_2" },
923 { 22, "movbe" },
924 { 23, "popcnt" },
925 { 25, "aes" },
926 { 26, "xsave" },
3a6ce677 927 { 27, "osxsave" },
d9f5095a
MB
928 { 28, "avx" },
929 { 29, "f16c" },
930 { 30, "rdrand" },
3a6ce677
BR
931};
932
933static const struct cpuid_table_entry leaf7_ebx[] = {
d9f5095a
MB
934 { 3, "bmi1" },
935 { 5, "avx2" },
936 { 8, "bmi2" },
3a6ce677 937 { 18, "rdseed" },
d9f5095a 938 { 19, "adx" },
3a6ce677
BR
939 { 29, "sha_ni" },
940};
941
942static const struct cpuid_table_entry leaf81_edx[] = {
943 { 11, "syscall" },
d9f5095a
MB
944 { 27, "rdtscp" },
945 { 29, "lm" },
3a6ce677
BR
946};
947
948static const struct cpuid_table_entry leaf81_ecx[] = {
949 { 0, "lahf_lm" },
d9f5095a 950 { 5, "abm" },
3a6ce677
BR
951};
952
953static const struct cpuid_table_entry leaf87_edx[] = {
954 { 8, "constant_tsc" },
955};
956
957static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
958 for (size_t i = 0; i < set_size; i++) {
959 if ((UINT32_C(1) << set[i].flag_bit) & val &&
960 streq(flag, set[i].name))
961 return true;
962 }
963 return false;
964}
965
966static bool real_has_cpu_with_flag(const char *flag) {
967 uint32_t eax, ebx, ecx, edx;
968
969 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
970 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
971 return true;
972
973 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
974 return true;
975 }
976
977 if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
978 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
979 return true;
980 }
981
982 if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
983 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
984 return true;
985
986 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
987 return true;
988 }
989
990 if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
991 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
992 return true;
993
994 return false;
995}
996#endif
997
998bool has_cpu_with_flag(const char *flag) {
999 /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
1000#if defined(__i386__) || defined(__x86_64__)
1001 return real_has_cpu_with_flag(flag);
1002#else
1003 return false;
1004#endif
1005}
1006
6300502b 1007static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
d9f5095a
MB
1008 [VIRTUALIZATION_NONE] = "none",
1009 [VIRTUALIZATION_KVM] = "kvm",
1010 [VIRTUALIZATION_AMAZON] = "amazon",
1011 [VIRTUALIZATION_QEMU] = "qemu",
1012 [VIRTUALIZATION_BOCHS] = "bochs",
1013 [VIRTUALIZATION_XEN] = "xen",
1014 [VIRTUALIZATION_UML] = "uml",
1015 [VIRTUALIZATION_VMWARE] = "vmware",
1016 [VIRTUALIZATION_ORACLE] = "oracle",
1017 [VIRTUALIZATION_MICROSOFT] = "microsoft",
1018 [VIRTUALIZATION_ZVM] = "zvm",
1019 [VIRTUALIZATION_PARALLELS] = "parallels",
1020 [VIRTUALIZATION_BHYVE] = "bhyve",
1021 [VIRTUALIZATION_QNX] = "qnx",
1022 [VIRTUALIZATION_ACRN] = "acrn",
1023 [VIRTUALIZATION_POWERVM] = "powervm",
1024 [VIRTUALIZATION_APPLE] = "apple",
9fe6880f 1025 [VIRTUALIZATION_SRE] = "sre",
d9f5095a
MB
1026 [VIRTUALIZATION_VM_OTHER] = "vm-other",
1027
1028 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1029 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1030 [VIRTUALIZATION_LXC] = "lxc",
1031 [VIRTUALIZATION_OPENVZ] = "openvz",
1032 [VIRTUALIZATION_DOCKER] = "docker",
1033 [VIRTUALIZATION_PODMAN] = "podman",
1034 [VIRTUALIZATION_RKT] = "rkt",
1035 [VIRTUALIZATION_WSL] = "wsl",
1036 [VIRTUALIZATION_PROOT] = "proot",
1037 [VIRTUALIZATION_POUCH] = "pouch",
6300502b
MP
1038 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1039};
1040
8f232108 1041DEFINE_STRING_TABLE_LOOKUP(virtualization, Virtualization);