]> git.proxmox.com Git - systemd.git/blame - src/basic/virt.c
New upstream version 250~rc3
[systemd.git] / src / basic / virt.c
CommitLineData
a032b68d 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
663996b3 2
1d42b86d
MB
3#if defined(__i386__) || defined(__x86_64__)
4#include <cpuid.h>
5#endif
663996b3 6#include <errno.h>
4c89c718
MP
7#include <stdint.h>
8#include <stdlib.h>
663996b3
MS
9#include <unistd.h>
10
db2df898 11#include "alloc-util.h"
3a6ce677 12#include "cgroup-util.h"
db2df898 13#include "dirent-util.h"
2897b343 14#include "env-util.h"
db2df898
MP
15#include "fd-util.h"
16#include "fileio.h"
4c89c718 17#include "macro.h"
e3bff60a 18#include "process-util.h"
db2df898
MP
19#include "stat-util.h"
20#include "string-table.h"
21#include "string-util.h"
663996b3 22#include "virt.h"
663996b3 23
8b3d4ff0
MB
24enum {
25 SMBIOS_VM_BIT_SET,
26 SMBIOS_VM_BIT_UNSET,
27 SMBIOS_VM_BIT_UNKNOWN,
28};
29
46cdbd49
BR
30#if defined(__i386__) || defined(__x86_64__)
31static const char *const vm_table[_VIRTUALIZATION_MAX] = {
32 [VIRTUALIZATION_XEN] = "XenVMMXenVMM",
33 [VIRTUALIZATION_KVM] = "KVMKVMKVM",
34 [VIRTUALIZATION_QEMU] = "TCGTCGTCGTCG",
35 /* http://kb.vmware.com/selfservice/microsites/search.do?language=en_US&cmd=displayKC&externalId=1009458 */
36 [VIRTUALIZATION_VMWARE] = "VMwareVMware",
37 /* https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs */
38 [VIRTUALIZATION_MICROSOFT] = "Microsoft Hv",
39 /* https://wiki.freebsd.org/bhyve */
40 [VIRTUALIZATION_BHYVE] = "bhyve bhyve ",
41 [VIRTUALIZATION_QNX] = "QNXQVMBSQG",
42 /* https://projectacrn.org */
43 [VIRTUALIZATION_ACRN] = "ACRNACRNACRN",
44};
45
46DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(vm, int);
47#endif
48
6300502b 49static int detect_vm_cpuid(void) {
663996b3 50
db2df898 51 /* CPUID is an x86 specific interface. */
60f067b4 52#if defined(__i386__) || defined(__x86_64__)
663996b3 53
1d42b86d 54 uint32_t eax, ebx, ecx, edx;
663996b3 55 bool hypervisor;
663996b3
MS
56
57 /* http://lwn.net/Articles/301888/ */
58
663996b3 59 /* First detect whether there is a hypervisor */
1d42b86d
MB
60 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx) == 0)
61 return VIRTUALIZATION_NONE;
663996b3 62
b012e921 63 hypervisor = ecx & 0x80000000U;
663996b3
MS
64
65 if (hypervisor) {
6300502b
MP
66 union {
67 uint32_t sig32[3];
68 char text[13];
69 } sig = {};
46cdbd49 70 int v;
663996b3
MS
71
72 /* There is a hypervisor, see what it is */
1d42b86d
MB
73 __cpuid(0x40000000U, eax, ebx, ecx, edx);
74
75 sig.sig32[0] = ebx;
76 sig.sig32[1] = ecx;
77 sig.sig32[2] = edx;
663996b3 78
aa27b158
MP
79 log_debug("Virtualization found, CPUID=%s", sig.text);
80
46cdbd49
BR
81 v = vm_from_string(sig.text);
82 if (v < 0)
83 return VIRTUALIZATION_VM_OTHER;
60f067b4 84
46cdbd49 85 return v;
663996b3 86 }
60f067b4 87#endif
aa27b158 88 log_debug("No virtualization found in CPUID");
60f067b4 89
6300502b 90 return VIRTUALIZATION_NONE;
60f067b4
JS
91}
92
6300502b 93static int detect_vm_device_tree(void) {
e3bff60a 94#if defined(__arm__) || defined(__aarch64__) || defined(__powerpc__) || defined(__powerpc64__)
e735f4d4
MP
95 _cleanup_free_ char *hvtype = NULL;
96 int r;
97
e3bff60a 98 r = read_one_line_file("/proc/device-tree/hypervisor/compatible", &hvtype);
6300502b 99 if (r == -ENOENT) {
e3bff60a 100 _cleanup_closedir_ DIR *dir = NULL;
e3bff60a 101
a032b68d
MB
102 if (access("/proc/device-tree/ibm,partition-name", F_OK) == 0 &&
103 access("/proc/device-tree/hmc-managed?", F_OK) == 0 &&
104 access("/proc/device-tree/chosen/qemu,graphic-width", F_OK) != 0)
105 return VIRTUALIZATION_POWERVM;
106
e3bff60a
MP
107 dir = opendir("/proc/device-tree");
108 if (!dir) {
aa27b158
MP
109 if (errno == ENOENT) {
110 log_debug_errno(errno, "/proc/device-tree: %m");
6300502b 111 return VIRTUALIZATION_NONE;
aa27b158 112 }
e3bff60a
MP
113 return -errno;
114 }
115
ea0999c9
MB
116 FOREACH_DIRENT(de, dir, return -errno)
117 if (strstr(de->d_name, "fw-cfg")) {
118 log_debug("Virtualization QEMU: \"fw-cfg\" present in /proc/device-tree/%s", de->d_name);
6300502b 119 return VIRTUALIZATION_QEMU;
aa27b158 120 }
6300502b 121
aa27b158 122 log_debug("No virtualization found in /proc/device-tree/*");
6300502b
MP
123 return VIRTUALIZATION_NONE;
124 } else if (r < 0)
125 return r;
126
aa27b158 127 log_debug("Virtualization %s found in /proc/device-tree/hypervisor/compatible", hvtype);
6300502b
MP
128 if (streq(hvtype, "linux,kvm"))
129 return VIRTUALIZATION_KVM;
130 else if (strstr(hvtype, "xen"))
131 return VIRTUALIZATION_XEN;
e1f67bc7
MB
132 else if (strstr(hvtype, "vmware"))
133 return VIRTUALIZATION_VMWARE;
6300502b
MP
134 else
135 return VIRTUALIZATION_VM_OTHER;
136#else
aa27b158 137 log_debug("This platform does not support /proc/device-tree");
6300502b 138 return VIRTUALIZATION_NONE;
e735f4d4 139#endif
e735f4d4
MP
140}
141
db2df898 142#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
8b3d4ff0 143static int detect_vm_dmi_vendor(void) {
60f067b4 144 static const char *const dmi_vendors[] = {
db2df898 145 "/sys/class/dmi/id/product_name", /* Test this before sys_vendor to detect KVM over QEMU */
60f067b4
JS
146 "/sys/class/dmi/id/sys_vendor",
147 "/sys/class/dmi/id/board_vendor",
ea0999c9
MB
148 "/sys/class/dmi/id/bios_vendor",
149 "/sys/class/dmi/id/product_version" /* For Hyper-V VMs test */
60f067b4
JS
150 };
151
6300502b
MP
152 static const struct {
153 const char *vendor;
154 int id;
155 } dmi_vendor_table[] = {
812752cc 156 { "KVM", VIRTUALIZATION_KVM },
8b3d4ff0 157 { "Amazon EC2", VIRTUALIZATION_AMAZON },
46cdbd49 158 { "QEMU", VIRTUALIZATION_QEMU },
812752cc
MB
159 { "VMware", VIRTUALIZATION_VMWARE }, /* https://kb.vmware.com/s/article/1009458 */
160 { "VMW", VIRTUALIZATION_VMWARE },
161 { "innotek GmbH", VIRTUALIZATION_ORACLE },
2223c773 162 { "VirtualBox", VIRTUALIZATION_ORACLE },
812752cc
MB
163 { "Xen", VIRTUALIZATION_XEN },
164 { "Bochs", VIRTUALIZATION_BOCHS },
165 { "Parallels", VIRTUALIZATION_PARALLELS },
8a584da2 166 /* https://wiki.freebsd.org/bhyve */
812752cc 167 { "BHYVE", VIRTUALIZATION_BHYVE },
ea0999c9 168 { "Hyper-V", VIRTUALIZATION_MICROSOFT },
6300502b 169 };
6300502b 170 int r;
663996b3 171
3a6ce677 172 for (size_t i = 0; i < ELEMENTSOF(dmi_vendors); i++) {
663996b3 173 _cleanup_free_ char *s = NULL;
6300502b 174 unsigned j;
663996b3
MS
175
176 r = read_one_line_file(dmi_vendors[i], &s);
177 if (r < 0) {
6300502b
MP
178 if (r == -ENOENT)
179 continue;
663996b3 180
6300502b 181 return r;
663996b3
MS
182 }
183
6300502b 184 for (j = 0; j < ELEMENTSOF(dmi_vendor_table); j++)
aa27b158
MP
185 if (startswith(s, dmi_vendor_table[j].vendor)) {
186 log_debug("Virtualization %s found in DMI (%s)", s, dmi_vendors[i]);
6300502b 187 return dmi_vendor_table[j].id;
aa27b158 188 }
60f067b4 189 }
626cb2db 190 log_debug("No virtualization found in DMI vendor table.");
8b3d4ff0
MB
191 return VIRTUALIZATION_NONE;
192}
193
194static int detect_vm_smbios(void) {
195 /* The SMBIOS BIOS Charateristics Extension Byte 2 (Section 2.1.2.2 of
196 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.4.0.pdf), specifies that
197 * the 4th bit being set indicates a VM. The BIOS Characteristics table is exposed via the kernel in
198 * /sys/firmware/dmi/entries/0-0. Note that in the general case, this bit being unset should not
199 * imply that the system is running on bare-metal. For example, QEMU 3.1.0 (with or without KVM)
200 * with SeaBIOS does not set this bit. */
201 _cleanup_free_ char *s = NULL;
202 size_t readsize;
203 int r;
204
205 r = read_full_virtual_file("/sys/firmware/dmi/entries/0-0/raw", &s, &readsize);
206 if (r < 0) {
626cb2db
MB
207 log_debug_errno(r, "Unable to read /sys/firmware/dmi/entries/0-0/raw, "
208 "using the virtualization information found in DMI vendor table, ignoring: %m");
8b3d4ff0
MB
209 return SMBIOS_VM_BIT_UNKNOWN;
210 }
211 if (readsize < 20 || s[1] < 20) {
212 /* The spec indicates that byte 1 contains the size of the table, 0x12 + the number of
213 * extension bytes. The data we're interested in is in extension byte 2, which would be at
214 * 0x13. If we didn't read that much data, or if the BIOS indicates that we don't have that
215 * much data, we don't infer anything from the SMBIOS. */
626cb2db
MB
216 log_debug("Only read %zu bytes from /sys/firmware/dmi/entries/0-0/raw (expected 20). "
217 "Using the virtualization information found in DMI vendor table.", readsize);
8b3d4ff0
MB
218 return SMBIOS_VM_BIT_UNKNOWN;
219 }
663996b3 220
8b3d4ff0
MB
221 uint8_t byte = (uint8_t) s[19];
222 if (byte & (1U<<4)) {
626cb2db 223 log_debug("DMI BIOS Extension table indicates virtualization.");
8b3d4ff0
MB
224 return SMBIOS_VM_BIT_SET;
225 }
626cb2db 226 log_debug("DMI BIOS Extension table does not indicate virtualization.");
8b3d4ff0
MB
227 return SMBIOS_VM_BIT_UNSET;
228}
229#endif /* defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) */
230
231static int detect_vm_dmi(void) {
232#if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__)
233
234 int r;
235 r = detect_vm_dmi_vendor();
aa27b158 236
8b3d4ff0
MB
237 /* The DMI vendor tables in /sys/class/dmi/id don't help us distinguish between Amazon EC2
238 * virtual machines and bare-metal instances, so we need to look at SMBIOS. */
ce5f39bd
MB
239 if (r == VIRTUALIZATION_AMAZON) {
240 switch (detect_vm_smbios()) {
241 case SMBIOS_VM_BIT_SET:
242 return VIRTUALIZATION_AMAZON;
243 case SMBIOS_VM_BIT_UNSET:
244 return VIRTUALIZATION_NONE;
245 case SMBIOS_VM_BIT_UNKNOWN: {
246 /* The DMI information we are after is only accessible to the root user,
247 * so we fallback to using the product name which is less restricted
248 * to distinguish metal systems from virtualized instances */
249 _cleanup_free_ char *s = NULL;
250
251 r = read_full_virtual_file("/sys/class/dmi/id/product_name", &s, NULL);
252 /* In EC2, virtualized is much more common than metal, so if for some reason
253 * we fail to read the DMI data, assume we are virtualized. */
254 if (r < 0) {
255 log_debug_errno(r, "Can't read /sys/class/dmi/id/product_name,"
256 " assuming virtualized: %m");
257 return VIRTUALIZATION_AMAZON;
258 }
259 if (endswith(truncate_nl(s), ".metal")) {
260 log_debug("DMI product name ends with '.metal', assuming no virtualization");
261 return VIRTUALIZATION_NONE;
262 } else
263 return VIRTUALIZATION_AMAZON;
264 }
265 default:
ea0999c9 266 assert_not_reached();
ce5f39bd
MB
267 }
268 }
8b3d4ff0
MB
269
270 /* If we haven't identified a VM, but the firmware indicates that there is one, indicate as much. We
271 * have no further information about what it is. */
272 if (r == VIRTUALIZATION_NONE && detect_vm_smbios() == SMBIOS_VM_BIT_SET)
273 return VIRTUALIZATION_VM_OTHER;
274 return r;
275#else
6300502b 276 return VIRTUALIZATION_NONE;
8b3d4ff0 277#endif
60f067b4 278}
663996b3 279
52ad194e
MB
280#define XENFEAT_dom0 11 /* xen/include/public/features.h */
281#define PATH_FEATURES "/sys/hypervisor/properties/features"
282/* Returns -errno, or 0 for domU, or 1 for dom0 */
283static int detect_vm_xen_dom0(void) {
6300502b 284 _cleanup_free_ char *domcap = NULL;
60f067b4
JS
285 int r;
286
52ad194e
MB
287 r = read_one_line_file(PATH_FEATURES, &domcap);
288 if (r < 0 && r != -ENOENT)
289 return r;
bb4f798a 290 if (r >= 0) {
52ad194e
MB
291 unsigned long features;
292
1d42b86d
MB
293 /* Here, we need to use sscanf() instead of safe_atoul()
294 * as the string lacks the leading "0x". */
295 r = sscanf(domcap, "%lx", &features);
296 if (r == 1) {
52ad194e
MB
297 r = !!(features & (1U << XENFEAT_dom0));
298 log_debug("Virtualization XEN, found %s with value %08lx, "
299 "XENFEAT_dom0 (indicating the 'hardware domain') is%s set.",
300 PATH_FEATURES, features, r ? "" : " not");
301 return r;
302 }
303 log_debug("Virtualization XEN, found %s, unhandled content '%s'",
304 PATH_FEATURES, domcap);
305 }
306
6300502b 307 r = read_one_line_file("/proc/xen/capabilities", &domcap);
aa27b158 308 if (r == -ENOENT) {
52ad194e
MB
309 log_debug("Virtualization XEN because /proc/xen/capabilities does not exist");
310 return 0;
aa27b158
MP
311 }
312 if (r < 0)
313 return r;
663996b3 314
f2dec872
BR
315 for (const char *i = domcap;;) {
316 _cleanup_free_ char *cap = NULL;
663996b3 317
f2dec872
BR
318 r = extract_first_word(&i, &cap, ",", 0);
319 if (r < 0)
320 return r;
321 if (r == 0) {
322 log_debug("Virtualization XEN DomU found (/proc/xen/capabilities)");
323 return 0;
324 }
325
326 if (streq(cap, "control_d")) {
327 log_debug("Virtualization XEN Dom0 ignored (/proc/xen/capabilities)");
328 return 1;
329 }
330 }
6300502b 331}
60f067b4 332
ea0999c9
MB
333static int detect_vm_xen(void) {
334 int r;
335
336 /* The presence of /proc/xen indicates some form of a Xen domain */
337 if (access("/proc/xen", F_OK) < 0) {
338 log_debug("Virtualization XEN not found, /proc/xen does not exist");
339 return VIRTUALIZATION_NONE;
340 }
341 log_debug("Virtualization XEN found (/proc/xen exists)");
342
343 /* Ignore the Xen hypervisor if we are in Dom0 */
344 r = detect_vm_xen_dom0();
345 if (r < 0)
346 return r;
347 if (r > 0)
348 return VIRTUALIZATION_NONE;
349
350 return VIRTUALIZATION_XEN;
351}
352
6300502b
MP
353static int detect_vm_hypervisor(void) {
354 _cleanup_free_ char *hvtype = NULL;
355 int r;
60f067b4 356
6300502b
MP
357 r = read_one_line_file("/sys/hypervisor/type", &hvtype);
358 if (r == -ENOENT)
359 return VIRTUALIZATION_NONE;
360 if (r < 0)
361 return r;
60f067b4 362
aa27b158
MP
363 log_debug("Virtualization %s found in /sys/hypervisor/type", hvtype);
364
6300502b
MP
365 if (streq(hvtype, "xen"))
366 return VIRTUALIZATION_XEN;
367 else
368 return VIRTUALIZATION_VM_OTHER;
369}
60f067b4 370
6300502b 371static int detect_vm_uml(void) {
6e866b33 372 _cleanup_fclose_ FILE *f = NULL;
6300502b 373 int r;
60f067b4 374
6300502b 375 /* Detect User-Mode Linux by reading /proc/cpuinfo */
6e866b33
MB
376 f = fopen("/proc/cpuinfo", "re");
377 if (!f) {
378 if (errno == ENOENT) {
379 log_debug("/proc/cpuinfo not found, assuming no UML virtualization.");
380 return VIRTUALIZATION_NONE;
381 }
382 return -errno;
98393f85 383 }
aa27b158 384
6e866b33
MB
385 for (;;) {
386 _cleanup_free_ char *line = NULL;
387 const char *t;
388
389 r = read_line(f, LONG_LINE_MAX, &line);
390 if (r < 0)
391 return r;
392 if (r == 0)
393 break;
394
395 t = startswith(line, "vendor_id\t: ");
396 if (t) {
397 if (startswith(t, "User Mode Linux")) {
398 log_debug("UML virtualization found in /proc/cpuinfo");
399 return VIRTUALIZATION_UML;
400 }
401
402 break;
403 }
aa27b158 404 }
60f067b4 405
98393f85 406 log_debug("UML virtualization not found in /proc/cpuinfo.");
6300502b
MP
407 return VIRTUALIZATION_NONE;
408}
7035cd9e 409
6300502b 410static int detect_vm_zvm(void) {
60f067b4 411
6300502b
MP
412#if defined(__s390__)
413 _cleanup_free_ char *t = NULL;
414 int r;
7035cd9e 415
6300502b
MP
416 r = get_proc_field("/proc/sysinfo", "VM00 Control Program", WHITESPACE, &t);
417 if (r == -ENOENT)
418 return VIRTUALIZATION_NONE;
419 if (r < 0)
420 return r;
7035cd9e 421
aa27b158 422 log_debug("Virtualization %s found in /proc/sysinfo", t);
6300502b
MP
423 if (streq(t, "z/VM"))
424 return VIRTUALIZATION_ZVM;
425 else
426 return VIRTUALIZATION_KVM;
427#else
aa27b158 428 log_debug("This platform does not support /proc/sysinfo");
6300502b
MP
429 return VIRTUALIZATION_NONE;
430#endif
431}
7035cd9e 432
6300502b
MP
433/* Returns a short identifier for the various VM implementations */
434int detect_vm(void) {
435 static thread_local int cached_found = _VIRTUALIZATION_INVALID;
52ad194e 436 bool other = false;
b012e921 437 int r, dmi;
7035cd9e 438
6300502b
MP
439 if (cached_found >= 0)
440 return cached_found;
60f067b4 441
4c89c718 442 /* We have to use the correct order here:
4c89c718 443 *
8b3d4ff0
MB
444 * → First, try to detect Oracle Virtualbox and Amazon EC2 Nitro, even if they use KVM, as well as Xen even if
445 * it cloaks as Microsoft Hyper-V. Attempt to detect uml at this stage also since it runs as a user-process
ea0999c9
MB
446 * nested inside other VMs. Also check for Xen now, because Xen PV mode does not override CPUID when nested
447 * inside another hypervisor.
b012e921
MB
448 *
449 * → Second, try to detect from CPUID, this will report KVM for whatever software is used even if info in DMI is
450 * overwritten.
451 *
452 * → Third, try to detect from DMI. */
2897b343
MP
453
454 dmi = detect_vm_dmi();
8b3d4ff0 455 if (IN_SET(dmi, VIRTUALIZATION_ORACLE, VIRTUALIZATION_XEN, VIRTUALIZATION_AMAZON)) {
2897b343
MP
456 r = dmi;
457 goto finish;
458 }
459
9bb629ec
MB
460 /* Detect UML */
461 r = detect_vm_uml();
462 if (r < 0)
463 return r;
464 if (r == VIRTUALIZATION_VM_OTHER)
465 other = true;
466 else if (r != VIRTUALIZATION_NONE)
467 goto finish;
468
ea0999c9
MB
469 /* Detect Xen */
470 r = detect_vm_xen();
471 if (r < 0)
472 return r;
473 if (r == VIRTUALIZATION_VM_OTHER)
474 other = true;
475 else if (r != VIRTUALIZATION_NONE)
476 goto finish;
477
9bb629ec 478 /* Detect from CPUID */
2897b343 479 r = detect_vm_cpuid();
6300502b
MP
480 if (r < 0)
481 return r;
b012e921
MB
482 if (r == VIRTUALIZATION_VM_OTHER)
483 other = true;
484 else if (r != VIRTUALIZATION_NONE)
485 goto finish;
e735f4d4 486
b012e921
MB
487 /* Now, let's get back to DMI */
488 if (dmi < 0)
489 return dmi;
490 if (dmi == VIRTUALIZATION_VM_OTHER)
491 other = true;
492 else if (dmi != VIRTUALIZATION_NONE) {
493 r = dmi;
494 goto finish;
52ad194e 495 }
14228c0d 496
ea0999c9 497 /* Check high-level hypervisor sysfs file */
6300502b
MP
498 r = detect_vm_hypervisor();
499 if (r < 0)
500 return r;
b012e921
MB
501 if (r == VIRTUALIZATION_VM_OTHER)
502 other = true;
503 else if (r != VIRTUALIZATION_NONE)
504 goto finish;
5eef597e 505
6300502b
MP
506 r = detect_vm_device_tree();
507 if (r < 0)
508 return r;
b012e921
MB
509 if (r == VIRTUALIZATION_VM_OTHER)
510 other = true;
511 else if (r != VIRTUALIZATION_NONE)
512 goto finish;
5eef597e 513
6300502b
MP
514 r = detect_vm_zvm();
515 if (r < 0)
516 return r;
60f067b4
JS
517
518finish:
ea0999c9 519 if (r == VIRTUALIZATION_NONE && other)
52ad194e 520 r = VIRTUALIZATION_VM_OTHER;
aa27b158 521
60f067b4 522 cached_found = r;
aa27b158 523 log_debug("Found VM virtualization %s", virtualization_to_string(r));
60f067b4 524 return r;
663996b3
MS
525}
526
46cdbd49
BR
527static const char *const container_table[_VIRTUALIZATION_MAX] = {
528 [VIRTUALIZATION_LXC] = "lxc",
529 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
530 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
531 [VIRTUALIZATION_DOCKER] = "docker",
532 [VIRTUALIZATION_PODMAN] = "podman",
533 [VIRTUALIZATION_RKT] = "rkt",
534 [VIRTUALIZATION_WSL] = "wsl",
a10f5d05 535 [VIRTUALIZATION_PROOT] = "proot",
a032b68d 536 [VIRTUALIZATION_POUCH] = "pouch",
46cdbd49
BR
537};
538
539DEFINE_PRIVATE_STRING_TABLE_LOOKUP_FROM_STRING(container, int);
663996b3 540
3a6ce677
BR
541static int running_in_cgroupns(void) {
542 int r;
543
544 if (!cg_ns_supported())
545 return false;
546
547 r = cg_all_unified();
548 if (r < 0)
549 return r;
550
551 if (r) {
552 /* cgroup v2 */
553
554 r = access("/sys/fs/cgroup/cgroup.events", F_OK);
555 if (r < 0) {
556 if (errno != ENOENT)
557 return -errno;
558 /* All kernel versions have cgroup.events in nested cgroups. */
559 return false;
560 }
561
562 /* There's no cgroup.type in the root cgroup, and future kernel versions
563 * are unlikely to add it since cgroup.type is something that makes no sense
564 * whatsoever in the root cgroup. */
565 r = access("/sys/fs/cgroup/cgroup.type", F_OK);
566 if (r == 0)
567 return true;
568 if (r < 0 && errno != ENOENT)
569 return -errno;
570
571 /* On older kernel versions, there's no cgroup.type */
572 r = access("/sys/kernel/cgroup/features", F_OK);
573 if (r < 0) {
574 if (errno != ENOENT)
575 return -errno;
576 /* This is an old kernel that we know for sure has cgroup.events
577 * only in nested cgroups. */
578 return true;
579 }
580
581 /* This is a recent kernel, and cgroup.type doesn't exist, so we must be
582 * in the root cgroup. */
583 return false;
584 } else {
585 /* cgroup v1 */
586
587 /* If systemd controller is not mounted, do not even bother. */
588 r = access("/sys/fs/cgroup/systemd", F_OK);
589 if (r < 0) {
590 if (errno != ENOENT)
591 return -errno;
592 return false;
593 }
594
595 /* release_agent only exists in the root cgroup. */
596 r = access("/sys/fs/cgroup/systemd/release_agent", F_OK);
597 if (r < 0) {
598 if (errno != ENOENT)
599 return -errno;
600 return true;
601 }
602
603 return false;
604 }
605}
606
607static int detect_container_files(void) {
608 unsigned i;
609
610 static const struct {
611 const char *file_path;
612 int id;
613 } container_file_table[] = {
614 /* https://github.com/containers/podman/issues/6192 */
615 /* https://github.com/containers/podman/issues/3586#issuecomment-661918679 */
616 { "/run/.containerenv", VIRTUALIZATION_PODMAN },
617 /* https://github.com/moby/moby/issues/18355 */
618 /* Docker must be the last in this table, see below. */
619 { "/.dockerenv", VIRTUALIZATION_DOCKER },
620 };
621
622 for (i = 0; i < ELEMENTSOF(container_file_table); i++) {
623 if (access(container_file_table[i].file_path, F_OK) >= 0)
624 return container_file_table[i].id;
625
626 if (errno != ENOENT)
627 log_debug_errno(errno,
628 "Checking if %s exists failed, ignoring: %m",
629 container_file_table[i].file_path);
630 }
631
632 return VIRTUALIZATION_NONE;
633}
634
46cdbd49 635int detect_container(void) {
6300502b 636 static thread_local int cached_found = _VIRTUALIZATION_INVALID;
3a6ce677 637 _cleanup_free_ char *m = NULL, *o = NULL, *p = NULL;
6300502b 638 const char *e = NULL;
60f067b4
JS
639 int r;
640
6300502b 641 if (cached_found >= 0)
60f067b4 642 return cached_found;
663996b3 643
2897b343 644 /* /proc/vz exists in container and outside of the container, /proc/bc only outside of the container. */
3a6ce677
BR
645 if (access("/proc/vz", F_OK) < 0) {
646 if (errno != ENOENT)
647 log_debug_errno(errno, "Failed to check if /proc/vz exists, ignoring: %m");
648 } else if (access("/proc/bc", F_OK) < 0) {
649 if (errno == ENOENT) {
650 r = VIRTUALIZATION_OPENVZ;
651 goto finish;
652 }
653
654 log_debug_errno(errno, "Failed to check if /proc/bc exists, ignoring: %m");
663996b3
MS
655 }
656
a10f5d05 657 /* "Official" way of detecting WSL https://github.com/Microsoft/WSL/issues/423#issuecomment-221627364 */
bb4f798a 658 r = read_one_line_file("/proc/sys/kernel/osrelease", &o);
3a6ce677
BR
659 if (r < 0)
660 log_debug_errno(r, "Failed to read /proc/sys/kernel/osrelease, ignoring: %m");
661 else if (strstr(o, "Microsoft") || strstr(o, "WSL")) {
d0648cfe
MB
662 r = VIRTUALIZATION_WSL;
663 goto finish;
bb4f798a
MB
664 }
665
a10f5d05
MB
666 /* proot doesn't use PID namespacing, so we can just check if we have a matching tracer for this
667 * invocation without worrying about it being elsewhere.
668 */
669 r = get_proc_field("/proc/self/status", "TracerPid", WHITESPACE, &p);
3a6ce677
BR
670 if (r < 0)
671 log_debug_errno(r, "Failed to read our own trace PID, ignoring: %m");
672 else if (!streq(p, "0")) {
a10f5d05 673 pid_t ptrace_pid;
3a6ce677 674
a10f5d05 675 r = parse_pid(p, &ptrace_pid);
3a6ce677
BR
676 if (r < 0)
677 log_debug_errno(r, "Failed to parse our own tracer PID, ignoring: %m");
678 else {
a10f5d05 679 _cleanup_free_ char *ptrace_comm = NULL;
3a6ce677
BR
680 const char *pf;
681
682 pf = procfs_file_alloca(ptrace_pid, "comm");
a10f5d05 683 r = read_one_line_file(pf, &ptrace_comm);
3a6ce677
BR
684 if (r < 0)
685 log_debug_errno(r, "Failed to read %s, ignoring: %m", pf);
686 else if (startswith(ptrace_comm, "proot")) {
a10f5d05
MB
687 r = VIRTUALIZATION_PROOT;
688 goto finish;
689 }
690 }
691 }
692
3a6ce677 693 /* The container manager might have placed this in the /run/host/ hierarchy for us, which is best
a032b68d
MB
694 * because we can be consumed just like that, without special privileges. */
695 r = read_one_line_file("/run/host/container-manager", &m);
696 if (r > 0) {
697 e = m;
698 goto translate_name;
699 }
700 if (!IN_SET(r, -ENOENT, 0))
3a6ce677 701 return log_debug_errno(r, "Failed to read /run/host/container-manager: %m");
a032b68d 702
f5e65279 703 if (getpid_cached() == 1) {
f2dec872
BR
704 /* If we are PID 1 we can just check our own environment variable, and that's authoritative.
705 * We distinguish three cases:
706 * - the variable is not defined → we jump to other checks
707 * - the variable is defined to an empty value → we are not in a container
708 * - anything else → some container, either one of the known ones or "container-other"
709 */
60f067b4 710 e = getenv("container");
f2dec872 711 if (!e)
3a6ce677 712 goto check_files;
60f067b4 713 if (isempty(e)) {
6300502b 714 r = VIRTUALIZATION_NONE;
60f067b4
JS
715 goto finish;
716 }
663996b3 717
2897b343
MP
718 goto translate_name;
719 }
720
721 /* Otherwise, PID 1 might have dropped this information into a file in /run. This is better than accessing
722 * /proc/1/environ, since we don't need CAP_SYS_PTRACE for that. */
723 r = read_one_line_file("/run/systemd/container", &m);
bb4f798a 724 if (r > 0) {
2897b343
MP
725 e = m;
726 goto translate_name;
727 }
bb4f798a 728 if (!IN_SET(r, -ENOENT, 0))
2897b343
MP
729 return log_debug_errno(r, "Failed to read /run/systemd/container: %m");
730
731 /* Fallback for cases where PID 1 was not systemd (for example, cases where init=/bin/sh is used. */
732 r = getenv_for_pid(1, "container", &m);
733 if (r > 0) {
60f067b4 734 e = m;
2897b343 735 goto translate_name;
60f067b4 736 }
2897b343
MP
737 if (r < 0) /* This only works if we have CAP_SYS_PTRACE, hence let's better ignore failures here */
738 log_debug_errno(r, "Failed to read $container of PID 1, ignoring: %m");
739
3a6ce677
BR
740check_files:
741 /* Check for existence of some well-known files. We only do this after checking
742 * for other specific container managers, otherwise we risk mistaking another
743 * container manager for Docker: the /.dockerenv file could inadvertently end up
744 * in a file system image. */
745 r = detect_container_files();
746 if (r)
747 goto finish;
2897b343 748
3a6ce677
BR
749 r = running_in_cgroupns();
750 if (r > 0) {
751 r = VIRTUALIZATION_CONTAINER_OTHER;
752 goto finish;
753 }
754 if (r < 0)
755 log_debug_errno(r, "Failed to detect cgroup namespace: %m");
2897b343 756
3a6ce677 757 /* If none of that worked, give up, assume no container manager. */
2897b343
MP
758 r = VIRTUALIZATION_NONE;
759 goto finish;
663996b3 760
2897b343 761translate_name:
3a6ce677
BR
762 if (streq(e, "oci")) {
763 /* Some images hardcode container=oci, but OCI is not a specific container manager.
764 * Try to detect one based on well-known files. */
765 r = detect_container_files();
766 if (!r)
767 r = VIRTUALIZATION_CONTAINER_OTHER;
768 goto finish;
769 }
46cdbd49
BR
770 r = container_from_string(e);
771 if (r < 0)
772 r = VIRTUALIZATION_CONTAINER_OTHER;
663996b3 773
60f067b4 774finish:
2897b343 775 log_debug("Found container virtualization %s.", virtualization_to_string(r));
60f067b4 776 cached_found = r;
60f067b4
JS
777 return r;
778}
663996b3 779
6300502b 780int detect_virtualization(void) {
60f067b4 781 int r;
663996b3 782
6300502b 783 r = detect_container();
aa27b158
MP
784 if (r == 0)
785 r = detect_vm();
663996b3 786
aa27b158 787 return r;
663996b3 788}
6300502b 789
8a584da2
MP
790static int userns_has_mapping(const char *name) {
791 _cleanup_fclose_ FILE *f = NULL;
792 _cleanup_free_ char *buf = NULL;
793 size_t n_allocated = 0;
794 ssize_t n;
795 uint32_t a, b, c;
796 int r;
797
798 f = fopen(name, "re");
799 if (!f) {
800 log_debug_errno(errno, "Failed to open %s: %m", name);
2897b343 801 return errno == ENOENT ? false : -errno;
8a584da2
MP
802 }
803
804 n = getline(&buf, &n_allocated, f);
805 if (n < 0) {
806 if (feof(f)) {
807 log_debug("%s is empty, we're in an uninitialized user namespace", name);
808 return true;
809 }
810
811 return log_debug_errno(errno, "Failed to read %s: %m", name);
812 }
813
814 r = sscanf(buf, "%"PRIu32" %"PRIu32" %"PRIu32, &a, &b, &c);
815 if (r < 3)
816 return log_debug_errno(errno, "Failed to parse %s: %m", name);
817
818 if (a == 0 && b == 0 && c == UINT32_MAX) {
819 /* The kernel calls mappings_overlap() and does not allow overlaps */
820 log_debug("%s has a full 1:1 mapping", name);
821 return false;
822 }
823
824 /* Anything else implies that we are in a user namespace */
825 log_debug("Mapping found in %s, we're in a user namespace", name);
826 return true;
827}
828
829int running_in_userns(void) {
830 _cleanup_free_ char *line = NULL;
831 int r;
832
833 r = userns_has_mapping("/proc/self/uid_map");
834 if (r != 0)
835 return r;
836
837 r = userns_has_mapping("/proc/self/gid_map");
838 if (r != 0)
839 return r;
840
841 /* "setgroups" file was added in kernel v3.18-rc6-15-g9cc46516dd. It is also
842 * possible to compile a kernel without CONFIG_USER_NS, in which case "setgroups"
843 * also does not exist. We cannot distinguish those two cases, so assume that
844 * we're running on a stripped-down recent kernel, rather than on an old one,
845 * and if the file is not found, return false.
846 */
847 r = read_one_line_file("/proc/self/setgroups", &line);
848 if (r < 0) {
849 log_debug_errno(r, "/proc/self/setgroups: %m");
850 return r == -ENOENT ? false : r;
851 }
852
853 truncate_nl(line);
854 r = streq(line, "deny");
855 /* See user_namespaces(7) for a description of this "setgroups" contents. */
856 log_debug("/proc/self/setgroups contains \"%s\", %s user namespace", line, r ? "in" : "not in");
857 return r;
858}
859
db2df898 860int running_in_chroot(void) {
98393f85 861 int r;
db2df898 862
8a584da2
MP
863 if (getenv_bool("SYSTEMD_IGNORE_CHROOT") > 0)
864 return 0;
865
98393f85
MB
866 r = files_same("/proc/1/root", "/", 0);
867 if (r < 0)
868 return r;
db2df898 869
98393f85 870 return r == 0;
db2df898
MP
871}
872
3a6ce677
BR
873#if defined(__i386__) || defined(__x86_64__)
874struct cpuid_table_entry {
875 uint32_t flag_bit;
876 const char *name;
877};
878
879static const struct cpuid_table_entry leaf1_edx[] = {
880 { 0, "fpu" },
881 { 1, "vme" },
882 { 2, "de" },
883 { 3, "pse" },
884 { 4, "tsc" },
885 { 5, "msr" },
886 { 6, "pae" },
887 { 7, "mce" },
888 { 8, "cx8" },
889 { 9, "apic" },
890 { 11, "sep" },
891 { 12, "mtrr" },
892 { 13, "pge" },
893 { 14, "mca" },
894 { 15, "cmov" },
895 { 16, "pat" },
896 { 17, "pse36" },
897 { 19, "clflush" },
898 { 23, "mmx" },
899 { 24, "fxsr" },
900 { 25, "sse" },
901 { 26, "sse2" },
902 { 28, "ht" },
903};
904
905static const struct cpuid_table_entry leaf1_ecx[] = {
906 { 0, "pni" },
907 { 1, "pclmul" },
908 { 3, "monitor" },
909 { 9, "ssse3" },
910 { 12, "fma3" },
911 { 13, "cx16" },
912 { 19, "sse4_1" },
913 { 20, "sse4_2" },
914 { 22, "movbe" },
915 { 23, "popcnt" },
916 { 25, "aes" },
917 { 26, "xsave" },
918 { 27, "osxsave" },
919 { 28, "avx" },
920 { 29, "f16c" },
921 { 30, "rdrand" },
922};
923
924static const struct cpuid_table_entry leaf7_ebx[] = {
925 { 3, "bmi1" },
926 { 5, "avx2" },
927 { 8, "bmi2" },
928 { 18, "rdseed" },
929 { 19, "adx" },
930 { 29, "sha_ni" },
931};
932
933static const struct cpuid_table_entry leaf81_edx[] = {
934 { 11, "syscall" },
935 { 27, "rdtscp" },
936 { 29, "lm" },
937};
938
939static const struct cpuid_table_entry leaf81_ecx[] = {
940 { 0, "lahf_lm" },
941 { 5, "abm" },
942};
943
944static const struct cpuid_table_entry leaf87_edx[] = {
945 { 8, "constant_tsc" },
946};
947
948static bool given_flag_in_set(const char *flag, const struct cpuid_table_entry *set, size_t set_size, uint32_t val) {
949 for (size_t i = 0; i < set_size; i++) {
950 if ((UINT32_C(1) << set[i].flag_bit) & val &&
951 streq(flag, set[i].name))
952 return true;
953 }
954 return false;
955}
956
957static bool real_has_cpu_with_flag(const char *flag) {
958 uint32_t eax, ebx, ecx, edx;
959
960 if (__get_cpuid(1, &eax, &ebx, &ecx, &edx)) {
961 if (given_flag_in_set(flag, leaf1_ecx, ELEMENTSOF(leaf1_ecx), ecx))
962 return true;
963
964 if (given_flag_in_set(flag, leaf1_edx, ELEMENTSOF(leaf1_edx), edx))
965 return true;
966 }
967
968 if (__get_cpuid(7, &eax, &ebx, &ecx, &edx)) {
969 if (given_flag_in_set(flag, leaf7_ebx, ELEMENTSOF(leaf7_ebx), ebx))
970 return true;
971 }
972
973 if (__get_cpuid(0x80000001U, &eax, &ebx, &ecx, &edx)) {
974 if (given_flag_in_set(flag, leaf81_ecx, ELEMENTSOF(leaf81_ecx), ecx))
975 return true;
976
977 if (given_flag_in_set(flag, leaf81_edx, ELEMENTSOF(leaf81_edx), edx))
978 return true;
979 }
980
981 if (__get_cpuid(0x80000007U, &eax, &ebx, &ecx, &edx))
982 if (given_flag_in_set(flag, leaf87_edx, ELEMENTSOF(leaf87_edx), edx))
983 return true;
984
985 return false;
986}
987#endif
988
989bool has_cpu_with_flag(const char *flag) {
990 /* CPUID is an x86 specific interface. Assume on all others that no CPUs have those flags. */
991#if defined(__i386__) || defined(__x86_64__)
992 return real_has_cpu_with_flag(flag);
993#else
994 return false;
995#endif
996}
997
6300502b
MP
998static const char *const virtualization_table[_VIRTUALIZATION_MAX] = {
999 [VIRTUALIZATION_NONE] = "none",
1000 [VIRTUALIZATION_KVM] = "kvm",
8b3d4ff0 1001 [VIRTUALIZATION_AMAZON] = "amazon",
6300502b
MP
1002 [VIRTUALIZATION_QEMU] = "qemu",
1003 [VIRTUALIZATION_BOCHS] = "bochs",
1004 [VIRTUALIZATION_XEN] = "xen",
1005 [VIRTUALIZATION_UML] = "uml",
1006 [VIRTUALIZATION_VMWARE] = "vmware",
1007 [VIRTUALIZATION_ORACLE] = "oracle",
1008 [VIRTUALIZATION_MICROSOFT] = "microsoft",
1009 [VIRTUALIZATION_ZVM] = "zvm",
1010 [VIRTUALIZATION_PARALLELS] = "parallels",
8a584da2 1011 [VIRTUALIZATION_BHYVE] = "bhyve",
98393f85 1012 [VIRTUALIZATION_QNX] = "qnx",
bb4f798a 1013 [VIRTUALIZATION_ACRN] = "acrn",
a032b68d 1014 [VIRTUALIZATION_POWERVM] = "powervm",
6300502b
MP
1015 [VIRTUALIZATION_VM_OTHER] = "vm-other",
1016
1017 [VIRTUALIZATION_SYSTEMD_NSPAWN] = "systemd-nspawn",
1018 [VIRTUALIZATION_LXC_LIBVIRT] = "lxc-libvirt",
1019 [VIRTUALIZATION_LXC] = "lxc",
1020 [VIRTUALIZATION_OPENVZ] = "openvz",
1021 [VIRTUALIZATION_DOCKER] = "docker",
f2dec872 1022 [VIRTUALIZATION_PODMAN] = "podman",
db2df898 1023 [VIRTUALIZATION_RKT] = "rkt",
bb4f798a 1024 [VIRTUALIZATION_WSL] = "wsl",
a10f5d05 1025 [VIRTUALIZATION_PROOT] = "proot",
a032b68d 1026 [VIRTUALIZATION_POUCH] = "pouch",
6300502b
MP
1027 [VIRTUALIZATION_CONTAINER_OTHER] = "container-other",
1028};
1029
1030DEFINE_STRING_TABLE_LOOKUP(virtualization, int);