]> git.proxmox.com Git - qemu-server.git/blame_incremental - PVE/QemuServer.pm
cloudinit: allow non-root users to set ciupgrade option
[qemu-server.git] / PVE / QemuServer.pm
... / ...
CommitLineData
1package PVE::QemuServer;
2
3use strict;
4use warnings;
5
6use Cwd 'abs_path';
7use Digest::SHA;
8use Fcntl ':flock';
9use Fcntl;
10use File::Basename;
11use File::Copy qw(copy);
12use File::Path;
13use File::stat;
14use Getopt::Long;
15use IO::Dir;
16use IO::File;
17use IO::Handle;
18use IO::Select;
19use IO::Socket::UNIX;
20use IPC::Open3;
21use JSON;
22use List::Util qw(first);
23use MIME::Base64;
24use POSIX;
25use Storable qw(dclone);
26use Time::HiRes qw(gettimeofday usleep);
27use URI::Escape;
28use UUID;
29
30use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31use PVE::CGroup;
32use PVE::CpuSet;
33use PVE::DataCenterConfig;
34use PVE::Exception qw(raise raise_param_exc);
35use PVE::Format qw(render_duration render_bytes);
36use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37use PVE::Mapping::PCI;
38use PVE::Mapping::USB;
39use PVE::INotify;
40use PVE::JSONSchema qw(get_standard_option parse_property_string);
41use PVE::ProcFSTools;
42use PVE::PBSClient;
43use PVE::RESTEnvironment qw(log_warn);
44use PVE::RPCEnvironment;
45use PVE::Storage;
46use PVE::SysFSTools;
47use PVE::Systemd;
48use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
49
50use PVE::QMPClient;
51use PVE::QemuConfig;
52use PVE::QemuServer::Helpers qw(min_version config_aware_timeout windows_version);
53use PVE::QemuServer::Cloudinit;
54use PVE::QemuServer::CGroup;
55use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
56use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
57use PVE::QemuServer::Machine;
58use PVE::QemuServer::Memory;
59use PVE::QemuServer::Monitor qw(mon_cmd);
60use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
61use PVE::QemuServer::USB;
62
63my $have_sdn;
64eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67};
68
69my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
70my $OVMF = {
71 x86_64 => {
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
99};
100
101my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
102
103# Note about locking: we use flock on the config file protect against concurent actions.
104# Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105# 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106# But you can ignore this kind of lock with the --skiplock flag.
107
108cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112);
113
114PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119});
120
121PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
122 description => "Specifies the QEMU machine type.",
123 type => 'string',
124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
125 maxLength => 40,
126 optional => 1,
127});
128
129# FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
130my $nodename_cache;
131sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134}
135
136my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151};
152PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
154my $agent_fmt = {
155 enabled => {
156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
162 description => "Run fstrim after moving a disk or migrating the VM.",
163 type => 'boolean',
164 optional => 1,
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
172 },
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
180};
181
182my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198};
199
200my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213};
214
215my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
223 enum => ['spice', 'none'],
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228};
229
230my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
234 default => '0',
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
240 default => 'off',
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244};
245
246my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
257 },
258 max_bytes => {
259 type => 'integer',
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
263 optional => 1,
264
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
274 optional => 1,
275 default => 1000,
276 },
277};
278
279my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
292};
293
294my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
307 hotplug => {
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
315 default => 'network,disk,usb',
316 },
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
328 },
329 cpulimit => {
330 optional => 1,
331 type => 'number',
332 description => "Limit of CPU usage.",
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
335 minimum => 0,
336 maximum => 128,
337 default => 0,
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
346 minimum => 1,
347 maximum => 262144,
348 default => 'cgroup v1: 1024, cgroup v2: 100',
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
354 ." you use the balloon device.",
355 minimum => 16,
356 default => 512,
357 },
358 balloon => {
359 optional => 1,
360 type => 'integer',
361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
362 minimum => 0,
363 },
364 shares => {
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
373 },
374 keyboard => {
375 optional => 1,
376 type => 'string',
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
379 enum => PVE::Tools::kvmkeymaplist(),
380 default => undef,
381 },
382 name => {
383 optional => 1,
384 type => 'string', format => 'dns-name',
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
387 scsihw => {
388 optional => 1,
389 type => 'string',
390 description => "SCSI controller model",
391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
392 default => 'lsi',
393 },
394 description => {
395 optional => 1,
396 type => 'string',
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407Specify guest operating system. This is used to enable special
408optimization/features for specific operating systems:
409
410[horizontal]
411other;; unspecified OS
412wxp;; Microsoft Windows XP
413w2k;; Microsoft Windows 2000
414w2k3;; Microsoft Windows 2003
415w2k8;; Microsoft Windows 2008
416wvista;; Microsoft Windows Vista
417win7;; Microsoft Windows 7
418win8;; Microsoft Windows 8/2012/2012r2
419win10;; Microsoft Windows 10/2016/2019
420win11;; Microsoft Windows 11/2022
421l24;; Linux 2.4 Kernel
422l26;; Linux 2.6 - 6.X Kernel
423solaris;; Solaris/OpenSolaris/OpenIndiania kernel
424EODESC
425 },
426 boot => {
427 optional => 1,
428 type => 'string', format => 'pve-qm-boot',
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
436 pattern => '(ide|sata|scsi|virtio)\d+',
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
459 numa => {
460 optional => 1,
461 type => 'boolean',
462 description => "Enable/disable NUMA.",
463 default => 0,
464 },
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
477 },
478 vcpus => {
479 optional => 1,
480 type => 'integer',
481 description => "Number of hotplugged vcpus.",
482 minimum => 1,
483 default => 0,
484 },
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
491 agent => {
492 optional => 1,
493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
494 type => 'string',
495 format => $agent_fmt,
496 },
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
506 description => "Enable/disable time drift fix.",
507 default => 0,
508 },
509 localtime => {
510 optional => 1,
511 type => 'boolean',
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
531 },
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
535 description => "Create a virtual hardware watchdog device.",
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
539 },
540 startdate => {
541 optional => 1,
542 type => 'string',
543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
549 startup => get_standard_option('pve-startup-order'),
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
556 args => {
557 optional => 1,
558 type => 'string',
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
561Arbitrary arguments passed to kvm, for example:
562
563args: -no-reboot -smbios 'type=0,vendor=FOO'
564
565NOTE: this option is for experts only.
566EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
572 description => "Enable/disable the USB tablet device.",
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
588 type => 'number',
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
591 default => 0.1,
592 },
593 cdrom => {
594 optional => 1,
595 type => 'string', format => 'pve-qm-ide',
596 typetext => '<volume>',
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
603 format => 'pve-vm-cpu-conf',
604 },
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
620 },
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
625 runningmachine => get_standard_option('pve-qemu-machine', {
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
628 }),
629 runningcpu => {
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
637 machine => get_standard_option('pve-qemu-machine'),
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
647 maxLength => 512,
648 optional => 1,
649 },
650 protection => {
651 optional => 1,
652 type => 'boolean',
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
655 default => 0,
656 },
657 bios => {
658 optional => 1,
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
663 },
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
678 default => "1 (autogenerated)",
679 optional => 1,
680 },
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
692 optional => 1,
693 },
694 audio0 => {
695 type => 'string',
696 format => $audio_fmt,
697 description => "Configure a audio device, useful in combination with QXL/Spice.",
698 optional => 1
699 },
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
726 optional => 1,
727 },
728};
729
730my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
753 vendor => {
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
759 },
760};
761PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
763# any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
764my $confdesc_cloudinit = {
765 citype => {
766 optional => 1,
767 type => 'string',
768 description => 'Specifies the cloud-init configuration format. The default depends on the'
769 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
770 .' and `configdrive2` for windows.',
771 enum => ['configdrive2', 'nocloud', 'opennebula'],
772 },
773 ciuser => {
774 optional => 1,
775 type => 'string',
776 description => "cloud-init: User name to change ssh keys and password for instead of the"
777 ." image's configured default user.",
778 },
779 cipassword => {
780 optional => 1,
781 type => 'string',
782 description => 'cloud-init: Password to assign the user. Using this is generally not'
783 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
784 .' support hashed passwords.',
785 },
786 ciupgrade => {
787 optional => 1,
788 type => 'boolean',
789 description => 'cloud-init: do an automatic package upgrade after the first boot.',
790 default => 1,
791 },
792 cicustom => {
793 optional => 1,
794 type => 'string',
795 description => 'cloud-init: Specify custom files to replace the automatically generated'
796 .' ones at start.',
797 format => 'pve-qm-cicustom',
798 },
799 searchdomain => {
800 optional => 1,
801 type => 'string',
802 description => 'cloud-init: Sets DNS search domains for a container. Create will'
803 .' automatically use the setting from the host if neither searchdomain nor nameserver'
804 .' are set.',
805 },
806 nameserver => {
807 optional => 1,
808 type => 'string', format => 'address-list',
809 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
810 .' automatically use the setting from the host if neither searchdomain nor nameserver'
811 .' are set.',
812 },
813 sshkeys => {
814 optional => 1,
815 type => 'string',
816 format => 'urlencoded',
817 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
818 },
819};
820
821# what about other qemu settings ?
822#cpu => 'string',
823#machine => 'string',
824#fda => 'file',
825#fdb => 'file',
826#mtdblock => 'file',
827#sd => 'file',
828#pflash => 'file',
829#snapshot => 'bool',
830#bootp => 'file',
831##tftp => 'dir',
832##smb => 'dir',
833#kernel => 'file',
834#append => 'string',
835#initrd => 'file',
836##soundhw => 'string',
837
838while (my ($k, $v) = each %$confdesc) {
839 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
840}
841
842my $MAX_NETS = 32;
843my $MAX_SERIAL_PORTS = 4;
844my $MAX_PARALLEL_PORTS = 3;
845my $MAX_NUMA = 8;
846
847my $numa_fmt = {
848 cpus => {
849 type => "string",
850 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
851 description => "CPUs accessing this NUMA node.",
852 format_description => "id[-id];...",
853 },
854 memory => {
855 type => "number",
856 description => "Amount of memory this NUMA node provides.",
857 optional => 1,
858 },
859 hostnodes => {
860 type => "string",
861 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
862 description => "Host NUMA nodes to use.",
863 format_description => "id[-id];...",
864 optional => 1,
865 },
866 policy => {
867 type => 'string',
868 enum => [qw(preferred bind interleave)],
869 description => "NUMA allocation policy.",
870 optional => 1,
871 },
872};
873PVE::JSONSchema::register_format('pve-qm-numanode', $numa_fmt);
874my $numadesc = {
875 optional => 1,
876 type => 'string', format => $numa_fmt,
877 description => "NUMA topology.",
878};
879PVE::JSONSchema::register_standard_option("pve-qm-numanode", $numadesc);
880
881for (my $i = 0; $i < $MAX_NUMA; $i++) {
882 $confdesc->{"numa$i"} = $numadesc;
883}
884
885my $nic_model_list = [
886 'e1000',
887 'e1000-82540em',
888 'e1000-82544gc',
889 'e1000-82545em',
890 'e1000e',
891 'i82551',
892 'i82557b',
893 'i82559er',
894 'ne2k_isa',
895 'ne2k_pci',
896 'pcnet',
897 'rtl8139',
898 'virtio',
899 'vmxnet3',
900];
901my $nic_model_list_txt = join(' ', sort @$nic_model_list);
902
903my $net_fmt_bridge_descr = <<__EOD__;
904Bridge to attach the network device to. The Proxmox VE standard bridge
905is called 'vmbr0'.
906
907If you do not specify a bridge, we create a kvm user (NATed) network
908device, which provides DHCP and DNS services. The following addresses
909are used:
910
911 10.0.2.2 Gateway
912 10.0.2.3 DNS Server
913 10.0.2.4 SMB Server
914
915The DHCP server assign addresses to the guest starting from 10.0.2.15.
916__EOD__
917
918my $net_fmt = {
919 macaddr => get_standard_option('mac-addr', {
920 description => "MAC address. That address must be unique withing your network. This is"
921 ." automatically generated if not specified.",
922 }),
923 model => {
924 type => 'string',
925 description => "Network Card Model. The 'virtio' model provides the best performance with"
926 ." very low CPU overhead. If your guest does not support this driver, it is usually"
927 ." best to use 'e1000'.",
928 enum => $nic_model_list,
929 default_key => 1,
930 },
931 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
932 bridge => get_standard_option('pve-bridge-id', {
933 description => $net_fmt_bridge_descr,
934 optional => 1,
935 }),
936 queues => {
937 type => 'integer',
938 minimum => 0, maximum => 64,
939 description => 'Number of packet queues to be used on the device.',
940 optional => 1,
941 },
942 rate => {
943 type => 'number',
944 minimum => 0,
945 description => "Rate limit in mbps (megabytes per second) as floating point number.",
946 optional => 1,
947 },
948 tag => {
949 type => 'integer',
950 minimum => 1, maximum => 4094,
951 description => 'VLAN tag to apply to packets on this interface.',
952 optional => 1,
953 },
954 trunks => {
955 type => 'string',
956 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
957 description => 'VLAN trunks to pass through this interface.',
958 format_description => 'vlanid[;vlanid...]',
959 optional => 1,
960 },
961 firewall => {
962 type => 'boolean',
963 description => 'Whether this interface should be protected by the firewall.',
964 optional => 1,
965 },
966 link_down => {
967 type => 'boolean',
968 description => 'Whether this interface should be disconnected (like pulling the plug).',
969 optional => 1,
970 },
971 mtu => {
972 type => 'integer',
973 minimum => 1, maximum => 65520,
974 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
975 optional => 1,
976 },
977};
978
979my $netdesc = {
980 optional => 1,
981 type => 'string', format => $net_fmt,
982 description => "Specify network devices.",
983};
984
985PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
986
987my $ipconfig_fmt = {
988 ip => {
989 type => 'string',
990 format => 'pve-ipv4-config',
991 format_description => 'IPv4Format/CIDR',
992 description => 'IPv4 address in CIDR format.',
993 optional => 1,
994 default => 'dhcp',
995 },
996 gw => {
997 type => 'string',
998 format => 'ipv4',
999 format_description => 'GatewayIPv4',
1000 description => 'Default gateway for IPv4 traffic.',
1001 optional => 1,
1002 requires => 'ip',
1003 },
1004 ip6 => {
1005 type => 'string',
1006 format => 'pve-ipv6-config',
1007 format_description => 'IPv6Format/CIDR',
1008 description => 'IPv6 address in CIDR format.',
1009 optional => 1,
1010 default => 'dhcp',
1011 },
1012 gw6 => {
1013 type => 'string',
1014 format => 'ipv6',
1015 format_description => 'GatewayIPv6',
1016 description => 'Default gateway for IPv6 traffic.',
1017 optional => 1,
1018 requires => 'ip6',
1019 },
1020};
1021PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
1022my $ipconfigdesc = {
1023 optional => 1,
1024 type => 'string', format => 'pve-qm-ipconfig',
1025 description => <<'EODESCR',
1026cloud-init: Specify IP addresses and gateways for the corresponding interface.
1027
1028IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1029
1030The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1031gateway should be provided.
1032For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1033cloud-init 19.4 or newer.
1034
1035If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1036dhcp on IPv4.
1037EODESCR
1038};
1039PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1040
1041for (my $i = 0; $i < $MAX_NETS; $i++) {
1042 $confdesc->{"net$i"} = $netdesc;
1043 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1044}
1045
1046foreach my $key (keys %$confdesc_cloudinit) {
1047 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1048}
1049
1050PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1051sub pve_verify_cpuset {
1052 my ($set_text, $noerr) = @_;
1053
1054 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1055
1056 if ($@) {
1057 return if $noerr;
1058 die "unable to parse cpuset option\n";
1059 }
1060
1061 return PVE::CpuSet->new($members)->short_string();
1062}
1063
1064PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1065sub verify_volume_id_or_qm_path {
1066 my ($volid, $noerr) = @_;
1067
1068 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1069
1070 return verify_volume_id_or_absolute_path($volid, $noerr);
1071}
1072
1073PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1074sub verify_volume_id_or_absolute_path {
1075 my ($volid, $noerr) = @_;
1076
1077 return $volid if $volid =~ m|^/|;
1078
1079 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1080 if ($@) {
1081 return if $noerr;
1082 die $@;
1083 }
1084 return $volid;
1085}
1086
1087my $serialdesc = {
1088 optional => 1,
1089 type => 'string',
1090 pattern => '(/dev/.+|socket)',
1091 description => "Create a serial device inside the VM (n is 0 to 3)",
1092 verbose_description => <<EODESCR,
1093Create a serial device inside the VM (n is 0 to 3), and pass through a
1094host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1095host side (use 'qm terminal' to open a terminal connection).
1096
1097NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1098use with special care.
1099
1100CAUTION: Experimental! User reported problems with this option.
1101EODESCR
1102};
1103
1104my $paralleldesc= {
1105 optional => 1,
1106 type => 'string',
1107 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1108 description => "Map host parallel devices (n is 0 to 2).",
1109 verbose_description => <<EODESCR,
1110Map host parallel devices (n is 0 to 2).
1111
1112NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1113machines - use with special care.
1114
1115CAUTION: Experimental! User reported problems with this option.
1116EODESCR
1117};
1118
1119for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1120 $confdesc->{"parallel$i"} = $paralleldesc;
1121}
1122
1123for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1124 $confdesc->{"serial$i"} = $serialdesc;
1125}
1126
1127for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1128 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1129}
1130
1131for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1132 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1133}
1134
1135for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1136 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1137}
1138
1139my $boot_fmt = {
1140 legacy => {
1141 optional => 1,
1142 default_key => 1,
1143 type => 'string',
1144 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1145 . " Deprecated, use 'order=' instead.",
1146 pattern => '[acdn]{1,4}',
1147 format_description => "[acdn]{1,4}",
1148
1149 # note: this is also the fallback if boot: is not given at all
1150 default => 'cdn',
1151 },
1152 order => {
1153 optional => 1,
1154 type => 'string',
1155 format => 'pve-qm-bootdev-list',
1156 format_description => "device[;device...]",
1157 description => <<EODESC,
1158The guest will attempt to boot from devices in the order they appear here.
1159
1160Disks, optical drives and passed-through storage USB devices will be directly
1161booted from, NICs will load PXE, and PCIe devices will either behave like disks
1162(e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1163
1164Note that only devices in this list will be marked as bootable and thus loaded
1165by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1166(e.g. software-raid), you need to specify all of them here.
1167
1168Overrides the deprecated 'legacy=[acdn]*' value when given.
1169EODESC
1170 },
1171};
1172PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1173
1174PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1175sub verify_bootdev {
1176 my ($dev, $noerr) = @_;
1177
1178 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1179 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1180
1181 my $check = sub {
1182 my ($base) = @_;
1183 return 0 if $dev !~ m/^$base\d+$/;
1184 return 0 if !$confdesc->{$dev};
1185 return 1;
1186 };
1187
1188 return $dev if $check->("net");
1189 return $dev if $check->("usb");
1190 return $dev if $check->("hostpci");
1191
1192 return if $noerr;
1193 die "invalid boot device '$dev'\n";
1194}
1195
1196sub print_bootorder {
1197 my ($devs) = @_;
1198 return "" if !@$devs;
1199 my $data = { order => join(';', @$devs) };
1200 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1201}
1202
1203my $kvm_api_version = 0;
1204
1205sub kvm_version {
1206 return $kvm_api_version if $kvm_api_version;
1207
1208 open my $fh, '<', '/dev/kvm' or return;
1209
1210 # 0xae00 => KVM_GET_API_VERSION
1211 $kvm_api_version = ioctl($fh, 0xae00, 0);
1212 close($fh);
1213
1214 return $kvm_api_version;
1215}
1216
1217my $kvm_user_version = {};
1218my $kvm_mtime = {};
1219
1220sub kvm_user_version {
1221 my ($binary) = @_;
1222
1223 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1224 my $st = stat($binary);
1225
1226 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1227 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1228 $cachedmtime == $st->mtime;
1229
1230 $kvm_user_version->{$binary} = 'unknown';
1231 $kvm_mtime->{$binary} = $st->mtime;
1232
1233 my $code = sub {
1234 my $line = shift;
1235 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1236 $kvm_user_version->{$binary} = $2;
1237 }
1238 };
1239
1240 eval { run_command([$binary, '--version'], outfunc => $code); };
1241 warn $@ if $@;
1242
1243 return $kvm_user_version->{$binary};
1244
1245}
1246my sub extract_version {
1247 my ($machine_type, $version) = @_;
1248 $version = kvm_user_version() if !defined($version);
1249 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1250}
1251
1252sub kernel_has_vhost_net {
1253 return -c '/dev/vhost-net';
1254}
1255
1256sub option_exists {
1257 my $key = shift;
1258 return defined($confdesc->{$key});
1259}
1260
1261my $cdrom_path;
1262sub get_cdrom_path {
1263
1264 return $cdrom_path if defined($cdrom_path);
1265
1266 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1267
1268 if (!defined($cdrom_path)) {
1269 log_warn("no physical CD-ROM available, ignoring");
1270 $cdrom_path = '';
1271 }
1272
1273 return $cdrom_path;
1274}
1275
1276sub get_iso_path {
1277 my ($storecfg, $vmid, $cdrom) = @_;
1278
1279 if ($cdrom eq 'cdrom') {
1280 return get_cdrom_path();
1281 } elsif ($cdrom eq 'none') {
1282 return '';
1283 } elsif ($cdrom =~ m|^/|) {
1284 return $cdrom;
1285 } else {
1286 return PVE::Storage::path($storecfg, $cdrom);
1287 }
1288}
1289
1290# try to convert old style file names to volume IDs
1291sub filename_to_volume_id {
1292 my ($vmid, $file, $media) = @_;
1293
1294 if (!($file eq 'none' || $file eq 'cdrom' ||
1295 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1296
1297 return if $file =~ m|/|;
1298
1299 if ($media && $media eq 'cdrom') {
1300 $file = "local:iso/$file";
1301 } else {
1302 $file = "local:$vmid/$file";
1303 }
1304 }
1305
1306 return $file;
1307}
1308
1309sub verify_media_type {
1310 my ($opt, $vtype, $media) = @_;
1311
1312 return if !$media;
1313
1314 my $etype;
1315 if ($media eq 'disk') {
1316 $etype = 'images';
1317 } elsif ($media eq 'cdrom') {
1318 $etype = 'iso';
1319 } else {
1320 die "internal error";
1321 }
1322
1323 return if ($vtype eq $etype);
1324
1325 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1326}
1327
1328sub cleanup_drive_path {
1329 my ($opt, $storecfg, $drive) = @_;
1330
1331 # try to convert filesystem paths to volume IDs
1332
1333 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1334 ($drive->{file} !~ m|^/dev/.+|) &&
1335 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1336 ($drive->{file} !~ m/^\d+$/)) {
1337 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1338 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1339 if !$vtype;
1340 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1341 verify_media_type($opt, $vtype, $drive->{media});
1342 $drive->{file} = $volid;
1343 }
1344
1345 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1346}
1347
1348sub parse_hotplug_features {
1349 my ($data) = @_;
1350
1351 my $res = {};
1352
1353 return $res if $data eq '0';
1354
1355 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1356
1357 foreach my $feature (PVE::Tools::split_list($data)) {
1358 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1359 $res->{$1} = 1;
1360 } else {
1361 die "invalid hotplug feature '$feature'\n";
1362 }
1363 }
1364 return $res;
1365}
1366
1367PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1368sub pve_verify_hotplug_features {
1369 my ($value, $noerr) = @_;
1370
1371 return $value if parse_hotplug_features($value);
1372
1373 return if $noerr;
1374
1375 die "unable to parse hotplug option\n";
1376}
1377
1378sub scsi_inquiry {
1379 my($fh, $noerr) = @_;
1380
1381 my $SG_IO = 0x2285;
1382 my $SG_GET_VERSION_NUM = 0x2282;
1383
1384 my $versionbuf = "\x00" x 8;
1385 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1386 if (!$ret) {
1387 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1388 return;
1389 }
1390 my $version = unpack("I", $versionbuf);
1391 if ($version < 30000) {
1392 die "scsi generic interface too old\n" if !$noerr;
1393 return;
1394 }
1395
1396 my $buf = "\x00" x 36;
1397 my $sensebuf = "\x00" x 8;
1398 my $cmd = pack("C x3 C x1", 0x12, 36);
1399
1400 # see /usr/include/scsi/sg.h
1401 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1402
1403 my $packet = pack(
1404 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1405 );
1406
1407 $ret = ioctl($fh, $SG_IO, $packet);
1408 if (!$ret) {
1409 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1410 return;
1411 }
1412
1413 my @res = unpack($sg_io_hdr_t, $packet);
1414 if ($res[17] || $res[18]) {
1415 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1416 return;
1417 }
1418
1419 my $res = {};
1420 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1421
1422 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1423 $res->{type} &= 0x1F;
1424
1425 return $res;
1426}
1427
1428sub path_is_scsi {
1429 my ($path) = @_;
1430
1431 my $fh = IO::File->new("+<$path") || return;
1432 my $res = scsi_inquiry($fh, 1);
1433 close($fh);
1434
1435 return $res;
1436}
1437
1438sub print_tabletdevice_full {
1439 my ($conf, $arch) = @_;
1440
1441 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1442
1443 # we use uhci for old VMs because tablet driver was buggy in older qemu
1444 my $usbbus;
1445 if ($q35 || $arch eq 'aarch64') {
1446 $usbbus = 'ehci';
1447 } else {
1448 $usbbus = 'uhci';
1449 }
1450
1451 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1452}
1453
1454sub print_keyboarddevice_full {
1455 my ($conf, $arch) = @_;
1456
1457 return if $arch ne 'aarch64';
1458
1459 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1460}
1461
1462my sub get_drive_id {
1463 my ($drive) = @_;
1464 return "$drive->{interface}$drive->{index}";
1465}
1466
1467sub print_drivedevice_full {
1468 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1469
1470 my $device = '';
1471 my $maxdev = 0;
1472
1473 my $drive_id = get_drive_id($drive);
1474 if ($drive->{interface} eq 'virtio') {
1475 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1476 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1477 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1478 } elsif ($drive->{interface} eq 'scsi') {
1479
1480 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1481 my $unit = $drive->{index} % $maxdev;
1482 my $devicetype = 'hd';
1483 my $path = '';
1484 if (drive_is_cdrom($drive)) {
1485 $devicetype = 'cd';
1486 } else {
1487 if ($drive->{file} =~ m|^/|) {
1488 $path = $drive->{file};
1489 if (my $info = path_is_scsi($path)) {
1490 if ($info->{type} == 0 && $drive->{scsiblock}) {
1491 $devicetype = 'block';
1492 } elsif ($info->{type} == 1) { # tape
1493 $devicetype = 'generic';
1494 }
1495 }
1496 } else {
1497 $path = PVE::Storage::path($storecfg, $drive->{file});
1498 }
1499
1500 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1501 my $version = extract_version($machine_type, kvm_user_version());
1502 if ($path =~ m/^iscsi\:\/\// &&
1503 !min_version($version, 4, 1)) {
1504 $devicetype = 'generic';
1505 }
1506 }
1507
1508 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1509 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1510 } else {
1511 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1512 .",lun=$drive->{index}";
1513 }
1514 $device .= ",drive=drive-$drive_id,id=$drive_id";
1515
1516 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1517 $device .= ",rotation_rate=1";
1518 }
1519 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1520
1521 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1522 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1523 my $controller = int($drive->{index} / $maxdev);
1524 my $unit = $drive->{index} % $maxdev;
1525
1526 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1527 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1528 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1529 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1530 # were before. Move odd ones up by 2 where they don't clash.
1531 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1532 $controller += 2 * ($unit % 2);
1533 $unit = 0;
1534 }
1535
1536 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1537
1538 $device = "ide-$devicetype";
1539 if ($drive->{interface} eq 'ide') {
1540 $device .= ",bus=ide.$controller,unit=$unit";
1541 } else {
1542 $device .= ",bus=ahci$controller.$unit";
1543 }
1544 $device .= ",drive=drive-$drive_id,id=$drive_id";
1545
1546 if ($devicetype eq 'hd') {
1547 if (my $model = $drive->{model}) {
1548 $model = URI::Escape::uri_unescape($model);
1549 $device .= ",model=$model";
1550 }
1551 if ($drive->{ssd}) {
1552 $device .= ",rotation_rate=1";
1553 }
1554 }
1555 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1556 } elsif ($drive->{interface} eq 'usb') {
1557 die "implement me";
1558 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1559 } else {
1560 die "unsupported interface type";
1561 }
1562
1563 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1564
1565 if (my $serial = $drive->{serial}) {
1566 $serial = URI::Escape::uri_unescape($serial);
1567 $device .= ",serial=$serial";
1568 }
1569
1570
1571 return $device;
1572}
1573
1574sub get_initiator_name {
1575 my $initiator;
1576
1577 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1578 while (defined(my $line = <$fh>)) {
1579 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1580 $initiator = $1;
1581 last;
1582 }
1583 $fh->close();
1584
1585 return $initiator;
1586}
1587
1588my sub storage_allows_io_uring_default {
1589 my ($scfg, $cache_direct) = @_;
1590
1591 # io_uring with cache mode writeback or writethrough on krbd will hang...
1592 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1593
1594 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1595 # sometimes, just plain disable...
1596 return if $scfg && $scfg->{type} eq 'lvm';
1597
1598 # io_uring causes problems when used with CIFS since kernel 5.15
1599 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1600 return if $scfg && $scfg->{type} eq 'cifs';
1601
1602 return 1;
1603}
1604
1605my sub drive_uses_cache_direct {
1606 my ($drive, $scfg) = @_;
1607
1608 my $cache_direct = 0;
1609
1610 if (my $cache = $drive->{cache}) {
1611 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1612 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1613 $cache_direct = 1;
1614 }
1615
1616 return $cache_direct;
1617}
1618
1619sub print_drive_commandline_full {
1620 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1621
1622 my $path;
1623 my $volid = $drive->{file};
1624 my $format = $drive->{format};
1625 my $drive_id = get_drive_id($drive);
1626
1627 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1628 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1629
1630 if (drive_is_cdrom($drive)) {
1631 $path = get_iso_path($storecfg, $vmid, $volid);
1632 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1633 } else {
1634 if ($storeid) {
1635 $path = PVE::Storage::path($storecfg, $volid);
1636 $format //= qemu_img_format($scfg, $volname);
1637 } else {
1638 $path = $volid;
1639 $format //= "raw";
1640 }
1641 }
1642
1643 my $is_rbd = $path =~ m/^rbd:/;
1644
1645 my $opts = '';
1646 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1647 foreach my $o (@qemu_drive_options) {
1648 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1649 }
1650
1651 # snapshot only accepts on|off
1652 if (defined($drive->{snapshot})) {
1653 my $v = $drive->{snapshot} ? 'on' : 'off';
1654 $opts .= ",snapshot=$v";
1655 }
1656
1657 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1658 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1659 }
1660
1661 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1662 my ($dir, $qmpname) = @$type;
1663 if (my $v = $drive->{"mbps$dir"}) {
1664 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1665 }
1666 if (my $v = $drive->{"mbps${dir}_max"}) {
1667 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1668 }
1669 if (my $v = $drive->{"bps${dir}_max_length"}) {
1670 $opts .= ",throttling.bps$qmpname-max-length=$v";
1671 }
1672 if (my $v = $drive->{"iops${dir}"}) {
1673 $opts .= ",throttling.iops$qmpname=$v";
1674 }
1675 if (my $v = $drive->{"iops${dir}_max"}) {
1676 $opts .= ",throttling.iops$qmpname-max=$v";
1677 }
1678 if (my $v = $drive->{"iops${dir}_max_length"}) {
1679 $opts .= ",throttling.iops$qmpname-max-length=$v";
1680 }
1681 }
1682
1683 if ($pbs_name) {
1684 $format = "rbd" if $is_rbd;
1685 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1686 if !$format;
1687 $opts .= ",format=alloc-track,file.driver=$format";
1688 } elsif ($format) {
1689 $opts .= ",format=$format";
1690 }
1691
1692 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1693
1694 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1695
1696 if (!$drive->{aio}) {
1697 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1698 # io_uring supports all cache modes
1699 $opts .= ",aio=io_uring";
1700 } else {
1701 # aio native works only with O_DIRECT
1702 if($cache_direct) {
1703 $opts .= ",aio=native";
1704 } else {
1705 $opts .= ",aio=threads";
1706 }
1707 }
1708 }
1709
1710 if (!drive_is_cdrom($drive)) {
1711 my $detectzeroes;
1712 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1713 $detectzeroes = 'off';
1714 } elsif ($drive->{discard}) {
1715 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1716 } else {
1717 # This used to be our default with discard not being specified:
1718 $detectzeroes = 'on';
1719 }
1720
1721 # note: 'detect-zeroes' works per blockdev and we want it to persist
1722 # after the alloc-track is removed, so put it on 'file' directly
1723 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1724 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1725 }
1726
1727 if ($pbs_name) {
1728 $opts .= ",backing=$pbs_name";
1729 $opts .= ",auto-remove=on";
1730 }
1731
1732 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1733 my $file_param = "file";
1734 if ($pbs_name) {
1735 # non-rbd drivers require the underlying file to be a seperate block
1736 # node, so add a second .file indirection
1737 $file_param .= ".file" if !$is_rbd;
1738 $file_param .= ".filename";
1739 }
1740 my $pathinfo = $path ? "$file_param=$path," : '';
1741
1742 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1743}
1744
1745sub print_pbs_blockdev {
1746 my ($pbs_conf, $pbs_name) = @_;
1747 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1748 $blockdev .= ",repository=$pbs_conf->{repository}";
1749 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1750 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1751 $blockdev .= ",archive=$pbs_conf->{archive}";
1752 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1753 return $blockdev;
1754}
1755
1756sub print_netdevice_full {
1757 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1758
1759 my $device = $net->{model};
1760 if ($net->{model} eq 'virtio') {
1761 $device = 'virtio-net-pci';
1762 };
1763
1764 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1765 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1766 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1767 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1768 # and out of each queue plus one config interrupt and control vector queue
1769 my $vectors = $net->{queues} * 2 + 2;
1770 $tmpstr .= ",vectors=$vectors,mq=on";
1771 if (min_version($machine_version, 7, 1)) {
1772 $tmpstr .= ",packed=on";
1773 }
1774 }
1775
1776 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1777 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=1024";
1778 }
1779
1780 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1781
1782 if (my $mtu = $net->{mtu}) {
1783 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1784 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1785 if ($mtu == 1) {
1786 $mtu = $bridge_mtu;
1787 } elsif ($mtu < 576) {
1788 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1789 } elsif ($mtu > $bridge_mtu) {
1790 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1791 }
1792 $tmpstr .= ",host_mtu=$mtu";
1793 } else {
1794 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1795 }
1796 }
1797
1798 if ($use_old_bios_files) {
1799 my $romfile;
1800 if ($device eq 'virtio-net-pci') {
1801 $romfile = 'pxe-virtio.rom';
1802 } elsif ($device eq 'e1000') {
1803 $romfile = 'pxe-e1000.rom';
1804 } elsif ($device eq 'e1000e') {
1805 $romfile = 'pxe-e1000e.rom';
1806 } elsif ($device eq 'ne2k') {
1807 $romfile = 'pxe-ne2k_pci.rom';
1808 } elsif ($device eq 'pcnet') {
1809 $romfile = 'pxe-pcnet.rom';
1810 } elsif ($device eq 'rtl8139') {
1811 $romfile = 'pxe-rtl8139.rom';
1812 }
1813 $tmpstr .= ",romfile=$romfile" if $romfile;
1814 }
1815
1816 return $tmpstr;
1817}
1818
1819sub print_netdev_full {
1820 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1821
1822 my $i = '';
1823 if ($netid =~ m/^net(\d+)$/) {
1824 $i = int($1);
1825 }
1826
1827 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1828
1829 my $ifname = "tap${vmid}i$i";
1830
1831 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1832 die "interface name '$ifname' is too long (max 15 character)\n"
1833 if length($ifname) >= 16;
1834
1835 my $vhostparam = '';
1836 if (is_native($arch)) {
1837 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1838 }
1839
1840 my $vmname = $conf->{name} || "vm$vmid";
1841
1842 my $netdev = "";
1843 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1844
1845 if ($net->{bridge}) {
1846 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1847 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1848 } else {
1849 $netdev = "type=user,id=$netid,hostname=$vmname";
1850 }
1851
1852 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1853
1854 return $netdev;
1855}
1856
1857my $vga_map = {
1858 'cirrus' => 'cirrus-vga',
1859 'std' => 'VGA',
1860 'vmware' => 'vmware-svga',
1861 'virtio' => 'virtio-vga',
1862 'virtio-gl' => 'virtio-vga-gl',
1863};
1864
1865sub print_vga_device {
1866 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1867
1868 my $type = $vga_map->{$vga->{type}};
1869 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1870 $type = 'virtio-gpu';
1871 }
1872 my $vgamem_mb = $vga->{memory};
1873
1874 my $max_outputs = '';
1875 if ($qxlnum) {
1876 $type = $id ? 'qxl' : 'qxl-vga';
1877
1878 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1879 # set max outputs so linux can have up to 4 qxl displays with one device
1880 if (min_version($machine_version, 4, 1)) {
1881 $max_outputs = ",max_outputs=4";
1882 }
1883 }
1884 }
1885
1886 die "no devicetype for $vga->{type}\n" if !$type;
1887
1888 my $memory = "";
1889 if ($vgamem_mb) {
1890 if ($vga->{type} =~ /^virtio/) {
1891 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1892 $memory = ",max_hostmem=$bytes";
1893 } elsif ($qxlnum) {
1894 # from https://www.spice-space.org/multiple-monitors.html
1895 $memory = ",vgamem_mb=$vga->{memory}";
1896 my $ram = $vgamem_mb * 4;
1897 my $vram = $vgamem_mb * 2;
1898 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1899 } else {
1900 $memory = ",vgamem_mb=$vga->{memory}";
1901 }
1902 } elsif ($qxlnum && $id) {
1903 $memory = ",ram_size=67108864,vram_size=33554432";
1904 }
1905
1906 my $edidoff = "";
1907 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1908 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1909 }
1910
1911 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1912 my $vgaid = "vga" . ($id // '');
1913 my $pciaddr;
1914 if ($q35 && $vgaid eq 'vga') {
1915 # the first display uses pcie.0 bus on q35 machines
1916 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1917 } else {
1918 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1919 }
1920
1921 if ($vga->{type} eq 'virtio-gl') {
1922 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1923 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1924 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1925
1926 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1927 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1928 }
1929
1930 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1931}
1932
1933sub parse_number_sets {
1934 my ($set) = @_;
1935 my $res = [];
1936 foreach my $part (split(/;/, $set)) {
1937 if ($part =~ /^\s*(\d+)(?:-(\d+))?\s*$/) {
1938 die "invalid range: $part ($2 < $1)\n" if defined($2) && $2 < $1;
1939 push @$res, [ $1, $2 ];
1940 } else {
1941 die "invalid range: $part\n";
1942 }
1943 }
1944 return $res;
1945}
1946
1947sub parse_numa {
1948 my ($data) = @_;
1949
1950 my $res = parse_property_string($numa_fmt, $data);
1951 $res->{cpus} = parse_number_sets($res->{cpus}) if defined($res->{cpus});
1952 $res->{hostnodes} = parse_number_sets($res->{hostnodes}) if defined($res->{hostnodes});
1953 return $res;
1954}
1955
1956# netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1957sub parse_net {
1958 my ($data, $disable_mac_autogen) = @_;
1959
1960 my $res = eval { parse_property_string($net_fmt, $data) };
1961 if ($@) {
1962 warn $@;
1963 return;
1964 }
1965 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1966 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1967 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1968 }
1969 return $res;
1970}
1971
1972# ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1973sub parse_ipconfig {
1974 my ($data) = @_;
1975
1976 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1977 if ($@) {
1978 warn $@;
1979 return;
1980 }
1981
1982 if ($res->{gw} && !$res->{ip}) {
1983 warn 'gateway specified without specifying an IP address';
1984 return;
1985 }
1986 if ($res->{gw6} && !$res->{ip6}) {
1987 warn 'IPv6 gateway specified without specifying an IPv6 address';
1988 return;
1989 }
1990 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1991 warn 'gateway specified together with DHCP';
1992 return;
1993 }
1994 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1995 # gw6 + auto/dhcp
1996 warn "IPv6 gateway specified together with $res->{ip6} address";
1997 return;
1998 }
1999
2000 if (!$res->{ip} && !$res->{ip6}) {
2001 return { ip => 'dhcp', ip6 => 'dhcp' };
2002 }
2003
2004 return $res;
2005}
2006
2007sub print_net {
2008 my $net = shift;
2009
2010 return PVE::JSONSchema::print_property_string($net, $net_fmt);
2011}
2012
2013sub add_random_macs {
2014 my ($settings) = @_;
2015
2016 foreach my $opt (keys %$settings) {
2017 next if $opt !~ m/^net(\d+)$/;
2018 my $net = parse_net($settings->{$opt});
2019 next if !$net;
2020 $settings->{$opt} = print_net($net);
2021 }
2022}
2023
2024sub vm_is_volid_owner {
2025 my ($storecfg, $vmid, $volid) = @_;
2026
2027 if ($volid !~ m|^/|) {
2028 my ($path, $owner);
2029 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
2030 if ($owner && ($owner == $vmid)) {
2031 return 1;
2032 }
2033 }
2034
2035 return;
2036}
2037
2038sub vmconfig_register_unused_drive {
2039 my ($storecfg, $vmid, $conf, $drive) = @_;
2040
2041 if (drive_is_cloudinit($drive)) {
2042 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2043 warn $@ if $@;
2044 delete $conf->{cloudinit};
2045 } elsif (!drive_is_cdrom($drive)) {
2046 my $volid = $drive->{file};
2047 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2048 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2049 }
2050 }
2051}
2052
2053# smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2054my $smbios1_fmt = {
2055 uuid => {
2056 type => 'string',
2057 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2058 format_description => 'UUID',
2059 description => "Set SMBIOS1 UUID.",
2060 optional => 1,
2061 },
2062 version => {
2063 type => 'string',
2064 pattern => '[A-Za-z0-9+\/]+={0,2}',
2065 format_description => 'Base64 encoded string',
2066 description => "Set SMBIOS1 version.",
2067 optional => 1,
2068 },
2069 serial => {
2070 type => 'string',
2071 pattern => '[A-Za-z0-9+\/]+={0,2}',
2072 format_description => 'Base64 encoded string',
2073 description => "Set SMBIOS1 serial number.",
2074 optional => 1,
2075 },
2076 manufacturer => {
2077 type => 'string',
2078 pattern => '[A-Za-z0-9+\/]+={0,2}',
2079 format_description => 'Base64 encoded string',
2080 description => "Set SMBIOS1 manufacturer.",
2081 optional => 1,
2082 },
2083 product => {
2084 type => 'string',
2085 pattern => '[A-Za-z0-9+\/]+={0,2}',
2086 format_description => 'Base64 encoded string',
2087 description => "Set SMBIOS1 product ID.",
2088 optional => 1,
2089 },
2090 sku => {
2091 type => 'string',
2092 pattern => '[A-Za-z0-9+\/]+={0,2}',
2093 format_description => 'Base64 encoded string',
2094 description => "Set SMBIOS1 SKU string.",
2095 optional => 1,
2096 },
2097 family => {
2098 type => 'string',
2099 pattern => '[A-Za-z0-9+\/]+={0,2}',
2100 format_description => 'Base64 encoded string',
2101 description => "Set SMBIOS1 family string.",
2102 optional => 1,
2103 },
2104 base64 => {
2105 type => 'boolean',
2106 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2107 optional => 1,
2108 },
2109};
2110
2111sub parse_smbios1 {
2112 my ($data) = @_;
2113
2114 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2115 warn $@ if $@;
2116 return $res;
2117}
2118
2119sub print_smbios1 {
2120 my ($smbios1) = @_;
2121 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2122}
2123
2124PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2125
2126sub parse_watchdog {
2127 my ($value) = @_;
2128
2129 return if !$value;
2130
2131 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2132 warn $@ if $@;
2133 return $res;
2134}
2135
2136sub parse_guest_agent {
2137 my ($conf) = @_;
2138
2139 return {} if !defined($conf->{agent});
2140
2141 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2142 warn $@ if $@;
2143
2144 # if the agent is disabled ignore the other potentially set properties
2145 return {} if !$res->{enabled};
2146 return $res;
2147}
2148
2149sub get_qga_key {
2150 my ($conf, $key) = @_;
2151 return undef if !defined($conf->{agent});
2152
2153 my $agent = parse_guest_agent($conf);
2154 return $agent->{$key};
2155}
2156
2157sub parse_vga {
2158 my ($value) = @_;
2159
2160 return {} if !$value;
2161 my $res = eval { parse_property_string($vga_fmt, $value) };
2162 warn $@ if $@;
2163 return $res;
2164}
2165
2166sub parse_rng {
2167 my ($value) = @_;
2168
2169 return if !$value;
2170
2171 my $res = eval { parse_property_string($rng_fmt, $value) };
2172 warn $@ if $@;
2173 return $res;
2174}
2175
2176sub parse_meta_info {
2177 my ($value) = @_;
2178
2179 return if !$value;
2180
2181 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2182 warn $@ if $@;
2183 return $res;
2184}
2185
2186sub new_meta_info_string {
2187 my () = @_; # for now do not allow to override any value
2188
2189 return PVE::JSONSchema::print_property_string(
2190 {
2191 'creation-qemu' => kvm_user_version(),
2192 ctime => "". int(time()),
2193 },
2194 $meta_info_fmt
2195 );
2196}
2197
2198sub qemu_created_version_fixups {
2199 my ($conf, $forcemachine, $kvmver) = @_;
2200
2201 my $meta = parse_meta_info($conf->{meta}) // {};
2202 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2203
2204 # check if we need to apply some handling for VMs that always use the latest machine version but
2205 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2206 # would be required (we do not want to pin machine version for non-windows OS type)
2207 if (
2208 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2209 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2210 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2211 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2212 ) {
2213 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2214 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2215 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2216 # and thus with the predictable interface naming of systemd
2217 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2218 }
2219 }
2220 return;
2221}
2222
2223# add JSON properties for create and set function
2224sub json_config_properties {
2225 my ($prop, $with_disk_alloc) = @_;
2226
2227 my $skip_json_config_opts = {
2228 parent => 1,
2229 snaptime => 1,
2230 vmstate => 1,
2231 runningmachine => 1,
2232 runningcpu => 1,
2233 meta => 1,
2234 };
2235
2236 foreach my $opt (keys %$confdesc) {
2237 next if $skip_json_config_opts->{$opt};
2238
2239 if ($with_disk_alloc && is_valid_drivename($opt)) {
2240 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2241 } else {
2242 $prop->{$opt} = $confdesc->{$opt};
2243 }
2244 }
2245
2246 return $prop;
2247}
2248
2249# Properties that we can read from an OVF file
2250sub json_ovf_properties {
2251 my $prop = {};
2252
2253 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2254 $prop->{$device} = {
2255 type => 'string',
2256 format => 'pve-volume-id-or-absolute-path',
2257 description => "Disk image that gets imported to $device",
2258 optional => 1,
2259 };
2260 }
2261
2262 $prop->{cores} = {
2263 type => 'integer',
2264 description => "The number of CPU cores.",
2265 optional => 1,
2266 };
2267 $prop->{memory} = {
2268 type => 'integer',
2269 description => "Amount of RAM for the VM in MB.",
2270 optional => 1,
2271 };
2272 $prop->{name} = {
2273 type => 'string',
2274 description => "Name of the VM.",
2275 optional => 1,
2276 };
2277
2278 return $prop;
2279}
2280
2281# return copy of $confdesc_cloudinit to generate documentation
2282sub cloudinit_config_properties {
2283
2284 return dclone($confdesc_cloudinit);
2285}
2286
2287sub cloudinit_pending_properties {
2288 my $p = {
2289 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2290 name => 1,
2291 };
2292 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2293 return $p;
2294}
2295
2296sub check_type {
2297 my ($key, $value) = @_;
2298
2299 die "unknown setting '$key'\n" if !$confdesc->{$key};
2300
2301 my $type = $confdesc->{$key}->{type};
2302
2303 if (!defined($value)) {
2304 die "got undefined value\n";
2305 }
2306
2307 if ($value =~ m/[\n\r]/) {
2308 die "property contains a line feed\n";
2309 }
2310
2311 if ($type eq 'boolean') {
2312 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2313 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2314 die "type check ('boolean') failed - got '$value'\n";
2315 } elsif ($type eq 'integer') {
2316 return int($1) if $value =~ m/^(\d+)$/;
2317 die "type check ('integer') failed - got '$value'\n";
2318 } elsif ($type eq 'number') {
2319 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2320 die "type check ('number') failed - got '$value'\n";
2321 } elsif ($type eq 'string') {
2322 if (my $fmt = $confdesc->{$key}->{format}) {
2323 PVE::JSONSchema::check_format($fmt, $value);
2324 return $value;
2325 }
2326 $value =~ s/^\"(.*)\"$/$1/;
2327 return $value;
2328 } else {
2329 die "internal error"
2330 }
2331}
2332
2333sub destroy_vm {
2334 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2335
2336 my $conf = PVE::QemuConfig->load_config($vmid);
2337
2338 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2339 PVE::QemuConfig->check_lock($conf);
2340 }
2341
2342 if ($conf->{template}) {
2343 # check if any base image is still used by a linked clone
2344 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2345 my ($ds, $drive) = @_;
2346 return if drive_is_cdrom($drive);
2347
2348 my $volid = $drive->{file};
2349 return if !$volid || $volid =~ m|^/|;
2350
2351 die "base volume '$volid' is still in use by linked cloned\n"
2352 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2353
2354 });
2355 }
2356
2357 my $volids = {};
2358 my $remove_owned_drive = sub {
2359 my ($ds, $drive) = @_;
2360 return if drive_is_cdrom($drive, 1);
2361
2362 my $volid = $drive->{file};
2363 return if !$volid || $volid =~ m|^/|;
2364 return if $volids->{$volid};
2365
2366 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2367 return if !$path || !$owner || ($owner != $vmid);
2368
2369 $volids->{$volid} = 1;
2370 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2371 warn "Could not remove disk '$volid', check manually: $@" if $@;
2372 };
2373
2374 # only remove disks owned by this VM (referenced in the config)
2375 my $include_opts = {
2376 include_unused => 1,
2377 extra_keys => ['vmstate'],
2378 };
2379 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2380
2381 for my $snap (values %{$conf->{snapshots}}) {
2382 next if !defined($snap->{vmstate});
2383 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2384 next if !defined($drive);
2385 $remove_owned_drive->('vmstate', $drive);
2386 }
2387
2388 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2389
2390 if ($purge_unreferenced) { # also remove unreferenced disk
2391 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2392 PVE::Storage::foreach_volid($vmdisks, sub {
2393 my ($volid, $sid, $volname, $d) = @_;
2394 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2395 warn $@ if $@;
2396 });
2397 }
2398
2399 if (defined $replacement_conf) {
2400 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2401 } else {
2402 PVE::QemuConfig->destroy_config($vmid);
2403 }
2404}
2405
2406sub parse_vm_config {
2407 my ($filename, $raw, $strict) = @_;
2408
2409 return if !defined($raw);
2410
2411 my $res = {
2412 digest => Digest::SHA::sha1_hex($raw),
2413 snapshots => {},
2414 pending => {},
2415 cloudinit => {},
2416 };
2417
2418 my $handle_error = sub {
2419 my ($msg) = @_;
2420
2421 if ($strict) {
2422 die $msg;
2423 } else {
2424 warn $msg;
2425 }
2426 };
2427
2428 $filename =~ m|/qemu-server/(\d+)\.conf$|
2429 || die "got strange filename '$filename'";
2430
2431 my $vmid = $1;
2432
2433 my $conf = $res;
2434 my $descr;
2435 my $finish_description = sub {
2436 if (defined($descr)) {
2437 $descr =~ s/\s+$//;
2438 $conf->{description} = $descr;
2439 }
2440 $descr = undef;
2441 };
2442 my $section = '';
2443
2444 my @lines = split(/\n/, $raw);
2445 foreach my $line (@lines) {
2446 next if $line =~ m/^\s*$/;
2447
2448 if ($line =~ m/^\[PENDING\]\s*$/i) {
2449 $section = 'pending';
2450 $finish_description->();
2451 $conf = $res->{$section} = {};
2452 next;
2453 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2454 $section = 'cloudinit';
2455 $finish_description->();
2456 $conf = $res->{$section} = {};
2457 next;
2458
2459 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2460 $section = $1;
2461 $finish_description->();
2462 $conf = $res->{snapshots}->{$section} = {};
2463 next;
2464 }
2465
2466 if ($line =~ m/^\#(.*)$/) {
2467 $descr = '' if !defined($descr);
2468 $descr .= PVE::Tools::decode_text($1) . "\n";
2469 next;
2470 }
2471
2472 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2473 $descr = '' if !defined($descr);
2474 $descr .= PVE::Tools::decode_text($2);
2475 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2476 $conf->{snapstate} = $1;
2477 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2478 my $key = $1;
2479 my $value = $2;
2480 $conf->{$key} = $value;
2481 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2482 my $value = $1;
2483 if ($section eq 'pending') {
2484 $conf->{delete} = $value; # we parse this later
2485 } else {
2486 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2487 }
2488 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2489 my $key = $1;
2490 my $value = $2;
2491 if ($section eq 'cloudinit') {
2492 # ignore validation only used for informative purpose
2493 $conf->{$key} = $value;
2494 next;
2495 }
2496 eval { $value = check_type($key, $value); };
2497 if ($@) {
2498 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2499 } else {
2500 $key = 'ide2' if $key eq 'cdrom';
2501 my $fmt = $confdesc->{$key}->{format};
2502 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2503 my $v = parse_drive($key, $value);
2504 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2505 $v->{file} = $volid;
2506 $value = print_drive($v);
2507 } else {
2508 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2509 next;
2510 }
2511 }
2512
2513 $conf->{$key} = $value;
2514 }
2515 } else {
2516 $handle_error->("vm $vmid - unable to parse config: $line\n");
2517 }
2518 }
2519
2520 $finish_description->();
2521 delete $res->{snapstate}; # just to be sure
2522
2523 return $res;
2524}
2525
2526sub write_vm_config {
2527 my ($filename, $conf) = @_;
2528
2529 delete $conf->{snapstate}; # just to be sure
2530
2531 if ($conf->{cdrom}) {
2532 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2533 $conf->{ide2} = $conf->{cdrom};
2534 delete $conf->{cdrom};
2535 }
2536
2537 # we do not use 'smp' any longer
2538 if ($conf->{sockets}) {
2539 delete $conf->{smp};
2540 } elsif ($conf->{smp}) {
2541 $conf->{sockets} = $conf->{smp};
2542 delete $conf->{cores};
2543 delete $conf->{smp};
2544 }
2545
2546 my $used_volids = {};
2547
2548 my $cleanup_config = sub {
2549 my ($cref, $pending, $snapname) = @_;
2550
2551 foreach my $key (keys %$cref) {
2552 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2553 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2554 my $value = $cref->{$key};
2555 if ($key eq 'delete') {
2556 die "propertry 'delete' is only allowed in [PENDING]\n"
2557 if !$pending;
2558 # fixme: check syntax?
2559 next;
2560 }
2561 eval { $value = check_type($key, $value); };
2562 die "unable to parse value of '$key' - $@" if $@;
2563
2564 $cref->{$key} = $value;
2565
2566 if (!$snapname && is_valid_drivename($key)) {
2567 my $drive = parse_drive($key, $value);
2568 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2569 }
2570 }
2571 };
2572
2573 &$cleanup_config($conf);
2574
2575 &$cleanup_config($conf->{pending}, 1);
2576
2577 foreach my $snapname (keys %{$conf->{snapshots}}) {
2578 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2579 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2580 }
2581
2582 # remove 'unusedX' settings if we re-add a volume
2583 foreach my $key (keys %$conf) {
2584 my $value = $conf->{$key};
2585 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2586 delete $conf->{$key};
2587 }
2588 }
2589
2590 my $generate_raw_config = sub {
2591 my ($conf, $pending) = @_;
2592
2593 my $raw = '';
2594
2595 # add description as comment to top of file
2596 if (defined(my $descr = $conf->{description})) {
2597 if ($descr) {
2598 foreach my $cl (split(/\n/, $descr)) {
2599 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2600 }
2601 } else {
2602 $raw .= "#\n" if $pending;
2603 }
2604 }
2605
2606 foreach my $key (sort keys %$conf) {
2607 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2608 $raw .= "$key: $conf->{$key}\n";
2609 }
2610 return $raw;
2611 };
2612
2613 my $raw = &$generate_raw_config($conf);
2614
2615 if (scalar(keys %{$conf->{pending}})){
2616 $raw .= "\n[PENDING]\n";
2617 $raw .= &$generate_raw_config($conf->{pending}, 1);
2618 }
2619
2620 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2621 $raw .= "\n[special:cloudinit]\n";
2622 $raw .= &$generate_raw_config($conf->{cloudinit});
2623 }
2624
2625 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2626 $raw .= "\n[$snapname]\n";
2627 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2628 }
2629
2630 return $raw;
2631}
2632
2633sub load_defaults {
2634
2635 my $res = {};
2636
2637 # we use static defaults from our JSON schema configuration
2638 foreach my $key (keys %$confdesc) {
2639 if (defined(my $default = $confdesc->{$key}->{default})) {
2640 $res->{$key} = $default;
2641 }
2642 }
2643
2644 return $res;
2645}
2646
2647sub config_list {
2648 my $vmlist = PVE::Cluster::get_vmlist();
2649 my $res = {};
2650 return $res if !$vmlist || !$vmlist->{ids};
2651 my $ids = $vmlist->{ids};
2652 my $nodename = nodename();
2653
2654 foreach my $vmid (keys %$ids) {
2655 my $d = $ids->{$vmid};
2656 next if !$d->{node} || $d->{node} ne $nodename;
2657 next if !$d->{type} || $d->{type} ne 'qemu';
2658 $res->{$vmid}->{exists} = 1;
2659 }
2660 return $res;
2661}
2662
2663# test if VM uses local resources (to prevent migration)
2664sub check_local_resources {
2665 my ($conf, $noerr) = @_;
2666
2667 my @loc_res = ();
2668 my $mapped_res = [];
2669
2670 my $nodelist = PVE::Cluster::get_nodelist();
2671 my $pci_map = PVE::Mapping::PCI::config();
2672 my $usb_map = PVE::Mapping::USB::config();
2673
2674 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2675
2676 my $add_missing_mapping = sub {
2677 my ($type, $key, $id) = @_;
2678 for my $node (@$nodelist) {
2679 my $entry;
2680 if ($type eq 'pci') {
2681 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2682 } elsif ($type eq 'usb') {
2683 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2684 }
2685 if (!scalar($entry->@*)) {
2686 push @{$missing_mappings_by_node->{$node}}, $key;
2687 }
2688 }
2689 };
2690
2691 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2692 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2693
2694 push @loc_res, "ivshmem" if $conf->{ivshmem};
2695
2696 foreach my $k (keys %$conf) {
2697 if ($k =~ m/^usb/) {
2698 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2699 next if $entry->{host} =~ m/^spice$/i;
2700 if ($entry->{mapping}) {
2701 $add_missing_mapping->('usb', $k, $entry->{mapping});
2702 push @$mapped_res, $k;
2703 }
2704 }
2705 if ($k =~ m/^hostpci/) {
2706 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2707 if ($entry->{mapping}) {
2708 $add_missing_mapping->('pci', $k, $entry->{mapping});
2709 push @$mapped_res, $k;
2710 }
2711 }
2712 # sockets are safe: they will recreated be on the target side post-migrate
2713 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2714 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2715 }
2716
2717 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2718
2719 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2720}
2721
2722# check if used storages are available on all nodes (use by migrate)
2723sub check_storage_availability {
2724 my ($storecfg, $conf, $node) = @_;
2725
2726 PVE::QemuConfig->foreach_volume($conf, sub {
2727 my ($ds, $drive) = @_;
2728
2729 my $volid = $drive->{file};
2730 return if !$volid;
2731
2732 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2733 return if !$sid;
2734
2735 # check if storage is available on both nodes
2736 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2737 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2738
2739 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2740
2741 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2742 if !$scfg->{content}->{$vtype};
2743 });
2744}
2745
2746# list nodes where all VM images are available (used by has_feature API)
2747sub shared_nodes {
2748 my ($conf, $storecfg) = @_;
2749
2750 my $nodelist = PVE::Cluster::get_nodelist();
2751 my $nodehash = { map { $_ => 1 } @$nodelist };
2752 my $nodename = nodename();
2753
2754 PVE::QemuConfig->foreach_volume($conf, sub {
2755 my ($ds, $drive) = @_;
2756
2757 my $volid = $drive->{file};
2758 return if !$volid;
2759
2760 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2761 if ($storeid) {
2762 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2763 if ($scfg->{disable}) {
2764 $nodehash = {};
2765 } elsif (my $avail = $scfg->{nodes}) {
2766 foreach my $node (keys %$nodehash) {
2767 delete $nodehash->{$node} if !$avail->{$node};
2768 }
2769 } elsif (!$scfg->{shared}) {
2770 foreach my $node (keys %$nodehash) {
2771 delete $nodehash->{$node} if $node ne $nodename
2772 }
2773 }
2774 }
2775 });
2776
2777 return $nodehash
2778}
2779
2780sub check_local_storage_availability {
2781 my ($conf, $storecfg) = @_;
2782
2783 my $nodelist = PVE::Cluster::get_nodelist();
2784 my $nodehash = { map { $_ => {} } @$nodelist };
2785
2786 PVE::QemuConfig->foreach_volume($conf, sub {
2787 my ($ds, $drive) = @_;
2788
2789 my $volid = $drive->{file};
2790 return if !$volid;
2791
2792 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2793 if ($storeid) {
2794 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2795
2796 if ($scfg->{disable}) {
2797 foreach my $node (keys %$nodehash) {
2798 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2799 }
2800 } elsif (my $avail = $scfg->{nodes}) {
2801 foreach my $node (keys %$nodehash) {
2802 if (!$avail->{$node}) {
2803 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2804 }
2805 }
2806 }
2807 }
2808 });
2809
2810 foreach my $node (values %$nodehash) {
2811 if (my $unavail = $node->{unavailable_storages}) {
2812 $node->{unavailable_storages} = [ sort keys %$unavail ];
2813 }
2814 }
2815
2816 return $nodehash
2817}
2818
2819# Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2820sub check_running {
2821 my ($vmid, $nocheck, $node) = @_;
2822
2823 # $nocheck is set when called during a migration, in which case the config
2824 # file might still or already reside on the *other* node
2825 # - because rename has already happened, and current node is source
2826 # - because rename hasn't happened yet, and current node is target
2827 # - because rename has happened, current node is target, but hasn't yet
2828 # processed it yet
2829 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2830 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2831}
2832
2833sub vzlist {
2834
2835 my $vzlist = config_list();
2836
2837 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2838
2839 while (defined(my $de = $fd->read)) {
2840 next if $de !~ m/^(\d+)\.pid$/;
2841 my $vmid = $1;
2842 next if !defined($vzlist->{$vmid});
2843 if (my $pid = check_running($vmid)) {
2844 $vzlist->{$vmid}->{pid} = $pid;
2845 }
2846 }
2847
2848 return $vzlist;
2849}
2850
2851our $vmstatus_return_properties = {
2852 vmid => get_standard_option('pve-vmid'),
2853 status => {
2854 description => "QEMU process status.",
2855 type => 'string',
2856 enum => ['stopped', 'running'],
2857 },
2858 maxmem => {
2859 description => "Maximum memory in bytes.",
2860 type => 'integer',
2861 optional => 1,
2862 renderer => 'bytes',
2863 },
2864 maxdisk => {
2865 description => "Root disk size in bytes.",
2866 type => 'integer',
2867 optional => 1,
2868 renderer => 'bytes',
2869 },
2870 name => {
2871 description => "VM name.",
2872 type => 'string',
2873 optional => 1,
2874 },
2875 qmpstatus => {
2876 description => "VM run state from the 'query-status' QMP monitor command.",
2877 type => 'string',
2878 optional => 1,
2879 },
2880 pid => {
2881 description => "PID of running qemu process.",
2882 type => 'integer',
2883 optional => 1,
2884 },
2885 uptime => {
2886 description => "Uptime.",
2887 type => 'integer',
2888 optional => 1,
2889 renderer => 'duration',
2890 },
2891 cpus => {
2892 description => "Maximum usable CPUs.",
2893 type => 'number',
2894 optional => 1,
2895 },
2896 lock => {
2897 description => "The current config lock, if any.",
2898 type => 'string',
2899 optional => 1,
2900 },
2901 tags => {
2902 description => "The current configured tags, if any",
2903 type => 'string',
2904 optional => 1,
2905 },
2906 'running-machine' => {
2907 description => "The currently running machine type (if running).",
2908 type => 'string',
2909 optional => 1,
2910 },
2911 'running-qemu' => {
2912 description => "The currently running QEMU version (if running).",
2913 type => 'string',
2914 optional => 1,
2915 },
2916};
2917
2918my $last_proc_pid_stat;
2919
2920# get VM status information
2921# This must be fast and should not block ($full == false)
2922# We only query KVM using QMP if $full == true (this can be slow)
2923sub vmstatus {
2924 my ($opt_vmid, $full) = @_;
2925
2926 my $res = {};
2927
2928 my $storecfg = PVE::Storage::config();
2929
2930 my $list = vzlist();
2931 my $defaults = load_defaults();
2932
2933 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2934
2935 my $cpucount = $cpuinfo->{cpus} || 1;
2936
2937 foreach my $vmid (keys %$list) {
2938 next if $opt_vmid && ($vmid ne $opt_vmid);
2939
2940 my $conf = PVE::QemuConfig->load_config($vmid);
2941
2942 my $d = { vmid => int($vmid) };
2943 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2944
2945 # fixme: better status?
2946 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2947
2948 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2949 if (defined($size)) {
2950 $d->{disk} = 0; # no info available
2951 $d->{maxdisk} = $size;
2952 } else {
2953 $d->{disk} = 0;
2954 $d->{maxdisk} = 0;
2955 }
2956
2957 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2958 * ($conf->{cores} || $defaults->{cores});
2959 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2960 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2961
2962 $d->{name} = $conf->{name} || "VM $vmid";
2963 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2964 : $defaults->{memory}*(1024*1024);
2965
2966 if ($conf->{balloon}) {
2967 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2968 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2969 : $defaults->{shares};
2970 }
2971
2972 $d->{uptime} = 0;
2973 $d->{cpu} = 0;
2974 $d->{mem} = 0;
2975
2976 $d->{netout} = 0;
2977 $d->{netin} = 0;
2978
2979 $d->{diskread} = 0;
2980 $d->{diskwrite} = 0;
2981
2982 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2983
2984 $d->{serial} = 1 if conf_has_serial($conf);
2985 $d->{lock} = $conf->{lock} if $conf->{lock};
2986 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2987
2988 $res->{$vmid} = $d;
2989 }
2990
2991 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2992 foreach my $dev (keys %$netdev) {
2993 next if $dev !~ m/^tap([1-9]\d*)i/;
2994 my $vmid = $1;
2995 my $d = $res->{$vmid};
2996 next if !$d;
2997
2998 $d->{netout} += $netdev->{$dev}->{receive};
2999 $d->{netin} += $netdev->{$dev}->{transmit};
3000
3001 if ($full) {
3002 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
3003 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
3004 }
3005
3006 }
3007
3008 my $ctime = gettimeofday;
3009
3010 foreach my $vmid (keys %$list) {
3011
3012 my $d = $res->{$vmid};
3013 my $pid = $d->{pid};
3014 next if !$pid;
3015
3016 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
3017 next if !$pstat; # not running
3018
3019 my $used = $pstat->{utime} + $pstat->{stime};
3020
3021 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
3022
3023 if ($pstat->{vsize}) {
3024 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
3025 }
3026
3027 my $old = $last_proc_pid_stat->{$pid};
3028 if (!$old) {
3029 $last_proc_pid_stat->{$pid} = {
3030 time => $ctime,
3031 used => $used,
3032 cpu => 0,
3033 };
3034 next;
3035 }
3036
3037 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
3038
3039 if ($dtime > 1000) {
3040 my $dutime = $used - $old->{used};
3041
3042 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
3043 $last_proc_pid_stat->{$pid} = {
3044 time => $ctime,
3045 used => $used,
3046 cpu => $d->{cpu},
3047 };
3048 } else {
3049 $d->{cpu} = $old->{cpu};
3050 }
3051 }
3052
3053 return $res if !$full;
3054
3055 my $qmpclient = PVE::QMPClient->new();
3056
3057 my $ballooncb = sub {
3058 my ($vmid, $resp) = @_;
3059
3060 my $info = $resp->{'return'};
3061 return if !$info->{max_mem};
3062
3063 my $d = $res->{$vmid};
3064
3065 # use memory assigned to VM
3066 $d->{maxmem} = $info->{max_mem};
3067 $d->{balloon} = $info->{actual};
3068
3069 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3070 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3071 $d->{freemem} = $info->{free_mem};
3072 }
3073
3074 $d->{ballooninfo} = $info;
3075 };
3076
3077 my $blockstatscb = sub {
3078 my ($vmid, $resp) = @_;
3079 my $data = $resp->{'return'} || [];
3080 my $totalrdbytes = 0;
3081 my $totalwrbytes = 0;
3082
3083 for my $blockstat (@$data) {
3084 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3085 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3086
3087 $blockstat->{device} =~ s/drive-//;
3088 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3089 }
3090 $res->{$vmid}->{diskread} = $totalrdbytes;
3091 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3092 };
3093
3094 my $machinecb = sub {
3095 my ($vmid, $resp) = @_;
3096 my $data = $resp->{'return'} || [];
3097
3098 $res->{$vmid}->{'running-machine'} =
3099 PVE::QemuServer::Machine::current_from_query_machines($data);
3100 };
3101
3102 my $versioncb = sub {
3103 my ($vmid, $resp) = @_;
3104 my $data = $resp->{'return'} // {};
3105 my $version = 'unknown';
3106
3107 if (my $v = $data->{qemu}) {
3108 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3109 }
3110
3111 $res->{$vmid}->{'running-qemu'} = $version;
3112 };
3113
3114 my $statuscb = sub {
3115 my ($vmid, $resp) = @_;
3116
3117 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3118 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3119 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3120 # this fails if ballon driver is not loaded, so this must be
3121 # the last commnand (following command are aborted if this fails).
3122 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3123
3124 my $status = 'unknown';
3125 if (!defined($status = $resp->{'return'}->{status})) {
3126 warn "unable to get VM status\n";
3127 return;
3128 }
3129
3130 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3131 };
3132
3133 foreach my $vmid (keys %$list) {
3134 next if $opt_vmid && ($vmid ne $opt_vmid);
3135 next if !$res->{$vmid}->{pid}; # not running
3136 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3137 }
3138
3139 $qmpclient->queue_execute(undef, 2);
3140
3141 foreach my $vmid (keys %$list) {
3142 next if $opt_vmid && ($vmid ne $opt_vmid);
3143 next if !$res->{$vmid}->{pid}; #not running
3144
3145 # we can't use the $qmpclient since it might have already aborted on
3146 # 'query-balloon', but this might also fail for older versions...
3147 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3148 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3149 }
3150
3151 foreach my $vmid (keys %$list) {
3152 next if $opt_vmid && ($vmid ne $opt_vmid);
3153 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3154 }
3155
3156 return $res;
3157}
3158
3159sub conf_has_serial {
3160 my ($conf) = @_;
3161
3162 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3163 if ($conf->{"serial$i"}) {
3164 return 1;
3165 }
3166 }
3167
3168 return 0;
3169}
3170
3171sub conf_has_audio {
3172 my ($conf, $id) = @_;
3173
3174 $id //= 0;
3175 my $audio = $conf->{"audio$id"};
3176 return if !defined($audio);
3177
3178 my $audioproperties = parse_property_string($audio_fmt, $audio);
3179 my $audiodriver = $audioproperties->{driver} // 'spice';
3180
3181 return {
3182 dev => $audioproperties->{device},
3183 dev_id => "audiodev$id",
3184 backend => $audiodriver,
3185 backend_id => "$audiodriver-backend${id}",
3186 };
3187}
3188
3189sub audio_devs {
3190 my ($audio, $audiopciaddr, $machine_version) = @_;
3191
3192 my $devs = [];
3193
3194 my $id = $audio->{dev_id};
3195 my $audiodev = "";
3196 if (min_version($machine_version, 4, 2)) {
3197 $audiodev = ",audiodev=$audio->{backend_id}";
3198 }
3199
3200 if ($audio->{dev} eq 'AC97') {
3201 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3202 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3203 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3204 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3205 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3206 } else {
3207 die "unkown audio device '$audio->{dev}', implement me!";
3208 }
3209
3210 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3211
3212 return $devs;
3213}
3214
3215sub get_tpm_paths {
3216 my ($vmid) = @_;
3217 return {
3218 socket => "/var/run/qemu-server/$vmid.swtpm",
3219 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3220 };
3221}
3222
3223sub add_tpm_device {
3224 my ($vmid, $devices, $conf) = @_;
3225
3226 return if !$conf->{tpmstate0};
3227
3228 my $paths = get_tpm_paths($vmid);
3229
3230 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3231 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3232 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3233}
3234
3235sub start_swtpm {
3236 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3237
3238 return if !$tpmdrive;
3239
3240 my $state;
3241 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3242 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3243 if ($storeid) {
3244 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3245 } else {
3246 $state = $tpm->{file};
3247 }
3248
3249 my $paths = get_tpm_paths($vmid);
3250
3251 # during migration, we will get state from remote
3252 #
3253 if (!$migration) {
3254 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3255 my $setup_cmd = [
3256 "swtpm_setup",
3257 "--tpmstate",
3258 "file://$state",
3259 "--createek",
3260 "--create-ek-cert",
3261 "--create-platform-cert",
3262 "--lock-nvram",
3263 "--config",
3264 "/etc/swtpm_setup.conf", # do not use XDG configs
3265 "--runas",
3266 "0", # force creation as root, error if not possible
3267 "--not-overwrite", # ignore existing state, do not modify
3268 ];
3269
3270 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3271 # TPM 2.0 supports ECC crypto, use if possible
3272 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3273
3274 run_command($setup_cmd, outfunc => sub {
3275 print "swtpm_setup: $1\n";
3276 });
3277 }
3278
3279 # Used to distinguish different invocations in the log.
3280 my $log_prefix = "[id=" . int(time()) . "] ";
3281
3282 my $emulator_cmd = [
3283 "swtpm",
3284 "socket",
3285 "--tpmstate",
3286 "backend-uri=file://$state,mode=0600",
3287 "--ctrl",
3288 "type=unixio,path=$paths->{socket},mode=0600",
3289 "--pid",
3290 "file=$paths->{pid}",
3291 "--terminate", # terminate on QEMU disconnect
3292 "--daemon",
3293 "--log",
3294 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3295 ];
3296 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3297 run_command($emulator_cmd, outfunc => sub { print $1; });
3298
3299 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3300 while (! -e $paths->{pid}) {
3301 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3302 usleep(50_000);
3303 }
3304
3305 # return untainted PID of swtpm daemon so it can be killed on error
3306 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3307 return $1;
3308}
3309
3310sub vga_conf_has_spice {
3311 my ($vga) = @_;
3312
3313 my $vgaconf = parse_vga($vga);
3314 my $vgatype = $vgaconf->{type};
3315 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3316
3317 return $1 || 1;
3318}
3319
3320sub is_native($) {
3321 my ($arch) = @_;
3322 return get_host_arch() eq $arch;
3323}
3324
3325sub get_vm_arch {
3326 my ($conf) = @_;
3327 return $conf->{arch} // get_host_arch();
3328}
3329
3330my $default_machines = {
3331 x86_64 => 'pc',
3332 aarch64 => 'virt',
3333};
3334
3335sub get_installed_machine_version {
3336 my ($kvmversion) = @_;
3337 $kvmversion = kvm_user_version() if !defined($kvmversion);
3338 $kvmversion =~ m/^(\d+\.\d+)/;
3339 return $1;
3340}
3341
3342sub windows_get_pinned_machine_version {
3343 my ($machine, $base_version, $kvmversion) = @_;
3344
3345 my $pin_version = $base_version;
3346 if (!defined($base_version) ||
3347 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3348 ) {
3349 $pin_version = get_installed_machine_version($kvmversion);
3350 }
3351 if (!$machine || $machine eq 'pc') {
3352 $machine = "pc-i440fx-$pin_version";
3353 } elsif ($machine eq 'q35') {
3354 $machine = "pc-q35-$pin_version";
3355 } elsif ($machine eq 'virt') {
3356 $machine = "virt-$pin_version";
3357 } else {
3358 warn "unknown machine type '$machine', not touching that!\n";
3359 }
3360
3361 return $machine;
3362}
3363
3364sub get_vm_machine {
3365 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3366
3367 my $machine = $forcemachine || $conf->{machine};
3368
3369 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3370 $kvmversion //= kvm_user_version();
3371 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3372 # layout which confuses windows quite a bit and may result in various regressions..
3373 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3374 if (windows_version($conf->{ostype})) {
3375 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3376 }
3377 $arch //= 'x86_64';
3378 $machine ||= $default_machines->{$arch};
3379 if ($add_pve_version) {
3380 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3381 $machine .= "+pve$pvever";
3382 }
3383 }
3384
3385 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3386 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3387 $machine = $1 if $is_pxe;
3388
3389 # for version-pinned machines that do not include a pve-version (e.g.
3390 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3391 $machine .= '+pve0';
3392
3393 $machine .= '.pxe' if $is_pxe;
3394 }
3395
3396 return $machine;
3397}
3398
3399sub get_ovmf_files($$$) {
3400 my ($arch, $efidisk, $smm) = @_;
3401
3402 my $types = $OVMF->{$arch}
3403 or die "no OVMF images known for architecture '$arch'\n";
3404
3405 my $type = 'default';
3406 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3407 $type = $smm ? "4m" : "4m-no-smm";
3408 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3409 }
3410
3411 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3412 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3413 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3414
3415 return ($ovmf_code, $ovmf_vars);
3416}
3417
3418my $Arch2Qemu = {
3419 aarch64 => '/usr/bin/qemu-system-aarch64',
3420 x86_64 => '/usr/bin/qemu-system-x86_64',
3421};
3422sub get_command_for_arch($) {
3423 my ($arch) = @_;
3424 return '/usr/bin/kvm' if is_native($arch);
3425
3426 my $cmd = $Arch2Qemu->{$arch}
3427 or die "don't know how to emulate architecture '$arch'\n";
3428 return $cmd;
3429}
3430
3431# To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3432# to use in a QEMU command line (-cpu element), first array_intersect the result
3433# of query_supported_ with query_understood_. This is necessary because:
3434#
3435# a) query_understood_ returns flags the host cannot use and
3436# b) query_supported_ (rather the QMP call) doesn't actually return CPU
3437# flags, but CPU settings - with most of them being flags. Those settings
3438# (and some flags, curiously) cannot be specified as a "-cpu" argument.
3439#
3440# query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3441# expensive. If you need the value returned from this, you can get it much
3442# cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3443# $accel being 'kvm' or 'tcg'.
3444#
3445# pvestatd calls this function on startup and whenever the QEMU/KVM version
3446# changes, automatically populating pmxcfs.
3447#
3448# Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3449# since kvm and tcg machines support different flags
3450#
3451sub query_supported_cpu_flags {
3452 my ($arch) = @_;
3453
3454 $arch //= get_host_arch();
3455 my $default_machine = $default_machines->{$arch};
3456
3457 my $flags = {};
3458
3459 # FIXME: Once this is merged, the code below should work for ARM as well:
3460 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3461 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3462 $arch eq "aarch64";
3463
3464 my $kvm_supported = defined(kvm_version());
3465 my $qemu_cmd = get_command_for_arch($arch);
3466 my $fakevmid = -1;
3467 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3468
3469 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3470 my $query_supported_run_qemu = sub {
3471 my ($kvm) = @_;
3472
3473 my $flags = {};
3474 my $cmd = [
3475 $qemu_cmd,
3476 '-machine', $default_machine,
3477 '-display', 'none',
3478 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3479 '-mon', 'chardev=qmp,mode=control',
3480 '-pidfile', $pidfile,
3481 '-S', '-daemonize'
3482 ];
3483
3484 if (!$kvm) {
3485 push @$cmd, '-accel', 'tcg';
3486 }
3487
3488 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3489 die "QEMU flag querying VM exited with code " . $rc if $rc;
3490
3491 eval {
3492 my $cmd_result = mon_cmd(
3493 $fakevmid,
3494 'query-cpu-model-expansion',
3495 type => 'full',
3496 model => { name => 'host' }
3497 );
3498
3499 my $props = $cmd_result->{model}->{props};
3500 foreach my $prop (keys %$props) {
3501 next if $props->{$prop} ne '1';
3502 # QEMU returns some flags multiple times, with '_', '.' or '-'
3503 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3504 # We only keep those with underscores, to match /proc/cpuinfo
3505 $prop =~ s/\.|-/_/g;
3506 $flags->{$prop} = 1;
3507 }
3508 };
3509 my $err = $@;
3510
3511 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3512 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3513
3514 die $err if $err;
3515
3516 return [ sort keys %$flags ];
3517 };
3518
3519 # We need to query QEMU twice, since KVM and TCG have different supported flags
3520 PVE::QemuConfig->lock_config($fakevmid, sub {
3521 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3522 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3523
3524 if ($kvm_supported) {
3525 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3526 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3527 }
3528 });
3529
3530 return $flags;
3531}
3532
3533# Understood CPU flags are written to a file at 'pve-qemu' compile time
3534my $understood_cpu_flag_dir = "/usr/share/kvm";
3535sub query_understood_cpu_flags {
3536 my $arch = get_host_arch();
3537 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3538
3539 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3540 if ! -e $filepath;
3541
3542 my $raw = file_get_contents($filepath);
3543 $raw =~ s/^\s+|\s+$//g;
3544 my @flags = split(/\s+/, $raw);
3545
3546 return \@flags;
3547}
3548
3549# Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3550# anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3551my sub should_disable_smm {
3552 my ($conf, $vga, $machine) = @_;
3553
3554 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3555
3556 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3557 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3558}
3559
3560my sub print_ovmf_drive_commandlines {
3561 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3562
3563 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3564
3565 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3566
3567 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3568 if ($d) {
3569 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3570 my ($path, $format) = $d->@{'file', 'format'};
3571 if ($storeid) {
3572 $path = PVE::Storage::path($storecfg, $d->{file});
3573 if (!defined($format)) {
3574 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3575 $format = qemu_img_format($scfg, $volname);
3576 }
3577 } elsif (!defined($format)) {
3578 die "efidisk format must be specified\n";
3579 }
3580 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3581 if ($path =~ m/^rbd:/) {
3582 $var_drive_str .= ',cache=writeback';
3583 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3584 }
3585 $var_drive_str .= ",format=$format,file=$path";
3586
3587 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3588 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3589 } else {
3590 log_warn("no efidisk configured! Using temporary efivars disk.");
3591 my $path = "/tmp/$vmid-ovmf.fd";
3592 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3593 $var_drive_str .= ",format=raw,file=$path";
3594 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3595 }
3596
3597 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3598}
3599
3600sub config_to_command {
3601 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3602 $pbs_backing) = @_;
3603
3604 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3605 my $devices = [];
3606 my $bridges = {};
3607 my $ostype = $conf->{ostype};
3608 my $winversion = windows_version($ostype);
3609 my $kvm = $conf->{kvm};
3610 my $nodename = nodename();
3611
3612 my $arch = get_vm_arch($conf);
3613 my $kvm_binary = get_command_for_arch($arch);
3614 my $kvmver = kvm_user_version($kvm_binary);
3615
3616 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3617 $kvmver //= "undefined";
3618 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3619 }
3620
3621 my $add_pve_version = min_version($kvmver, 4, 1);
3622
3623 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3624 my $machine_version = extract_version($machine_type, $kvmver);
3625 $kvm //= 1 if is_native($arch);
3626
3627 $machine_version =~ m/(\d+)\.(\d+)/;
3628 my ($machine_major, $machine_minor) = ($1, $2);
3629
3630 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3631 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3632 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3633 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3634 ." please upgrade node '$nodename'\n"
3635 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3636 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3637 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3638 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3639 ." node '$nodename'\n";
3640 }
3641
3642 # if a specific +pve version is required for a feature, use $version_guard
3643 # instead of min_version to allow machines to be run with the minimum
3644 # required version
3645 my $required_pve_version = 0;
3646 my $version_guard = sub {
3647 my ($major, $minor, $pve) = @_;
3648 return 0 if !min_version($machine_version, $major, $minor, $pve);
3649 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3650 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3651 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3652 return 1;
3653 };
3654
3655 if ($kvm && !defined kvm_version()) {
3656 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3657 ." or enable in BIOS.\n";
3658 }
3659
3660 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3661 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3662 my $use_old_bios_files = undef;
3663 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3664
3665 my $cmd = [];
3666 if ($conf->{affinity}) {
3667 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3668 }
3669
3670 push @$cmd, $kvm_binary;
3671
3672 push @$cmd, '-id', $vmid;
3673
3674 my $vmname = $conf->{name} || "vm$vmid";
3675
3676 push @$cmd, '-name', "$vmname,debug-threads=on";
3677
3678 push @$cmd, '-no-shutdown';
3679
3680 my $use_virtio = 0;
3681
3682 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3683 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3684 push @$cmd, '-mon', "chardev=qmp,mode=control";
3685
3686 if (min_version($machine_version, 2, 12)) {
3687 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3688 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3689 }
3690
3691 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3692
3693 push @$cmd, '-daemonize';
3694
3695 if ($conf->{smbios1}) {
3696 my $smbios_conf = parse_smbios1($conf->{smbios1});
3697 if ($smbios_conf->{base64}) {
3698 # Do not pass base64 flag to qemu
3699 delete $smbios_conf->{base64};
3700 my $smbios_string = "";
3701 foreach my $key (keys %$smbios_conf) {
3702 my $value;
3703 if ($key eq "uuid") {
3704 $value = $smbios_conf->{uuid}
3705 } else {
3706 $value = decode_base64($smbios_conf->{$key});
3707 }
3708 # qemu accepts any binary data, only commas need escaping by double comma
3709 $value =~ s/,/,,/g;
3710 $smbios_string .= "," . $key . "=" . $value if $value;
3711 }
3712 push @$cmd, '-smbios', "type=1" . $smbios_string;
3713 } else {
3714 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3715 }
3716 }
3717
3718 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3719 my ($code_drive_str, $var_drive_str) =
3720 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3721 push $cmd->@*, '-drive', $code_drive_str;
3722 push $cmd->@*, '-drive', $var_drive_str;
3723 }
3724
3725 if ($q35) { # tell QEMU to load q35 config early
3726 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3727 if (min_version($machine_version, 4, 0)) {
3728 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3729 } else {
3730 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3731 }
3732 }
3733
3734 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3735 push @$cmd, $fixups->@*;
3736 }
3737
3738 if ($conf->{vmgenid}) {
3739 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3740 }
3741
3742 # add usb controllers
3743 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3744 $conf, $bridges, $arch, $machine_type, $machine_version);
3745 push @$devices, @usbcontrollers if @usbcontrollers;
3746 my $vga = parse_vga($conf->{vga});
3747
3748 my $qxlnum = vga_conf_has_spice($conf->{vga});
3749 $vga->{type} = 'qxl' if $qxlnum;
3750
3751 if (!$vga->{type}) {
3752 if ($arch eq 'aarch64') {
3753 $vga->{type} = 'virtio';
3754 } elsif (min_version($machine_version, 2, 9)) {
3755 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3756 } else {
3757 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3758 }
3759 }
3760
3761 # enable absolute mouse coordinates (needed by vnc)
3762 my $tablet = $conf->{tablet};
3763 if (!defined($tablet)) {
3764 $tablet = $defaults->{tablet};
3765 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3766 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3767 }
3768
3769 if ($tablet) {
3770 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3771 my $kbd = print_keyboarddevice_full($conf, $arch);
3772 push @$devices, '-device', $kbd if defined($kbd);
3773 }
3774
3775 my $bootorder = device_bootorder($conf);
3776
3777 # host pci device passthrough
3778 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3779 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3780
3781 # usb devices
3782 my $usb_dev_features = {};
3783 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3784
3785 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3786 $conf, $usb_dev_features, $bootorder, $machine_version);
3787 push @$devices, @usbdevices if @usbdevices;
3788
3789 # serial devices
3790 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3791 my $path = $conf->{"serial$i"} or next;
3792 if ($path eq 'socket') {
3793 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3794 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3795 # On aarch64, serial0 is the UART device. QEMU only allows
3796 # connecting UART devices via the '-serial' command line, as
3797 # the device has a fixed slot on the hardware...
3798 if ($arch eq 'aarch64' && $i == 0) {
3799 push @$devices, '-serial', "chardev:serial$i";
3800 } else {
3801 push @$devices, '-device', "isa-serial,chardev=serial$i";
3802 }
3803 } else {
3804 die "no such serial device\n" if ! -c $path;
3805 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3806 push @$devices, '-device', "isa-serial,chardev=serial$i";
3807 }
3808 }
3809
3810 # parallel devices
3811 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3812 if (my $path = $conf->{"parallel$i"}) {
3813 die "no such parallel device\n" if ! -c $path;
3814 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3815 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3816 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3817 }
3818 }
3819
3820 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3821 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3822 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3823 push @$devices, @$audio_devs;
3824 }
3825
3826 add_tpm_device($vmid, $devices, $conf);
3827
3828 my $sockets = 1;
3829 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3830 $sockets = $conf->{sockets} if $conf->{sockets};
3831
3832 my $cores = $conf->{cores} || 1;
3833
3834 my $maxcpus = $sockets * $cores;
3835
3836 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3837
3838 my $allowed_vcpus = $cpuinfo->{cpus};
3839
3840 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3841
3842 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3843 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3844 for (my $i = 2; $i <= $vcpus; $i++) {
3845 my $cpustr = print_cpu_device($conf,$i);
3846 push @$cmd, '-device', $cpustr;
3847 }
3848
3849 } else {
3850
3851 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3852 }
3853 push @$cmd, '-nodefaults';
3854
3855 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3856
3857 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3858
3859 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3860
3861 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3862 push @$devices, '-device', print_vga_device(
3863 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3864
3865 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3866
3867 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3868 push @$cmd, '-vnc', "unix:$socket,password=on";
3869 } else {
3870 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3871 push @$cmd, '-nographic';
3872 }
3873
3874 # time drift fix
3875 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3876 my $useLocaltime = $conf->{localtime};
3877
3878 if ($winversion >= 5) { # windows
3879 $useLocaltime = 1 if !defined($conf->{localtime});
3880
3881 # use time drift fix when acpi is enabled
3882 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3883 $tdf = 1 if !defined($conf->{tdf});
3884 }
3885 }
3886
3887 if ($winversion >= 6) {
3888 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3889 push @$machineFlags, 'hpet=off';
3890 }
3891
3892 push @$rtcFlags, 'driftfix=slew' if $tdf;
3893
3894 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3895 push @$rtcFlags, "base=$conf->{startdate}";
3896 } elsif ($useLocaltime) {
3897 push @$rtcFlags, 'base=localtime';
3898 }
3899
3900 if ($forcecpu) {
3901 push @$cmd, '-cpu', $forcecpu;
3902 } else {
3903 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3904 }
3905
3906 PVE::QemuServer::Memory::config(
3907 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
3908
3909 push @$cmd, '-S' if $conf->{freeze};
3910
3911 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3912
3913 my $guest_agent = parse_guest_agent($conf);
3914
3915 if ($guest_agent->{enabled}) {
3916 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3917 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3918
3919 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3920 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3921 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3922 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3923 } elsif ($guest_agent->{type} eq 'isa') {
3924 push @$devices, '-device', "isa-serial,chardev=qga0";
3925 }
3926 }
3927
3928 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3929 if ($rng && $version_guard->(4, 1, 2)) {
3930 check_rng_source($rng->{source});
3931
3932 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3933 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3934 my $limiter_str = "";
3935 if ($max_bytes) {
3936 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3937 }
3938
3939 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3940 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3941 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3942 }
3943
3944 my $spice_port;
3945
3946 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3947 if ($qxlnum > 1) {
3948 if ($winversion){
3949 for (my $i = 1; $i < $qxlnum; $i++){
3950 push @$devices, '-device', print_vga_device(
3951 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3952 }
3953 } else {
3954 # assume other OS works like Linux
3955 my ($ram, $vram) = ("134217728", "67108864");
3956 if ($vga->{memory}) {
3957 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3958 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3959 }
3960 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3961 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3962 }
3963 }
3964
3965 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3966
3967 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3968 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3969 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3970
3971 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3972 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3973 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3974
3975 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3976 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3977
3978 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3979 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3980 if ($spice_enhancement->{foldersharing}) {
3981 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3982 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3983 }
3984
3985 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3986 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3987 if $spice_enhancement->{videostreaming};
3988
3989 push @$devices, '-spice', "$spice_opts";
3990 }
3991
3992 # enable balloon by default, unless explicitly disabled
3993 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3994 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3995 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3996 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3997 push @$devices, '-device', $ballooncmd;
3998 }
3999
4000 if ($conf->{watchdog}) {
4001 my $wdopts = parse_watchdog($conf->{watchdog});
4002 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
4003 my $watchdog = $wdopts->{model} || 'i6300esb';
4004 push @$devices, '-device', "$watchdog$pciaddr";
4005 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
4006 }
4007
4008 my $vollist = [];
4009 my $scsicontroller = {};
4010 my $ahcicontroller = {};
4011 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
4012
4013 # Add iscsi initiator name if available
4014 if (my $initiator = get_initiator_name()) {
4015 push @$devices, '-iscsi', "initiator-name=$initiator";
4016 }
4017
4018 PVE::QemuConfig->foreach_volume($conf, sub {
4019 my ($ds, $drive) = @_;
4020
4021 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
4022 check_volume_storage_type($storecfg, $drive->{file});
4023 push @$vollist, $drive->{file};
4024 }
4025
4026 # ignore efidisk here, already added in bios/fw handling code above
4027 return if $drive->{interface} eq 'efidisk';
4028 # similar for TPM
4029 return if $drive->{interface} eq 'tpmstate';
4030
4031 $use_virtio = 1 if $ds =~ m/^virtio/;
4032
4033 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
4034
4035 if ($drive->{interface} eq 'virtio'){
4036 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4037 }
4038
4039 if ($drive->{interface} eq 'scsi') {
4040
4041 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
4042
4043 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4044 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4045
4046 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
4047 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
4048
4049 my $iothread = '';
4050 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4051 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4052 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
4053 } elsif ($drive->{iothread}) {
4054 log_warn(
4055 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4056 );
4057 }
4058
4059 my $queues = '';
4060 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4061 $queues = ",num_queues=$drive->{queues}";
4062 }
4063
4064 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4065 if !$scsicontroller->{$controller};
4066 $scsicontroller->{$controller}=1;
4067 }
4068
4069 if ($drive->{interface} eq 'sata') {
4070 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4071 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4072 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4073 if !$ahcicontroller->{$controller};
4074 $ahcicontroller->{$controller}=1;
4075 }
4076
4077 my $pbs_conf = $pbs_backing->{$ds};
4078 my $pbs_name = undef;
4079 if ($pbs_conf) {
4080 $pbs_name = "drive-$ds-pbs";
4081 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4082 }
4083
4084 my $drive_cmd = print_drive_commandline_full(
4085 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4086
4087 # extra protection for templates, but SATA and IDE don't support it..
4088 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4089
4090 push @$devices, '-drive',$drive_cmd;
4091 push @$devices, '-device', print_drivedevice_full(
4092 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4093 });
4094
4095 for (my $i = 0; $i < $MAX_NETS; $i++) {
4096 my $netname = "net$i";
4097
4098 next if !$conf->{$netname};
4099 my $d = parse_net($conf->{$netname});
4100 next if !$d;
4101 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4102
4103 $use_virtio = 1 if $d->{model} eq 'virtio';
4104
4105 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4106
4107 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4108 push @$devices, '-netdev', $netdevfull;
4109
4110 my $netdevicefull = print_netdevice_full(
4111 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4112
4113 push @$devices, '-device', $netdevicefull;
4114 }
4115
4116 if ($conf->{ivshmem}) {
4117 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4118
4119 my $bus;
4120 if ($q35) {
4121 $bus = print_pcie_addr("ivshmem");
4122 } else {
4123 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4124 }
4125
4126 my $ivshmem_name = $ivshmem->{name} // $vmid;
4127 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4128
4129 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4130 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4131 .",size=$ivshmem->{size}M";
4132 }
4133
4134 # pci.4 is nested in pci.1
4135 $bridges->{1} = 1 if $bridges->{4};
4136
4137 if (!$q35) { # add pci bridges
4138 if (min_version($machine_version, 2, 3)) {
4139 $bridges->{1} = 1;
4140 $bridges->{2} = 1;
4141 }
4142 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4143 }
4144
4145 for my $k (sort {$b cmp $a} keys %$bridges) {
4146 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4147
4148 my $k_name = $k;
4149 if ($k == 2 && $legacy_igd) {
4150 $k_name = "$k-igd";
4151 }
4152 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4153 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4154
4155 if ($q35) { # add after -readconfig pve-q35.cfg
4156 splice @$devices, 2, 0, '-device', $devstr;
4157 } else {
4158 unshift @$devices, '-device', $devstr if $k > 0;
4159 }
4160 }
4161
4162 if (!$kvm) {
4163 push @$machineFlags, 'accel=tcg';
4164 }
4165
4166 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4167
4168 my $machine_type_min = $machine_type;
4169 if ($add_pve_version) {
4170 $machine_type_min =~ s/\+pve\d+$//;
4171 $machine_type_min .= "+pve$required_pve_version";
4172 }
4173 push @$machineFlags, "type=${machine_type_min}";
4174
4175 push @$cmd, @$devices;
4176 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4177 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4178 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4179
4180 if (my $vmstate = $conf->{vmstate}) {
4181 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4182 push @$vollist, $vmstate;
4183 push @$cmd, '-loadstate', $statepath;
4184 print "activating and using '$vmstate' as vmstate\n";
4185 }
4186
4187 if (PVE::QemuConfig->is_template($conf)) {
4188 # needed to workaround base volumes being read-only
4189 push @$cmd, '-snapshot';
4190 }
4191
4192 # add custom args
4193 if ($conf->{args}) {
4194 my $aa = PVE::Tools::split_args($conf->{args});
4195 push @$cmd, @$aa;
4196 }
4197
4198 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4199}
4200
4201sub check_rng_source {
4202 my ($source) = @_;
4203
4204 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4205 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4206 if ! -e $source;
4207
4208 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4209 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4210 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4211 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4212 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4213 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4214 ." to the host.\n";
4215 }
4216}
4217
4218sub spice_port {
4219 my ($vmid) = @_;
4220
4221 my $res = mon_cmd($vmid, 'query-spice');
4222
4223 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4224}
4225
4226sub vm_devices_list {
4227 my ($vmid) = @_;
4228
4229 my $res = mon_cmd($vmid, 'query-pci');
4230 my $devices_to_check = [];
4231 my $devices = {};
4232 foreach my $pcibus (@$res) {
4233 push @$devices_to_check, @{$pcibus->{devices}},
4234 }
4235
4236 while (@$devices_to_check) {
4237 my $to_check = [];
4238 for my $d (@$devices_to_check) {
4239 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4240 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4241
4242 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4243 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4244 }
4245 $devices_to_check = $to_check;
4246 }
4247
4248 my $resblock = mon_cmd($vmid, 'query-block');
4249 foreach my $block (@$resblock) {
4250 if($block->{device} =~ m/^drive-(\S+)/){
4251 $devices->{$1} = 1;
4252 }
4253 }
4254
4255 my $resmice = mon_cmd($vmid, 'query-mice');
4256 foreach my $mice (@$resmice) {
4257 if ($mice->{name} eq 'QEMU HID Tablet') {
4258 $devices->{tablet} = 1;
4259 last;
4260 }
4261 }
4262
4263 # for usb devices there is no query-usb
4264 # but we can iterate over the entries in
4265 # qom-list path=/machine/peripheral
4266 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4267 foreach my $per (@$resperipheral) {
4268 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4269 $devices->{$per->{name}} = 1;
4270 }
4271 }
4272
4273 return $devices;
4274}
4275
4276sub vm_deviceplug {
4277 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4278
4279 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4280
4281 my $devices_list = vm_devices_list($vmid);
4282 return 1 if defined($devices_list->{$deviceid});
4283
4284 # add PCI bridge if we need it for the device
4285 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4286
4287 if ($deviceid eq 'tablet') {
4288 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4289 } elsif ($deviceid eq 'keyboard') {
4290 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4291 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4292 my $id = $1;
4293 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4294 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4295 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4296 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4297 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4298 qemu_iothread_add($vmid, $deviceid, $device);
4299
4300 qemu_driveadd($storecfg, $vmid, $device);
4301 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4302
4303 qemu_deviceadd($vmid, $devicefull);
4304 eval { qemu_deviceaddverify($vmid, $deviceid); };
4305 if (my $err = $@) {
4306 eval { qemu_drivedel($vmid, $deviceid); };
4307 warn $@ if $@;
4308 die $err;
4309 }
4310 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4311 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4312 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4313 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4314
4315 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4316
4317 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4318 qemu_iothread_add($vmid, $deviceid, $device);
4319 $devicefull .= ",iothread=iothread-$deviceid";
4320 }
4321
4322 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4323 $devicefull .= ",num_queues=$device->{queues}";
4324 }
4325
4326 qemu_deviceadd($vmid, $devicefull);
4327 qemu_deviceaddverify($vmid, $deviceid);
4328 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4329 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4330 qemu_driveadd($storecfg, $vmid, $device);
4331
4332 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4333 eval { qemu_deviceadd($vmid, $devicefull); };
4334 if (my $err = $@) {
4335 eval { qemu_drivedel($vmid, $deviceid); };
4336 warn $@ if $@;
4337 die $err;
4338 }
4339 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4340 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4341
4342 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4343 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4344 my $use_old_bios_files = undef;
4345 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4346
4347 my $netdevicefull = print_netdevice_full(
4348 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4349 qemu_deviceadd($vmid, $netdevicefull);
4350 eval {
4351 qemu_deviceaddverify($vmid, $deviceid);
4352 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4353 };
4354 if (my $err = $@) {
4355 eval { qemu_netdevdel($vmid, $deviceid); };
4356 warn $@ if $@;
4357 die $err;
4358 }
4359 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4360 my $bridgeid = $2;
4361 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4362 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4363
4364 qemu_deviceadd($vmid, $devicefull);
4365 qemu_deviceaddverify($vmid, $deviceid);
4366 } else {
4367 die "can't hotplug device '$deviceid'\n";
4368 }
4369
4370 return 1;
4371}
4372
4373# fixme: this should raise exceptions on error!
4374sub vm_deviceunplug {
4375 my ($vmid, $conf, $deviceid) = @_;
4376
4377 my $devices_list = vm_devices_list($vmid);
4378 return 1 if !defined($devices_list->{$deviceid});
4379
4380 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4381 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4382
4383 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4384 qemu_devicedel($vmid, $deviceid);
4385 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4386 qemu_devicedel($vmid, $deviceid);
4387 qemu_devicedelverify($vmid, $deviceid);
4388 } elsif ($deviceid =~ m/^usb\d+$/) {
4389 qemu_devicedel($vmid, $deviceid);
4390 qemu_devicedelverify($vmid, $deviceid);
4391 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4392 my $device = parse_drive($deviceid, $conf->{$deviceid});
4393
4394 qemu_devicedel($vmid, $deviceid);
4395 qemu_devicedelverify($vmid, $deviceid);
4396 qemu_drivedel($vmid, $deviceid);
4397 qemu_iothread_del($vmid, $deviceid, $device);
4398 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4399 qemu_devicedel($vmid, $deviceid);
4400 qemu_devicedelverify($vmid, $deviceid);
4401 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4402 my $device = parse_drive($deviceid, $conf->{$deviceid});
4403
4404 qemu_devicedel($vmid, $deviceid);
4405 qemu_devicedelverify($vmid, $deviceid);
4406 qemu_drivedel($vmid, $deviceid);
4407 qemu_deletescsihw($conf, $vmid, $deviceid);
4408
4409 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4410 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4411 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4412 qemu_devicedel($vmid, $deviceid);
4413 qemu_devicedelverify($vmid, $deviceid);
4414 qemu_netdevdel($vmid, $deviceid);
4415 } else {
4416 die "can't unplug device '$deviceid'\n";
4417 }
4418
4419 return 1;
4420}
4421
4422sub qemu_spice_usbredir_chardev_add {
4423 my ($vmid, $id) = @_;
4424
4425 mon_cmd($vmid, "chardev-add" , (
4426 id => $id,
4427 backend => {
4428 type => 'spicevmc',
4429 data => {
4430 type => "usbredir",
4431 },
4432 },
4433 ));
4434}
4435
4436sub qemu_deviceadd {
4437 my ($vmid, $devicefull) = @_;
4438
4439 $devicefull = "driver=".$devicefull;
4440 my %options = split(/[=,]/, $devicefull);
4441
4442 mon_cmd($vmid, "device_add" , %options);
4443}
4444
4445sub qemu_devicedel {
4446 my ($vmid, $deviceid) = @_;
4447
4448 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4449}
4450
4451sub qemu_iothread_add {
4452 my ($vmid, $deviceid, $device) = @_;
4453
4454 if ($device->{iothread}) {
4455 my $iothreads = vm_iothreads_list($vmid);
4456 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4457 }
4458}
4459
4460sub qemu_iothread_del {
4461 my ($vmid, $deviceid, $device) = @_;
4462
4463 if ($device->{iothread}) {
4464 my $iothreads = vm_iothreads_list($vmid);
4465 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4466 }
4467}
4468
4469sub qemu_objectadd {
4470 my ($vmid, $objectid, $qomtype) = @_;
4471
4472 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4473
4474 return 1;
4475}
4476
4477sub qemu_objectdel {
4478 my ($vmid, $objectid) = @_;
4479
4480 mon_cmd($vmid, "object-del", id => $objectid);
4481
4482 return 1;
4483}
4484
4485sub qemu_driveadd {
4486 my ($storecfg, $vmid, $device) = @_;
4487
4488 my $kvmver = get_running_qemu_version($vmid);
4489 my $io_uring = min_version($kvmver, 6, 0);
4490 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4491 $drive =~ s/\\/\\\\/g;
4492 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4493
4494 # If the command succeeds qemu prints: "OK"
4495 return 1 if $ret =~ m/OK/s;
4496
4497 die "adding drive failed: $ret\n";
4498}
4499
4500sub qemu_drivedel {
4501 my ($vmid, $deviceid) = @_;
4502
4503 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4504 $ret =~ s/^\s+//;
4505
4506 return 1 if $ret eq "";
4507
4508 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4509 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4510
4511 die "deleting drive $deviceid failed : $ret\n";
4512}
4513
4514sub qemu_deviceaddverify {
4515 my ($vmid, $deviceid) = @_;
4516
4517 for (my $i = 0; $i <= 5; $i++) {
4518 my $devices_list = vm_devices_list($vmid);
4519 return 1 if defined($devices_list->{$deviceid});
4520 sleep 1;
4521 }
4522
4523 die "error on hotplug device '$deviceid'\n";
4524}
4525
4526
4527sub qemu_devicedelverify {
4528 my ($vmid, $deviceid) = @_;
4529
4530 # need to verify that the device is correctly removed as device_del
4531 # is async and empty return is not reliable
4532
4533 for (my $i = 0; $i <= 5; $i++) {
4534 my $devices_list = vm_devices_list($vmid);
4535 return 1 if !defined($devices_list->{$deviceid});
4536 sleep 1;
4537 }
4538
4539 die "error on hot-unplugging device '$deviceid'\n";
4540}
4541
4542sub qemu_findorcreatescsihw {
4543 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4544
4545 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4546
4547 my $scsihwid="$controller_prefix$controller";
4548 my $devices_list = vm_devices_list($vmid);
4549
4550 if (!defined($devices_list->{$scsihwid})) {
4551 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4552 }
4553
4554 return 1;
4555}
4556
4557sub qemu_deletescsihw {
4558 my ($conf, $vmid, $opt) = @_;
4559
4560 my $device = parse_drive($opt, $conf->{$opt});
4561
4562 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4563 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4564 return 1;
4565 }
4566
4567 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4568
4569 my $devices_list = vm_devices_list($vmid);
4570 foreach my $opt (keys %{$devices_list}) {
4571 if (is_valid_drivename($opt)) {
4572 my $drive = parse_drive($opt, $conf->{$opt});
4573 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4574 return 1;
4575 }
4576 }
4577 }
4578
4579 my $scsihwid="scsihw$controller";
4580
4581 vm_deviceunplug($vmid, $conf, $scsihwid);
4582
4583 return 1;
4584}
4585
4586sub qemu_add_pci_bridge {
4587 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4588
4589 my $bridges = {};
4590
4591 my $bridgeid;
4592
4593 print_pci_addr($device, $bridges, $arch, $machine_type);
4594
4595 while (my ($k, $v) = each %$bridges) {
4596 $bridgeid = $k;
4597 }
4598 return 1 if !defined($bridgeid) || $bridgeid < 1;
4599
4600 my $bridge = "pci.$bridgeid";
4601 my $devices_list = vm_devices_list($vmid);
4602
4603 if (!defined($devices_list->{$bridge})) {
4604 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4605 }
4606
4607 return 1;
4608}
4609
4610sub qemu_set_link_status {
4611 my ($vmid, $device, $up) = @_;
4612
4613 mon_cmd($vmid, "set_link", name => $device,
4614 up => $up ? JSON::true : JSON::false);
4615}
4616
4617sub qemu_netdevadd {
4618 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4619
4620 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4621 my %options = split(/[=,]/, $netdev);
4622
4623 if (defined(my $vhost = $options{vhost})) {
4624 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4625 }
4626
4627 if (defined(my $queues = $options{queues})) {
4628 $options{queues} = $queues + 0;
4629 }
4630
4631 mon_cmd($vmid, "netdev_add", %options);
4632 return 1;
4633}
4634
4635sub qemu_netdevdel {
4636 my ($vmid, $deviceid) = @_;
4637
4638 mon_cmd($vmid, "netdev_del", id => $deviceid);
4639}
4640
4641sub qemu_usb_hotplug {
4642 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4643
4644 return if !$device;
4645
4646 # remove the old one first
4647 vm_deviceunplug($vmid, $conf, $deviceid);
4648
4649 # check if xhci controller is necessary and available
4650 my $devicelist = vm_devices_list($vmid);
4651
4652 if (!$devicelist->{xhci}) {
4653 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4654 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4655 }
4656
4657 # add the new one
4658 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4659}
4660
4661sub qemu_cpu_hotplug {
4662 my ($vmid, $conf, $vcpus) = @_;
4663
4664 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4665
4666 my $sockets = 1;
4667 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4668 $sockets = $conf->{sockets} if $conf->{sockets};
4669 my $cores = $conf->{cores} || 1;
4670 my $maxcpus = $sockets * $cores;
4671
4672 $vcpus = $maxcpus if !$vcpus;
4673
4674 die "you can't add more vcpus than maxcpus\n"
4675 if $vcpus > $maxcpus;
4676
4677 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4678
4679 if ($vcpus < $currentvcpus) {
4680
4681 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4682
4683 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4684 qemu_devicedel($vmid, "cpu$i");
4685 my $retry = 0;
4686 my $currentrunningvcpus = undef;
4687 while (1) {
4688 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4689 last if scalar(@{$currentrunningvcpus}) == $i-1;
4690 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4691 $retry++;
4692 sleep 1;
4693 }
4694 #update conf after each succesfull cpu unplug
4695 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4696 PVE::QemuConfig->write_config($vmid, $conf);
4697 }
4698 } else {
4699 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4700 }
4701
4702 return;
4703 }
4704
4705 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4706 die "vcpus in running vm does not match its configuration\n"
4707 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4708
4709 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4710
4711 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4712 my $cpustr = print_cpu_device($conf, $i);
4713 qemu_deviceadd($vmid, $cpustr);
4714
4715 my $retry = 0;
4716 my $currentrunningvcpus = undef;
4717 while (1) {
4718 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4719 last if scalar(@{$currentrunningvcpus}) == $i;
4720 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4721 sleep 1;
4722 $retry++;
4723 }
4724 #update conf after each succesfull cpu hotplug
4725 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4726 PVE::QemuConfig->write_config($vmid, $conf);
4727 }
4728 } else {
4729
4730 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4731 mon_cmd($vmid, "cpu-add", id => int($i));
4732 }
4733 }
4734}
4735
4736sub qemu_block_set_io_throttle {
4737 my ($vmid, $deviceid,
4738 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4739 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4740 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4741 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4742
4743 return if !check_running($vmid) ;
4744
4745 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4746 bps => int($bps),
4747 bps_rd => int($bps_rd),
4748 bps_wr => int($bps_wr),
4749 iops => int($iops),
4750 iops_rd => int($iops_rd),
4751 iops_wr => int($iops_wr),
4752 bps_max => int($bps_max),
4753 bps_rd_max => int($bps_rd_max),
4754 bps_wr_max => int($bps_wr_max),
4755 iops_max => int($iops_max),
4756 iops_rd_max => int($iops_rd_max),
4757 iops_wr_max => int($iops_wr_max),
4758 bps_max_length => int($bps_max_length),
4759 bps_rd_max_length => int($bps_rd_max_length),
4760 bps_wr_max_length => int($bps_wr_max_length),
4761 iops_max_length => int($iops_max_length),
4762 iops_rd_max_length => int($iops_rd_max_length),
4763 iops_wr_max_length => int($iops_wr_max_length),
4764 );
4765
4766}
4767
4768sub qemu_block_resize {
4769 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4770
4771 my $running = check_running($vmid);
4772
4773 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4774
4775 return if !$running;
4776
4777 my $padding = (1024 - $size % 1024) % 1024;
4778 $size = $size + $padding;
4779
4780 mon_cmd(
4781 $vmid,
4782 "block_resize",
4783 device => $deviceid,
4784 size => int($size),
4785 timeout => 60,
4786 );
4787}
4788
4789sub qemu_volume_snapshot {
4790 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4791
4792 my $running = check_running($vmid);
4793
4794 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4795 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4796 } else {
4797 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4798 }
4799}
4800
4801sub qemu_volume_snapshot_delete {
4802 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4803
4804 my $running = check_running($vmid);
4805
4806 if($running) {
4807
4808 $running = undef;
4809 my $conf = PVE::QemuConfig->load_config($vmid);
4810 PVE::QemuConfig->foreach_volume($conf, sub {
4811 my ($ds, $drive) = @_;
4812 $running = 1 if $drive->{file} eq $volid;
4813 });
4814 }
4815
4816 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4817 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4818 } else {
4819 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4820 }
4821}
4822
4823sub set_migration_caps {
4824 my ($vmid, $savevm) = @_;
4825
4826 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4827
4828 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4829 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4830
4831 my $cap_ref = [];
4832
4833 my $enabled_cap = {
4834 "auto-converge" => 1,
4835 "xbzrle" => 1,
4836 "x-rdma-pin-all" => 0,
4837 "zero-blocks" => 0,
4838 "compress" => 0,
4839 "dirty-bitmaps" => $dirty_bitmaps,
4840 };
4841
4842 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4843
4844 for my $supported_capability (@$supported_capabilities) {
4845 push @$cap_ref, {
4846 capability => $supported_capability->{capability},
4847 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4848 };
4849 }
4850
4851 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4852}
4853
4854sub foreach_volid {
4855 my ($conf, $func, @param) = @_;
4856
4857 my $volhash = {};
4858
4859 my $test_volid = sub {
4860 my ($key, $drive, $snapname, $pending) = @_;
4861
4862 my $volid = $drive->{file};
4863 return if !$volid;
4864
4865 $volhash->{$volid}->{cdrom} //= 1;
4866 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4867
4868 my $replicate = $drive->{replicate} // 1;
4869 $volhash->{$volid}->{replicate} //= 0;
4870 $volhash->{$volid}->{replicate} = 1 if $replicate;
4871
4872 $volhash->{$volid}->{shared} //= 0;
4873 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4874
4875 $volhash->{$volid}->{is_unused} //= 0;
4876 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4877
4878 $volhash->{$volid}->{is_attached} //= 0;
4879 $volhash->{$volid}->{is_attached} = 1
4880 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4881
4882 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4883 if defined($snapname);
4884
4885 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4886
4887 my $size = $drive->{size};
4888 $volhash->{$volid}->{size} //= $size if $size;
4889
4890 $volhash->{$volid}->{is_vmstate} //= 0;
4891 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4892
4893 $volhash->{$volid}->{is_tpmstate} //= 0;
4894 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4895
4896 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4897 };
4898
4899 my $include_opts = {
4900 extra_keys => ['vmstate'],
4901 include_unused => 1,
4902 };
4903
4904 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4905
4906 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4907 if defined($conf->{pending}) && $conf->{pending}->%*;
4908
4909 foreach my $snapname (keys %{$conf->{snapshots}}) {
4910 my $snap = $conf->{snapshots}->{$snapname};
4911 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4912 }
4913
4914 foreach my $volid (keys %$volhash) {
4915 &$func($volid, $volhash->{$volid}, @param);
4916 }
4917}
4918
4919my $fast_plug_option = {
4920 'description' => 1,
4921 'hookscript' => 1,
4922 'lock' => 1,
4923 'migrate_downtime' => 1,
4924 'migrate_speed' => 1,
4925 'name' => 1,
4926 'onboot' => 1,
4927 'protection' => 1,
4928 'shares' => 1,
4929 'startup' => 1,
4930 'tags' => 1,
4931 'vmstatestorage' => 1,
4932};
4933
4934for my $opt (keys %$confdesc_cloudinit) {
4935 $fast_plug_option->{$opt} = 1;
4936};
4937
4938# hotplug changes in [PENDING]
4939# $selection hash can be used to only apply specified options, for
4940# example: { cores => 1 } (only apply changed 'cores')
4941# $errors ref is used to return error messages
4942sub vmconfig_hotplug_pending {
4943 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4944
4945 my $defaults = load_defaults();
4946 my $arch = get_vm_arch($conf);
4947 my $machine_type = get_vm_machine($conf, undef, $arch);
4948
4949 # commit values which do not have any impact on running VM first
4950 # Note: those option cannot raise errors, we we do not care about
4951 # $selection and always apply them.
4952
4953 my $add_error = sub {
4954 my ($opt, $msg) = @_;
4955 $errors->{$opt} = "hotplug problem - $msg";
4956 };
4957
4958 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4959
4960 my $cloudinit_record_changed = sub {
4961 my ($conf, $opt, $old, $new) = @_;
4962 return if !$cloudinit_pending_properties->{$opt};
4963
4964 my $ci = ($conf->{cloudinit} //= {});
4965
4966 my $recorded = $ci->{$opt};
4967 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4968
4969 if (defined($new)) {
4970 if (defined($old)) {
4971 # an existing value is being modified
4972 if (defined($recorded)) {
4973 # the value was already not in sync
4974 if ($new eq $recorded) {
4975 # a value is being reverted to the cloud-init state:
4976 delete $ci->{$opt};
4977 delete $added{$opt};
4978 } else {
4979 # the value was changed multiple times, do nothing
4980 }
4981 } elsif ($added{$opt}) {
4982 # the value had been marked as added and is being changed, do nothing
4983 } else {
4984 # the value is new, record it:
4985 $ci->{$opt} = $old;
4986 }
4987 } else {
4988 # a new value is being added
4989 if (defined($recorded)) {
4990 # it was already not in sync
4991 if ($new eq $recorded) {
4992 # a value is being reverted to the cloud-init state:
4993 delete $ci->{$opt};
4994 delete $added{$opt};
4995 } else {
4996 # the value had temporarily been removed, do nothing
4997 }
4998 } elsif ($added{$opt}) {
4999 # the value had been marked as added already, do nothing
5000 } else {
5001 # the value is new, add it
5002 $added{$opt} = 1;
5003 }
5004 }
5005 } elsif (!defined($old)) {
5006 # a non-existent value is being removed? ignore...
5007 } else {
5008 # a value is being deleted
5009 if (defined($recorded)) {
5010 # a value was already recorded, just keep it
5011 } elsif ($added{$opt}) {
5012 # the value was marked as added, remove it
5013 delete $added{$opt};
5014 } else {
5015 # a previously unrecorded value is being removed, record the old value:
5016 $ci->{$opt} = $old;
5017 }
5018 }
5019
5020 my $added = join(',', sort keys %added);
5021 $ci->{added} = $added if length($added);
5022 };
5023
5024 my $changes = 0;
5025 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5026 if ($fast_plug_option->{$opt}) {
5027 my $new = delete $conf->{pending}->{$opt};
5028 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
5029 $conf->{$opt} = $new;
5030 $changes = 1;
5031 }
5032 }
5033
5034 if ($changes) {
5035 PVE::QemuConfig->write_config($vmid, $conf);
5036 }
5037
5038 my $ostype = $conf->{ostype};
5039 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
5040 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5041 my $usb_hotplug = $hotplug_features->{usb}
5042 && min_version($version, 7, 1)
5043 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
5044
5045 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
5046 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5047
5048 foreach my $opt (sort keys %$pending_delete_hash) {
5049 next if $selection && !$selection->{$opt};
5050 my $force = $pending_delete_hash->{$opt}->{force};
5051 eval {
5052 if ($opt eq 'hotplug') {
5053 die "skip\n" if ($conf->{hotplug} =~ /memory/);
5054 } elsif ($opt eq 'tablet') {
5055 die "skip\n" if !$hotplug_features->{usb};
5056 if ($defaults->{tablet}) {
5057 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5058 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5059 if $arch eq 'aarch64';
5060 } else {
5061 vm_deviceunplug($vmid, $conf, 'tablet');
5062 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5063 }
5064 } elsif ($opt =~ m/^usb(\d+)$/) {
5065 my $index = $1;
5066 die "skip\n" if !$usb_hotplug;
5067 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5068 vm_deviceunplug($vmid, $conf, $opt);
5069 } elsif ($opt eq 'vcpus') {
5070 die "skip\n" if !$hotplug_features->{cpu};
5071 qemu_cpu_hotplug($vmid, $conf, undef);
5072 } elsif ($opt eq 'balloon') {
5073 # enable balloon device is not hotpluggable
5074 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5075 # here we reset the ballooning value to memory
5076 my $balloon = $conf->{memory} || $defaults->{memory};
5077 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5078 } elsif ($fast_plug_option->{$opt}) {
5079 # do nothing
5080 } elsif ($opt =~ m/^net(\d+)$/) {
5081 die "skip\n" if !$hotplug_features->{network};
5082 vm_deviceunplug($vmid, $conf, $opt);
5083 } elsif (is_valid_drivename($opt)) {
5084 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5085 vm_deviceunplug($vmid, $conf, $opt);
5086 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5087 } elsif ($opt =~ m/^memory$/) {
5088 die "skip\n" if !$hotplug_features->{memory};
5089 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
5090 } elsif ($opt eq 'cpuunits') {
5091 $cgroup->change_cpu_shares(undef);
5092 } elsif ($opt eq 'cpulimit') {
5093 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5094 } else {
5095 die "skip\n";
5096 }
5097 };
5098 if (my $err = $@) {
5099 &$add_error($opt, $err) if $err ne "skip\n";
5100 } else {
5101 my $old = delete $conf->{$opt};
5102 $cloudinit_record_changed->($conf, $opt, $old, undef);
5103 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5104 }
5105 }
5106
5107 my $cloudinit_opt;
5108 foreach my $opt (keys %{$conf->{pending}}) {
5109 next if $selection && !$selection->{$opt};
5110 my $value = $conf->{pending}->{$opt};
5111 eval {
5112 if ($opt eq 'hotplug') {
5113 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5114 } elsif ($opt eq 'tablet') {
5115 die "skip\n" if !$hotplug_features->{usb};
5116 if ($value == 1) {
5117 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5118 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5119 if $arch eq 'aarch64';
5120 } elsif ($value == 0) {
5121 vm_deviceunplug($vmid, $conf, 'tablet');
5122 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5123 }
5124 } elsif ($opt =~ m/^usb(\d+)$/) {
5125 my $index = $1;
5126 die "skip\n" if !$usb_hotplug;
5127 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5128 my $id = $opt;
5129 if ($d->{host} =~ m/^spice$/i) {
5130 $id = "usbredirdev$index";
5131 }
5132 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5133 } elsif ($opt eq 'vcpus') {
5134 die "skip\n" if !$hotplug_features->{cpu};
5135 qemu_cpu_hotplug($vmid, $conf, $value);
5136 } elsif ($opt eq 'balloon') {
5137 # enable/disable balloning device is not hotpluggable
5138 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5139 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5140 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5141
5142 # allow manual ballooning if shares is set to zero
5143 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5144 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5145 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5146 }
5147 } elsif ($opt =~ m/^net(\d+)$/) {
5148 # some changes can be done without hotplug
5149 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5150 $vmid, $opt, $value, $arch, $machine_type);
5151 } elsif (is_valid_drivename($opt)) {
5152 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5153 # some changes can be done without hotplug
5154 my $drive = parse_drive($opt, $value);
5155 if (drive_is_cloudinit($drive)) {
5156 $cloudinit_opt = [$opt, $drive];
5157 # apply all the other changes first, then generate the cloudinit disk
5158 die "skip\n";
5159 }
5160 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5161 $vmid, $opt, $value, $arch, $machine_type);
5162 } elsif ($opt =~ m/^memory$/) { #dimms
5163 die "skip\n" if !$hotplug_features->{memory};
5164 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
5165 } elsif ($opt eq 'cpuunits') {
5166 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5167 $cgroup->change_cpu_shares($new_cpuunits);
5168 } elsif ($opt eq 'cpulimit') {
5169 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5170 $cgroup->change_cpu_quota($cpulimit, 100000);
5171 } elsif ($opt eq 'agent') {
5172 vmconfig_update_agent($conf, $opt, $value);
5173 } else {
5174 die "skip\n"; # skip non-hot-pluggable options
5175 }
5176 };
5177 if (my $err = $@) {
5178 &$add_error($opt, $err) if $err ne "skip\n";
5179 } else {
5180 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5181 $conf->{$opt} = $value;
5182 delete $conf->{pending}->{$opt};
5183 }
5184 }
5185
5186 if (defined($cloudinit_opt)) {
5187 my ($opt, $drive) = @$cloudinit_opt;
5188 my $value = $conf->{pending}->{$opt};
5189 eval {
5190 my $temp = {%$conf, $opt => $value};
5191 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5192 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5193 $vmid, $opt, $value, $arch, $machine_type);
5194 };
5195 if (my $err = $@) {
5196 &$add_error($opt, $err) if $err ne "skip\n";
5197 } else {
5198 $conf->{$opt} = $value;
5199 delete $conf->{pending}->{$opt};
5200 }
5201 }
5202
5203 # unplug xhci controller if no usb device is left
5204 if ($usb_hotplug) {
5205 my $has_usb = 0;
5206 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5207 next if !defined($conf->{"usb$i"});
5208 $has_usb = 1;
5209 last;
5210 }
5211 if (!$has_usb) {
5212 vm_deviceunplug($vmid, $conf, 'xhci');
5213 }
5214 }
5215
5216 PVE::QemuConfig->write_config($vmid, $conf);
5217
5218 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5219 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5220 }
5221}
5222
5223sub try_deallocate_drive {
5224 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5225
5226 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5227 my $volid = $drive->{file};
5228 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5229 my $sid = PVE::Storage::parse_volume_id($volid);
5230 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5231
5232 # check if the disk is really unused
5233 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5234 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5235 PVE::Storage::vdisk_free($storecfg, $volid);
5236 return 1;
5237 } else {
5238 # If vm is not owner of this disk remove from config
5239 return 1;
5240 }
5241 }
5242
5243 return;
5244}
5245
5246sub vmconfig_delete_or_detach_drive {
5247 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5248
5249 my $drive = parse_drive($opt, $conf->{$opt});
5250
5251 my $rpcenv = PVE::RPCEnvironment::get();
5252 my $authuser = $rpcenv->get_user();
5253
5254 if ($force) {
5255 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5256 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5257 } else {
5258 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5259 }
5260}
5261
5262
5263
5264sub vmconfig_apply_pending {
5265 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5266
5267 return if !scalar(keys %{$conf->{pending}});
5268
5269 my $add_apply_error = sub {
5270 my ($opt, $msg) = @_;
5271 my $err_msg = "unable to apply pending change $opt : $msg";
5272 $errors->{$opt} = $err_msg;
5273 warn $err_msg;
5274 };
5275
5276 # cold plug
5277
5278 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5279 foreach my $opt (sort keys %$pending_delete_hash) {
5280 my $force = $pending_delete_hash->{$opt}->{force};
5281 eval {
5282 if ($opt =~ m/^unused/) {
5283 die "internal error";
5284 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5285 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5286 }
5287 };
5288 if (my $err = $@) {
5289 $add_apply_error->($opt, $err);
5290 } else {
5291 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5292 delete $conf->{$opt};
5293 }
5294 }
5295
5296 PVE::QemuConfig->cleanup_pending($conf);
5297
5298 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5299
5300 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5301 next if $opt eq 'delete'; # just to be sure
5302 eval {
5303 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5304 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5305 }
5306 };
5307 if (my $err = $@) {
5308 $add_apply_error->($opt, $err);
5309 } else {
5310
5311 if (is_valid_drivename($opt)) {
5312 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5313 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5314 }
5315
5316 $conf->{$opt} = delete $conf->{pending}->{$opt};
5317 }
5318 }
5319
5320 # write all changes at once to avoid unnecessary i/o
5321 PVE::QemuConfig->write_config($vmid, $conf);
5322 if ($generate_cloudinit) {
5323 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5324 # After successful generation and if there were changes to be applied, update the
5325 # config to drop the {cloudinit} entry.
5326 PVE::QemuConfig->write_config($vmid, $conf);
5327 }
5328 }
5329}
5330
5331sub vmconfig_update_net {
5332 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5333
5334 my $newnet = parse_net($value);
5335
5336 if ($conf->{$opt}) {
5337 my $oldnet = parse_net($conf->{$opt});
5338
5339 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5340 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5341 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5342 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5343 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5344
5345 # for non online change, we try to hot-unplug
5346 die "skip\n" if !$hotplug;
5347 vm_deviceunplug($vmid, $conf, $opt);
5348 } else {
5349
5350 die "internal error" if $opt !~ m/net(\d+)/;
5351 my $iface = "tap${vmid}i$1";
5352
5353 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5354 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5355 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5356 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5357 PVE::Network::tap_unplug($iface);
5358
5359 if ($have_sdn) {
5360 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5361 } else {
5362 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5363 }
5364 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5365 # Rate can be applied on its own but any change above needs to
5366 # include the rate in tap_plug since OVS resets everything.
5367 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5368 }
5369
5370 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5371 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5372 }
5373
5374 return 1;
5375 }
5376 }
5377
5378 if ($hotplug) {
5379 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5380 } else {
5381 die "skip\n";
5382 }
5383}
5384
5385sub vmconfig_update_agent {
5386 my ($conf, $opt, $value) = @_;
5387
5388 die "skip\n" if !$conf->{$opt};
5389
5390 my $hotplug_options = { fstrim_cloned_disks => 1 };
5391
5392 my $old_agent = parse_guest_agent($conf);
5393 my $agent = parse_guest_agent({$opt => $value});
5394
5395 for my $option (keys %$agent) { # added/changed options
5396 next if defined($hotplug_options->{$option});
5397 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5398 }
5399
5400 for my $option (keys %$old_agent) { # removed options
5401 next if defined($hotplug_options->{$option});
5402 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5403 }
5404
5405 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5406}
5407
5408sub vmconfig_update_disk {
5409 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5410
5411 my $drive = parse_drive($opt, $value);
5412
5413 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5414 my $media = $drive->{media} || 'disk';
5415 my $oldmedia = $old_drive->{media} || 'disk';
5416 die "unable to change media type\n" if $media ne $oldmedia;
5417
5418 if (!drive_is_cdrom($old_drive)) {
5419
5420 if ($drive->{file} ne $old_drive->{file}) {
5421
5422 die "skip\n" if !$hotplug;
5423
5424 # unplug and register as unused
5425 vm_deviceunplug($vmid, $conf, $opt);
5426 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5427
5428 } else {
5429 # update existing disk
5430
5431 # skip non hotpluggable value
5432 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5433 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5434 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5435 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5436 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5437 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5438 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5439 die "skip\n";
5440 }
5441
5442 # apply throttle
5443 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5444 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5445 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5446 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5447 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5448 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5449 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5450 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5451 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5452 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5453 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5454 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5455 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5456 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5457 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5458 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5459 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5460 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5461
5462 qemu_block_set_io_throttle(
5463 $vmid,"drive-$opt",
5464 ($drive->{mbps} || 0)*1024*1024,
5465 ($drive->{mbps_rd} || 0)*1024*1024,
5466 ($drive->{mbps_wr} || 0)*1024*1024,
5467 $drive->{iops} || 0,
5468 $drive->{iops_rd} || 0,
5469 $drive->{iops_wr} || 0,
5470 ($drive->{mbps_max} || 0)*1024*1024,
5471 ($drive->{mbps_rd_max} || 0)*1024*1024,
5472 ($drive->{mbps_wr_max} || 0)*1024*1024,
5473 $drive->{iops_max} || 0,
5474 $drive->{iops_rd_max} || 0,
5475 $drive->{iops_wr_max} || 0,
5476 $drive->{bps_max_length} || 1,
5477 $drive->{bps_rd_max_length} || 1,
5478 $drive->{bps_wr_max_length} || 1,
5479 $drive->{iops_max_length} || 1,
5480 $drive->{iops_rd_max_length} || 1,
5481 $drive->{iops_wr_max_length} || 1,
5482 );
5483
5484 }
5485
5486 return 1;
5487 }
5488
5489 } else { # cdrom
5490
5491 if ($drive->{file} eq 'none') {
5492 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5493 if (drive_is_cloudinit($old_drive)) {
5494 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5495 }
5496 } else {
5497 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5498
5499 # force eject if locked
5500 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5501
5502 if ($path) {
5503 mon_cmd($vmid, "blockdev-change-medium",
5504 id => "$opt", filename => "$path");
5505 }
5506 }
5507
5508 return 1;
5509 }
5510 }
5511
5512 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5513 # hotplug new disks
5514 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5515 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5516}
5517
5518sub vmconfig_update_cloudinit_drive {
5519 my ($storecfg, $conf, $vmid) = @_;
5520
5521 my $cloudinit_ds = undef;
5522 my $cloudinit_drive = undef;
5523
5524 PVE::QemuConfig->foreach_volume($conf, sub {
5525 my ($ds, $drive) = @_;
5526 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5527 $cloudinit_ds = $ds;
5528 $cloudinit_drive = $drive;
5529 }
5530 });
5531
5532 return if !$cloudinit_drive;
5533
5534 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5535 PVE::QemuConfig->write_config($vmid, $conf);
5536 }
5537
5538 my $running = PVE::QemuServer::check_running($vmid);
5539
5540 if ($running) {
5541 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5542 if ($path) {
5543 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5544 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5545 }
5546 }
5547}
5548
5549# called in locked context by incoming migration
5550sub vm_migrate_get_nbd_disks {
5551 my ($storecfg, $conf, $replicated_volumes) = @_;
5552
5553 my $local_volumes = {};
5554 PVE::QemuConfig->foreach_volume($conf, sub {
5555 my ($ds, $drive) = @_;
5556
5557 return if drive_is_cdrom($drive);
5558 return if $ds eq 'tpmstate0';
5559
5560 my $volid = $drive->{file};
5561
5562 return if !$volid;
5563
5564 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5565
5566 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5567 return if $scfg->{shared};
5568
5569 # replicated disks re-use existing state via bitmap
5570 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5571 $local_volumes->{$ds} = [$volid, $storeid, $volname, $drive, $use_existing];
5572 });
5573 return $local_volumes;
5574}
5575
5576# called in locked context by incoming migration
5577sub vm_migrate_alloc_nbd_disks {
5578 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5579
5580 my $nbd = {};
5581 foreach my $opt (sort keys %$source_volumes) {
5582 my ($volid, $storeid, $volname, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5583
5584 if ($use_existing) {
5585 $nbd->{$opt}->{drivestr} = print_drive($drive);
5586 $nbd->{$opt}->{volid} = $volid;
5587 $nbd->{$opt}->{replicated} = 1;
5588 next;
5589 }
5590
5591 # storage mapping + volname = regular migration
5592 # storage mapping + format = remote migration
5593 # order of precedence, filtered by whether storage supports it:
5594 # 1. explicit requested format
5595 # 2. format of current volume
5596 # 3. default format of storage
5597 if (!$storagemap->{identity}) {
5598 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5599 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5600 if (!$format || !grep { $format eq $_ } @$validFormats) {
5601 if ($volname) {
5602 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5603 my $fileFormat = qemu_img_format($scfg, $volname);
5604 $format = $fileFormat
5605 if grep { $fileFormat eq $_ } @$validFormats;
5606 }
5607 $format //= $defFormat;
5608 }
5609 } else {
5610 # can't happen for remote migration, so $volname is always defined
5611 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5612 $format = qemu_img_format($scfg, $volname);
5613 }
5614
5615 my $size = $drive->{size} / 1024;
5616 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5617 my $newdrive = $drive;
5618 $newdrive->{format} = $format;
5619 $newdrive->{file} = $newvolid;
5620 my $drivestr = print_drive($newdrive);
5621 $nbd->{$opt}->{drivestr} = $drivestr;
5622 $nbd->{$opt}->{volid} = $newvolid;
5623 }
5624
5625 return $nbd;
5626}
5627
5628# see vm_start_nolock for parameters, additionally:
5629# migrate_opts:
5630# storagemap = parsed storage map for allocating NBD disks
5631sub vm_start {
5632 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5633
5634 return PVE::QemuConfig->lock_config($vmid, sub {
5635 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5636
5637 die "you can't start a vm if it's a template\n"
5638 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5639
5640 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5641 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5642
5643 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5644
5645 if ($has_backup_lock && $running) {
5646 # a backup is currently running, attempt to start the guest in the
5647 # existing QEMU instance
5648 return vm_resume($vmid);
5649 }
5650
5651 PVE::QemuConfig->check_lock($conf)
5652 if !($params->{skiplock} || $has_suspended_lock);
5653
5654 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5655
5656 die "VM $vmid already running\n" if $running;
5657
5658 if (my $storagemap = $migrate_opts->{storagemap}) {
5659 my $replicated = $migrate_opts->{replicated_volumes};
5660 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5661 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5662
5663 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5664 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5665 }
5666 }
5667
5668 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5669 });
5670}
5671
5672
5673# params:
5674# statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5675# skiplock => 0/1, skip checking for config lock
5676# skiptemplate => 0/1, skip checking whether VM is template
5677# forcemachine => to force QEMU machine (rollback/migration)
5678# forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5679# timeout => in seconds
5680# paused => start VM in paused state (backup)
5681# resume => resume from hibernation
5682# pbs-backing => {
5683# sata0 => {
5684# repository
5685# snapshot
5686# keyfile
5687# archive
5688# },
5689# virtio2 => ...
5690# }
5691# migrate_opts:
5692# nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5693# migratedfrom => source node
5694# spice_ticket => used for spice migration, passed via tunnel/stdin
5695# network => CIDR of migration network
5696# type => secure/insecure - tunnel over encrypted connection or plain-text
5697# nbd_proto_version => int, 0 for TCP, 1 for UNIX
5698# replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5699# offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5700# contained in config
5701sub vm_start_nolock {
5702 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5703
5704 my $statefile = $params->{statefile};
5705 my $resume = $params->{resume};
5706
5707 my $migratedfrom = $migrate_opts->{migratedfrom};
5708 my $migration_type = $migrate_opts->{type};
5709
5710 my $res = {};
5711
5712 # clean up leftover reboot request files
5713 eval { clear_reboot_request($vmid); };
5714 warn $@ if $@;
5715
5716 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5717 vmconfig_apply_pending($vmid, $conf, $storecfg);
5718 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5719 }
5720
5721 # don't regenerate the ISO if the VM is started as part of a live migration
5722 # this way we can reuse the old ISO with the correct config
5723 if (!$migratedfrom) {
5724 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5725 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5726 # $conf->{cloudinit}, so we could just not do this?
5727 # But we do it above, so for now let's be consistent.
5728 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5729 }
5730 }
5731
5732 # override offline migrated volumes, conf is out of date still
5733 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5734 for my $key (sort keys $offline_volumes->%*) {
5735 my $parsed = parse_drive($key, $conf->{$key});
5736 $parsed->{file} = $offline_volumes->{$key};
5737 $conf->{$key} = print_drive($parsed);
5738 }
5739 }
5740
5741 my $defaults = load_defaults();
5742
5743 # set environment variable useful inside network script
5744 # for remote migration the config is available on the target node!
5745 if (!$migrate_opts->{remote_node}) {
5746 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5747 }
5748
5749 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5750
5751 my $forcemachine = $params->{forcemachine};
5752 my $forcecpu = $params->{forcecpu};
5753 if ($resume) {
5754 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5755 $forcemachine = $conf->{runningmachine};
5756 $forcecpu = $conf->{runningcpu};
5757 print "Resuming suspended VM\n";
5758 }
5759
5760 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5761 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5762
5763 my $migration_ip;
5764 my $get_migration_ip = sub {
5765 my ($nodename) = @_;
5766
5767 return $migration_ip if defined($migration_ip);
5768
5769 my $cidr = $migrate_opts->{network};
5770
5771 if (!defined($cidr)) {
5772 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5773 $cidr = $dc_conf->{migration}->{network};
5774 }
5775
5776 if (defined($cidr)) {
5777 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5778
5779 die "could not get IP: no address configured on local " .
5780 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5781
5782 die "could not get IP: multiple addresses configured on local " .
5783 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5784
5785 $migration_ip = @$ips[0];
5786 }
5787
5788 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5789 if !defined($migration_ip);
5790
5791 return $migration_ip;
5792 };
5793
5794 if ($statefile) {
5795 if ($statefile eq 'tcp') {
5796 my $migrate = $res->{migrate} = { proto => 'tcp' };
5797 $migrate->{addr} = "localhost";
5798 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5799 my $nodename = nodename();
5800
5801 if (!defined($migration_type)) {
5802 if (defined($datacenterconf->{migration}->{type})) {
5803 $migration_type = $datacenterconf->{migration}->{type};
5804 } else {
5805 $migration_type = 'secure';
5806 }
5807 }
5808
5809 if ($migration_type eq 'insecure') {
5810 $migrate->{addr} = $get_migration_ip->($nodename);
5811 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5812 }
5813
5814 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5815 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5816 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5817 push @$cmd, '-incoming', $migrate->{uri};
5818 push @$cmd, '-S';
5819
5820 } elsif ($statefile eq 'unix') {
5821 # should be default for secure migrations as a ssh TCP forward
5822 # tunnel is not deterministic reliable ready and fails regurarly
5823 # to set up in time, so use UNIX socket forwards
5824 my $migrate = $res->{migrate} = { proto => 'unix' };
5825 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5826 unlink $migrate->{addr};
5827
5828 $migrate->{uri} = "unix:$migrate->{addr}";
5829 push @$cmd, '-incoming', $migrate->{uri};
5830 push @$cmd, '-S';
5831
5832 } elsif (-e $statefile) {
5833 push @$cmd, '-loadstate', $statefile;
5834 } else {
5835 my $statepath = PVE::Storage::path($storecfg, $statefile);
5836 push @$vollist, $statefile;
5837 push @$cmd, '-loadstate', $statepath;
5838 }
5839 } elsif ($params->{paused}) {
5840 push @$cmd, '-S';
5841 }
5842
5843 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5844
5845 my $pci_reserve_list = [];
5846 for my $device (values $pci_devices->%*) {
5847 next if $device->{mdev}; # we don't reserve for mdev devices
5848 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5849 }
5850
5851 # reserve all PCI IDs before actually doing anything with them
5852 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5853
5854 eval {
5855 my $uuid;
5856 for my $id (sort keys %$pci_devices) {
5857 my $d = $pci_devices->{$id};
5858 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5859
5860 my $chosen_mdev;
5861 for my $dev ($d->{ids}->@*) {
5862 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5863 if ($d->{mdev}) {
5864 warn $@ if $@;
5865 $chosen_mdev = $info;
5866 last if $chosen_mdev; # if successful, we're done
5867 } else {
5868 die $@ if $@;
5869 }
5870 }
5871
5872 next if !$d->{mdev};
5873 die "could not create mediated device\n" if !defined($chosen_mdev);
5874
5875 # nvidia grid needs the uuid of the mdev as qemu parameter
5876 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5877 if (defined($conf->{smbios1})) {
5878 my $smbios_conf = parse_smbios1($conf->{smbios1});
5879 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5880 }
5881 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5882 }
5883 }
5884 push @$cmd, '-uuid', $uuid if defined($uuid);
5885 };
5886 if (my $err = $@) {
5887 eval { cleanup_pci_devices($vmid, $conf) };
5888 warn $@ if $@;
5889 die $err;
5890 }
5891
5892 PVE::Storage::activate_volumes($storecfg, $vollist);
5893
5894
5895 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5896 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5897 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5898 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5899 # timeout should be more than enough here...
5900 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5901
5902 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5903
5904 my %run_params = (
5905 timeout => $statefile ? undef : $start_timeout,
5906 umask => 0077,
5907 noerr => 1,
5908 );
5909
5910 # when migrating, prefix QEMU output so other side can pick up any
5911 # errors that might occur and show the user
5912 if ($migratedfrom) {
5913 $run_params{quiet} = 1;
5914 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5915 }
5916
5917 my %systemd_properties = (
5918 Slice => 'qemu.slice',
5919 KillMode => 'process',
5920 SendSIGKILL => 0,
5921 TimeoutStopUSec => ULONG_MAX, # infinity
5922 );
5923
5924 if (PVE::CGroup::cgroup_mode() == 2) {
5925 $systemd_properties{CPUWeight} = $cpuunits;
5926 } else {
5927 $systemd_properties{CPUShares} = $cpuunits;
5928 }
5929
5930 if (my $cpulimit = $conf->{cpulimit}) {
5931 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5932 }
5933 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5934
5935 my $run_qemu = sub {
5936 PVE::Tools::run_fork sub {
5937 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5938
5939 my $tpmpid;
5940 if (my $tpm = $conf->{tpmstate0}) {
5941 # start the TPM emulator so QEMU can connect on start
5942 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5943 }
5944
5945 my $exitcode = run_command($cmd, %run_params);
5946 if ($exitcode) {
5947 if ($tpmpid) {
5948 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5949 kill 'TERM', $tpmpid;
5950 }
5951 die "QEMU exited with code $exitcode\n";
5952 }
5953 };
5954 };
5955
5956 if ($conf->{hugepages}) {
5957
5958 my $code = sub {
5959 my $hotplug_features =
5960 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5961 my $hugepages_topology =
5962 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5963
5964 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5965
5966 PVE::QemuServer::Memory::hugepages_mount();
5967 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5968
5969 eval { $run_qemu->() };
5970 if (my $err = $@) {
5971 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5972 if !$conf->{keephugepages};
5973 die $err;
5974 }
5975
5976 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5977 if !$conf->{keephugepages};
5978 };
5979 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5980
5981 } else {
5982 eval { $run_qemu->() };
5983 }
5984
5985 if (my $err = $@) {
5986 # deactivate volumes if start fails
5987 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5988 warn $@ if $@;
5989 eval { cleanup_pci_devices($vmid, $conf) };
5990 warn $@ if $@;
5991
5992 die "start failed: $err";
5993 }
5994
5995 # re-reserve all PCI IDs now that we can know the actual VM PID
5996 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5997 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5998 warn $@ if $@;
5999
6000 if (defined($res->{migrate})) {
6001 print "migration listens on $res->{migrate}->{uri}\n";
6002 } elsif ($statefile) {
6003 eval { mon_cmd($vmid, "cont"); };
6004 warn $@ if $@;
6005 }
6006
6007 #start nbd server for storage migration
6008 if (my $nbd = $migrate_opts->{nbd}) {
6009 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
6010
6011 my $migrate_storage_uri;
6012 # nbd_protocol_version > 0 for unix socket support
6013 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
6014 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
6015 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
6016 $migrate_storage_uri = "nbd:unix:$socket_path";
6017 $res->{migrate}->{unix_sockets} = [$socket_path];
6018 } else {
6019 my $nodename = nodename();
6020 my $localip = $get_migration_ip->($nodename);
6021 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6022 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6023
6024 mon_cmd($vmid, "nbd-server-start", addr => {
6025 type => 'inet',
6026 data => {
6027 host => "${localip}",
6028 port => "${storage_migrate_port}",
6029 },
6030 });
6031 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6032 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
6033 }
6034
6035 my $block_info = mon_cmd($vmid, "query-block");
6036 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6037
6038 foreach my $opt (sort keys %$nbd) {
6039 my $drivestr = $nbd->{$opt}->{drivestr};
6040 my $volid = $nbd->{$opt}->{volid};
6041
6042 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6043
6044 mon_cmd(
6045 $vmid,
6046 "block-export-add",
6047 id => "drive-$opt",
6048 'node-name' => $block_node,
6049 writable => JSON::true,
6050 type => "nbd",
6051 name => "drive-$opt", # NBD export name
6052 );
6053
6054 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6055 print "storage migration listens on $nbd_uri volume:$drivestr\n";
6056 print "re-using replicated volume: $opt - $volid\n"
6057 if $nbd->{$opt}->{replicated};
6058
6059 $res->{drives}->{$opt} = $nbd->{$opt};
6060 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6061 }
6062 }
6063
6064 if ($migratedfrom) {
6065 eval {
6066 set_migration_caps($vmid);
6067 };
6068 warn $@ if $@;
6069
6070 if ($spice_port) {
6071 print "spice listens on port $spice_port\n";
6072 $res->{spice_port} = $spice_port;
6073 if ($migrate_opts->{spice_ticket}) {
6074 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6075 $migrate_opts->{spice_ticket});
6076 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6077 }
6078 }
6079
6080 } else {
6081 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6082 if !$statefile && $conf->{balloon};
6083
6084 foreach my $opt (keys %$conf) {
6085 next if $opt !~ m/^net\d+$/;
6086 my $nicconf = parse_net($conf->{$opt});
6087 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6088 }
6089 add_nets_bridge_fdb($conf, $vmid);
6090 }
6091
6092 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6093 eval {
6094 mon_cmd(
6095 $vmid,
6096 'qom-set',
6097 path => "machine/peripheral/balloon0",
6098 property => "guest-stats-polling-interval",
6099 value => 2
6100 );
6101 };
6102 log_warn("could not set polling interval for ballooning - $@") if $@;
6103 }
6104
6105 if ($resume) {
6106 print "Resumed VM, removing state\n";
6107 if (my $vmstate = $conf->{vmstate}) {
6108 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6109 PVE::Storage::vdisk_free($storecfg, $vmstate);
6110 }
6111 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6112 PVE::QemuConfig->write_config($vmid, $conf);
6113 }
6114
6115 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6116
6117 return $res;
6118}
6119
6120sub vm_commandline {
6121 my ($storecfg, $vmid, $snapname) = @_;
6122
6123 my $conf = PVE::QemuConfig->load_config($vmid);
6124
6125 my ($forcemachine, $forcecpu);
6126 if ($snapname) {
6127 my $snapshot = $conf->{snapshots}->{$snapname};
6128 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6129
6130 # check for machine or CPU overrides in snapshot
6131 $forcemachine = $snapshot->{runningmachine};
6132 $forcecpu = $snapshot->{runningcpu};
6133
6134 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6135
6136 $conf = $snapshot;
6137 }
6138
6139 my $defaults = load_defaults();
6140
6141 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6142
6143 return PVE::Tools::cmd2string($cmd);
6144}
6145
6146sub vm_reset {
6147 my ($vmid, $skiplock) = @_;
6148
6149 PVE::QemuConfig->lock_config($vmid, sub {
6150
6151 my $conf = PVE::QemuConfig->load_config($vmid);
6152
6153 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6154
6155 mon_cmd($vmid, "system_reset");
6156 });
6157}
6158
6159sub get_vm_volumes {
6160 my ($conf) = @_;
6161
6162 my $vollist = [];
6163 foreach_volid($conf, sub {
6164 my ($volid, $attr) = @_;
6165
6166 return if $volid =~ m|^/|;
6167
6168 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6169 return if !$sid;
6170
6171 push @$vollist, $volid;
6172 });
6173
6174 return $vollist;
6175}
6176
6177sub cleanup_pci_devices {
6178 my ($vmid, $conf) = @_;
6179
6180 foreach my $key (keys %$conf) {
6181 next if $key !~ m/^hostpci(\d+)$/;
6182 my $hostpciindex = $1;
6183 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6184 my $d = parse_hostpci($conf->{$key});
6185 if ($d->{mdev}) {
6186 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6187 # don't want to break ABI just for this two liner
6188 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6189
6190 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6191 # out when we do it first. so wait for 10 seconds and then try it
6192 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
6193 sleep 10;
6194 }
6195
6196 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6197 }
6198 }
6199 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6200}
6201
6202sub vm_stop_cleanup {
6203 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6204
6205 eval {
6206
6207 if (!$keepActive) {
6208 my $vollist = get_vm_volumes($conf);
6209 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6210
6211 if (my $tpmdrive = $conf->{tpmstate0}) {
6212 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6213 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6214 if ($storeid) {
6215 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6216 }
6217 }
6218 }
6219
6220 foreach my $ext (qw(mon qmp pid vnc qga)) {
6221 unlink "/var/run/qemu-server/${vmid}.$ext";
6222 }
6223
6224 if ($conf->{ivshmem}) {
6225 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6226 # just delete it for now, VMs which have this already open do not
6227 # are affected, but new VMs will get a separated one. If this
6228 # becomes an issue we either add some sort of ref-counting or just
6229 # add a "don't delete on stop" flag to the ivshmem format.
6230 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6231 }
6232
6233 cleanup_pci_devices($vmid, $conf);
6234
6235 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6236 };
6237 warn $@ if $@; # avoid errors - just warn
6238}
6239
6240# call only in locked context
6241sub _do_vm_stop {
6242 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6243
6244 my $pid = check_running($vmid, $nocheck);
6245 return if !$pid;
6246
6247 my $conf;
6248 if (!$nocheck) {
6249 $conf = PVE::QemuConfig->load_config($vmid);
6250 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6251 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6252 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6253 $timeout = $opts->{down} if $opts->{down};
6254 }
6255 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6256 }
6257
6258 eval {
6259 if ($shutdown) {
6260 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6261 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6262 } else {
6263 mon_cmd($vmid, "system_powerdown");
6264 }
6265 } else {
6266 mon_cmd($vmid, "quit");
6267 }
6268 };
6269 my $err = $@;
6270
6271 if (!$err) {
6272 $timeout = 60 if !defined($timeout);
6273
6274 my $count = 0;
6275 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6276 $count++;
6277 sleep 1;
6278 }
6279
6280 if ($count >= $timeout) {
6281 if ($force) {
6282 warn "VM still running - terminating now with SIGTERM\n";
6283 kill 15, $pid;
6284 } else {
6285 die "VM quit/powerdown failed - got timeout\n";
6286 }
6287 } else {
6288 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6289 return;
6290 }
6291 } else {
6292 if (!check_running($vmid, $nocheck)) {
6293 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6294 return;
6295 }
6296 if ($force) {
6297 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6298 kill 15, $pid;
6299 } else {
6300 die "VM quit/powerdown failed\n";
6301 }
6302 }
6303
6304 # wait again
6305 $timeout = 10;
6306
6307 my $count = 0;
6308 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6309 $count++;
6310 sleep 1;
6311 }
6312
6313 if ($count >= $timeout) {
6314 warn "VM still running - terminating now with SIGKILL\n";
6315 kill 9, $pid;
6316 sleep 1;
6317 }
6318
6319 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6320}
6321
6322# Note: use $nocheck to skip tests if VM configuration file exists.
6323# We need that when migration VMs to other nodes (files already moved)
6324# Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6325sub vm_stop {
6326 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6327
6328 $force = 1 if !defined($force) && !$shutdown;
6329
6330 if ($migratedfrom){
6331 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6332 kill 15, $pid if $pid;
6333 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6334 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6335 return;
6336 }
6337
6338 PVE::QemuConfig->lock_config($vmid, sub {
6339 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6340 });
6341}
6342
6343sub vm_reboot {
6344 my ($vmid, $timeout) = @_;
6345
6346 PVE::QemuConfig->lock_config($vmid, sub {
6347 eval {
6348
6349 # only reboot if running, as qmeventd starts it again on a stop event
6350 return if !check_running($vmid);
6351
6352 create_reboot_request($vmid);
6353
6354 my $storecfg = PVE::Storage::config();
6355 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6356
6357 };
6358 if (my $err = $@) {
6359 # avoid that the next normal shutdown will be confused for a reboot
6360 clear_reboot_request($vmid);
6361 die $err;
6362 }
6363 });
6364}
6365
6366# note: if using the statestorage parameter, the caller has to check privileges
6367sub vm_suspend {
6368 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6369
6370 my $conf;
6371 my $path;
6372 my $storecfg;
6373 my $vmstate;
6374
6375 PVE::QemuConfig->lock_config($vmid, sub {
6376
6377 $conf = PVE::QemuConfig->load_config($vmid);
6378
6379 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6380 PVE::QemuConfig->check_lock($conf)
6381 if !($skiplock || $is_backing_up);
6382
6383 die "cannot suspend to disk during backup\n"
6384 if $is_backing_up && $includestate;
6385
6386 if ($includestate) {
6387 $conf->{lock} = 'suspending';
6388 my $date = strftime("%Y-%m-%d", localtime(time()));
6389 $storecfg = PVE::Storage::config();
6390 if (!$statestorage) {
6391 $statestorage = find_vmstate_storage($conf, $storecfg);
6392 # check permissions for the storage
6393 my $rpcenv = PVE::RPCEnvironment::get();
6394 if ($rpcenv->{type} ne 'cli') {
6395 my $authuser = $rpcenv->get_user();
6396 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6397 }
6398 }
6399
6400
6401 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6402 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6403 $path = PVE::Storage::path($storecfg, $vmstate);
6404 PVE::QemuConfig->write_config($vmid, $conf);
6405 } else {
6406 mon_cmd($vmid, "stop");
6407 }
6408 });
6409
6410 if ($includestate) {
6411 # save vm state
6412 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6413
6414 eval {
6415 set_migration_caps($vmid, 1);
6416 mon_cmd($vmid, "savevm-start", statefile => $path);
6417 for(;;) {
6418 my $state = mon_cmd($vmid, "query-savevm");
6419 if (!$state->{status}) {
6420 die "savevm not active\n";
6421 } elsif ($state->{status} eq 'active') {
6422 sleep(1);
6423 next;
6424 } elsif ($state->{status} eq 'completed') {
6425 print "State saved, quitting\n";
6426 last;
6427 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6428 die "query-savevm failed with error '$state->{error}'\n"
6429 } else {
6430 die "query-savevm returned status '$state->{status}'\n";
6431 }
6432 }
6433 };
6434 my $err = $@;
6435
6436 PVE::QemuConfig->lock_config($vmid, sub {
6437 $conf = PVE::QemuConfig->load_config($vmid);
6438 if ($err) {
6439 # cleanup, but leave suspending lock, to indicate something went wrong
6440 eval {
6441 mon_cmd($vmid, "savevm-end");
6442 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6443 PVE::Storage::vdisk_free($storecfg, $vmstate);
6444 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6445 PVE::QemuConfig->write_config($vmid, $conf);
6446 };
6447 warn $@ if $@;
6448 die $err;
6449 }
6450
6451 die "lock changed unexpectedly\n"
6452 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6453
6454 mon_cmd($vmid, "quit");
6455 $conf->{lock} = 'suspended';
6456 PVE::QemuConfig->write_config($vmid, $conf);
6457 });
6458 }
6459}
6460
6461# $nocheck is set when called as part of a migration - in this context the
6462# location of the config file (source or target node) is not deterministic,
6463# since migration cannot wait for pmxcfs to process the rename
6464sub vm_resume {
6465 my ($vmid, $skiplock, $nocheck) = @_;
6466
6467 PVE::QemuConfig->lock_config($vmid, sub {
6468 my $res = mon_cmd($vmid, 'query-status');
6469 my $resume_cmd = 'cont';
6470 my $reset = 0;
6471 my $conf;
6472 if ($nocheck) {
6473 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6474 if ($@) {
6475 my $vmlist = PVE::Cluster::get_vmlist();
6476 if (exists($vmlist->{ids}->{$vmid})) {
6477 my $node = $vmlist->{ids}->{$vmid}->{node};
6478 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6479 }
6480 if (!$conf) {
6481 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6482 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6483 }
6484 }
6485 } else {
6486 $conf = PVE::QemuConfig->load_config($vmid);
6487 }
6488
6489 if ($res->{status}) {
6490 return if $res->{status} eq 'running'; # job done, go home
6491 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6492 $reset = 1 if $res->{status} eq 'shutdown';
6493 }
6494
6495 if (!$nocheck) {
6496 PVE::QemuConfig->check_lock($conf)
6497 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6498 }
6499
6500 if ($reset) {
6501 # required if a VM shuts down during a backup and we get a resume
6502 # request before the backup finishes for example
6503 mon_cmd($vmid, "system_reset");
6504 }
6505
6506 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6507
6508 mon_cmd($vmid, $resume_cmd);
6509 });
6510}
6511
6512sub vm_sendkey {
6513 my ($vmid, $skiplock, $key) = @_;
6514
6515 PVE::QemuConfig->lock_config($vmid, sub {
6516
6517 my $conf = PVE::QemuConfig->load_config($vmid);
6518
6519 # there is no qmp command, so we use the human monitor command
6520 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6521 die $res if $res ne '';
6522 });
6523}
6524
6525sub check_bridge_access {
6526 my ($rpcenv, $authuser, $conf) = @_;
6527
6528 return 1 if $authuser eq 'root@pam';
6529
6530 for my $opt (sort keys $conf->%*) {
6531 next if $opt !~ m/^net\d+$/;
6532 my $net = parse_net($conf->{$opt});
6533 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6534 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6535 }
6536 return 1;
6537};
6538
6539sub check_mapping_access {
6540 my ($rpcenv, $user, $conf) = @_;
6541
6542 for my $opt (keys $conf->%*) {
6543 if ($opt =~ m/^usb\d+$/) {
6544 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6545 if (my $host = $device->{host}) {
6546 die "only root can set '$opt' config for real devices\n"
6547 if $host !~ m/^spice$/i && $user ne 'root@pam';
6548 } elsif ($device->{mapping}) {
6549 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6550 } else {
6551 die "either 'host' or 'mapping' must be set.\n";
6552 }
6553 } elsif ($opt =~ m/^hostpci\d+$/) {
6554 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6555 if ($device->{host}) {
6556 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6557 } elsif ($device->{mapping}) {
6558 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6559 } else {
6560 die "either 'host' or 'mapping' must be set.\n";
6561 }
6562 }
6563 }
6564};
6565
6566sub check_restore_permissions {
6567 my ($rpcenv, $user, $conf) = @_;
6568
6569 check_bridge_access($rpcenv, $user, $conf);
6570 check_mapping_access($rpcenv, $user, $conf);
6571}
6572# vzdump restore implementaion
6573
6574sub tar_archive_read_firstfile {
6575 my $archive = shift;
6576
6577 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6578
6579 # try to detect archive type first
6580 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6581 die "unable to open file '$archive'\n";
6582 my $firstfile = <$fh>;
6583 kill 15, $pid;
6584 close $fh;
6585
6586 die "ERROR: archive contaions no data\n" if !$firstfile;
6587 chomp $firstfile;
6588
6589 return $firstfile;
6590}
6591
6592sub tar_restore_cleanup {
6593 my ($storecfg, $statfile) = @_;
6594
6595 print STDERR "starting cleanup\n";
6596
6597 if (my $fd = IO::File->new($statfile, "r")) {
6598 while (defined(my $line = <$fd>)) {
6599 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6600 my $volid = $2;
6601 eval {
6602 if ($volid =~ m|^/|) {
6603 unlink $volid || die 'unlink failed\n';
6604 } else {
6605 PVE::Storage::vdisk_free($storecfg, $volid);
6606 }
6607 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6608 };
6609 print STDERR "unable to cleanup '$volid' - $@" if $@;
6610 } else {
6611 print STDERR "unable to parse line in statfile - $line";
6612 }
6613 }
6614 $fd->close();
6615 }
6616}
6617
6618sub restore_file_archive {
6619 my ($archive, $vmid, $user, $opts) = @_;
6620
6621 return restore_vma_archive($archive, $vmid, $user, $opts)
6622 if $archive eq '-';
6623
6624 my $info = PVE::Storage::archive_info($archive);
6625 my $format = $opts->{format} // $info->{format};
6626 my $comp = $info->{compression};
6627
6628 # try to detect archive format
6629 if ($format eq 'tar') {
6630 return restore_tar_archive($archive, $vmid, $user, $opts);
6631 } else {
6632 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6633 }
6634}
6635
6636# hepler to remove disks that will not be used after restore
6637my $restore_cleanup_oldconf = sub {
6638 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6639
6640 my $kept_disks = {};
6641
6642 PVE::QemuConfig->foreach_volume($oldconf, sub {
6643 my ($ds, $drive) = @_;
6644
6645 return if drive_is_cdrom($drive, 1);
6646
6647 my $volid = $drive->{file};
6648 return if !$volid || $volid =~ m|^/|;
6649
6650 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6651 return if !$path || !$owner || ($owner != $vmid);
6652
6653 # Note: only delete disk we want to restore
6654 # other volumes will become unused
6655 if ($virtdev_hash->{$ds}) {
6656 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6657 if (my $err = $@) {
6658 warn $err;
6659 }
6660 } else {
6661 $kept_disks->{$volid} = 1;
6662 }
6663 });
6664
6665 # after the restore we have no snapshots anymore
6666 for my $snapname (keys $oldconf->{snapshots}->%*) {
6667 my $snap = $oldconf->{snapshots}->{$snapname};
6668 if ($snap->{vmstate}) {
6669 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6670 if (my $err = $@) {
6671 warn $err;
6672 }
6673 }
6674
6675 for my $volid (keys $kept_disks->%*) {
6676 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6677 warn $@ if $@;
6678 }
6679 }
6680};
6681
6682# Helper to parse vzdump backup device hints
6683#
6684# $rpcenv: Environment, used to ckeck storage permissions
6685# $user: User ID, to check storage permissions
6686# $storecfg: Storage configuration
6687# $fh: the file handle for reading the configuration
6688# $devinfo: should contain device sizes for all backu-up'ed devices
6689# $options: backup options (pool, default storage)
6690#
6691# Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6692my $parse_backup_hints = sub {
6693 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6694
6695 my $check_storage = sub { # assert if an image can be allocate
6696 my ($storeid, $scfg) = @_;
6697 die "Content type 'images' is not available on storage '$storeid'\n"
6698 if !$scfg->{content}->{images};
6699 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6700 if $user ne 'root@pam';
6701 };
6702
6703 my $virtdev_hash = {};
6704 while (defined(my $line = <$fh>)) {
6705 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6706 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6707 die "archive does not contain data for drive '$virtdev'\n"
6708 if !$devinfo->{$devname};
6709
6710 if (defined($options->{storage})) {
6711 $storeid = $options->{storage} || 'local';
6712 } elsif (!$storeid) {
6713 $storeid = 'local';
6714 }
6715 $format = 'raw' if !$format;
6716 $devinfo->{$devname}->{devname} = $devname;
6717 $devinfo->{$devname}->{virtdev} = $virtdev;
6718 $devinfo->{$devname}->{format} = $format;
6719 $devinfo->{$devname}->{storeid} = $storeid;
6720
6721 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6722 $check_storage->($storeid, $scfg); # permission and content type check
6723
6724 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6725 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6726 my $virtdev = $1;
6727 my $drive = parse_drive($virtdev, $2);
6728
6729 if (drive_is_cloudinit($drive)) {
6730 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6731 $storeid = $options->{storage} if defined ($options->{storage});
6732 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6733 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6734
6735 $check_storage->($storeid, $scfg); # permission and content type check
6736
6737 $virtdev_hash->{$virtdev} = {
6738 format => $format,
6739 storeid => $storeid,
6740 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6741 is_cloudinit => 1,
6742 };
6743 }
6744 }
6745 }
6746
6747 return $virtdev_hash;
6748};
6749
6750# Helper to allocate and activate all volumes required for a restore
6751#
6752# $storecfg: Storage configuration
6753# $virtdev_hash: as returned by parse_backup_hints()
6754#
6755# Returns: { $virtdev => $volid }
6756my $restore_allocate_devices = sub {
6757 my ($storecfg, $virtdev_hash, $vmid) = @_;
6758
6759 my $map = {};
6760 foreach my $virtdev (sort keys %$virtdev_hash) {
6761 my $d = $virtdev_hash->{$virtdev};
6762 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6763 my $storeid = $d->{storeid};
6764 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6765
6766 # test if requested format is supported
6767 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6768 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6769 $d->{format} = $defFormat if !$supported;
6770
6771 my $name;
6772 if ($d->{is_cloudinit}) {
6773 $name = "vm-$vmid-cloudinit";
6774 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6775 if ($scfg->{path}) {
6776 $name .= ".$d->{format}";
6777 }
6778 }
6779
6780 my $volid = PVE::Storage::vdisk_alloc(
6781 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6782
6783 print STDERR "new volume ID is '$volid'\n";
6784 $d->{volid} = $volid;
6785
6786 PVE::Storage::activate_volumes($storecfg, [$volid]);
6787
6788 $map->{$virtdev} = $volid;
6789 }
6790
6791 return $map;
6792};
6793
6794sub restore_update_config_line {
6795 my ($cookie, $map, $line, $unique) = @_;
6796
6797 return '' if $line =~ m/^\#qmdump\#/;
6798 return '' if $line =~ m/^\#vzdump\#/;
6799 return '' if $line =~ m/^lock:/;
6800 return '' if $line =~ m/^unused\d+:/;
6801 return '' if $line =~ m/^parent:/;
6802
6803 my $res = '';
6804
6805 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6806 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6807 # try to convert old 1.X settings
6808 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6809 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6810 my ($model, $macaddr) = split(/\=/, $devconfig);
6811 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6812 my $net = {
6813 model => $model,
6814 bridge => "vmbr$ind",
6815 macaddr => $macaddr,
6816 };
6817 my $netstr = print_net($net);
6818
6819 $res .= "net$cookie->{netcount}: $netstr\n";
6820 $cookie->{netcount}++;
6821 }
6822 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6823 my ($id, $netstr) = ($1, $2);
6824 my $net = parse_net($netstr);
6825 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6826 $netstr = print_net($net);
6827 $res .= "$id: $netstr\n";
6828 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6829 my $virtdev = $1;
6830 my $value = $3;
6831 my $di = parse_drive($virtdev, $value);
6832 if (defined($di->{backup}) && !$di->{backup}) {
6833 $res .= "#$line";
6834 } elsif ($map->{$virtdev}) {
6835 delete $di->{format}; # format can change on restore
6836 $di->{file} = $map->{$virtdev};
6837 $value = print_drive($di);
6838 $res .= "$virtdev: $value\n";
6839 } else {
6840 $res .= $line;
6841 }
6842 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6843 my $vmgenid = $1;
6844 if ($vmgenid ne '0') {
6845 # always generate a new vmgenid if there was a valid one setup
6846 $vmgenid = generate_uuid();
6847 }
6848 $res .= "vmgenid: $vmgenid\n";
6849 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6850 my ($uuid, $uuid_str);
6851 UUID::generate($uuid);
6852 UUID::unparse($uuid, $uuid_str);
6853 my $smbios1 = parse_smbios1($2);
6854 $smbios1->{uuid} = $uuid_str;
6855 $res .= $1.print_smbios1($smbios1)."\n";
6856 } else {
6857 $res .= $line;
6858 }
6859
6860 return $res;
6861}
6862
6863my $restore_deactivate_volumes = sub {
6864 my ($storecfg, $virtdev_hash) = @_;
6865
6866 my $vollist = [];
6867 for my $dev (values $virtdev_hash->%*) {
6868 push $vollist->@*, $dev->{volid} if $dev->{volid};
6869 }
6870
6871 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6872 print STDERR $@ if $@;
6873};
6874
6875my $restore_destroy_volumes = sub {
6876 my ($storecfg, $virtdev_hash) = @_;
6877
6878 for my $dev (values $virtdev_hash->%*) {
6879 my $volid = $dev->{volid} or next;
6880 eval {
6881 PVE::Storage::vdisk_free($storecfg, $volid);
6882 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6883 };
6884 print STDERR "unable to cleanup '$volid' - $@" if $@;
6885 }
6886};
6887
6888sub restore_merge_config {
6889 my ($filename, $backup_conf_raw, $override_conf) = @_;
6890
6891 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6892 for my $key (keys $override_conf->%*) {
6893 $backup_conf->{$key} = $override_conf->{$key};
6894 }
6895
6896 return $backup_conf;
6897}
6898
6899sub scan_volids {
6900 my ($cfg, $vmid) = @_;
6901
6902 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6903
6904 my $volid_hash = {};
6905 foreach my $storeid (keys %$info) {
6906 foreach my $item (@{$info->{$storeid}}) {
6907 next if !($item->{volid} && $item->{size});
6908 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6909 $volid_hash->{$item->{volid}} = $item;
6910 }
6911 }
6912
6913 return $volid_hash;
6914}
6915
6916sub update_disk_config {
6917 my ($vmid, $conf, $volid_hash) = @_;
6918
6919 my $changes;
6920 my $prefix = "VM $vmid";
6921
6922 # used and unused disks
6923 my $referenced = {};
6924
6925 # Note: it is allowed to define multiple storages with same path (alias), so
6926 # we need to check both 'volid' and real 'path' (two different volid can point
6927 # to the same path).
6928
6929 my $referencedpath = {};
6930
6931 # update size info
6932 PVE::QemuConfig->foreach_volume($conf, sub {
6933 my ($opt, $drive) = @_;
6934
6935 my $volid = $drive->{file};
6936 return if !$volid;
6937 my $volume = $volid_hash->{$volid};
6938
6939 # mark volid as "in-use" for next step
6940 $referenced->{$volid} = 1;
6941 if ($volume && (my $path = $volume->{path})) {
6942 $referencedpath->{$path} = 1;
6943 }
6944
6945 return if drive_is_cdrom($drive);
6946 return if !$volume;
6947
6948 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6949 if (defined($updated)) {
6950 $changes = 1;
6951 $conf->{$opt} = print_drive($updated);
6952 print "$prefix ($opt): $msg\n";
6953 }
6954 });
6955
6956 # remove 'unusedX' entry if volume is used
6957 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6958 my ($opt, $drive) = @_;
6959
6960 my $volid = $drive->{file};
6961 return if !$volid;
6962
6963 my $path;
6964 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6965 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6966 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6967 $changes = 1;
6968 delete $conf->{$opt};
6969 }
6970
6971 $referenced->{$volid} = 1;
6972 $referencedpath->{$path} = 1 if $path;
6973 });
6974
6975 foreach my $volid (sort keys %$volid_hash) {
6976 next if $volid =~ m/vm-$vmid-state-/;
6977 next if $referenced->{$volid};
6978 my $path = $volid_hash->{$volid}->{path};
6979 next if !$path; # just to be sure
6980 next if $referencedpath->{$path};
6981 $changes = 1;
6982 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6983 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6984 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6985 }
6986
6987 return $changes;
6988}
6989
6990sub rescan {
6991 my ($vmid, $nolock, $dryrun) = @_;
6992
6993 my $cfg = PVE::Storage::config();
6994
6995 print "rescan volumes...\n";
6996 my $volid_hash = scan_volids($cfg, $vmid);
6997
6998 my $updatefn = sub {
6999 my ($vmid) = @_;
7000
7001 my $conf = PVE::QemuConfig->load_config($vmid);
7002
7003 PVE::QemuConfig->check_lock($conf);
7004
7005 my $vm_volids = {};
7006 foreach my $volid (keys %$volid_hash) {
7007 my $info = $volid_hash->{$volid};
7008 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
7009 }
7010
7011 my $changes = update_disk_config($vmid, $conf, $vm_volids);
7012
7013 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
7014 };
7015
7016 if (defined($vmid)) {
7017 if ($nolock) {
7018 &$updatefn($vmid);
7019 } else {
7020 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7021 }
7022 } else {
7023 my $vmlist = config_list();
7024 foreach my $vmid (keys %$vmlist) {
7025 if ($nolock) {
7026 &$updatefn($vmid);
7027 } else {
7028 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7029 }
7030 }
7031 }
7032}
7033
7034sub restore_proxmox_backup_archive {
7035 my ($archive, $vmid, $user, $options) = @_;
7036
7037 my $storecfg = PVE::Storage::config();
7038
7039 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7040 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7041
7042 my $fingerprint = $scfg->{fingerprint};
7043 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
7044
7045 my $repo = PVE::PBSClient::get_repository($scfg);
7046 my $namespace = $scfg->{namespace};
7047
7048 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7049 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7050 local $ENV{PBS_PASSWORD} = $password;
7051 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7052
7053 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7054 PVE::Storage::parse_volname($storecfg, $archive);
7055
7056 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7057
7058 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7059
7060 my $tmpdir = "/var/tmp/vzdumptmp$$";
7061 rmtree $tmpdir;
7062 mkpath $tmpdir;
7063
7064 my $conffile = PVE::QemuConfig->config_file($vmid);
7065 # disable interrupts (always do cleanups)
7066 local $SIG{INT} =
7067 local $SIG{TERM} =
7068 local $SIG{QUIT} =
7069 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7070
7071 # Note: $oldconf is undef if VM does not exists
7072 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7073 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7074 my $new_conf_raw = '';
7075
7076 my $rpcenv = PVE::RPCEnvironment::get();
7077 my $devinfo = {}; # info about drives included in backup
7078 my $virtdev_hash = {}; # info about allocated drives
7079
7080 eval {
7081 # enable interrupts
7082 local $SIG{INT} =
7083 local $SIG{TERM} =
7084 local $SIG{QUIT} =
7085 local $SIG{HUP} =
7086 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7087
7088 my $cfgfn = "$tmpdir/qemu-server.conf";
7089 my $firewall_config_fn = "$tmpdir/fw.conf";
7090 my $index_fn = "$tmpdir/index.json";
7091
7092 my $cmd = "restore";
7093
7094 my $param = [$pbs_backup_name, "index.json", $index_fn];
7095 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7096 my $index = PVE::Tools::file_get_contents($index_fn);
7097 $index = decode_json($index);
7098
7099 foreach my $info (@{$index->{files}}) {
7100 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7101 my $devname = $1;
7102 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7103 $devinfo->{$devname}->{size} = $1;
7104 } else {
7105 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7106 }
7107 }
7108 }
7109
7110 my $is_qemu_server_backup = scalar(
7111 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7112 );
7113 if (!$is_qemu_server_backup) {
7114 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7115 }
7116 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7117
7118 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7119 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7120
7121 if ($has_firewall_config) {
7122 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7123 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7124
7125 my $pve_firewall_dir = '/etc/pve/firewall';
7126 mkdir $pve_firewall_dir; # make sure the dir exists
7127 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7128 }
7129
7130 my $fh = IO::File->new($cfgfn, "r") ||
7131 die "unable to read qemu-server.conf - $!\n";
7132
7133 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7134
7135 # fixme: rate limit?
7136
7137 # create empty/temp config
7138 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7139
7140 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7141
7142 # allocate volumes
7143 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7144
7145 foreach my $virtdev (sort keys %$virtdev_hash) {
7146 my $d = $virtdev_hash->{$virtdev};
7147 next if $d->{is_cloudinit}; # no need to restore cloudinit
7148
7149 # this fails if storage is unavailable
7150 my $volid = $d->{volid};
7151 my $path = PVE::Storage::path($storecfg, $volid);
7152
7153 # for live-restore we only want to preload the efidisk and TPM state
7154 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7155
7156 my @ns_arg;
7157 if (defined(my $ns = $scfg->{namespace})) {
7158 @ns_arg = ('--ns', $ns);
7159 }
7160
7161 my $pbs_restore_cmd = [
7162 '/usr/bin/pbs-restore',
7163 '--repository', $repo,
7164 @ns_arg,
7165 $pbs_backup_name,
7166 "$d->{devname}.img.fidx",
7167 $path,
7168 '--verbose',
7169 ];
7170
7171 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7172 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7173
7174 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7175 push @$pbs_restore_cmd, '--skip-zero';
7176 }
7177
7178 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7179 print "restore proxmox backup image: $dbg_cmdstring\n";
7180 run_command($pbs_restore_cmd);
7181 }
7182
7183 $fh->seek(0, 0) || die "seek failed - $!\n";
7184
7185 my $cookie = { netcount => 0 };
7186 while (defined(my $line = <$fh>)) {
7187 $new_conf_raw .= restore_update_config_line(
7188 $cookie,
7189 $map,
7190 $line,
7191 $options->{unique},
7192 );
7193 }
7194
7195 $fh->close();
7196 };
7197 my $err = $@;
7198
7199 if ($err || !$options->{live}) {
7200 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7201 }
7202
7203 rmtree $tmpdir;
7204
7205 if ($err) {
7206 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7207 die $err;
7208 }
7209
7210 if ($options->{live}) {
7211 # keep lock during live-restore
7212 $new_conf_raw .= "\nlock: create";
7213 }
7214
7215 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7216 check_restore_permissions($rpcenv, $user, $new_conf);
7217 PVE::QemuConfig->write_config($vmid, $new_conf);
7218
7219 eval { rescan($vmid, 1); };
7220 warn $@ if $@;
7221
7222 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7223
7224 if ($options->{live}) {
7225 # enable interrupts
7226 local $SIG{INT} =
7227 local $SIG{TERM} =
7228 local $SIG{QUIT} =
7229 local $SIG{HUP} =
7230 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7231
7232 my $conf = PVE::QemuConfig->load_config($vmid);
7233 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7234
7235 # these special drives are already restored before start
7236 delete $devinfo->{'drive-efidisk0'};
7237 delete $devinfo->{'drive-tpmstate0-backup'};
7238
7239 my $pbs_opts = {
7240 repo => $repo,
7241 keyfile => $keyfile,
7242 snapshot => $pbs_backup_name,
7243 namespace => $namespace,
7244 };
7245 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7246
7247 PVE::QemuConfig->remove_lock($vmid, "create");
7248 }
7249}
7250
7251sub pbs_live_restore {
7252 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7253
7254 print "starting VM for live-restore\n";
7255 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7256
7257 my $pbs_backing = {};
7258 for my $ds (keys %$restored_disks) {
7259 $ds =~ m/^drive-(.*)$/;
7260 my $confname = $1;
7261 $pbs_backing->{$confname} = {
7262 repository => $opts->{repo},
7263 snapshot => $opts->{snapshot},
7264 archive => "$ds.img.fidx",
7265 };
7266 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7267 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7268
7269 my $drive = parse_drive($confname, $conf->{$confname});
7270 print "restoring '$ds' to '$drive->{file}'\n";
7271 }
7272
7273 my $drives_streamed = 0;
7274 eval {
7275 # make sure HA doesn't interrupt our restore by stopping the VM
7276 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7277 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7278 }
7279
7280 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7281 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7282 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7283
7284 my $qmeventd_fd = register_qmeventd_handle($vmid);
7285
7286 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7287 # this will effectively collapse the backing image chain consisting of
7288 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7289 # removes itself once all backing images vanish with 'auto-remove=on')
7290 my $jobs = {};
7291 for my $ds (sort keys %$restored_disks) {
7292 my $job_id = "restore-$ds";
7293 mon_cmd($vmid, 'block-stream',
7294 'job-id' => $job_id,
7295 device => "$ds",
7296 );
7297 $jobs->{$job_id} = {};
7298 }
7299
7300 mon_cmd($vmid, 'cont');
7301 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7302
7303 print "restore-drive jobs finished successfully, removing all tracking block devices"
7304 ." to disconnect from Proxmox Backup Server\n";
7305
7306 for my $ds (sort keys %$restored_disks) {
7307 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7308 }
7309
7310 close($qmeventd_fd);
7311 };
7312
7313 my $err = $@;
7314
7315 if ($err) {
7316 warn "An error occurred during live-restore: $err\n";
7317 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7318 die "live-restore failed\n";
7319 }
7320}
7321
7322sub restore_vma_archive {
7323 my ($archive, $vmid, $user, $opts, $comp) = @_;
7324
7325 my $readfrom = $archive;
7326
7327 my $cfg = PVE::Storage::config();
7328 my $commands = [];
7329 my $bwlimit = $opts->{bwlimit};
7330
7331 my $dbg_cmdstring = '';
7332 my $add_pipe = sub {
7333 my ($cmd) = @_;
7334 push @$commands, $cmd;
7335 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7336 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7337 $readfrom = '-';
7338 };
7339
7340 my $input = undef;
7341 if ($archive eq '-') {
7342 $input = '<&STDIN';
7343 } else {
7344 # If we use a backup from a PVE defined storage we also consider that
7345 # storage's rate limit:
7346 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7347 if (defined($volid)) {
7348 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7349 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7350 if ($readlimit) {
7351 print STDERR "applying read rate limit: $readlimit\n";
7352 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7353 $add_pipe->($cstream);
7354 }
7355 }
7356 }
7357
7358 if ($comp) {
7359 my $info = PVE::Storage::decompressor_info('vma', $comp);
7360 my $cmd = $info->{decompressor};
7361 push @$cmd, $readfrom;
7362 $add_pipe->($cmd);
7363 }
7364
7365 my $tmpdir = "/var/tmp/vzdumptmp$$";
7366 rmtree $tmpdir;
7367
7368 # disable interrupts (always do cleanups)
7369 local $SIG{INT} =
7370 local $SIG{TERM} =
7371 local $SIG{QUIT} =
7372 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7373
7374 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7375 POSIX::mkfifo($mapfifo, 0600);
7376 my $fifofh;
7377 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7378
7379 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7380
7381 my $oldtimeout;
7382 my $timeout = 5;
7383
7384 my $devinfo = {}; # info about drives included in backup
7385 my $virtdev_hash = {}; # info about allocated drives
7386
7387 my $rpcenv = PVE::RPCEnvironment::get();
7388
7389 my $conffile = PVE::QemuConfig->config_file($vmid);
7390
7391 # Note: $oldconf is undef if VM does not exist
7392 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7393 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7394 my $new_conf_raw = '';
7395
7396 my %storage_limits;
7397
7398 my $print_devmap = sub {
7399 my $cfgfn = "$tmpdir/qemu-server.conf";
7400
7401 # we can read the config - that is already extracted
7402 my $fh = IO::File->new($cfgfn, "r") ||
7403 die "unable to read qemu-server.conf - $!\n";
7404
7405 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7406 if (-f $fwcfgfn) {
7407 my $pve_firewall_dir = '/etc/pve/firewall';
7408 mkdir $pve_firewall_dir; # make sure the dir exists
7409 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7410 }
7411
7412 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7413
7414 foreach my $info (values %{$virtdev_hash}) {
7415 my $storeid = $info->{storeid};
7416 next if defined($storage_limits{$storeid});
7417
7418 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7419 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7420 $storage_limits{$storeid} = $limit * 1024;
7421 }
7422
7423 foreach my $devname (keys %$devinfo) {
7424 die "found no device mapping information for device '$devname'\n"
7425 if !$devinfo->{$devname}->{virtdev};
7426 }
7427
7428 # create empty/temp config
7429 if ($oldconf) {
7430 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7431 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7432 }
7433
7434 # allocate volumes
7435 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7436
7437 # print restore information to $fifofh
7438 foreach my $virtdev (sort keys %$virtdev_hash) {
7439 my $d = $virtdev_hash->{$virtdev};
7440 next if $d->{is_cloudinit}; # no need to restore cloudinit
7441
7442 my $storeid = $d->{storeid};
7443 my $volid = $d->{volid};
7444
7445 my $map_opts = '';
7446 if (my $limit = $storage_limits{$storeid}) {
7447 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7448 }
7449
7450 my $write_zeros = 1;
7451 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7452 $write_zeros = 0;
7453 }
7454
7455 my $path = PVE::Storage::path($cfg, $volid);
7456
7457 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7458
7459 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7460 }
7461
7462 $fh->seek(0, 0) || die "seek failed - $!\n";
7463
7464 my $cookie = { netcount => 0 };
7465 while (defined(my $line = <$fh>)) {
7466 $new_conf_raw .= restore_update_config_line(
7467 $cookie,
7468 $map,
7469 $line,
7470 $opts->{unique},
7471 );
7472 }
7473
7474 $fh->close();
7475 };
7476
7477 eval {
7478 # enable interrupts
7479 local $SIG{INT} =
7480 local $SIG{TERM} =
7481 local $SIG{QUIT} =
7482 local $SIG{HUP} =
7483 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7484 local $SIG{ALRM} = sub { die "got timeout\n"; };
7485
7486 $oldtimeout = alarm($timeout);
7487
7488 my $parser = sub {
7489 my $line = shift;
7490
7491 print "$line\n";
7492
7493 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7494 my ($dev_id, $size, $devname) = ($1, $2, $3);
7495 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7496 } elsif ($line =~ m/^CTIME: /) {
7497 # we correctly received the vma config, so we can disable
7498 # the timeout now for disk allocation (set to 10 minutes, so
7499 # that we always timeout if something goes wrong)
7500 alarm(600);
7501 &$print_devmap();
7502 print $fifofh "done\n";
7503 my $tmp = $oldtimeout || 0;
7504 $oldtimeout = undef;
7505 alarm($tmp);
7506 close($fifofh);
7507 $fifofh = undef;
7508 }
7509 };
7510
7511 print "restore vma archive: $dbg_cmdstring\n";
7512 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7513 };
7514 my $err = $@;
7515
7516 alarm($oldtimeout) if $oldtimeout;
7517
7518 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7519
7520 close($fifofh) if $fifofh;
7521 unlink $mapfifo;
7522 rmtree $tmpdir;
7523
7524 if ($err) {
7525 $restore_destroy_volumes->($cfg, $virtdev_hash);
7526 die $err;
7527 }
7528
7529 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7530 check_restore_permissions($rpcenv, $user, $new_conf);
7531 PVE::QemuConfig->write_config($vmid, $new_conf);
7532
7533 eval { rescan($vmid, 1); };
7534 warn $@ if $@;
7535
7536 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7537}
7538
7539sub restore_tar_archive {
7540 my ($archive, $vmid, $user, $opts) = @_;
7541
7542 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7543 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7544 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7545 }
7546
7547 if ($archive ne '-') {
7548 my $firstfile = tar_archive_read_firstfile($archive);
7549 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7550 if $firstfile ne 'qemu-server.conf';
7551 }
7552
7553 my $storecfg = PVE::Storage::config();
7554
7555 # avoid zombie disks when restoring over an existing VM -> cleanup first
7556 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7557 # skiplock=1 because qmrestore has set the 'create' lock itself already
7558 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7559 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7560
7561 my $tocmd = "/usr/lib/qemu-server/qmextract";
7562
7563 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7564 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7565 $tocmd .= ' --prealloc' if $opts->{prealloc};
7566 $tocmd .= ' --info' if $opts->{info};
7567
7568 # tar option "xf" does not autodetect compression when read from STDIN,
7569 # so we pipe to zcat
7570 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7571 PVE::Tools::shellquote("--to-command=$tocmd");
7572
7573 my $tmpdir = "/var/tmp/vzdumptmp$$";
7574 mkpath $tmpdir;
7575
7576 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7577 local $ENV{VZDUMP_VMID} = $vmid;
7578 local $ENV{VZDUMP_USER} = $user;
7579
7580 my $conffile = PVE::QemuConfig->config_file($vmid);
7581 my $new_conf_raw = '';
7582
7583 # disable interrupts (always do cleanups)
7584 local $SIG{INT} =
7585 local $SIG{TERM} =
7586 local $SIG{QUIT} =
7587 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7588
7589 eval {
7590 # enable interrupts
7591 local $SIG{INT} =
7592 local $SIG{TERM} =
7593 local $SIG{QUIT} =
7594 local $SIG{HUP} =
7595 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7596
7597 if ($archive eq '-') {
7598 print "extracting archive from STDIN\n";
7599 run_command($cmd, input => "<&STDIN");
7600 } else {
7601 print "extracting archive '$archive'\n";
7602 run_command($cmd);
7603 }
7604
7605 return if $opts->{info};
7606
7607 # read new mapping
7608 my $map = {};
7609 my $statfile = "$tmpdir/qmrestore.stat";
7610 if (my $fd = IO::File->new($statfile, "r")) {
7611 while (defined (my $line = <$fd>)) {
7612 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7613 $map->{$1} = $2 if $1;
7614 } else {
7615 print STDERR "unable to parse line in statfile - $line\n";
7616 }
7617 }
7618 $fd->close();
7619 }
7620
7621 my $confsrc = "$tmpdir/qemu-server.conf";
7622
7623 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7624
7625 my $cookie = { netcount => 0 };
7626 while (defined (my $line = <$srcfd>)) {
7627 $new_conf_raw .= restore_update_config_line(
7628 $cookie,
7629 $map,
7630 $line,
7631 $opts->{unique},
7632 );
7633 }
7634
7635 $srcfd->close();
7636 };
7637 if (my $err = $@) {
7638 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7639 die $err;
7640 }
7641
7642 rmtree $tmpdir;
7643
7644 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7645
7646 PVE::Cluster::cfs_update(); # make sure we read new file
7647
7648 eval { rescan($vmid, 1); };
7649 warn $@ if $@;
7650};
7651
7652sub foreach_storage_used_by_vm {
7653 my ($conf, $func) = @_;
7654
7655 my $sidhash = {};
7656
7657 PVE::QemuConfig->foreach_volume($conf, sub {
7658 my ($ds, $drive) = @_;
7659 return if drive_is_cdrom($drive);
7660
7661 my $volid = $drive->{file};
7662
7663 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7664 $sidhash->{$sid} = $sid if $sid;
7665 });
7666
7667 foreach my $sid (sort keys %$sidhash) {
7668 &$func($sid);
7669 }
7670}
7671
7672my $qemu_snap_storage = {
7673 rbd => 1,
7674};
7675sub do_snapshots_with_qemu {
7676 my ($storecfg, $volid, $deviceid) = @_;
7677
7678 return if $deviceid =~ m/tpmstate0/;
7679
7680 my $storage_name = PVE::Storage::parse_volume_id($volid);
7681 my $scfg = $storecfg->{ids}->{$storage_name};
7682 die "could not find storage '$storage_name'\n" if !defined($scfg);
7683
7684 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7685 return 1;
7686 }
7687
7688 if ($volid =~ m/\.(qcow2|qed)$/){
7689 return 1;
7690 }
7691
7692 return;
7693}
7694
7695sub qga_check_running {
7696 my ($vmid, $nowarn) = @_;
7697
7698 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7699 if ($@) {
7700 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7701 return 0;
7702 }
7703 return 1;
7704}
7705
7706sub template_create {
7707 my ($vmid, $conf, $disk) = @_;
7708
7709 my $storecfg = PVE::Storage::config();
7710
7711 PVE::QemuConfig->foreach_volume($conf, sub {
7712 my ($ds, $drive) = @_;
7713
7714 return if drive_is_cdrom($drive);
7715 return if $disk && $ds ne $disk;
7716
7717 my $volid = $drive->{file};
7718 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7719
7720 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7721 $drive->{file} = $voliddst;
7722 $conf->{$ds} = print_drive($drive);
7723 PVE::QemuConfig->write_config($vmid, $conf);
7724 });
7725}
7726
7727sub convert_iscsi_path {
7728 my ($path) = @_;
7729
7730 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7731 my $portal = $1;
7732 my $target = $2;
7733 my $lun = $3;
7734
7735 my $initiator_name = get_initiator_name();
7736
7737 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7738 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7739 }
7740
7741 die "cannot convert iscsi path '$path', unkown format\n";
7742}
7743
7744sub qemu_img_convert {
7745 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7746
7747 my $storecfg = PVE::Storage::config();
7748 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7749 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7750
7751 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7752
7753 my $cachemode;
7754 my $src_path;
7755 my $src_is_iscsi = 0;
7756 my $src_format;
7757
7758 if ($src_storeid) {
7759 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7760 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7761 $src_format = qemu_img_format($src_scfg, $src_volname);
7762 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7763 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7764 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7765 } elsif (-f $src_volid || -b $src_volid) {
7766 $src_path = $src_volid;
7767 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7768 $src_format = $1;
7769 }
7770 }
7771
7772 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7773
7774 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7775 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7776 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7777 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7778
7779 my $cmd = [];
7780 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7781 push @$cmd, '-l', "snapshot.name=$snapname"
7782 if $snapname && $src_format && $src_format eq "qcow2";
7783 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7784 push @$cmd, '-T', $cachemode if defined($cachemode);
7785 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7786
7787 if ($src_is_iscsi) {
7788 push @$cmd, '--image-opts';
7789 $src_path = convert_iscsi_path($src_path);
7790 } elsif ($src_format) {
7791 push @$cmd, '-f', $src_format;
7792 }
7793
7794 if ($dst_is_iscsi) {
7795 push @$cmd, '--target-image-opts';
7796 $dst_path = convert_iscsi_path($dst_path);
7797 } else {
7798 push @$cmd, '-O', $dst_format;
7799 }
7800
7801 push @$cmd, $src_path;
7802
7803 if (!$dst_is_iscsi && $is_zero_initialized) {
7804 push @$cmd, "zeroinit:$dst_path";
7805 } else {
7806 push @$cmd, $dst_path;
7807 }
7808
7809 my $parser = sub {
7810 my $line = shift;
7811 if($line =~ m/\((\S+)\/100\%\)/){
7812 my $percent = $1;
7813 my $transferred = int($size * $percent / 100);
7814 my $total_h = render_bytes($size, 1);
7815 my $transferred_h = render_bytes($transferred, 1);
7816
7817 print "transferred $transferred_h of $total_h ($percent%)\n";
7818 }
7819
7820 };
7821
7822 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7823 my $err = $@;
7824 die "copy failed: $err" if $err;
7825}
7826
7827sub qemu_img_format {
7828 my ($scfg, $volname) = @_;
7829
7830 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7831 return $1;
7832 } else {
7833 return "raw";
7834 }
7835}
7836
7837sub qemu_drive_mirror {
7838 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7839
7840 $jobs = {} if !$jobs;
7841
7842 my $qemu_target;
7843 my $format;
7844 $jobs->{"drive-$drive"} = {};
7845
7846 if ($dst_volid =~ /^nbd:/) {
7847 $qemu_target = $dst_volid;
7848 $format = "nbd";
7849 } else {
7850 my $storecfg = PVE::Storage::config();
7851 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7852
7853 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7854
7855 $format = qemu_img_format($dst_scfg, $dst_volname);
7856
7857 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7858
7859 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7860 }
7861
7862 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7863 $opts->{format} = $format if $format;
7864
7865 if (defined($src_bitmap)) {
7866 $opts->{sync} = 'incremental';
7867 $opts->{bitmap} = $src_bitmap;
7868 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7869 }
7870
7871 if (defined($bwlimit)) {
7872 $opts->{speed} = $bwlimit * 1024;
7873 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7874 } else {
7875 print "drive mirror is starting for drive-$drive\n";
7876 }
7877
7878 # if a job already runs for this device we get an error, catch it for cleanup
7879 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7880 if (my $err = $@) {
7881 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7882 warn "$@\n" if $@;
7883 die "mirroring error: $err\n";
7884 }
7885
7886 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7887}
7888
7889# $completion can be either
7890# 'complete': wait until all jobs are ready, block-job-complete them (default)
7891# 'cancel': wait until all jobs are ready, block-job-cancel them
7892# 'skip': wait until all jobs are ready, return with block jobs in ready state
7893# 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7894sub qemu_drive_mirror_monitor {
7895 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7896
7897 $completion //= 'complete';
7898 $op //= "mirror";
7899
7900 eval {
7901 my $err_complete = 0;
7902
7903 my $starttime = time ();
7904 while (1) {
7905 die "block job ('$op') timed out\n" if $err_complete > 300;
7906
7907 my $stats = mon_cmd($vmid, "query-block-jobs");
7908 my $ctime = time();
7909
7910 my $running_jobs = {};
7911 for my $stat (@$stats) {
7912 next if $stat->{type} ne $op;
7913 $running_jobs->{$stat->{device}} = $stat;
7914 }
7915
7916 my $readycounter = 0;
7917
7918 for my $job_id (sort keys %$jobs) {
7919 my $job = $running_jobs->{$job_id};
7920
7921 my $vanished = !defined($job);
7922 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7923 if($complete || ($vanished && $completion eq 'auto')) {
7924 print "$job_id: $op-job finished\n";
7925 delete $jobs->{$job_id};
7926 next;
7927 }
7928
7929 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7930
7931 my $busy = $job->{busy};
7932 my $ready = $job->{ready};
7933 if (my $total = $job->{len}) {
7934 my $transferred = $job->{offset} || 0;
7935 my $remaining = $total - $transferred;
7936 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7937
7938 my $duration = $ctime - $starttime;
7939 my $total_h = render_bytes($total, 1);
7940 my $transferred_h = render_bytes($transferred, 1);
7941
7942 my $status = sprintf(
7943 "transferred $transferred_h of $total_h ($percent%%) in %s",
7944 render_duration($duration),
7945 );
7946
7947 if ($ready) {
7948 if ($busy) {
7949 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7950 } else {
7951 $status .= ", ready";
7952 }
7953 }
7954 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7955 $jobs->{$job_id}->{ready} = $ready;
7956 }
7957
7958 $readycounter++ if $job->{ready};
7959 }
7960
7961 last if scalar(keys %$jobs) == 0;
7962
7963 if ($readycounter == scalar(keys %$jobs)) {
7964 print "all '$op' jobs are ready\n";
7965
7966 # do the complete later (or has already been done)
7967 last if $completion eq 'skip' || $completion eq 'auto';
7968
7969 if ($vmiddst && $vmiddst != $vmid) {
7970 my $agent_running = $qga && qga_check_running($vmid);
7971 if ($agent_running) {
7972 print "freeze filesystem\n";
7973 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7974 warn $@ if $@;
7975 } else {
7976 print "suspend vm\n";
7977 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7978 warn $@ if $@;
7979 }
7980
7981 # if we clone a disk for a new target vm, we don't switch the disk
7982 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7983
7984 if ($agent_running) {
7985 print "unfreeze filesystem\n";
7986 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7987 warn $@ if $@;
7988 } else {
7989 print "resume vm\n";
7990 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7991 warn $@ if $@;
7992 }
7993
7994 last;
7995 } else {
7996
7997 for my $job_id (sort keys %$jobs) {
7998 # try to switch the disk if source and destination are on the same guest
7999 print "$job_id: Completing block job_id...\n";
8000
8001 my $op;
8002 if ($completion eq 'complete') {
8003 $op = 'block-job-complete';
8004 } elsif ($completion eq 'cancel') {
8005 $op = 'block-job-cancel';
8006 } else {
8007 die "invalid completion value: $completion\n";
8008 }
8009 eval { mon_cmd($vmid, $op, device => $job_id) };
8010 if ($@ =~ m/cannot be completed/) {
8011 print "$job_id: block job cannot be completed, trying again.\n";
8012 $err_complete++;
8013 }else {
8014 print "$job_id: Completed successfully.\n";
8015 $jobs->{$job_id}->{complete} = 1;
8016 }
8017 }
8018 }
8019 }
8020 sleep 1;
8021 }
8022 };
8023 my $err = $@;
8024
8025 if ($err) {
8026 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8027 die "block job ($op) error: $err";
8028 }
8029}
8030
8031sub qemu_blockjobs_cancel {
8032 my ($vmid, $jobs) = @_;
8033
8034 foreach my $job (keys %$jobs) {
8035 print "$job: Cancelling block job\n";
8036 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8037 $jobs->{$job}->{cancel} = 1;
8038 }
8039
8040 while (1) {
8041 my $stats = mon_cmd($vmid, "query-block-jobs");
8042
8043 my $running_jobs = {};
8044 foreach my $stat (@$stats) {
8045 $running_jobs->{$stat->{device}} = $stat;
8046 }
8047
8048 foreach my $job (keys %$jobs) {
8049
8050 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8051 print "$job: Done.\n";
8052 delete $jobs->{$job};
8053 }
8054 }
8055
8056 last if scalar(keys %$jobs) == 0;
8057
8058 sleep 1;
8059 }
8060}
8061
8062# Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8063# source, but some storages have problems with io_uring, sometimes even leading to crashes.
8064my sub clone_disk_check_io_uring {
8065 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8066
8067 return if !$use_drive_mirror;
8068
8069 # Don't complain when not changing storage.
8070 # Assume if it works for the source, it'll work for the target too.
8071 return if $src_storeid eq $dst_storeid;
8072
8073 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8074 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8075
8076 my $cache_direct = drive_uses_cache_direct($src_drive);
8077
8078 my $src_uses_io_uring;
8079 if ($src_drive->{aio}) {
8080 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8081 } else {
8082 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8083 }
8084
8085 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8086 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8087}
8088
8089sub clone_disk {
8090 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8091
8092 my ($vmid, $running) = $source->@{qw(vmid running)};
8093 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8094
8095 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8096 my ($storage, $format) = $dest->@{qw(storage format)};
8097
8098 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8099
8100 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8101 die "cloning from/to EFI disk requires EFI disk\n"
8102 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8103 die "cloning from/to TPM state requires TPM state\n"
8104 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8105
8106 # This would lead to two device nodes in QEMU pointing to the same backing image!
8107 die "cannot change drive name when cloning disk from/to the same VM\n"
8108 if $use_drive_mirror && $vmid == $newvmid;
8109 }
8110
8111 die "cannot move TPM state while VM is running\n"
8112 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8113
8114 my $newvolid;
8115
8116 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8117 print "$src_drivename " if $src_drivename;
8118 print "($drive->{file})\n";
8119
8120 if (!$full) {
8121 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8122 push @$newvollist, $newvolid;
8123 } else {
8124 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8125 my $storeid = $storage || $src_storeid;
8126
8127 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8128
8129 my $name = undef;
8130 my $size = undef;
8131 if (drive_is_cloudinit($drive)) {
8132 $name = "vm-$newvmid-cloudinit";
8133 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8134 if ($scfg->{path}) {
8135 $name .= ".$dst_format";
8136 }
8137 $snapname = undef;
8138 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8139 } elsif ($dst_drivename eq 'efidisk0') {
8140 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8141 } elsif ($dst_drivename eq 'tpmstate0') {
8142 $dst_format = 'raw';
8143 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8144 } else {
8145 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8146
8147 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8148 }
8149 $newvolid = PVE::Storage::vdisk_alloc(
8150 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8151 );
8152 push @$newvollist, $newvolid;
8153
8154 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8155
8156 if (drive_is_cloudinit($drive)) {
8157 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8158 # if this is the case, we have to complete any block-jobs still there from
8159 # previous drive-mirrors
8160 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8161 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8162 }
8163 goto no_data_clone;
8164 }
8165
8166 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8167 if ($use_drive_mirror) {
8168 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8169 $completion, $qga, $bwlimit);
8170 } else {
8171 if ($dst_drivename eq 'efidisk0') {
8172 # the relevant data on the efidisk may be smaller than the source
8173 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8174 # that is given by the OVMF_VARS.fd
8175 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8176 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8177
8178 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8179
8180 # better for Ceph if block size is not too small, see bug #3324
8181 my $bs = 1024*1024;
8182
8183 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8184
8185 if ($src_format eq 'qcow2' && $snapname) {
8186 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8187 if !min_version(kvm_user_version(), 6, 2);
8188 push $cmd->@*, '-l', $snapname;
8189 }
8190 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8191 run_command($cmd);
8192 } else {
8193 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8194 }
8195 }
8196 }
8197
8198no_data_clone:
8199 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8200
8201 my $disk = dclone($drive);
8202 delete $disk->{format};
8203 $disk->{file} = $newvolid;
8204 $disk->{size} = $size if defined($size);
8205
8206 return $disk;
8207}
8208
8209sub get_running_qemu_version {
8210 my ($vmid) = @_;
8211 my $res = mon_cmd($vmid, "query-version");
8212 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8213}
8214
8215sub qemu_use_old_bios_files {
8216 my ($machine_type) = @_;
8217
8218 return if !$machine_type;
8219
8220 my $use_old_bios_files = undef;
8221
8222 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8223 $machine_type = $1;
8224 $use_old_bios_files = 1;
8225 } else {
8226 my $version = extract_version($machine_type, kvm_user_version());
8227 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8228 # load new efi bios files on migration. So this hack is required to allow
8229 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8230 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8231 $use_old_bios_files = !min_version($version, 2, 4);
8232 }
8233
8234 return ($use_old_bios_files, $machine_type);
8235}
8236
8237sub get_efivars_size {
8238 my ($conf, $efidisk) = @_;
8239
8240 my $arch = get_vm_arch($conf);
8241 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8242 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8243 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8244 return -s $ovmf_vars;
8245}
8246
8247sub update_efidisk_size {
8248 my ($conf) = @_;
8249
8250 return if !defined($conf->{efidisk0});
8251
8252 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8253 $disk->{size} = get_efivars_size($conf);
8254 $conf->{efidisk0} = print_drive($disk);
8255
8256 return;
8257}
8258
8259sub update_tpmstate_size {
8260 my ($conf) = @_;
8261
8262 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8263 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8264 $conf->{tpmstate0} = print_drive($disk);
8265}
8266
8267sub create_efidisk($$$$$$$) {
8268 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8269
8270 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8271
8272 my $vars_size_b = -s $ovmf_vars;
8273 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8274 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8275 PVE::Storage::activate_volumes($storecfg, [$volid]);
8276
8277 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8278 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8279
8280 return ($volid, $size/1024);
8281}
8282
8283sub vm_iothreads_list {
8284 my ($vmid) = @_;
8285
8286 my $res = mon_cmd($vmid, 'query-iothreads');
8287
8288 my $iothreads = {};
8289 foreach my $iothread (@$res) {
8290 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8291 }
8292
8293 return $iothreads;
8294}
8295
8296sub scsihw_infos {
8297 my ($conf, $drive) = @_;
8298
8299 my $maxdev = 0;
8300
8301 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8302 $maxdev = 7;
8303 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8304 $maxdev = 1;
8305 } else {
8306 $maxdev = 256;
8307 }
8308
8309 my $controller = int($drive->{index} / $maxdev);
8310 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8311 ? "virtioscsi"
8312 : "scsihw";
8313
8314 return ($maxdev, $controller, $controller_prefix);
8315}
8316
8317sub resolve_dst_disk_format {
8318 my ($storecfg, $storeid, $src_volname, $format) = @_;
8319 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8320
8321 if (!$format) {
8322 # if no target format is specified, use the source disk format as hint
8323 if ($src_volname) {
8324 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8325 $format = qemu_img_format($scfg, $src_volname);
8326 } else {
8327 return $defFormat;
8328 }
8329 }
8330
8331 # test if requested format is supported - else use default
8332 my $supported = grep { $_ eq $format } @$validFormats;
8333 $format = $defFormat if !$supported;
8334 return $format;
8335}
8336
8337# NOTE: if this logic changes, please update docs & possibly gui logic
8338sub find_vmstate_storage {
8339 my ($conf, $storecfg) = @_;
8340
8341 # first, return storage from conf if set
8342 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8343
8344 my ($target, $shared, $local);
8345
8346 foreach_storage_used_by_vm($conf, sub {
8347 my ($sid) = @_;
8348 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8349 my $dst = $scfg->{shared} ? \$shared : \$local;
8350 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8351 });
8352
8353 # second, use shared storage where VM has at least one disk
8354 # third, use local storage where VM has at least one disk
8355 # fall back to local storage
8356 $target = $shared // $local // 'local';
8357
8358 return $target;
8359}
8360
8361sub generate_uuid {
8362 my ($uuid, $uuid_str);
8363 UUID::generate($uuid);
8364 UUID::unparse($uuid, $uuid_str);
8365 return $uuid_str;
8366}
8367
8368sub generate_smbios1_uuid {
8369 return "uuid=".generate_uuid();
8370}
8371
8372sub nbd_stop {
8373 my ($vmid) = @_;
8374
8375 mon_cmd($vmid, 'nbd-server-stop');
8376}
8377
8378sub create_reboot_request {
8379 my ($vmid) = @_;
8380 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8381 or die "failed to create reboot trigger file: $!\n";
8382 close($fh);
8383}
8384
8385sub clear_reboot_request {
8386 my ($vmid) = @_;
8387 my $path = "/run/qemu-server/$vmid.reboot";
8388 my $res = 0;
8389
8390 $res = unlink($path);
8391 die "could not remove reboot request for $vmid: $!"
8392 if !$res && $! != POSIX::ENOENT;
8393
8394 return $res;
8395}
8396
8397sub bootorder_from_legacy {
8398 my ($conf, $bootcfg) = @_;
8399
8400 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8401 my $bootindex_hash = {};
8402 my $i = 1;
8403 foreach my $o (split(//, $boot)) {
8404 $bootindex_hash->{$o} = $i*100;
8405 $i++;
8406 }
8407
8408 my $bootorder = {};
8409
8410 PVE::QemuConfig->foreach_volume($conf, sub {
8411 my ($ds, $drive) = @_;
8412
8413 if (drive_is_cdrom ($drive, 1)) {
8414 if ($bootindex_hash->{d}) {
8415 $bootorder->{$ds} = $bootindex_hash->{d};
8416 $bootindex_hash->{d} += 1;
8417 }
8418 } elsif ($bootindex_hash->{c}) {
8419 $bootorder->{$ds} = $bootindex_hash->{c}
8420 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8421 $bootindex_hash->{c} += 1;
8422 }
8423 });
8424
8425 if ($bootindex_hash->{n}) {
8426 for (my $i = 0; $i < $MAX_NETS; $i++) {
8427 my $netname = "net$i";
8428 next if !$conf->{$netname};
8429 $bootorder->{$netname} = $bootindex_hash->{n};
8430 $bootindex_hash->{n} += 1;
8431 }
8432 }
8433
8434 return $bootorder;
8435}
8436
8437# Generate default device list for 'boot: order=' property. Matches legacy
8438# default boot order, but with explicit device names. This is important, since
8439# the fallback for when neither 'order' nor the old format is specified relies
8440# on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8441sub get_default_bootdevices {
8442 my ($conf) = @_;
8443
8444 my @ret = ();
8445
8446 # harddisk
8447 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8448 push @ret, $first if $first;
8449
8450 # cdrom
8451 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8452 push @ret, $first if $first;
8453
8454 # network
8455 for (my $i = 0; $i < $MAX_NETS; $i++) {
8456 my $netname = "net$i";
8457 next if !$conf->{$netname};
8458 push @ret, $netname;
8459 last;
8460 }
8461
8462 return \@ret;
8463}
8464
8465sub device_bootorder {
8466 my ($conf) = @_;
8467
8468 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8469
8470 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8471
8472 my $bootorder = {};
8473 if (!defined($boot) || $boot->{legacy}) {
8474 $bootorder = bootorder_from_legacy($conf, $boot);
8475 } elsif ($boot->{order}) {
8476 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8477 for my $dev (PVE::Tools::split_list($boot->{order})) {
8478 $bootorder->{$dev} = $i++;
8479 }
8480 }
8481
8482 return $bootorder;
8483}
8484
8485sub register_qmeventd_handle {
8486 my ($vmid) = @_;
8487
8488 my $fh;
8489 my $peer = "/var/run/qmeventd.sock";
8490 my $count = 0;
8491
8492 for (;;) {
8493 $count++;
8494 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8495 last if $fh;
8496 if ($! != EINTR && $! != EAGAIN) {
8497 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8498 }
8499 if ($count > 4) {
8500 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8501 . "after $count retries\n";
8502 }
8503 usleep(25000);
8504 }
8505
8506 # send handshake to mark VM as backing up
8507 print $fh to_json({vzdump => {vmid => "$vmid"}});
8508
8509 # return handle to be closed later when inhibit is no longer required
8510 return $fh;
8511}
8512
8513# bash completion helper
8514
8515sub complete_backup_archives {
8516 my ($cmdname, $pname, $cvalue) = @_;
8517
8518 my $cfg = PVE::Storage::config();
8519
8520 my $storeid;
8521
8522 if ($cvalue =~ m/^([^:]+):/) {
8523 $storeid = $1;
8524 }
8525
8526 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8527
8528 my $res = [];
8529 foreach my $id (keys %$data) {
8530 foreach my $item (@{$data->{$id}}) {
8531 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8532 push @$res, $item->{volid} if defined($item->{volid});
8533 }
8534 }
8535
8536 return $res;
8537}
8538
8539my $complete_vmid_full = sub {
8540 my ($running) = @_;
8541
8542 my $idlist = vmstatus();
8543
8544 my $res = [];
8545
8546 foreach my $id (keys %$idlist) {
8547 my $d = $idlist->{$id};
8548 if (defined($running)) {
8549 next if $d->{template};
8550 next if $running && $d->{status} ne 'running';
8551 next if !$running && $d->{status} eq 'running';
8552 }
8553 push @$res, $id;
8554
8555 }
8556 return $res;
8557};
8558
8559sub complete_vmid {
8560 return &$complete_vmid_full();
8561}
8562
8563sub complete_vmid_stopped {
8564 return &$complete_vmid_full(0);
8565}
8566
8567sub complete_vmid_running {
8568 return &$complete_vmid_full(1);
8569}
8570
8571sub complete_storage {
8572
8573 my $cfg = PVE::Storage::config();
8574 my $ids = $cfg->{ids};
8575
8576 my $res = [];
8577 foreach my $sid (keys %$ids) {
8578 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8579 next if !$ids->{$sid}->{content}->{images};
8580 push @$res, $sid;
8581 }
8582
8583 return $res;
8584}
8585
8586sub complete_migration_storage {
8587 my ($cmd, $param, $current_value, $all_args) = @_;
8588
8589 my $targetnode = @$all_args[1];
8590
8591 my $cfg = PVE::Storage::config();
8592 my $ids = $cfg->{ids};
8593
8594 my $res = [];
8595 foreach my $sid (keys %$ids) {
8596 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8597 next if !$ids->{$sid}->{content}->{images};
8598 push @$res, $sid;
8599 }
8600
8601 return $res;
8602}
8603
8604sub vm_is_paused {
8605 my ($vmid) = @_;
8606 my $qmpstatus = eval {
8607 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8608 mon_cmd($vmid, "query-status");
8609 };
8610 warn "$@\n" if $@;
8611 return $qmpstatus && $qmpstatus->{status} eq "paused";
8612}
8613
8614sub check_volume_storage_type {
8615 my ($storecfg, $vol) = @_;
8616
8617 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8618 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8619 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8620
8621 die "storage '$storeid' does not support content-type '$vtype'\n"
8622 if !$scfg->{content}->{$vtype};
8623
8624 return 1;
8625}
8626
8627sub add_nets_bridge_fdb {
8628 my ($conf, $vmid) = @_;
8629
8630 for my $opt (keys %$conf) {
8631 next if $opt !~ m/^net(\d+)$/;
8632 my $iface = "tap${vmid}i$1";
8633 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8634 my $net = parse_net($conf->{$opt}, 1) or next;
8635
8636 my $mac = $net->{macaddr};
8637 if (!$mac) {
8638 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8639 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8640 next;
8641 }
8642
8643 my $bridge = $net->{bridge};
8644 if (!$bridge) {
8645 log_warn("Interface '$iface' not attached to any bridge.");
8646 next;
8647 }
8648 if ($have_sdn) {
8649 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8650 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8651 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
8652 }
8653 }
8654}
8655
8656sub del_nets_bridge_fdb {
8657 my ($conf, $vmid) = @_;
8658
8659 for my $opt (keys %$conf) {
8660 next if $opt !~ m/^net(\d+)$/;
8661 my $iface = "tap${vmid}i$1";
8662
8663 my $net = parse_net($conf->{$opt}) or next;
8664 my $mac = $net->{macaddr} or next;
8665
8666 my $bridge = $net->{bridge};
8667 if ($have_sdn) {
8668 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8669 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8670 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8671 }
8672 }
8673}
8674
86751;