]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
d/control: bump versioned dependencies for libpve-guest-common-perl
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::HA::Config;
38 use PVE::Mapping::PCI;
39 use PVE::Mapping::USB;
40 use PVE::INotify;
41 use PVE::JSONSchema qw(get_standard_option parse_property_string);
42 use PVE::ProcFSTools;
43 use PVE::PBSClient;
44 use PVE::RESTEnvironment qw(log_warn);
45 use PVE::RPCEnvironment;
46 use PVE::Storage;
47 use PVE::SysFSTools;
48 use PVE::Systemd;
49 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
50
51 use PVE::QMPClient;
52 use PVE::QemuConfig;
53 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
54 use PVE::QemuServer::Cloudinit;
55 use PVE::QemuServer::CGroup;
56 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options get_cpu_bitness is_native_arch);
57 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
58 use PVE::QemuServer::Machine;
59 use PVE::QemuServer::Memory qw(get_current_memory);
60 use PVE::QemuServer::Monitor qw(mon_cmd);
61 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
62 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
63 use PVE::QemuServer::USB;
64
65 my $have_sdn;
66 eval {
67 require PVE::Network::SDN::Zones;
68 require PVE::Network::SDN::Vnets;
69 $have_sdn = 1;
70 };
71
72 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
73 my $OVMF = {
74 x86_64 => {
75 '4m-no-smm' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-no-smm-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 '4m' => [
84 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
85 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
86 ],
87 '4m-ms' => [
88 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
89 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
90 ],
91 # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build
92 # anymore. how can we deperacate this sanely without breaking existing instances, or using
93 # older backups and snapshot?
94 default => [
95 "$EDK2_FW_BASE/OVMF_CODE.fd",
96 "$EDK2_FW_BASE/OVMF_VARS.fd",
97 ],
98 },
99 aarch64 => {
100 default => [
101 "$EDK2_FW_BASE/AAVMF_CODE.fd",
102 "$EDK2_FW_BASE/AAVMF_VARS.fd",
103 ],
104 },
105 };
106
107 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
108
109 # Note about locking: we use flock on the config file protect against concurent actions.
110 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
111 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
112 # But you can ignore this kind of lock with the --skiplock flag.
113
114 cfs_register_file(
115 '/qemu-server/',
116 \&parse_vm_config,
117 \&write_vm_config
118 );
119
120 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
121 description => "Some command save/restore state from this location.",
122 type => 'string',
123 maxLength => 128,
124 optional => 1,
125 });
126
127 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
128 my $nodename_cache;
129 sub nodename {
130 $nodename_cache //= PVE::INotify::nodename();
131 return $nodename_cache;
132 }
133
134 my $watchdog_fmt = {
135 model => {
136 default_key => 1,
137 type => 'string',
138 enum => [qw(i6300esb ib700)],
139 description => "Watchdog type to emulate.",
140 default => 'i6300esb',
141 optional => 1,
142 },
143 action => {
144 type => 'string',
145 enum => [qw(reset shutdown poweroff pause debug none)],
146 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
147 optional => 1,
148 },
149 };
150 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
151
152 my $agent_fmt = {
153 enabled => {
154 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
155 type => 'boolean',
156 default => 0,
157 default_key => 1,
158 },
159 fstrim_cloned_disks => {
160 description => "Run fstrim after moving a disk or migrating the VM.",
161 type => 'boolean',
162 optional => 1,
163 default => 0,
164 },
165 'freeze-fs-on-backup' => {
166 description => "Freeze/thaw guest filesystems on backup for consistency.",
167 type => 'boolean',
168 optional => 1,
169 default => 1,
170 },
171 type => {
172 description => "Select the agent type",
173 type => 'string',
174 default => 'virtio',
175 optional => 1,
176 enum => [qw(virtio isa)],
177 },
178 };
179
180 my $vga_fmt = {
181 type => {
182 description => "Select the VGA type.",
183 type => 'string',
184 default => 'std',
185 optional => 1,
186 default_key => 1,
187 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
188 },
189 memory => {
190 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
191 type => 'integer',
192 optional => 1,
193 minimum => 4,
194 maximum => 512,
195 },
196 clipboard => {
197 description => 'Enable a specific clipboard. If not set, depending on the display type the'
198 .' SPICE one will be added. Migration with VNC clipboard is not yet supported!',
199 type => 'string',
200 enum => ['vnc'],
201 optional => 1,
202 },
203 };
204
205 my $ivshmem_fmt = {
206 size => {
207 type => 'integer',
208 minimum => 1,
209 description => "The size of the file in MB.",
210 },
211 name => {
212 type => 'string',
213 pattern => '[a-zA-Z0-9\-]+',
214 optional => 1,
215 format_description => 'string',
216 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
217 },
218 };
219
220 my $audio_fmt = {
221 device => {
222 type => 'string',
223 enum => [qw(ich9-intel-hda intel-hda AC97)],
224 description => "Configure an audio device."
225 },
226 driver => {
227 type => 'string',
228 enum => ['spice', 'none'],
229 default => 'spice',
230 optional => 1,
231 description => "Driver backend for the audio device."
232 },
233 };
234
235 my $spice_enhancements_fmt = {
236 foldersharing => {
237 type => 'boolean',
238 optional => 1,
239 default => '0',
240 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
241 },
242 videostreaming => {
243 type => 'string',
244 enum => ['off', 'all', 'filter'],
245 default => 'off',
246 optional => 1,
247 description => "Enable video streaming. Uses compression for detected video streams."
248 },
249 };
250
251 my $rng_fmt = {
252 source => {
253 type => 'string',
254 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
255 default_key => 1,
256 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
257 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
258 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
259 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
260 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
261 ." a hardware RNG from the host.",
262 },
263 max_bytes => {
264 type => 'integer',
265 description => "Maximum bytes of entropy allowed to get injected into the guest every"
266 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
267 ." `0` to disable limiting (potentially dangerous!).",
268 optional => 1,
269
270 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
271 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
272 # reading from /dev/urandom
273 default => 1024,
274 },
275 period => {
276 type => 'integer',
277 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
278 ." the guest to retrieve another 'max_bytes' of entropy.",
279 optional => 1,
280 default => 1000,
281 },
282 };
283
284 my $meta_info_fmt = {
285 'ctime' => {
286 type => 'integer',
287 description => "The guest creation timestamp as UNIX epoch time",
288 minimum => 0,
289 optional => 1,
290 },
291 'creation-qemu' => {
292 type => 'string',
293 description => "The QEMU (machine) version from the time this VM was created.",
294 pattern => '\d+(\.\d+)+',
295 optional => 1,
296 },
297 };
298
299 my $confdesc = {
300 onboot => {
301 optional => 1,
302 type => 'boolean',
303 description => "Specifies whether a VM will be started during system bootup.",
304 default => 0,
305 },
306 autostart => {
307 optional => 1,
308 type => 'boolean',
309 description => "Automatic restart after crash (currently ignored).",
310 default => 0,
311 },
312 hotplug => {
313 optional => 1,
314 type => 'string', format => 'pve-hotplug-features',
315 description => "Selectively enable hotplug features. This is a comma separated list of"
316 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
317 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
318 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
319 ." windows > 7.",
320 default => 'network,disk,usb',
321 },
322 reboot => {
323 optional => 1,
324 type => 'boolean',
325 description => "Allow reboot. If set to '0' the VM exit on reboot.",
326 default => 1,
327 },
328 lock => {
329 optional => 1,
330 type => 'string',
331 description => "Lock/unlock the VM.",
332 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
333 },
334 cpulimit => {
335 optional => 1,
336 type => 'number',
337 description => "Limit of CPU usage.",
338 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
339 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
340 minimum => 0,
341 maximum => 128,
342 default => 0,
343 },
344 cpuunits => {
345 optional => 1,
346 type => 'integer',
347 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
348 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
349 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
350 ." weights of all the other running VMs.",
351 minimum => 1,
352 maximum => 262144,
353 default => 'cgroup v1: 1024, cgroup v2: 100',
354 },
355 memory => {
356 optional => 1,
357 type => 'string',
358 description => "Memory properties.",
359 format => $PVE::QemuServer::Memory::memory_fmt
360 },
361 balloon => {
362 optional => 1,
363 type => 'integer',
364 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
365 minimum => 0,
366 },
367 shares => {
368 optional => 1,
369 type => 'integer',
370 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
371 ." more memory this VM gets. Number is relative to weights of all other running VMs."
372 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
373 minimum => 0,
374 maximum => 50000,
375 default => 1000,
376 },
377 keyboard => {
378 optional => 1,
379 type => 'string',
380 description => "Keyboard layout for VNC server. This option is generally not required and"
381 ." is often better handled from within the guest OS.",
382 enum => PVE::Tools::kvmkeymaplist(),
383 default => undef,
384 },
385 name => {
386 optional => 1,
387 type => 'string', format => 'dns-name',
388 description => "Set a name for the VM. Only used on the configuration web interface.",
389 },
390 scsihw => {
391 optional => 1,
392 type => 'string',
393 description => "SCSI controller model",
394 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
395 default => 'lsi',
396 },
397 description => {
398 optional => 1,
399 type => 'string',
400 description => "Description for the VM. Shown in the web-interface VM's summary."
401 ." This is saved as comment inside the configuration file.",
402 maxLength => 1024 * 8,
403 },
404 ostype => {
405 optional => 1,
406 type => 'string',
407 # NOTE: When extending, also consider extending `%guest_types` in `Import/ESXi.pm`.
408 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
409 description => "Specify guest operating system.",
410 verbose_description => <<EODESC,
411 Specify guest operating system. This is used to enable special
412 optimization/features for specific operating systems:
413
414 [horizontal]
415 other;; unspecified OS
416 wxp;; Microsoft Windows XP
417 w2k;; Microsoft Windows 2000
418 w2k3;; Microsoft Windows 2003
419 w2k8;; Microsoft Windows 2008
420 wvista;; Microsoft Windows Vista
421 win7;; Microsoft Windows 7
422 win8;; Microsoft Windows 8/2012/2012r2
423 win10;; Microsoft Windows 10/2016/2019
424 win11;; Microsoft Windows 11/2022
425 l24;; Linux 2.4 Kernel
426 l26;; Linux 2.6 - 6.X Kernel
427 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
428 EODESC
429 },
430 boot => {
431 optional => 1,
432 type => 'string', format => 'pve-qm-boot',
433 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
434 ." key or 'legacy=' is deprecated.",
435 },
436 bootdisk => {
437 optional => 1,
438 type => 'string', format => 'pve-qm-bootdisk',
439 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
440 pattern => '(ide|sata|scsi|virtio)\d+',
441 },
442 smp => {
443 optional => 1,
444 type => 'integer',
445 description => "The number of CPUs. Please use option -sockets instead.",
446 minimum => 1,
447 default => 1,
448 },
449 sockets => {
450 optional => 1,
451 type => 'integer',
452 description => "The number of CPU sockets.",
453 minimum => 1,
454 default => 1,
455 },
456 cores => {
457 optional => 1,
458 type => 'integer',
459 description => "The number of cores per socket.",
460 minimum => 1,
461 default => 1,
462 },
463 numa => {
464 optional => 1,
465 type => 'boolean',
466 description => "Enable/disable NUMA.",
467 default => 0,
468 },
469 hugepages => {
470 optional => 1,
471 type => 'string',
472 description => "Enable/disable hugepages memory.",
473 enum => [qw(any 2 1024)],
474 },
475 keephugepages => {
476 optional => 1,
477 type => 'boolean',
478 default => 0,
479 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
480 ." after VM shutdown and can be used for subsequent starts.",
481 },
482 vcpus => {
483 optional => 1,
484 type => 'integer',
485 description => "Number of hotplugged vcpus.",
486 minimum => 1,
487 default => 0,
488 },
489 acpi => {
490 optional => 1,
491 type => 'boolean',
492 description => "Enable/disable ACPI.",
493 default => 1,
494 },
495 agent => {
496 optional => 1,
497 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
498 type => 'string',
499 format => $agent_fmt,
500 },
501 kvm => {
502 optional => 1,
503 type => 'boolean',
504 description => "Enable/disable KVM hardware virtualization.",
505 default => 1,
506 },
507 tdf => {
508 optional => 1,
509 type => 'boolean',
510 description => "Enable/disable time drift fix.",
511 default => 0,
512 },
513 localtime => {
514 optional => 1,
515 type => 'boolean',
516 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
517 ." the `ostype` indicates a Microsoft Windows OS.",
518 },
519 freeze => {
520 optional => 1,
521 type => 'boolean',
522 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
523 },
524 vga => {
525 optional => 1,
526 type => 'string', format => $vga_fmt,
527 description => "Configure the VGA hardware.",
528 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
529 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
530 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
531 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
532 ." display server. For win* OS you can select how many independent displays you want,"
533 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
534 ." using a serial device as terminal.",
535 },
536 watchdog => {
537 optional => 1,
538 type => 'string', format => 'pve-qm-watchdog',
539 description => "Create a virtual hardware watchdog device.",
540 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
541 ." action), the watchdog must be periodically polled by an agent inside the guest or"
542 ." else the watchdog will reset the guest (or execute the respective action specified)",
543 },
544 startdate => {
545 optional => 1,
546 type => 'string',
547 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
548 description => "Set the initial date of the real time clock. Valid format for date are:"
549 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
550 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
551 default => 'now',
552 },
553 startup => get_standard_option('pve-startup-order'),
554 template => {
555 optional => 1,
556 type => 'boolean',
557 description => "Enable/disable Template.",
558 default => 0,
559 },
560 args => {
561 optional => 1,
562 type => 'string',
563 description => "Arbitrary arguments passed to kvm.",
564 verbose_description => <<EODESCR,
565 Arbitrary arguments passed to kvm, for example:
566
567 args: -no-reboot -smbios 'type=0,vendor=FOO'
568
569 NOTE: this option is for experts only.
570 EODESCR
571 },
572 tablet => {
573 optional => 1,
574 type => 'boolean',
575 default => 1,
576 description => "Enable/disable the USB tablet device.",
577 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
578 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
579 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
580 ." may consider disabling this to save some context switches. This is turned off by"
581 ." default if you use spice (`qm set <vmid> --vga qxl`).",
582 },
583 migrate_speed => {
584 optional => 1,
585 type => 'integer',
586 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
587 minimum => 0,
588 default => 0,
589 },
590 migrate_downtime => {
591 optional => 1,
592 type => 'number',
593 description => "Set maximum tolerated downtime (in seconds) for migrations.",
594 minimum => 0,
595 default => 0.1,
596 },
597 cdrom => {
598 optional => 1,
599 type => 'string', format => 'pve-qm-ide',
600 typetext => '<volume>',
601 description => "This is an alias for option -ide2",
602 },
603 cpu => {
604 optional => 1,
605 description => "Emulated CPU type.",
606 type => 'string',
607 format => 'pve-vm-cpu-conf',
608 },
609 parent => get_standard_option('pve-snapshot-name', {
610 optional => 1,
611 description => "Parent snapshot name. This is used internally, and should not be modified.",
612 }),
613 snaptime => {
614 optional => 1,
615 description => "Timestamp for snapshots.",
616 type => 'integer',
617 minimum => 0,
618 },
619 vmstate => {
620 optional => 1,
621 type => 'string', format => 'pve-volume-id',
622 description => "Reference to a volume which stores the VM state. This is used internally"
623 ." for snapshots.",
624 },
625 vmstatestorage => get_standard_option('pve-storage-id', {
626 description => "Default storage for VM state volumes/files.",
627 optional => 1,
628 }),
629 runningmachine => get_standard_option('pve-qemu-machine', {
630 description => "Specifies the QEMU machine type of the running vm. This is used internally"
631 ." for snapshots.",
632 }),
633 runningcpu => {
634 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
635 ." internally for snapshots.",
636 optional => 1,
637 type => 'string',
638 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
639 format_description => 'QEMU -cpu parameter'
640 },
641 machine => get_standard_option('pve-qemu-machine'),
642 arch => {
643 description => "Virtual processor architecture. Defaults to the host.",
644 optional => 1,
645 type => 'string',
646 enum => [qw(x86_64 aarch64)],
647 },
648 smbios1 => {
649 description => "Specify SMBIOS type 1 fields.",
650 type => 'string', format => 'pve-qm-smbios1',
651 maxLength => 512,
652 optional => 1,
653 },
654 protection => {
655 optional => 1,
656 type => 'boolean',
657 description => "Sets the protection flag of the VM. This will disable the remove VM and"
658 ." remove disk operations.",
659 default => 0,
660 },
661 bios => {
662 optional => 1,
663 type => 'string',
664 enum => [ qw(seabios ovmf) ],
665 description => "Select BIOS implementation.",
666 default => 'seabios',
667 },
668 vmgenid => {
669 type => 'string',
670 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
671 format_description => 'UUID',
672 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
673 ." to disable explicitly.",
674 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
675 ." value identifier to the guest OS. This allows to notify the guest operating system"
676 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
677 ." execution or creation from a template). The guest operating system notices the"
678 ." change, and is then able to react as appropriate by marking its copies of"
679 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
680 ."Note that auto-creation only works when done through API/CLI create or update methods"
681 .", but not when manually editing the config file.",
682 default => "1 (autogenerated)",
683 optional => 1,
684 },
685 hookscript => {
686 type => 'string',
687 format => 'pve-volume-id',
688 optional => 1,
689 description => "Script that will be executed during various steps in the vms lifetime.",
690 },
691 ivshmem => {
692 type => 'string',
693 format => $ivshmem_fmt,
694 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
695 ." the host.",
696 optional => 1,
697 },
698 audio0 => {
699 type => 'string',
700 format => $audio_fmt,
701 description => "Configure a audio device, useful in combination with QXL/Spice.",
702 optional => 1
703 },
704 spice_enhancements => {
705 type => 'string',
706 format => $spice_enhancements_fmt,
707 description => "Configure additional enhancements for SPICE.",
708 optional => 1
709 },
710 tags => {
711 type => 'string', format => 'pve-tag-list',
712 description => 'Tags of the VM. This is only meta information.',
713 optional => 1,
714 },
715 rng0 => {
716 type => 'string',
717 format => $rng_fmt,
718 description => "Configure a VirtIO-based Random Number Generator.",
719 optional => 1,
720 },
721 meta => {
722 type => 'string',
723 format => $meta_info_fmt,
724 description => "Some (read-only) meta-information about this guest.",
725 optional => 1,
726 },
727 affinity => {
728 type => 'string', format => 'pve-cpuset',
729 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
730 optional => 1,
731 },
732 };
733
734 my $cicustom_fmt = {
735 meta => {
736 type => 'string',
737 optional => 1,
738 description => 'Specify a custom file containing all meta data passed to the VM via"
739 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
740 format => 'pve-volume-id',
741 format_description => 'volume',
742 },
743 network => {
744 type => 'string',
745 optional => 1,
746 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
747 format => 'pve-volume-id',
748 format_description => 'volume',
749 },
750 user => {
751 type => 'string',
752 optional => 1,
753 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
754 format => 'pve-volume-id',
755 format_description => 'volume',
756 },
757 vendor => {
758 type => 'string',
759 optional => 1,
760 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
761 format => 'pve-volume-id',
762 format_description => 'volume',
763 },
764 };
765 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
766
767 # any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
768 my $confdesc_cloudinit = {
769 citype => {
770 optional => 1,
771 type => 'string',
772 description => 'Specifies the cloud-init configuration format. The default depends on the'
773 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
774 .' and `configdrive2` for windows.',
775 enum => ['configdrive2', 'nocloud', 'opennebula'],
776 },
777 ciuser => {
778 optional => 1,
779 type => 'string',
780 description => "cloud-init: User name to change ssh keys and password for instead of the"
781 ." image's configured default user.",
782 },
783 cipassword => {
784 optional => 1,
785 type => 'string',
786 description => 'cloud-init: Password to assign the user. Using this is generally not'
787 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
788 .' support hashed passwords.',
789 },
790 ciupgrade => {
791 optional => 1,
792 type => 'boolean',
793 description => 'cloud-init: do an automatic package upgrade after the first boot.',
794 default => 1,
795 },
796 cicustom => {
797 optional => 1,
798 type => 'string',
799 description => 'cloud-init: Specify custom files to replace the automatically generated'
800 .' ones at start.',
801 format => 'pve-qm-cicustom',
802 },
803 searchdomain => {
804 optional => 1,
805 type => 'string',
806 description => 'cloud-init: Sets DNS search domains for a container. Create will'
807 .' automatically use the setting from the host if neither searchdomain nor nameserver'
808 .' are set.',
809 },
810 nameserver => {
811 optional => 1,
812 type => 'string', format => 'address-list',
813 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
814 .' automatically use the setting from the host if neither searchdomain nor nameserver'
815 .' are set.',
816 },
817 sshkeys => {
818 optional => 1,
819 type => 'string',
820 format => 'urlencoded',
821 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
822 },
823 };
824
825 # what about other qemu settings ?
826 #cpu => 'string',
827 #machine => 'string',
828 #fda => 'file',
829 #fdb => 'file',
830 #mtdblock => 'file',
831 #sd => 'file',
832 #pflash => 'file',
833 #snapshot => 'bool',
834 #bootp => 'file',
835 ##tftp => 'dir',
836 ##smb => 'dir',
837 #kernel => 'file',
838 #append => 'string',
839 #initrd => 'file',
840 ##soundhw => 'string',
841
842 while (my ($k, $v) = each %$confdesc) {
843 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
844 }
845
846 my $MAX_NETS = 32;
847 my $MAX_SERIAL_PORTS = 4;
848 my $MAX_PARALLEL_PORTS = 3;
849
850 for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
851 $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
852 }
853
854 my $nic_model_list = [
855 'e1000',
856 'e1000-82540em',
857 'e1000-82544gc',
858 'e1000-82545em',
859 'e1000e',
860 'i82551',
861 'i82557b',
862 'i82559er',
863 'ne2k_isa',
864 'ne2k_pci',
865 'pcnet',
866 'rtl8139',
867 'virtio',
868 'vmxnet3',
869 ];
870 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
871
872 my $net_fmt_bridge_descr = <<__EOD__;
873 Bridge to attach the network device to. The Proxmox VE standard bridge
874 is called 'vmbr0'.
875
876 If you do not specify a bridge, we create a kvm user (NATed) network
877 device, which provides DHCP and DNS services. The following addresses
878 are used:
879
880 10.0.2.2 Gateway
881 10.0.2.3 DNS Server
882 10.0.2.4 SMB Server
883
884 The DHCP server assign addresses to the guest starting from 10.0.2.15.
885 __EOD__
886
887 my $net_fmt = {
888 macaddr => get_standard_option('mac-addr', {
889 description => "MAC address. That address must be unique withing your network. This is"
890 ." automatically generated if not specified.",
891 }),
892 model => {
893 type => 'string',
894 description => "Network Card Model. The 'virtio' model provides the best performance with"
895 ." very low CPU overhead. If your guest does not support this driver, it is usually"
896 ." best to use 'e1000'.",
897 enum => $nic_model_list,
898 default_key => 1,
899 },
900 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
901 bridge => get_standard_option('pve-bridge-id', {
902 description => $net_fmt_bridge_descr,
903 optional => 1,
904 }),
905 queues => {
906 type => 'integer',
907 minimum => 0, maximum => 64,
908 description => 'Number of packet queues to be used on the device.',
909 optional => 1,
910 },
911 rate => {
912 type => 'number',
913 minimum => 0,
914 description => "Rate limit in mbps (megabytes per second) as floating point number.",
915 optional => 1,
916 },
917 tag => {
918 type => 'integer',
919 minimum => 1, maximum => 4094,
920 description => 'VLAN tag to apply to packets on this interface.',
921 optional => 1,
922 },
923 trunks => {
924 type => 'string',
925 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
926 description => 'VLAN trunks to pass through this interface.',
927 format_description => 'vlanid[;vlanid...]',
928 optional => 1,
929 },
930 firewall => {
931 type => 'boolean',
932 description => 'Whether this interface should be protected by the firewall.',
933 optional => 1,
934 },
935 link_down => {
936 type => 'boolean',
937 description => 'Whether this interface should be disconnected (like pulling the plug).',
938 optional => 1,
939 },
940 mtu => {
941 type => 'integer',
942 minimum => 1, maximum => 65520,
943 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
944 optional => 1,
945 },
946 };
947
948 my $netdesc = {
949 optional => 1,
950 type => 'string', format => $net_fmt,
951 description => "Specify network devices.",
952 };
953
954 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
955
956 my $ipconfig_fmt = {
957 ip => {
958 type => 'string',
959 format => 'pve-ipv4-config',
960 format_description => 'IPv4Format/CIDR',
961 description => 'IPv4 address in CIDR format.',
962 optional => 1,
963 default => 'dhcp',
964 },
965 gw => {
966 type => 'string',
967 format => 'ipv4',
968 format_description => 'GatewayIPv4',
969 description => 'Default gateway for IPv4 traffic.',
970 optional => 1,
971 requires => 'ip',
972 },
973 ip6 => {
974 type => 'string',
975 format => 'pve-ipv6-config',
976 format_description => 'IPv6Format/CIDR',
977 description => 'IPv6 address in CIDR format.',
978 optional => 1,
979 default => 'dhcp',
980 },
981 gw6 => {
982 type => 'string',
983 format => 'ipv6',
984 format_description => 'GatewayIPv6',
985 description => 'Default gateway for IPv6 traffic.',
986 optional => 1,
987 requires => 'ip6',
988 },
989 };
990 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
991 my $ipconfigdesc = {
992 optional => 1,
993 type => 'string', format => 'pve-qm-ipconfig',
994 description => <<'EODESCR',
995 cloud-init: Specify IP addresses and gateways for the corresponding interface.
996
997 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
998
999 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1000 gateway should be provided.
1001 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1002 cloud-init 19.4 or newer.
1003
1004 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1005 dhcp on IPv4.
1006 EODESCR
1007 };
1008 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1009
1010 for (my $i = 0; $i < $MAX_NETS; $i++) {
1011 $confdesc->{"net$i"} = $netdesc;
1012 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1013 }
1014
1015 foreach my $key (keys %$confdesc_cloudinit) {
1016 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1017 }
1018
1019 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1020 sub pve_verify_cpuset {
1021 my ($set_text, $noerr) = @_;
1022
1023 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1024
1025 if ($@) {
1026 return if $noerr;
1027 die "unable to parse cpuset option\n";
1028 }
1029
1030 return PVE::CpuSet->new($members)->short_string();
1031 }
1032
1033 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1034 sub verify_volume_id_or_qm_path {
1035 my ($volid, $noerr) = @_;
1036
1037 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1038
1039 return verify_volume_id_or_absolute_path($volid, $noerr);
1040 }
1041
1042 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1043 sub verify_volume_id_or_absolute_path {
1044 my ($volid, $noerr) = @_;
1045
1046 return $volid if $volid =~ m|^/|;
1047
1048 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1049 if ($@) {
1050 return if $noerr;
1051 die $@;
1052 }
1053 return $volid;
1054 }
1055
1056 my $serialdesc = {
1057 optional => 1,
1058 type => 'string',
1059 pattern => '(/dev/.+|socket)',
1060 description => "Create a serial device inside the VM (n is 0 to 3)",
1061 verbose_description => <<EODESCR,
1062 Create a serial device inside the VM (n is 0 to 3), and pass through a
1063 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1064 host side (use 'qm terminal' to open a terminal connection).
1065
1066 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1067 use with special care.
1068
1069 CAUTION: Experimental! User reported problems with this option.
1070 EODESCR
1071 };
1072
1073 my $paralleldesc= {
1074 optional => 1,
1075 type => 'string',
1076 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1077 description => "Map host parallel devices (n is 0 to 2).",
1078 verbose_description => <<EODESCR,
1079 Map host parallel devices (n is 0 to 2).
1080
1081 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1082 machines - use with special care.
1083
1084 CAUTION: Experimental! User reported problems with this option.
1085 EODESCR
1086 };
1087
1088 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1089 $confdesc->{"parallel$i"} = $paralleldesc;
1090 }
1091
1092 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1093 $confdesc->{"serial$i"} = $serialdesc;
1094 }
1095
1096 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1097 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1098 }
1099
1100 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1101 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1102 }
1103
1104 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1105 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1106 }
1107
1108 my $boot_fmt = {
1109 legacy => {
1110 optional => 1,
1111 default_key => 1,
1112 type => 'string',
1113 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1114 . " Deprecated, use 'order=' instead.",
1115 pattern => '[acdn]{1,4}',
1116 format_description => "[acdn]{1,4}",
1117
1118 # note: this is also the fallback if boot: is not given at all
1119 default => 'cdn',
1120 },
1121 order => {
1122 optional => 1,
1123 type => 'string',
1124 format => 'pve-qm-bootdev-list',
1125 format_description => "device[;device...]",
1126 description => <<EODESC,
1127 The guest will attempt to boot from devices in the order they appear here.
1128
1129 Disks, optical drives and passed-through storage USB devices will be directly
1130 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1131 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1132
1133 Note that only devices in this list will be marked as bootable and thus loaded
1134 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1135 (e.g. software-raid), you need to specify all of them here.
1136
1137 Overrides the deprecated 'legacy=[acdn]*' value when given.
1138 EODESC
1139 },
1140 };
1141 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1142
1143 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1144 sub verify_bootdev {
1145 my ($dev, $noerr) = @_;
1146
1147 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1148 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1149
1150 my $check = sub {
1151 my ($base) = @_;
1152 return 0 if $dev !~ m/^$base\d+$/;
1153 return 0 if !$confdesc->{$dev};
1154 return 1;
1155 };
1156
1157 return $dev if $check->("net");
1158 return $dev if $check->("usb");
1159 return $dev if $check->("hostpci");
1160
1161 return if $noerr;
1162 die "invalid boot device '$dev'\n";
1163 }
1164
1165 sub print_bootorder {
1166 my ($devs) = @_;
1167 return "" if !@$devs;
1168 my $data = { order => join(';', @$devs) };
1169 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1170 }
1171
1172 my $kvm_api_version = 0;
1173
1174 sub kvm_version {
1175 return $kvm_api_version if $kvm_api_version;
1176
1177 open my $fh, '<', '/dev/kvm' or return;
1178
1179 # 0xae00 => KVM_GET_API_VERSION
1180 $kvm_api_version = ioctl($fh, 0xae00, 0);
1181 close($fh);
1182
1183 return $kvm_api_version;
1184 }
1185
1186 my $kvm_user_version = {};
1187 my $kvm_mtime = {};
1188
1189 sub kvm_user_version {
1190 my ($binary) = @_;
1191
1192 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1193 my $st = stat($binary);
1194
1195 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1196 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1197 $cachedmtime == $st->mtime;
1198
1199 $kvm_user_version->{$binary} = 'unknown';
1200 $kvm_mtime->{$binary} = $st->mtime;
1201
1202 my $code = sub {
1203 my $line = shift;
1204 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1205 $kvm_user_version->{$binary} = $2;
1206 }
1207 };
1208
1209 eval { run_command([$binary, '--version'], outfunc => $code); };
1210 warn $@ if $@;
1211
1212 return $kvm_user_version->{$binary};
1213
1214 }
1215 my sub extract_version {
1216 my ($machine_type, $version) = @_;
1217 $version = kvm_user_version() if !defined($version);
1218 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1219 }
1220
1221 sub kernel_has_vhost_net {
1222 return -c '/dev/vhost-net';
1223 }
1224
1225 sub option_exists {
1226 my $key = shift;
1227 return defined($confdesc->{$key});
1228 }
1229
1230 my $cdrom_path;
1231 sub get_cdrom_path {
1232
1233 return $cdrom_path if defined($cdrom_path);
1234
1235 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1236
1237 if (!defined($cdrom_path)) {
1238 log_warn("no physical CD-ROM available, ignoring");
1239 $cdrom_path = '';
1240 }
1241
1242 return $cdrom_path;
1243 }
1244
1245 sub get_iso_path {
1246 my ($storecfg, $vmid, $cdrom) = @_;
1247
1248 if ($cdrom eq 'cdrom') {
1249 return get_cdrom_path();
1250 } elsif ($cdrom eq 'none') {
1251 return '';
1252 } elsif ($cdrom =~ m|^/|) {
1253 return $cdrom;
1254 } else {
1255 return PVE::Storage::path($storecfg, $cdrom);
1256 }
1257 }
1258
1259 # try to convert old style file names to volume IDs
1260 sub filename_to_volume_id {
1261 my ($vmid, $file, $media) = @_;
1262
1263 if (!($file eq 'none' || $file eq 'cdrom' ||
1264 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1265
1266 return if $file =~ m|/|;
1267
1268 if ($media && $media eq 'cdrom') {
1269 $file = "local:iso/$file";
1270 } else {
1271 $file = "local:$vmid/$file";
1272 }
1273 }
1274
1275 return $file;
1276 }
1277
1278 sub verify_media_type {
1279 my ($opt, $vtype, $media) = @_;
1280
1281 return if !$media;
1282
1283 my $etype;
1284 if ($media eq 'disk') {
1285 $etype = 'images';
1286 } elsif ($media eq 'cdrom') {
1287 $etype = 'iso';
1288 } else {
1289 die "internal error";
1290 }
1291
1292 return if ($vtype eq $etype);
1293
1294 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1295 }
1296
1297 sub cleanup_drive_path {
1298 my ($opt, $storecfg, $drive) = @_;
1299
1300 # try to convert filesystem paths to volume IDs
1301
1302 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1303 ($drive->{file} !~ m|^/dev/.+|) &&
1304 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1305 ($drive->{file} !~ m/^\d+$/)) {
1306 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1307 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1308 if !$vtype;
1309 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1310 verify_media_type($opt, $vtype, $drive->{media});
1311 $drive->{file} = $volid;
1312 }
1313
1314 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1315 }
1316
1317 sub parse_hotplug_features {
1318 my ($data) = @_;
1319
1320 my $res = {};
1321
1322 return $res if $data eq '0';
1323
1324 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1325
1326 foreach my $feature (PVE::Tools::split_list($data)) {
1327 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1328 $res->{$1} = 1;
1329 } else {
1330 die "invalid hotplug feature '$feature'\n";
1331 }
1332 }
1333 return $res;
1334 }
1335
1336 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1337 sub pve_verify_hotplug_features {
1338 my ($value, $noerr) = @_;
1339
1340 return $value if parse_hotplug_features($value);
1341
1342 return if $noerr;
1343
1344 die "unable to parse hotplug option\n";
1345 }
1346
1347 sub assert_clipboard_config {
1348 my ($vga) = @_;
1349
1350 my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/;
1351
1352 if (
1353 $vga->{'clipboard'}
1354 && $vga->{'clipboard'} eq 'vnc'
1355 && $vga->{type}
1356 && $vga->{type} !~ $clipboard_regex
1357 ) {
1358 die "vga type $vga->{type} is not compatible with VNC clipboard\n";
1359 }
1360 }
1361
1362 sub print_tabletdevice_full {
1363 my ($conf, $arch) = @_;
1364
1365 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1366
1367 # we use uhci for old VMs because tablet driver was buggy in older qemu
1368 my $usbbus;
1369 if ($q35 || $arch eq 'aarch64') {
1370 $usbbus = 'ehci';
1371 } else {
1372 $usbbus = 'uhci';
1373 }
1374
1375 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1376 }
1377
1378 sub print_keyboarddevice_full {
1379 my ($conf, $arch) = @_;
1380
1381 return if $arch ne 'aarch64';
1382
1383 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1384 }
1385
1386 my sub get_drive_id {
1387 my ($drive) = @_;
1388 return "$drive->{interface}$drive->{index}";
1389 }
1390
1391 sub print_drivedevice_full {
1392 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1393
1394 my $device = '';
1395 my $maxdev = 0;
1396
1397 my $drive_id = get_drive_id($drive);
1398 if ($drive->{interface} eq 'virtio') {
1399 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1400 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1401 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1402 } elsif ($drive->{interface} eq 'scsi') {
1403
1404 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1405 my $unit = $drive->{index} % $maxdev;
1406
1407 my $machine_version = extract_version($machine_type, kvm_user_version());
1408 my $device_type = PVE::QemuServer::Drive::get_scsi_device_type(
1409 $drive, $storecfg, $machine_version);
1410
1411 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1412 $device = "scsi-$device_type,bus=$controller_prefix$controller.0,scsi-id=$unit";
1413 } else {
1414 $device = "scsi-$device_type,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1415 .",lun=$drive->{index}";
1416 }
1417 $device .= ",drive=drive-$drive_id,id=$drive_id";
1418
1419 if ($drive->{ssd} && ($device_type eq 'block' || $device_type eq 'hd')) {
1420 $device .= ",rotation_rate=1";
1421 }
1422 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1423
1424 # only scsi-hd and scsi-cd support passing vendor and product information
1425 if ($device_type eq 'hd' || $device_type eq 'cd') {
1426 if (my $vendor = $drive->{vendor}) {
1427 $device .= ",vendor=$vendor";
1428 }
1429 if (my $product = $drive->{product}) {
1430 $device .= ",product=$product";
1431 }
1432 }
1433
1434 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1435 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1436 my $controller = int($drive->{index} / $maxdev);
1437 my $unit = $drive->{index} % $maxdev;
1438
1439 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1440 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1441 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1442 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1443 # were before. Move odd ones up by 2 where they don't clash.
1444 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1445 $controller += 2 * ($unit % 2);
1446 $unit = 0;
1447 }
1448
1449 my $device_type = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1450
1451 $device = "ide-$device_type";
1452 if ($drive->{interface} eq 'ide') {
1453 $device .= ",bus=ide.$controller,unit=$unit";
1454 } else {
1455 $device .= ",bus=ahci$controller.$unit";
1456 }
1457 $device .= ",drive=drive-$drive_id,id=$drive_id";
1458
1459 if ($device_type eq 'hd') {
1460 if (my $model = $drive->{model}) {
1461 $model = URI::Escape::uri_unescape($model);
1462 $device .= ",model=$model";
1463 }
1464 if ($drive->{ssd}) {
1465 $device .= ",rotation_rate=1";
1466 }
1467 }
1468 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1469 } elsif ($drive->{interface} eq 'usb') {
1470 die "implement me";
1471 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1472 } else {
1473 die "unsupported interface type";
1474 }
1475
1476 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1477
1478 if (my $serial = $drive->{serial}) {
1479 $serial = URI::Escape::uri_unescape($serial);
1480 $device .= ",serial=$serial";
1481 }
1482
1483
1484 return $device;
1485 }
1486
1487 sub get_initiator_name {
1488 my $initiator;
1489
1490 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1491 while (defined(my $line = <$fh>)) {
1492 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1493 $initiator = $1;
1494 last;
1495 }
1496 $fh->close();
1497
1498 return $initiator;
1499 }
1500
1501 my sub storage_allows_io_uring_default {
1502 my ($scfg, $cache_direct) = @_;
1503
1504 # io_uring with cache mode writeback or writethrough on krbd will hang...
1505 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1506
1507 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1508 # sometimes, just plain disable...
1509 return if $scfg && $scfg->{type} eq 'lvm';
1510
1511 # io_uring causes problems when used with CIFS since kernel 5.15
1512 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1513 return if $scfg && $scfg->{type} eq 'cifs';
1514
1515 return 1;
1516 }
1517
1518 my sub drive_uses_cache_direct {
1519 my ($drive, $scfg) = @_;
1520
1521 my $cache_direct = 0;
1522
1523 if (my $cache = $drive->{cache}) {
1524 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1525 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1526 $cache_direct = 1;
1527 }
1528
1529 return $cache_direct;
1530 }
1531
1532 sub print_drive_commandline_full {
1533 my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
1534
1535 my $path;
1536 my $volid = $drive->{file};
1537 my $format = $drive->{format};
1538 my $drive_id = get_drive_id($drive);
1539
1540 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1541 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1542
1543 if (drive_is_cdrom($drive)) {
1544 $path = get_iso_path($storecfg, $vmid, $volid);
1545 die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
1546 } else {
1547 if ($storeid) {
1548 $path = PVE::Storage::path($storecfg, $volid);
1549 $format //= qemu_img_format($scfg, $volname);
1550 } else {
1551 $path = $volid;
1552 $format //= "raw";
1553 }
1554 }
1555
1556 my $is_rbd = $path =~ m/^rbd:/;
1557
1558 my $opts = '';
1559 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1560 foreach my $o (@qemu_drive_options) {
1561 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1562 }
1563
1564 # snapshot only accepts on|off
1565 if (defined($drive->{snapshot})) {
1566 my $v = $drive->{snapshot} ? 'on' : 'off';
1567 $opts .= ",snapshot=$v";
1568 }
1569
1570 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1571 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1572 }
1573
1574 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1575 my ($dir, $qmpname) = @$type;
1576 if (my $v = $drive->{"mbps$dir"}) {
1577 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1578 }
1579 if (my $v = $drive->{"mbps${dir}_max"}) {
1580 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1581 }
1582 if (my $v = $drive->{"bps${dir}_max_length"}) {
1583 $opts .= ",throttling.bps$qmpname-max-length=$v";
1584 }
1585 if (my $v = $drive->{"iops${dir}"}) {
1586 $opts .= ",throttling.iops$qmpname=$v";
1587 }
1588 if (my $v = $drive->{"iops${dir}_max"}) {
1589 $opts .= ",throttling.iops$qmpname-max=$v";
1590 }
1591 if (my $v = $drive->{"iops${dir}_max_length"}) {
1592 $opts .= ",throttling.iops$qmpname-max-length=$v";
1593 }
1594 }
1595
1596 if ($live_restore_name) {
1597 $format = "rbd" if $is_rbd;
1598 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1599 if !$format;
1600 $opts .= ",format=alloc-track,file.driver=$format";
1601 } elsif ($format) {
1602 $opts .= ",format=$format";
1603 }
1604
1605 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1606
1607 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1608
1609 if (!$drive->{aio}) {
1610 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1611 # io_uring supports all cache modes
1612 $opts .= ",aio=io_uring";
1613 } else {
1614 # aio native works only with O_DIRECT
1615 if($cache_direct) {
1616 $opts .= ",aio=native";
1617 } else {
1618 $opts .= ",aio=threads";
1619 }
1620 }
1621 }
1622
1623 if (!drive_is_cdrom($drive)) {
1624 my $detectzeroes;
1625 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1626 $detectzeroes = 'off';
1627 } elsif ($drive->{discard}) {
1628 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1629 } else {
1630 # This used to be our default with discard not being specified:
1631 $detectzeroes = 'on';
1632 }
1633
1634 # note: 'detect-zeroes' works per blockdev and we want it to persist
1635 # after the alloc-track is removed, so put it on 'file' directly
1636 my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
1637 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1638 }
1639
1640 if ($live_restore_name) {
1641 $opts .= ",backing=$live_restore_name";
1642 $opts .= ",auto-remove=on";
1643 }
1644
1645 # my $file_param = $live_restore_name ? "file.file.filename" : "file";
1646 my $file_param = "file";
1647 if ($live_restore_name) {
1648 # non-rbd drivers require the underlying file to be a seperate block
1649 # node, so add a second .file indirection
1650 $file_param .= ".file" if !$is_rbd;
1651 $file_param .= ".filename";
1652 }
1653 my $pathinfo = $path ? "$file_param=$path," : '';
1654
1655 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1656 }
1657
1658 sub print_pbs_blockdev {
1659 my ($pbs_conf, $pbs_name) = @_;
1660 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1661 $blockdev .= ",repository=$pbs_conf->{repository}";
1662 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1663 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1664 $blockdev .= ",archive=$pbs_conf->{archive}";
1665 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1666 return $blockdev;
1667 }
1668
1669 sub print_netdevice_full {
1670 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1671
1672 my $device = $net->{model};
1673 if ($net->{model} eq 'virtio') {
1674 $device = 'virtio-net-pci';
1675 };
1676
1677 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1678 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1679 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1680 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1681 # and out of each queue plus one config interrupt and control vector queue
1682 my $vectors = $net->{queues} * 2 + 2;
1683 $tmpstr .= ",vectors=$vectors,mq=on";
1684 if (min_version($machine_version, 7, 1)) {
1685 $tmpstr .= ",packed=on";
1686 }
1687 }
1688
1689 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1690 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
1691 }
1692
1693 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1694
1695 if (my $mtu = $net->{mtu}) {
1696 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1697 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1698 if ($mtu == 1) {
1699 $mtu = $bridge_mtu;
1700 } elsif ($mtu < 576) {
1701 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1702 } elsif ($mtu > $bridge_mtu) {
1703 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1704 }
1705 $tmpstr .= ",host_mtu=$mtu";
1706 } else {
1707 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1708 }
1709 }
1710
1711 if ($use_old_bios_files) {
1712 my $romfile;
1713 if ($device eq 'virtio-net-pci') {
1714 $romfile = 'pxe-virtio.rom';
1715 } elsif ($device eq 'e1000') {
1716 $romfile = 'pxe-e1000.rom';
1717 } elsif ($device eq 'e1000e') {
1718 $romfile = 'pxe-e1000e.rom';
1719 } elsif ($device eq 'ne2k') {
1720 $romfile = 'pxe-ne2k_pci.rom';
1721 } elsif ($device eq 'pcnet') {
1722 $romfile = 'pxe-pcnet.rom';
1723 } elsif ($device eq 'rtl8139') {
1724 $romfile = 'pxe-rtl8139.rom';
1725 }
1726 $tmpstr .= ",romfile=$romfile" if $romfile;
1727 }
1728
1729 return $tmpstr;
1730 }
1731
1732 sub print_netdev_full {
1733 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1734
1735 my $i = '';
1736 if ($netid =~ m/^net(\d+)$/) {
1737 $i = int($1);
1738 }
1739
1740 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1741
1742 my $ifname = "tap${vmid}i$i";
1743
1744 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1745 die "interface name '$ifname' is too long (max 15 character)\n"
1746 if length($ifname) >= 16;
1747
1748 my $vhostparam = '';
1749 if (is_native_arch($arch)) {
1750 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1751 }
1752
1753 my $vmname = $conf->{name} || "vm$vmid";
1754
1755 my $netdev = "";
1756 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1757
1758 if ($net->{bridge}) {
1759 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1760 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1761 } else {
1762 $netdev = "type=user,id=$netid,hostname=$vmname";
1763 }
1764
1765 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1766
1767 return $netdev;
1768 }
1769
1770 my $vga_map = {
1771 'cirrus' => 'cirrus-vga',
1772 'std' => 'VGA',
1773 'vmware' => 'vmware-svga',
1774 'virtio' => 'virtio-vga',
1775 'virtio-gl' => 'virtio-vga-gl',
1776 };
1777
1778 sub print_vga_device {
1779 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1780
1781 my $type = $vga_map->{$vga->{type}};
1782 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1783 $type = 'virtio-gpu';
1784 }
1785 my $vgamem_mb = $vga->{memory};
1786
1787 my $max_outputs = '';
1788 if ($qxlnum) {
1789 $type = $id ? 'qxl' : 'qxl-vga';
1790
1791 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1792 # set max outputs so linux can have up to 4 qxl displays with one device
1793 if (min_version($machine_version, 4, 1)) {
1794 $max_outputs = ",max_outputs=4";
1795 }
1796 }
1797 }
1798
1799 die "no device-type for $vga->{type}\n" if !$type;
1800
1801 my $memory = "";
1802 if ($vgamem_mb) {
1803 if ($vga->{type} =~ /^virtio/) {
1804 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1805 $memory = ",max_hostmem=$bytes";
1806 } elsif ($qxlnum) {
1807 # from https://www.spice-space.org/multiple-monitors.html
1808 $memory = ",vgamem_mb=$vga->{memory}";
1809 my $ram = $vgamem_mb * 4;
1810 my $vram = $vgamem_mb * 2;
1811 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1812 } else {
1813 $memory = ",vgamem_mb=$vga->{memory}";
1814 }
1815 } elsif ($qxlnum && $id) {
1816 $memory = ",ram_size=67108864,vram_size=33554432";
1817 }
1818
1819 my $edidoff = "";
1820 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1821 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1822 }
1823
1824 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1825 my $vgaid = "vga" . ($id // '');
1826 my $pciaddr;
1827 if ($q35 && $vgaid eq 'vga') {
1828 # the first display uses pcie.0 bus on q35 machines
1829 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1830 } else {
1831 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1832 }
1833
1834 if ($vga->{type} eq 'virtio-gl') {
1835 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1836 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1837 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1838
1839 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1840 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1841 }
1842
1843 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1844 }
1845
1846 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1847 sub parse_net {
1848 my ($data, $disable_mac_autogen) = @_;
1849
1850 my $res = eval { parse_property_string($net_fmt, $data) };
1851 if ($@) {
1852 warn $@;
1853 return;
1854 }
1855 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1856 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1857 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1858 }
1859 return $res;
1860 }
1861
1862 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1863 sub parse_ipconfig {
1864 my ($data) = @_;
1865
1866 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1867 if ($@) {
1868 warn $@;
1869 return;
1870 }
1871
1872 if ($res->{gw} && !$res->{ip}) {
1873 warn 'gateway specified without specifying an IP address';
1874 return;
1875 }
1876 if ($res->{gw6} && !$res->{ip6}) {
1877 warn 'IPv6 gateway specified without specifying an IPv6 address';
1878 return;
1879 }
1880 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1881 warn 'gateway specified together with DHCP';
1882 return;
1883 }
1884 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1885 # gw6 + auto/dhcp
1886 warn "IPv6 gateway specified together with $res->{ip6} address";
1887 return;
1888 }
1889
1890 if (!$res->{ip} && !$res->{ip6}) {
1891 return { ip => 'dhcp', ip6 => 'dhcp' };
1892 }
1893
1894 return $res;
1895 }
1896
1897 sub print_net {
1898 my $net = shift;
1899
1900 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1901 }
1902
1903 sub add_random_macs {
1904 my ($settings) = @_;
1905
1906 foreach my $opt (keys %$settings) {
1907 next if $opt !~ m/^net(\d+)$/;
1908 my $net = parse_net($settings->{$opt});
1909 next if !$net;
1910 $settings->{$opt} = print_net($net);
1911 }
1912 }
1913
1914 sub vm_is_volid_owner {
1915 my ($storecfg, $vmid, $volid) = @_;
1916
1917 if ($volid !~ m|^/|) {
1918 my ($path, $owner);
1919 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1920 if ($owner && ($owner == $vmid)) {
1921 return 1;
1922 }
1923 }
1924
1925 return;
1926 }
1927
1928 sub vmconfig_register_unused_drive {
1929 my ($storecfg, $vmid, $conf, $drive) = @_;
1930
1931 if (drive_is_cloudinit($drive)) {
1932 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1933 warn $@ if $@;
1934 delete $conf->{cloudinit};
1935 } elsif (!drive_is_cdrom($drive)) {
1936 my $volid = $drive->{file};
1937 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1938 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1939 }
1940 }
1941 }
1942
1943 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1944 my $smbios1_fmt = {
1945 uuid => {
1946 type => 'string',
1947 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1948 format_description => 'UUID',
1949 description => "Set SMBIOS1 UUID.",
1950 optional => 1,
1951 },
1952 version => {
1953 type => 'string',
1954 pattern => '[A-Za-z0-9+\/]+={0,2}',
1955 format_description => 'Base64 encoded string',
1956 description => "Set SMBIOS1 version.",
1957 optional => 1,
1958 },
1959 serial => {
1960 type => 'string',
1961 pattern => '[A-Za-z0-9+\/]+={0,2}',
1962 format_description => 'Base64 encoded string',
1963 description => "Set SMBIOS1 serial number.",
1964 optional => 1,
1965 },
1966 manufacturer => {
1967 type => 'string',
1968 pattern => '[A-Za-z0-9+\/]+={0,2}',
1969 format_description => 'Base64 encoded string',
1970 description => "Set SMBIOS1 manufacturer.",
1971 optional => 1,
1972 },
1973 product => {
1974 type => 'string',
1975 pattern => '[A-Za-z0-9+\/]+={0,2}',
1976 format_description => 'Base64 encoded string',
1977 description => "Set SMBIOS1 product ID.",
1978 optional => 1,
1979 },
1980 sku => {
1981 type => 'string',
1982 pattern => '[A-Za-z0-9+\/]+={0,2}',
1983 format_description => 'Base64 encoded string',
1984 description => "Set SMBIOS1 SKU string.",
1985 optional => 1,
1986 },
1987 family => {
1988 type => 'string',
1989 pattern => '[A-Za-z0-9+\/]+={0,2}',
1990 format_description => 'Base64 encoded string',
1991 description => "Set SMBIOS1 family string.",
1992 optional => 1,
1993 },
1994 base64 => {
1995 type => 'boolean',
1996 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
1997 optional => 1,
1998 },
1999 };
2000
2001 sub parse_smbios1 {
2002 my ($data) = @_;
2003
2004 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2005 warn $@ if $@;
2006 return $res;
2007 }
2008
2009 sub print_smbios1 {
2010 my ($smbios1) = @_;
2011 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2012 }
2013
2014 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2015
2016 sub parse_watchdog {
2017 my ($value) = @_;
2018
2019 return if !$value;
2020
2021 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2022 warn $@ if $@;
2023 return $res;
2024 }
2025
2026 sub parse_guest_agent {
2027 my ($conf) = @_;
2028
2029 return {} if !defined($conf->{agent});
2030
2031 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2032 warn $@ if $@;
2033
2034 # if the agent is disabled ignore the other potentially set properties
2035 return {} if !$res->{enabled};
2036 return $res;
2037 }
2038
2039 sub get_qga_key {
2040 my ($conf, $key) = @_;
2041 return undef if !defined($conf->{agent});
2042
2043 my $agent = parse_guest_agent($conf);
2044 return $agent->{$key};
2045 }
2046
2047 sub parse_vga {
2048 my ($value) = @_;
2049
2050 return {} if !$value;
2051 my $res = eval { parse_property_string($vga_fmt, $value) };
2052 warn $@ if $@;
2053 return $res;
2054 }
2055
2056 sub parse_rng {
2057 my ($value) = @_;
2058
2059 return if !$value;
2060
2061 my $res = eval { parse_property_string($rng_fmt, $value) };
2062 warn $@ if $@;
2063 return $res;
2064 }
2065
2066 sub parse_meta_info {
2067 my ($value) = @_;
2068
2069 return if !$value;
2070
2071 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2072 warn $@ if $@;
2073 return $res;
2074 }
2075
2076 sub new_meta_info_string {
2077 my () = @_; # for now do not allow to override any value
2078
2079 return PVE::JSONSchema::print_property_string(
2080 {
2081 'creation-qemu' => kvm_user_version(),
2082 ctime => "". int(time()),
2083 },
2084 $meta_info_fmt
2085 );
2086 }
2087
2088 sub qemu_created_version_fixups {
2089 my ($conf, $forcemachine, $kvmver) = @_;
2090
2091 my $meta = parse_meta_info($conf->{meta}) // {};
2092 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2093
2094 # check if we need to apply some handling for VMs that always use the latest machine version but
2095 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2096 # would be required (we do not want to pin machine version for non-windows OS type)
2097 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
2098 if (
2099 (!defined($machine_conf->{type}) || $machine_conf->{type} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2100 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2101 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2102 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2103 ) {
2104 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2105 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2106 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2107 # and thus with the predictable interface naming of systemd
2108 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2109 }
2110 }
2111 return;
2112 }
2113
2114 # add JSON properties for create and set function
2115 sub json_config_properties {
2116 my ($prop, $with_disk_alloc) = @_;
2117
2118 my $skip_json_config_opts = {
2119 parent => 1,
2120 snaptime => 1,
2121 vmstate => 1,
2122 runningmachine => 1,
2123 runningcpu => 1,
2124 meta => 1,
2125 };
2126
2127 foreach my $opt (keys %$confdesc) {
2128 next if $skip_json_config_opts->{$opt};
2129
2130 if ($with_disk_alloc && is_valid_drivename($opt)) {
2131 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2132 } else {
2133 $prop->{$opt} = $confdesc->{$opt};
2134 }
2135 }
2136
2137 return $prop;
2138 }
2139
2140 # Properties that we can read from an OVF file
2141 sub json_ovf_properties {
2142 my $prop = {};
2143
2144 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2145 $prop->{$device} = {
2146 type => 'string',
2147 format => 'pve-volume-id-or-absolute-path',
2148 description => "Disk image that gets imported to $device",
2149 optional => 1,
2150 };
2151 }
2152
2153 $prop->{cores} = {
2154 type => 'integer',
2155 description => "The number of CPU cores.",
2156 optional => 1,
2157 };
2158 $prop->{memory} = {
2159 type => 'integer',
2160 description => "Amount of RAM for the VM in MB.",
2161 optional => 1,
2162 };
2163 $prop->{name} = {
2164 type => 'string',
2165 description => "Name of the VM.",
2166 optional => 1,
2167 };
2168
2169 return $prop;
2170 }
2171
2172 # return copy of $confdesc_cloudinit to generate documentation
2173 sub cloudinit_config_properties {
2174
2175 return dclone($confdesc_cloudinit);
2176 }
2177
2178 sub cloudinit_pending_properties {
2179 my $p = {
2180 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2181 name => 1,
2182 };
2183 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2184 return $p;
2185 }
2186
2187 sub check_type {
2188 my ($key, $value) = @_;
2189
2190 die "unknown setting '$key'\n" if !$confdesc->{$key};
2191
2192 my $type = $confdesc->{$key}->{type};
2193
2194 if (!defined($value)) {
2195 die "got undefined value\n";
2196 }
2197
2198 if ($value =~ m/[\n\r]/) {
2199 die "property contains a line feed\n";
2200 }
2201
2202 if ($type eq 'boolean') {
2203 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2204 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2205 die "type check ('boolean') failed - got '$value'\n";
2206 } elsif ($type eq 'integer') {
2207 return int($1) if $value =~ m/^(\d+)$/;
2208 die "type check ('integer') failed - got '$value'\n";
2209 } elsif ($type eq 'number') {
2210 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2211 die "type check ('number') failed - got '$value'\n";
2212 } elsif ($type eq 'string') {
2213 if (my $fmt = $confdesc->{$key}->{format}) {
2214 PVE::JSONSchema::check_format($fmt, $value);
2215 return $value;
2216 }
2217 $value =~ s/^\"(.*)\"$/$1/;
2218 return $value;
2219 } else {
2220 die "internal error"
2221 }
2222 }
2223
2224 sub destroy_vm {
2225 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2226
2227 my $conf = PVE::QemuConfig->load_config($vmid);
2228
2229 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2230 PVE::QemuConfig->check_lock($conf);
2231 }
2232
2233 if ($conf->{template}) {
2234 # check if any base image is still used by a linked clone
2235 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2236 my ($ds, $drive) = @_;
2237 return if drive_is_cdrom($drive);
2238
2239 my $volid = $drive->{file};
2240 return if !$volid || $volid =~ m|^/|;
2241
2242 die "base volume '$volid' is still in use by linked cloned\n"
2243 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2244
2245 });
2246 }
2247
2248 my $volids = {};
2249 my $remove_owned_drive = sub {
2250 my ($ds, $drive) = @_;
2251 return if drive_is_cdrom($drive, 1);
2252
2253 my $volid = $drive->{file};
2254 return if !$volid || $volid =~ m|^/|;
2255 return if $volids->{$volid};
2256
2257 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2258 return if !$path || !$owner || ($owner != $vmid);
2259
2260 $volids->{$volid} = 1;
2261 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2262 warn "Could not remove disk '$volid', check manually: $@" if $@;
2263 };
2264
2265 # only remove disks owned by this VM (referenced in the config)
2266 my $include_opts = {
2267 include_unused => 1,
2268 extra_keys => ['vmstate'],
2269 };
2270 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2271
2272 for my $snap (values %{$conf->{snapshots}}) {
2273 next if !defined($snap->{vmstate});
2274 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2275 next if !defined($drive);
2276 $remove_owned_drive->('vmstate', $drive);
2277 }
2278
2279 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2280
2281 if ($purge_unreferenced) { # also remove unreferenced disk
2282 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2283 PVE::Storage::foreach_volid($vmdisks, sub {
2284 my ($volid, $sid, $volname, $d) = @_;
2285 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2286 warn $@ if $@;
2287 });
2288 }
2289
2290 eval { delete_ifaces_ipams_ips($conf, $vmid)};
2291 warn $@ if $@;
2292
2293 if (defined $replacement_conf) {
2294 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2295 } else {
2296 PVE::QemuConfig->destroy_config($vmid);
2297 }
2298 }
2299
2300 sub parse_vm_config {
2301 my ($filename, $raw, $strict) = @_;
2302
2303 return if !defined($raw);
2304
2305 my $res = {
2306 digest => Digest::SHA::sha1_hex($raw),
2307 snapshots => {},
2308 pending => {},
2309 cloudinit => {},
2310 };
2311
2312 my $handle_error = sub {
2313 my ($msg) = @_;
2314
2315 if ($strict) {
2316 die $msg;
2317 } else {
2318 warn $msg;
2319 }
2320 };
2321
2322 $filename =~ m|/qemu-server/(\d+)\.conf$|
2323 || die "got strange filename '$filename'";
2324
2325 my $vmid = $1;
2326
2327 my $conf = $res;
2328 my $descr;
2329 my $finish_description = sub {
2330 if (defined($descr)) {
2331 $descr =~ s/\s+$//;
2332 $conf->{description} = $descr;
2333 }
2334 $descr = undef;
2335 };
2336 my $section = '';
2337
2338 my @lines = split(/\n/, $raw);
2339 foreach my $line (@lines) {
2340 next if $line =~ m/^\s*$/;
2341
2342 if ($line =~ m/^\[PENDING\]\s*$/i) {
2343 $section = 'pending';
2344 $finish_description->();
2345 $conf = $res->{$section} = {};
2346 next;
2347 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2348 $section = 'cloudinit';
2349 $finish_description->();
2350 $conf = $res->{$section} = {};
2351 next;
2352
2353 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2354 $section = $1;
2355 $finish_description->();
2356 $conf = $res->{snapshots}->{$section} = {};
2357 next;
2358 }
2359
2360 if ($line =~ m/^\#(.*)$/) {
2361 $descr = '' if !defined($descr);
2362 $descr .= PVE::Tools::decode_text($1) . "\n";
2363 next;
2364 }
2365
2366 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2367 $descr = '' if !defined($descr);
2368 $descr .= PVE::Tools::decode_text($2);
2369 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2370 $conf->{snapstate} = $1;
2371 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2372 my $key = $1;
2373 my $value = $2;
2374 $conf->{$key} = $value;
2375 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2376 my $value = $1;
2377 if ($section eq 'pending') {
2378 $conf->{delete} = $value; # we parse this later
2379 } else {
2380 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2381 }
2382 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2383 my $key = $1;
2384 my $value = $2;
2385 if ($section eq 'cloudinit') {
2386 # ignore validation only used for informative purpose
2387 $conf->{$key} = $value;
2388 next;
2389 }
2390 eval { $value = check_type($key, $value); };
2391 if ($@) {
2392 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2393 } else {
2394 $key = 'ide2' if $key eq 'cdrom';
2395 my $fmt = $confdesc->{$key}->{format};
2396 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2397 my $v = parse_drive($key, $value);
2398 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2399 $v->{file} = $volid;
2400 $value = print_drive($v);
2401 } else {
2402 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2403 next;
2404 }
2405 }
2406
2407 $conf->{$key} = $value;
2408 }
2409 } else {
2410 $handle_error->("vm $vmid - unable to parse config: $line\n");
2411 }
2412 }
2413
2414 $finish_description->();
2415 delete $res->{snapstate}; # just to be sure
2416
2417 return $res;
2418 }
2419
2420 sub write_vm_config {
2421 my ($filename, $conf) = @_;
2422
2423 delete $conf->{snapstate}; # just to be sure
2424
2425 if ($conf->{cdrom}) {
2426 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2427 $conf->{ide2} = $conf->{cdrom};
2428 delete $conf->{cdrom};
2429 }
2430
2431 # we do not use 'smp' any longer
2432 if ($conf->{sockets}) {
2433 delete $conf->{smp};
2434 } elsif ($conf->{smp}) {
2435 $conf->{sockets} = $conf->{smp};
2436 delete $conf->{cores};
2437 delete $conf->{smp};
2438 }
2439
2440 my $used_volids = {};
2441
2442 my $cleanup_config = sub {
2443 my ($cref, $pending, $snapname) = @_;
2444
2445 foreach my $key (keys %$cref) {
2446 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2447 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2448 my $value = $cref->{$key};
2449 if ($key eq 'delete') {
2450 die "propertry 'delete' is only allowed in [PENDING]\n"
2451 if !$pending;
2452 # fixme: check syntax?
2453 next;
2454 }
2455 eval { $value = check_type($key, $value); };
2456 die "unable to parse value of '$key' - $@" if $@;
2457
2458 $cref->{$key} = $value;
2459
2460 if (!$snapname && is_valid_drivename($key)) {
2461 my $drive = parse_drive($key, $value);
2462 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2463 }
2464 }
2465 };
2466
2467 &$cleanup_config($conf);
2468
2469 &$cleanup_config($conf->{pending}, 1);
2470
2471 foreach my $snapname (keys %{$conf->{snapshots}}) {
2472 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2473 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2474 }
2475
2476 # remove 'unusedX' settings if we re-add a volume
2477 foreach my $key (keys %$conf) {
2478 my $value = $conf->{$key};
2479 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2480 delete $conf->{$key};
2481 }
2482 }
2483
2484 my $generate_raw_config = sub {
2485 my ($conf, $pending) = @_;
2486
2487 my $raw = '';
2488
2489 # add description as comment to top of file
2490 if (defined(my $descr = $conf->{description})) {
2491 if ($descr) {
2492 foreach my $cl (split(/\n/, $descr)) {
2493 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2494 }
2495 } else {
2496 $raw .= "#\n" if $pending;
2497 }
2498 }
2499
2500 foreach my $key (sort keys %$conf) {
2501 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2502 $raw .= "$key: $conf->{$key}\n";
2503 }
2504 return $raw;
2505 };
2506
2507 my $raw = &$generate_raw_config($conf);
2508
2509 if (scalar(keys %{$conf->{pending}})){
2510 $raw .= "\n[PENDING]\n";
2511 $raw .= &$generate_raw_config($conf->{pending}, 1);
2512 }
2513
2514 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2515 $raw .= "\n[special:cloudinit]\n";
2516 $raw .= &$generate_raw_config($conf->{cloudinit});
2517 }
2518
2519 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2520 $raw .= "\n[$snapname]\n";
2521 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2522 }
2523
2524 return $raw;
2525 }
2526
2527 sub load_defaults {
2528
2529 my $res = {};
2530
2531 # we use static defaults from our JSON schema configuration
2532 foreach my $key (keys %$confdesc) {
2533 if (defined(my $default = $confdesc->{$key}->{default})) {
2534 $res->{$key} = $default;
2535 }
2536 }
2537
2538 return $res;
2539 }
2540
2541 sub config_list {
2542 my $vmlist = PVE::Cluster::get_vmlist();
2543 my $res = {};
2544 return $res if !$vmlist || !$vmlist->{ids};
2545 my $ids = $vmlist->{ids};
2546 my $nodename = nodename();
2547
2548 foreach my $vmid (keys %$ids) {
2549 my $d = $ids->{$vmid};
2550 next if !$d->{node} || $d->{node} ne $nodename;
2551 next if !$d->{type} || $d->{type} ne 'qemu';
2552 $res->{$vmid}->{exists} = 1;
2553 }
2554 return $res;
2555 }
2556
2557 # test if VM uses local resources (to prevent migration)
2558 sub check_local_resources {
2559 my ($conf, $noerr) = @_;
2560
2561 my @loc_res = ();
2562 my $mapped_res = [];
2563
2564 my $nodelist = PVE::Cluster::get_nodelist();
2565 my $pci_map = PVE::Mapping::PCI::config();
2566 my $usb_map = PVE::Mapping::USB::config();
2567
2568 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2569
2570 my $add_missing_mapping = sub {
2571 my ($type, $key, $id) = @_;
2572 for my $node (@$nodelist) {
2573 my $entry;
2574 if ($type eq 'pci') {
2575 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2576 } elsif ($type eq 'usb') {
2577 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2578 }
2579 if (!scalar($entry->@*)) {
2580 push @{$missing_mappings_by_node->{$node}}, $key;
2581 }
2582 }
2583 };
2584
2585 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2586 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2587
2588 push @loc_res, "ivshmem" if $conf->{ivshmem};
2589
2590 foreach my $k (keys %$conf) {
2591 if ($k =~ m/^usb/) {
2592 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2593 next if $entry->{host} && $entry->{host} =~ m/^spice$/i;
2594 if ($entry->{mapping}) {
2595 $add_missing_mapping->('usb', $k, $entry->{mapping});
2596 push @$mapped_res, $k;
2597 }
2598 }
2599 if ($k =~ m/^hostpci/) {
2600 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2601 if ($entry->{mapping}) {
2602 $add_missing_mapping->('pci', $k, $entry->{mapping});
2603 push @$mapped_res, $k;
2604 }
2605 }
2606 # sockets are safe: they will recreated be on the target side post-migrate
2607 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2608 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2609 }
2610
2611 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2612
2613 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2614 }
2615
2616 # check if used storages are available on all nodes (use by migrate)
2617 sub check_storage_availability {
2618 my ($storecfg, $conf, $node) = @_;
2619
2620 PVE::QemuConfig->foreach_volume($conf, sub {
2621 my ($ds, $drive) = @_;
2622
2623 my $volid = $drive->{file};
2624 return if !$volid;
2625
2626 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2627 return if !$sid;
2628
2629 # check if storage is available on both nodes
2630 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2631 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2632
2633 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2634
2635 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2636 if !$scfg->{content}->{$vtype};
2637 });
2638 }
2639
2640 # list nodes where all VM images are available (used by has_feature API)
2641 sub shared_nodes {
2642 my ($conf, $storecfg) = @_;
2643
2644 my $nodelist = PVE::Cluster::get_nodelist();
2645 my $nodehash = { map { $_ => 1 } @$nodelist };
2646 my $nodename = nodename();
2647
2648 PVE::QemuConfig->foreach_volume($conf, sub {
2649 my ($ds, $drive) = @_;
2650
2651 my $volid = $drive->{file};
2652 return if !$volid;
2653
2654 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2655 if ($storeid) {
2656 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2657 if ($scfg->{disable}) {
2658 $nodehash = {};
2659 } elsif (my $avail = $scfg->{nodes}) {
2660 foreach my $node (keys %$nodehash) {
2661 delete $nodehash->{$node} if !$avail->{$node};
2662 }
2663 } elsif (!$scfg->{shared}) {
2664 foreach my $node (keys %$nodehash) {
2665 delete $nodehash->{$node} if $node ne $nodename
2666 }
2667 }
2668 }
2669 });
2670
2671 return $nodehash
2672 }
2673
2674 sub check_local_storage_availability {
2675 my ($conf, $storecfg) = @_;
2676
2677 my $nodelist = PVE::Cluster::get_nodelist();
2678 my $nodehash = { map { $_ => {} } @$nodelist };
2679
2680 PVE::QemuConfig->foreach_volume($conf, sub {
2681 my ($ds, $drive) = @_;
2682
2683 my $volid = $drive->{file};
2684 return if !$volid;
2685
2686 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2687 if ($storeid) {
2688 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2689
2690 if ($scfg->{disable}) {
2691 foreach my $node (keys %$nodehash) {
2692 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2693 }
2694 } elsif (my $avail = $scfg->{nodes}) {
2695 foreach my $node (keys %$nodehash) {
2696 if (!$avail->{$node}) {
2697 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2698 }
2699 }
2700 }
2701 }
2702 });
2703
2704 foreach my $node (values %$nodehash) {
2705 if (my $unavail = $node->{unavailable_storages}) {
2706 $node->{unavailable_storages} = [ sort keys %$unavail ];
2707 }
2708 }
2709
2710 return $nodehash
2711 }
2712
2713 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2714 sub check_running {
2715 my ($vmid, $nocheck, $node) = @_;
2716
2717 # $nocheck is set when called during a migration, in which case the config
2718 # file might still or already reside on the *other* node
2719 # - because rename has already happened, and current node is source
2720 # - because rename hasn't happened yet, and current node is target
2721 # - because rename has happened, current node is target, but hasn't yet
2722 # processed it yet
2723 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2724 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2725 }
2726
2727 sub vzlist {
2728
2729 my $vzlist = config_list();
2730
2731 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2732
2733 while (defined(my $de = $fd->read)) {
2734 next if $de !~ m/^(\d+)\.pid$/;
2735 my $vmid = $1;
2736 next if !defined($vzlist->{$vmid});
2737 if (my $pid = check_running($vmid)) {
2738 $vzlist->{$vmid}->{pid} = $pid;
2739 }
2740 }
2741
2742 return $vzlist;
2743 }
2744
2745 our $vmstatus_return_properties = {
2746 vmid => get_standard_option('pve-vmid'),
2747 status => {
2748 description => "QEMU process status.",
2749 type => 'string',
2750 enum => ['stopped', 'running'],
2751 },
2752 maxmem => {
2753 description => "Maximum memory in bytes.",
2754 type => 'integer',
2755 optional => 1,
2756 renderer => 'bytes',
2757 },
2758 maxdisk => {
2759 description => "Root disk size in bytes.",
2760 type => 'integer',
2761 optional => 1,
2762 renderer => 'bytes',
2763 },
2764 name => {
2765 description => "VM name.",
2766 type => 'string',
2767 optional => 1,
2768 },
2769 qmpstatus => {
2770 description => "VM run state from the 'query-status' QMP monitor command.",
2771 type => 'string',
2772 optional => 1,
2773 },
2774 pid => {
2775 description => "PID of running qemu process.",
2776 type => 'integer',
2777 optional => 1,
2778 },
2779 uptime => {
2780 description => "Uptime.",
2781 type => 'integer',
2782 optional => 1,
2783 renderer => 'duration',
2784 },
2785 cpus => {
2786 description => "Maximum usable CPUs.",
2787 type => 'number',
2788 optional => 1,
2789 },
2790 lock => {
2791 description => "The current config lock, if any.",
2792 type => 'string',
2793 optional => 1,
2794 },
2795 tags => {
2796 description => "The current configured tags, if any",
2797 type => 'string',
2798 optional => 1,
2799 },
2800 'running-machine' => {
2801 description => "The currently running machine type (if running).",
2802 type => 'string',
2803 optional => 1,
2804 },
2805 'running-qemu' => {
2806 description => "The currently running QEMU version (if running).",
2807 type => 'string',
2808 optional => 1,
2809 },
2810 };
2811
2812 my $last_proc_pid_stat;
2813
2814 # get VM status information
2815 # This must be fast and should not block ($full == false)
2816 # We only query KVM using QMP if $full == true (this can be slow)
2817 sub vmstatus {
2818 my ($opt_vmid, $full) = @_;
2819
2820 my $res = {};
2821
2822 my $storecfg = PVE::Storage::config();
2823
2824 my $list = vzlist();
2825 my $defaults = load_defaults();
2826
2827 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2828
2829 my $cpucount = $cpuinfo->{cpus} || 1;
2830
2831 foreach my $vmid (keys %$list) {
2832 next if $opt_vmid && ($vmid ne $opt_vmid);
2833
2834 my $conf = PVE::QemuConfig->load_config($vmid);
2835
2836 my $d = { vmid => int($vmid) };
2837 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2838
2839 # fixme: better status?
2840 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2841
2842 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2843 if (defined($size)) {
2844 $d->{disk} = 0; # no info available
2845 $d->{maxdisk} = $size;
2846 } else {
2847 $d->{disk} = 0;
2848 $d->{maxdisk} = 0;
2849 }
2850
2851 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2852 * ($conf->{cores} || $defaults->{cores});
2853 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2854 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2855
2856 $d->{name} = $conf->{name} || "VM $vmid";
2857 $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024);
2858
2859 if ($conf->{balloon}) {
2860 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2861 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2862 : $defaults->{shares};
2863 }
2864
2865 $d->{uptime} = 0;
2866 $d->{cpu} = 0;
2867 $d->{mem} = 0;
2868
2869 $d->{netout} = 0;
2870 $d->{netin} = 0;
2871
2872 $d->{diskread} = 0;
2873 $d->{diskwrite} = 0;
2874
2875 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2876
2877 $d->{serial} = 1 if conf_has_serial($conf);
2878 $d->{lock} = $conf->{lock} if $conf->{lock};
2879 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2880
2881 $res->{$vmid} = $d;
2882 }
2883
2884 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2885 foreach my $dev (keys %$netdev) {
2886 next if $dev !~ m/^tap([1-9]\d*)i/;
2887 my $vmid = $1;
2888 my $d = $res->{$vmid};
2889 next if !$d;
2890
2891 $d->{netout} += $netdev->{$dev}->{receive};
2892 $d->{netin} += $netdev->{$dev}->{transmit};
2893
2894 if ($full) {
2895 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2896 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2897 }
2898
2899 }
2900
2901 my $ctime = gettimeofday;
2902
2903 foreach my $vmid (keys %$list) {
2904
2905 my $d = $res->{$vmid};
2906 my $pid = $d->{pid};
2907 next if !$pid;
2908
2909 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2910 next if !$pstat; # not running
2911
2912 my $used = $pstat->{utime} + $pstat->{stime};
2913
2914 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2915
2916 if ($pstat->{vsize}) {
2917 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2918 }
2919
2920 my $old = $last_proc_pid_stat->{$pid};
2921 if (!$old) {
2922 $last_proc_pid_stat->{$pid} = {
2923 time => $ctime,
2924 used => $used,
2925 cpu => 0,
2926 };
2927 next;
2928 }
2929
2930 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2931
2932 if ($dtime > 1000) {
2933 my $dutime = $used - $old->{used};
2934
2935 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2936 $last_proc_pid_stat->{$pid} = {
2937 time => $ctime,
2938 used => $used,
2939 cpu => $d->{cpu},
2940 };
2941 } else {
2942 $d->{cpu} = $old->{cpu};
2943 }
2944 }
2945
2946 return $res if !$full;
2947
2948 my $qmpclient = PVE::QMPClient->new();
2949
2950 my $ballooncb = sub {
2951 my ($vmid, $resp) = @_;
2952
2953 my $info = $resp->{'return'};
2954 return if !$info->{max_mem};
2955
2956 my $d = $res->{$vmid};
2957
2958 # use memory assigned to VM
2959 $d->{maxmem} = $info->{max_mem};
2960 $d->{balloon} = $info->{actual};
2961
2962 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2963 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2964 $d->{freemem} = $info->{free_mem};
2965 }
2966
2967 $d->{ballooninfo} = $info;
2968 };
2969
2970 my $blockstatscb = sub {
2971 my ($vmid, $resp) = @_;
2972 my $data = $resp->{'return'} || [];
2973 my $totalrdbytes = 0;
2974 my $totalwrbytes = 0;
2975
2976 for my $blockstat (@$data) {
2977 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2978 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2979
2980 $blockstat->{device} =~ s/drive-//;
2981 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2982 }
2983 $res->{$vmid}->{diskread} = $totalrdbytes;
2984 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2985 };
2986
2987 my $machinecb = sub {
2988 my ($vmid, $resp) = @_;
2989 my $data = $resp->{'return'} || [];
2990
2991 $res->{$vmid}->{'running-machine'} =
2992 PVE::QemuServer::Machine::current_from_query_machines($data);
2993 };
2994
2995 my $versioncb = sub {
2996 my ($vmid, $resp) = @_;
2997 my $data = $resp->{'return'} // {};
2998 my $version = 'unknown';
2999
3000 if (my $v = $data->{qemu}) {
3001 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3002 }
3003
3004 $res->{$vmid}->{'running-qemu'} = $version;
3005 };
3006
3007 my $statuscb = sub {
3008 my ($vmid, $resp) = @_;
3009
3010 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3011 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3012 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3013 # this fails if ballon driver is not loaded, so this must be
3014 # the last commnand (following command are aborted if this fails).
3015 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3016
3017 my $status = 'unknown';
3018 if (!defined($status = $resp->{'return'}->{status})) {
3019 warn "unable to get VM status\n";
3020 return;
3021 }
3022
3023 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3024 };
3025
3026 foreach my $vmid (keys %$list) {
3027 next if $opt_vmid && ($vmid ne $opt_vmid);
3028 next if !$res->{$vmid}->{pid}; # not running
3029 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3030 }
3031
3032 $qmpclient->queue_execute(undef, 2);
3033
3034 foreach my $vmid (keys %$list) {
3035 next if $opt_vmid && ($vmid ne $opt_vmid);
3036 next if !$res->{$vmid}->{pid}; #not running
3037
3038 # we can't use the $qmpclient since it might have already aborted on
3039 # 'query-balloon', but this might also fail for older versions...
3040 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3041 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3042 }
3043
3044 foreach my $vmid (keys %$list) {
3045 next if $opt_vmid && ($vmid ne $opt_vmid);
3046 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3047 }
3048
3049 return $res;
3050 }
3051
3052 sub conf_has_serial {
3053 my ($conf) = @_;
3054
3055 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3056 if ($conf->{"serial$i"}) {
3057 return 1;
3058 }
3059 }
3060
3061 return 0;
3062 }
3063
3064 sub conf_has_audio {
3065 my ($conf, $id) = @_;
3066
3067 $id //= 0;
3068 my $audio = $conf->{"audio$id"};
3069 return if !defined($audio);
3070
3071 my $audioproperties = parse_property_string($audio_fmt, $audio);
3072 my $audiodriver = $audioproperties->{driver} // 'spice';
3073
3074 return {
3075 dev => $audioproperties->{device},
3076 dev_id => "audiodev$id",
3077 backend => $audiodriver,
3078 backend_id => "$audiodriver-backend${id}",
3079 };
3080 }
3081
3082 sub audio_devs {
3083 my ($audio, $audiopciaddr, $machine_version) = @_;
3084
3085 my $devs = [];
3086
3087 my $id = $audio->{dev_id};
3088 my $audiodev = "";
3089 if (min_version($machine_version, 4, 2)) {
3090 $audiodev = ",audiodev=$audio->{backend_id}";
3091 }
3092
3093 if ($audio->{dev} eq 'AC97') {
3094 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3095 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3096 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3097 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3098 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3099 } else {
3100 die "unkown audio device '$audio->{dev}', implement me!";
3101 }
3102
3103 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3104
3105 return $devs;
3106 }
3107
3108 sub get_tpm_paths {
3109 my ($vmid) = @_;
3110 return {
3111 socket => "/var/run/qemu-server/$vmid.swtpm",
3112 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3113 };
3114 }
3115
3116 sub add_tpm_device {
3117 my ($vmid, $devices, $conf) = @_;
3118
3119 return if !$conf->{tpmstate0};
3120
3121 my $paths = get_tpm_paths($vmid);
3122
3123 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3124 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3125 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3126 }
3127
3128 sub start_swtpm {
3129 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3130
3131 return if !$tpmdrive;
3132
3133 my $state;
3134 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3135 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3136 if ($storeid) {
3137 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3138 } else {
3139 $state = $tpm->{file};
3140 }
3141
3142 my $paths = get_tpm_paths($vmid);
3143
3144 # during migration, we will get state from remote
3145 #
3146 if (!$migration) {
3147 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3148 my $setup_cmd = [
3149 "swtpm_setup",
3150 "--tpmstate",
3151 "file://$state",
3152 "--createek",
3153 "--create-ek-cert",
3154 "--create-platform-cert",
3155 "--lock-nvram",
3156 "--config",
3157 "/etc/swtpm_setup.conf", # do not use XDG configs
3158 "--runas",
3159 "0", # force creation as root, error if not possible
3160 "--not-overwrite", # ignore existing state, do not modify
3161 ];
3162
3163 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3164 # TPM 2.0 supports ECC crypto, use if possible
3165 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3166
3167 run_command($setup_cmd, outfunc => sub {
3168 print "swtpm_setup: $1\n";
3169 });
3170 }
3171
3172 # Used to distinguish different invocations in the log.
3173 my $log_prefix = "[id=" . int(time()) . "] ";
3174
3175 my $emulator_cmd = [
3176 "swtpm",
3177 "socket",
3178 "--tpmstate",
3179 "backend-uri=file://$state,mode=0600",
3180 "--ctrl",
3181 "type=unixio,path=$paths->{socket},mode=0600",
3182 "--pid",
3183 "file=$paths->{pid}",
3184 "--terminate", # terminate on QEMU disconnect
3185 "--daemon",
3186 "--log",
3187 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3188 ];
3189 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3190 run_command($emulator_cmd, outfunc => sub { print $1; });
3191
3192 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3193 while (! -e $paths->{pid}) {
3194 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3195 usleep(50_000);
3196 }
3197
3198 # return untainted PID of swtpm daemon so it can be killed on error
3199 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3200 return $1;
3201 }
3202
3203 sub vga_conf_has_spice {
3204 my ($vga) = @_;
3205
3206 my $vgaconf = parse_vga($vga);
3207 my $vgatype = $vgaconf->{type};
3208 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3209
3210 return $1 || 1;
3211 }
3212
3213 sub get_vm_arch {
3214 my ($conf) = @_;
3215 return $conf->{arch} // get_host_arch();
3216 }
3217
3218 my $default_machines = {
3219 x86_64 => 'pc',
3220 aarch64 => 'virt',
3221 };
3222
3223 sub get_installed_machine_version {
3224 my ($kvmversion) = @_;
3225 $kvmversion = kvm_user_version() if !defined($kvmversion);
3226 $kvmversion =~ m/^(\d+\.\d+)/;
3227 return $1;
3228 }
3229
3230 sub windows_get_pinned_machine_version {
3231 my ($machine, $base_version, $kvmversion) = @_;
3232
3233 my $pin_version = $base_version;
3234 if (!defined($base_version) ||
3235 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3236 ) {
3237 $pin_version = get_installed_machine_version($kvmversion);
3238 }
3239 if (!$machine || $machine eq 'pc') {
3240 $machine = "pc-i440fx-$pin_version";
3241 } elsif ($machine eq 'q35') {
3242 $machine = "pc-q35-$pin_version";
3243 } elsif ($machine eq 'virt') {
3244 $machine = "virt-$pin_version";
3245 } else {
3246 warn "unknown machine type '$machine', not touching that!\n";
3247 }
3248
3249 return $machine;
3250 }
3251
3252 sub get_vm_machine {
3253 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3254
3255 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
3256 my $machine = $forcemachine || $machine_conf->{type};
3257
3258 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3259 $kvmversion //= kvm_user_version();
3260 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3261 # layout which confuses windows quite a bit and may result in various regressions..
3262 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3263 if (windows_version($conf->{ostype})) {
3264 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3265 }
3266 $arch //= 'x86_64';
3267 $machine ||= $default_machines->{$arch};
3268 if ($add_pve_version) {
3269 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3270 $machine .= "+pve$pvever";
3271 }
3272 }
3273
3274 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3275 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3276 $machine = $1 if $is_pxe;
3277
3278 # for version-pinned machines that do not include a pve-version (e.g.
3279 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3280 $machine .= '+pve0';
3281
3282 $machine .= '.pxe' if $is_pxe;
3283 }
3284
3285 return $machine;
3286 }
3287
3288 sub get_ovmf_files($$$) {
3289 my ($arch, $efidisk, $smm) = @_;
3290
3291 my $types = $OVMF->{$arch}
3292 or die "no OVMF images known for architecture '$arch'\n";
3293
3294 my $type = 'default';
3295 if ($arch eq 'x86_64') {
3296 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3297 $type = $smm ? "4m" : "4m-no-smm";
3298 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3299 } else {
3300 # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9
3301 }
3302 }
3303
3304 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3305 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3306 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3307
3308 return ($ovmf_code, $ovmf_vars);
3309 }
3310
3311 my $Arch2Qemu = {
3312 aarch64 => '/usr/bin/qemu-system-aarch64',
3313 x86_64 => '/usr/bin/qemu-system-x86_64',
3314 };
3315 sub get_command_for_arch($) {
3316 my ($arch) = @_;
3317 return '/usr/bin/kvm' if is_native_arch($arch);
3318
3319 my $cmd = $Arch2Qemu->{$arch}
3320 or die "don't know how to emulate architecture '$arch'\n";
3321 return $cmd;
3322 }
3323
3324 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3325 # to use in a QEMU command line (-cpu element), first array_intersect the result
3326 # of query_supported_ with query_understood_. This is necessary because:
3327 #
3328 # a) query_understood_ returns flags the host cannot use and
3329 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3330 # flags, but CPU settings - with most of them being flags. Those settings
3331 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3332 #
3333 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3334 # expensive. If you need the value returned from this, you can get it much
3335 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3336 # $accel being 'kvm' or 'tcg'.
3337 #
3338 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3339 # changes, automatically populating pmxcfs.
3340 #
3341 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3342 # since kvm and tcg machines support different flags
3343 #
3344 sub query_supported_cpu_flags {
3345 my ($arch) = @_;
3346
3347 $arch //= get_host_arch();
3348 my $default_machine = $default_machines->{$arch};
3349
3350 my $flags = {};
3351
3352 # FIXME: Once this is merged, the code below should work for ARM as well:
3353 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3354 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3355 $arch eq "aarch64";
3356
3357 my $kvm_supported = defined(kvm_version());
3358 my $qemu_cmd = get_command_for_arch($arch);
3359 my $fakevmid = -1;
3360 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3361
3362 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3363 my $query_supported_run_qemu = sub {
3364 my ($kvm) = @_;
3365
3366 my $flags = {};
3367 my $cmd = [
3368 $qemu_cmd,
3369 '-machine', $default_machine,
3370 '-display', 'none',
3371 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3372 '-mon', 'chardev=qmp,mode=control',
3373 '-pidfile', $pidfile,
3374 '-S', '-daemonize'
3375 ];
3376
3377 if (!$kvm) {
3378 push @$cmd, '-accel', 'tcg';
3379 }
3380
3381 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3382 die "QEMU flag querying VM exited with code " . $rc if $rc;
3383
3384 eval {
3385 my $cmd_result = mon_cmd(
3386 $fakevmid,
3387 'query-cpu-model-expansion',
3388 type => 'full',
3389 model => { name => 'host' }
3390 );
3391
3392 my $props = $cmd_result->{model}->{props};
3393 foreach my $prop (keys %$props) {
3394 next if $props->{$prop} ne '1';
3395 # QEMU returns some flags multiple times, with '_', '.' or '-'
3396 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3397 # We only keep those with underscores, to match /proc/cpuinfo
3398 $prop =~ s/\.|-/_/g;
3399 $flags->{$prop} = 1;
3400 }
3401 };
3402 my $err = $@;
3403
3404 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3405 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3406
3407 die $err if $err;
3408
3409 return [ sort keys %$flags ];
3410 };
3411
3412 # We need to query QEMU twice, since KVM and TCG have different supported flags
3413 PVE::QemuConfig->lock_config($fakevmid, sub {
3414 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3415 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3416
3417 if ($kvm_supported) {
3418 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3419 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3420 }
3421 });
3422
3423 return $flags;
3424 }
3425
3426 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3427 my $understood_cpu_flag_dir = "/usr/share/kvm";
3428 sub query_understood_cpu_flags {
3429 my $arch = get_host_arch();
3430 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3431
3432 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3433 if ! -e $filepath;
3434
3435 my $raw = file_get_contents($filepath);
3436 $raw =~ s/^\s+|\s+$//g;
3437 my @flags = split(/\s+/, $raw);
3438
3439 return \@flags;
3440 }
3441
3442 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3443 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3444 my sub should_disable_smm {
3445 my ($conf, $vga, $machine) = @_;
3446
3447 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3448
3449 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3450 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3451 }
3452
3453 my sub print_ovmf_drive_commandlines {
3454 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3455
3456 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3457
3458 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3459
3460 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3461 if ($d) {
3462 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3463 my ($path, $format) = $d->@{'file', 'format'};
3464 if ($storeid) {
3465 $path = PVE::Storage::path($storecfg, $d->{file});
3466 if (!defined($format)) {
3467 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3468 $format = qemu_img_format($scfg, $volname);
3469 }
3470 } elsif (!defined($format)) {
3471 die "efidisk format must be specified\n";
3472 }
3473 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3474 if ($path =~ m/^rbd:/) {
3475 $var_drive_str .= ',cache=writeback';
3476 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3477 }
3478 $var_drive_str .= ",format=$format,file=$path";
3479
3480 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3481 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3482 } else {
3483 log_warn("no efidisk configured! Using temporary efivars disk.");
3484 my $path = "/tmp/$vmid-ovmf.fd";
3485 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3486 $var_drive_str .= ",format=raw,file=$path";
3487 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3488 }
3489
3490 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3491 }
3492
3493 sub config_to_command {
3494 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3495 $live_restore_backing) = @_;
3496
3497 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3498 my $devices = [];
3499 my $bridges = {};
3500 my $ostype = $conf->{ostype};
3501 my $winversion = windows_version($ostype);
3502 my $kvm = $conf->{kvm};
3503 my $nodename = nodename();
3504
3505 my $machine_conf = PVE::QemuServer::Machine::parse_machine($conf->{machine});
3506
3507 my $arch = get_vm_arch($conf);
3508 my $kvm_binary = get_command_for_arch($arch);
3509 my $kvmver = kvm_user_version($kvm_binary);
3510
3511 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3512 $kvmver //= "undefined";
3513 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3514 }
3515
3516 my $add_pve_version = min_version($kvmver, 4, 1);
3517
3518 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3519 my $machine_version = extract_version($machine_type, $kvmver);
3520 $kvm //= 1 if is_native_arch($arch);
3521
3522 $machine_version =~ m/(\d+)\.(\d+)/;
3523 my ($machine_major, $machine_minor) = ($1, $2);
3524
3525 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3526 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3527 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3528 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3529 ." please upgrade node '$nodename'\n"
3530 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3531 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3532 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3533 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3534 ." node '$nodename'\n";
3535 }
3536
3537 # if a specific +pve version is required for a feature, use $version_guard
3538 # instead of min_version to allow machines to be run with the minimum
3539 # required version
3540 my $required_pve_version = 0;
3541 my $version_guard = sub {
3542 my ($major, $minor, $pve) = @_;
3543 return 0 if !min_version($machine_version, $major, $minor, $pve);
3544 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3545 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3546 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3547 return 1;
3548 };
3549
3550 if ($kvm && !defined kvm_version()) {
3551 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3552 ." or enable in BIOS.\n";
3553 }
3554
3555 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3556 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3557 my $use_old_bios_files = undef;
3558 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3559
3560 my $cmd = [];
3561 if ($conf->{affinity}) {
3562 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3563 }
3564
3565 push @$cmd, $kvm_binary;
3566
3567 push @$cmd, '-id', $vmid;
3568
3569 my $vmname = $conf->{name} || "vm$vmid";
3570
3571 push @$cmd, '-name', "$vmname,debug-threads=on";
3572
3573 push @$cmd, '-no-shutdown';
3574
3575 my $use_virtio = 0;
3576
3577 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3578 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3579 push @$cmd, '-mon', "chardev=qmp,mode=control";
3580
3581 if (min_version($machine_version, 2, 12)) {
3582 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3583 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3584 }
3585
3586 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3587
3588 push @$cmd, '-daemonize';
3589
3590 if ($conf->{smbios1}) {
3591 my $smbios_conf = parse_smbios1($conf->{smbios1});
3592 if ($smbios_conf->{base64}) {
3593 # Do not pass base64 flag to qemu
3594 delete $smbios_conf->{base64};
3595 my $smbios_string = "";
3596 foreach my $key (keys %$smbios_conf) {
3597 my $value;
3598 if ($key eq "uuid") {
3599 $value = $smbios_conf->{uuid}
3600 } else {
3601 $value = decode_base64($smbios_conf->{$key});
3602 }
3603 # qemu accepts any binary data, only commas need escaping by double comma
3604 $value =~ s/,/,,/g;
3605 $smbios_string .= "," . $key . "=" . $value if $value;
3606 }
3607 push @$cmd, '-smbios', "type=1" . $smbios_string;
3608 } else {
3609 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3610 }
3611 }
3612
3613 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3614 die "OVMF (UEFI) BIOS is not supported on 32-bit CPU types\n"
3615 if !$forcecpu && get_cpu_bitness($conf->{cpu}, $arch) == 32;
3616
3617 my ($code_drive_str, $var_drive_str) =
3618 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3619 push $cmd->@*, '-drive', $code_drive_str;
3620 push $cmd->@*, '-drive', $var_drive_str;
3621 }
3622
3623 if ($q35) { # tell QEMU to load q35 config early
3624 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3625 if (min_version($machine_version, 4, 0)) {
3626 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3627 } else {
3628 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3629 }
3630 }
3631
3632 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3633 push @$cmd, $fixups->@*;
3634 }
3635
3636 if ($conf->{vmgenid}) {
3637 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3638 }
3639
3640 # add usb controllers
3641 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3642 $conf, $bridges, $arch, $machine_type, $machine_version);
3643 push @$devices, @usbcontrollers if @usbcontrollers;
3644 my $vga = parse_vga($conf->{vga});
3645
3646 my $qxlnum = vga_conf_has_spice($conf->{vga});
3647 $vga->{type} = 'qxl' if $qxlnum;
3648
3649 if (!$vga->{type}) {
3650 if ($arch eq 'aarch64') {
3651 $vga->{type} = 'virtio';
3652 } elsif (min_version($machine_version, 2, 9)) {
3653 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3654 } else {
3655 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3656 }
3657 }
3658
3659 # enable absolute mouse coordinates (needed by vnc)
3660 my $tablet = $conf->{tablet};
3661 if (!defined($tablet)) {
3662 $tablet = $defaults->{tablet};
3663 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3664 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3665 }
3666
3667 if ($tablet) {
3668 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3669 my $kbd = print_keyboarddevice_full($conf, $arch);
3670 push @$devices, '-device', $kbd if defined($kbd);
3671 }
3672
3673 my $bootorder = device_bootorder($conf);
3674
3675 # host pci device passthrough
3676 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3677 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3678
3679 # usb devices
3680 my $usb_dev_features = {};
3681 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3682
3683 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3684 $conf, $usb_dev_features, $bootorder, $machine_version);
3685 push @$devices, @usbdevices if @usbdevices;
3686
3687 # serial devices
3688 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3689 my $path = $conf->{"serial$i"} or next;
3690 if ($path eq 'socket') {
3691 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3692 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3693 # On aarch64, serial0 is the UART device. QEMU only allows
3694 # connecting UART devices via the '-serial' command line, as
3695 # the device has a fixed slot on the hardware...
3696 if ($arch eq 'aarch64' && $i == 0) {
3697 push @$devices, '-serial', "chardev:serial$i";
3698 } else {
3699 push @$devices, '-device', "isa-serial,chardev=serial$i";
3700 }
3701 } else {
3702 die "no such serial device\n" if ! -c $path;
3703 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3704 push @$devices, '-device', "isa-serial,chardev=serial$i";
3705 }
3706 }
3707
3708 # parallel devices
3709 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3710 if (my $path = $conf->{"parallel$i"}) {
3711 die "no such parallel device\n" if ! -c $path;
3712 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3713 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3714 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3715 }
3716 }
3717
3718 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3719 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3720 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3721 push @$devices, @$audio_devs;
3722 }
3723
3724 # Add a TPM only if the VM is not a template,
3725 # to support backing up template VMs even if the TPM disk is write-protected.
3726 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
3727
3728 my $sockets = 1;
3729 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3730 $sockets = $conf->{sockets} if $conf->{sockets};
3731
3732 my $cores = $conf->{cores} || 1;
3733
3734 my $maxcpus = $sockets * $cores;
3735
3736 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3737
3738 my $allowed_vcpus = $cpuinfo->{cpus};
3739
3740 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3741
3742 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3743 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3744 for (my $i = 2; $i <= $vcpus; $i++) {
3745 my $cpustr = print_cpu_device($conf, $arch, $i);
3746 push @$cmd, '-device', $cpustr;
3747 }
3748
3749 } else {
3750
3751 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3752 }
3753 push @$cmd, '-nodefaults';
3754
3755 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3756
3757 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3758
3759 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3760
3761 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3762 push @$devices, '-device', print_vga_device(
3763 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3764
3765 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3766
3767 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3768 push @$cmd, '-vnc', "unix:$socket,password=on";
3769 } else {
3770 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3771 push @$cmd, '-nographic';
3772 }
3773
3774 # time drift fix
3775 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3776 my $useLocaltime = $conf->{localtime};
3777
3778 if ($winversion >= 5) { # windows
3779 $useLocaltime = 1 if !defined($conf->{localtime});
3780
3781 # use time drift fix when acpi is enabled
3782 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3783 $tdf = 1 if !defined($conf->{tdf});
3784 }
3785 }
3786
3787 if ($winversion >= 6) {
3788 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3789 push @$machineFlags, 'hpet=off';
3790 }
3791
3792 push @$rtcFlags, 'driftfix=slew' if $tdf;
3793
3794 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3795 push @$rtcFlags, "base=$conf->{startdate}";
3796 } elsif ($useLocaltime) {
3797 push @$rtcFlags, 'base=localtime';
3798 }
3799
3800 if ($forcecpu) {
3801 push @$cmd, '-cpu', $forcecpu;
3802 } else {
3803 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3804 }
3805
3806 PVE::QemuServer::Memory::config(
3807 $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
3808
3809 push @$cmd, '-S' if $conf->{freeze};
3810
3811 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3812
3813 my $guest_agent = parse_guest_agent($conf);
3814
3815 if ($guest_agent->{enabled}) {
3816 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3817 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3818
3819 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3820 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3821 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3822 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3823 } elsif ($guest_agent->{type} eq 'isa') {
3824 push @$devices, '-device', "isa-serial,chardev=qga0";
3825 }
3826 }
3827
3828 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3829 if ($rng && $version_guard->(4, 1, 2)) {
3830 check_rng_source($rng->{source});
3831
3832 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3833 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3834 my $limiter_str = "";
3835 if ($max_bytes) {
3836 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3837 }
3838
3839 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3840 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3841 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3842 }
3843
3844 my $spice_port;
3845
3846 assert_clipboard_config($vga);
3847 my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/;
3848
3849 if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) {
3850 if ($qxlnum > 1) {
3851 if ($winversion){
3852 for (my $i = 1; $i < $qxlnum; $i++){
3853 push @$devices, '-device', print_vga_device(
3854 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3855 }
3856 } else {
3857 # assume other OS works like Linux
3858 my ($ram, $vram) = ("134217728", "67108864");
3859 if ($vga->{memory}) {
3860 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3861 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3862 }
3863 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3864 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3865 }
3866 }
3867
3868 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3869
3870 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3871 if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') {
3872 push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on';
3873 } else {
3874 push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent';
3875 }
3876 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3877
3878 if ($is_spice) {
3879 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3880 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3881 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3882
3883 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3884 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3885
3886 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3887 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3888 if ($spice_enhancement->{foldersharing}) {
3889 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3890 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3891 }
3892
3893 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3894 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3895 if $spice_enhancement->{videostreaming};
3896 push @$devices, '-spice', "$spice_opts";
3897 }
3898 }
3899
3900 # enable balloon by default, unless explicitly disabled
3901 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3902 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3903 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3904 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3905 push @$devices, '-device', $ballooncmd;
3906 }
3907
3908 if ($conf->{watchdog}) {
3909 my $wdopts = parse_watchdog($conf->{watchdog});
3910 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3911 my $watchdog = $wdopts->{model} || 'i6300esb';
3912 push @$devices, '-device', "$watchdog$pciaddr";
3913 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3914 }
3915
3916 my $vollist = [];
3917 my $scsicontroller = {};
3918 my $ahcicontroller = {};
3919 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3920
3921 # Add iscsi initiator name if available
3922 if (my $initiator = get_initiator_name()) {
3923 push @$devices, '-iscsi', "initiator-name=$initiator";
3924 }
3925
3926 PVE::QemuConfig->foreach_volume($conf, sub {
3927 my ($ds, $drive) = @_;
3928
3929 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3930 check_volume_storage_type($storecfg, $drive->{file});
3931 push @$vollist, $drive->{file};
3932 }
3933
3934 # ignore efidisk here, already added in bios/fw handling code above
3935 return if $drive->{interface} eq 'efidisk';
3936 # similar for TPM
3937 return if $drive->{interface} eq 'tpmstate';
3938
3939 $use_virtio = 1 if $ds =~ m/^virtio/;
3940
3941 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3942
3943 if ($drive->{interface} eq 'virtio'){
3944 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3945 }
3946
3947 if ($drive->{interface} eq 'scsi') {
3948
3949 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3950
3951 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3952 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3953
3954 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3955 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3956
3957 my $iothread = '';
3958 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3959 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3960 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3961 } elsif ($drive->{iothread}) {
3962 log_warn(
3963 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
3964 );
3965 }
3966
3967 my $queues = '';
3968 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3969 $queues = ",num_queues=$drive->{queues}";
3970 }
3971
3972 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3973 if !$scsicontroller->{$controller};
3974 $scsicontroller->{$controller}=1;
3975 }
3976
3977 if ($drive->{interface} eq 'sata') {
3978 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3979 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3980 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3981 if !$ahcicontroller->{$controller};
3982 $ahcicontroller->{$controller}=1;
3983 }
3984
3985 my $live_restore = $live_restore_backing->{$ds};
3986 my $live_blockdev_name = undef;
3987 if ($live_restore) {
3988 $live_blockdev_name = $live_restore->{name};
3989 push @$devices, '-blockdev', $live_restore->{blockdev};
3990 }
3991
3992 my $drive_cmd = print_drive_commandline_full(
3993 $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
3994
3995 # extra protection for templates, but SATA and IDE don't support it..
3996 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
3997
3998 push @$devices, '-drive',$drive_cmd;
3999 push @$devices, '-device', print_drivedevice_full(
4000 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4001 });
4002
4003 for (my $i = 0; $i < $MAX_NETS; $i++) {
4004 my $netname = "net$i";
4005
4006 next if !$conf->{$netname};
4007 my $d = parse_net($conf->{$netname});
4008 next if !$d;
4009 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4010
4011 $use_virtio = 1 if $d->{model} eq 'virtio';
4012
4013 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4014
4015 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4016 push @$devices, '-netdev', $netdevfull;
4017
4018 my $netdevicefull = print_netdevice_full(
4019 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4020
4021 push @$devices, '-device', $netdevicefull;
4022 }
4023
4024 if ($conf->{ivshmem}) {
4025 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4026
4027 my $bus;
4028 if ($q35) {
4029 $bus = print_pcie_addr("ivshmem");
4030 } else {
4031 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4032 }
4033
4034 my $ivshmem_name = $ivshmem->{name} // $vmid;
4035 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4036
4037 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4038 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4039 .",size=$ivshmem->{size}M";
4040 }
4041
4042 # pci.4 is nested in pci.1
4043 $bridges->{1} = 1 if $bridges->{4};
4044
4045 if (!$q35) { # add pci bridges
4046 if (min_version($machine_version, 2, 3)) {
4047 $bridges->{1} = 1;
4048 $bridges->{2} = 1;
4049 }
4050 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4051 }
4052
4053 for my $k (sort {$b cmp $a} keys %$bridges) {
4054 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4055
4056 my $k_name = $k;
4057 if ($k == 2 && $legacy_igd) {
4058 $k_name = "$k-igd";
4059 }
4060 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4061 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4062
4063 if ($q35) { # add after -readconfig pve-q35.cfg
4064 splice @$devices, 2, 0, '-device', $devstr;
4065 } else {
4066 unshift @$devices, '-device', $devstr if $k > 0;
4067 }
4068 }
4069
4070 if (!$kvm) {
4071 push @$machineFlags, 'accel=tcg';
4072 }
4073
4074 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4075
4076 my $machine_type_min = $machine_type;
4077 if ($add_pve_version) {
4078 $machine_type_min =~ s/\+pve\d+$//;
4079 $machine_type_min .= "+pve$required_pve_version";
4080 }
4081 push @$machineFlags, "type=${machine_type_min}";
4082
4083 PVE::QemuServer::Machine::assert_valid_machine_property($conf, $machine_conf);
4084
4085 if (my $viommu = $machine_conf->{viommu}) {
4086 if ($viommu eq 'intel') {
4087 unshift @$devices, '-device', 'intel-iommu,intremap=on,caching-mode=on';
4088 push @$machineFlags, 'kernel-irqchip=split';
4089 } elsif ($viommu eq 'virtio') {
4090 push @$devices, '-device', 'virtio-iommu-pci';
4091 }
4092 }
4093
4094 push @$cmd, @$devices;
4095 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4096 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4097 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4098
4099 if (my $vmstate = $conf->{vmstate}) {
4100 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4101 push @$vollist, $vmstate;
4102 push @$cmd, '-loadstate', $statepath;
4103 print "activating and using '$vmstate' as vmstate\n";
4104 }
4105
4106 if (PVE::QemuConfig->is_template($conf)) {
4107 # needed to workaround base volumes being read-only
4108 push @$cmd, '-snapshot';
4109 }
4110
4111 # add custom args
4112 if ($conf->{args}) {
4113 my $aa = PVE::Tools::split_args($conf->{args});
4114 push @$cmd, @$aa;
4115 }
4116
4117 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4118 }
4119
4120 sub check_rng_source {
4121 my ($source) = @_;
4122
4123 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4124 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4125 if ! -e $source;
4126
4127 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4128 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4129 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4130 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4131 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4132 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4133 ." to the host.\n";
4134 }
4135 }
4136
4137 sub spice_port {
4138 my ($vmid) = @_;
4139
4140 my $res = mon_cmd($vmid, 'query-spice');
4141
4142 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4143 }
4144
4145 sub vm_devices_list {
4146 my ($vmid) = @_;
4147
4148 my $res = mon_cmd($vmid, 'query-pci');
4149 my $devices_to_check = [];
4150 my $devices = {};
4151 foreach my $pcibus (@$res) {
4152 push @$devices_to_check, @{$pcibus->{devices}},
4153 }
4154
4155 while (@$devices_to_check) {
4156 my $to_check = [];
4157 for my $d (@$devices_to_check) {
4158 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4159 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4160
4161 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4162 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4163 }
4164 $devices_to_check = $to_check;
4165 }
4166
4167 my $resblock = mon_cmd($vmid, 'query-block');
4168 foreach my $block (@$resblock) {
4169 if($block->{device} =~ m/^drive-(\S+)/){
4170 $devices->{$1} = 1;
4171 }
4172 }
4173
4174 my $resmice = mon_cmd($vmid, 'query-mice');
4175 foreach my $mice (@$resmice) {
4176 if ($mice->{name} eq 'QEMU HID Tablet') {
4177 $devices->{tablet} = 1;
4178 last;
4179 }
4180 }
4181
4182 # for usb devices there is no query-usb
4183 # but we can iterate over the entries in
4184 # qom-list path=/machine/peripheral
4185 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4186 foreach my $per (@$resperipheral) {
4187 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4188 $devices->{$per->{name}} = 1;
4189 }
4190 }
4191
4192 return $devices;
4193 }
4194
4195 sub vm_deviceplug {
4196 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4197
4198 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4199
4200 my $devices_list = vm_devices_list($vmid);
4201 return 1 if defined($devices_list->{$deviceid});
4202
4203 # add PCI bridge if we need it for the device
4204 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4205
4206 if ($deviceid eq 'tablet') {
4207 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4208 } elsif ($deviceid eq 'keyboard') {
4209 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4210 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4211 my $id = $1;
4212 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4213 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4214 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4215 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4216 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4217 qemu_iothread_add($vmid, $deviceid, $device);
4218
4219 qemu_driveadd($storecfg, $vmid, $device);
4220 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4221
4222 qemu_deviceadd($vmid, $devicefull);
4223 eval { qemu_deviceaddverify($vmid, $deviceid); };
4224 if (my $err = $@) {
4225 eval { qemu_drivedel($vmid, $deviceid); };
4226 warn $@ if $@;
4227 die $err;
4228 }
4229 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4230 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4231 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4232 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4233
4234 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4235
4236 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4237 qemu_iothread_add($vmid, $deviceid, $device);
4238 $devicefull .= ",iothread=iothread-$deviceid";
4239 }
4240
4241 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4242 $devicefull .= ",num_queues=$device->{queues}";
4243 }
4244
4245 qemu_deviceadd($vmid, $devicefull);
4246 qemu_deviceaddverify($vmid, $deviceid);
4247 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4248 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4249 qemu_driveadd($storecfg, $vmid, $device);
4250
4251 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4252 eval { qemu_deviceadd($vmid, $devicefull); };
4253 if (my $err = $@) {
4254 eval { qemu_drivedel($vmid, $deviceid); };
4255 warn $@ if $@;
4256 die $err;
4257 }
4258 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4259 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4260
4261 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4262 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4263 my $use_old_bios_files = undef;
4264 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4265
4266 my $netdevicefull = print_netdevice_full(
4267 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4268 qemu_deviceadd($vmid, $netdevicefull);
4269 eval {
4270 qemu_deviceaddverify($vmid, $deviceid);
4271 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4272 };
4273 if (my $err = $@) {
4274 eval { qemu_netdevdel($vmid, $deviceid); };
4275 warn $@ if $@;
4276 die $err;
4277 }
4278 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4279 my $bridgeid = $2;
4280 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4281 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4282
4283 qemu_deviceadd($vmid, $devicefull);
4284 qemu_deviceaddverify($vmid, $deviceid);
4285 } else {
4286 die "can't hotplug device '$deviceid'\n";
4287 }
4288
4289 return 1;
4290 }
4291
4292 # fixme: this should raise exceptions on error!
4293 sub vm_deviceunplug {
4294 my ($vmid, $conf, $deviceid) = @_;
4295
4296 my $devices_list = vm_devices_list($vmid);
4297 return 1 if !defined($devices_list->{$deviceid});
4298
4299 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4300 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4301
4302 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4303 qemu_devicedel($vmid, $deviceid);
4304 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4305 qemu_devicedel($vmid, $deviceid);
4306 qemu_devicedelverify($vmid, $deviceid);
4307 } elsif ($deviceid =~ m/^usb\d+$/) {
4308 qemu_devicedel($vmid, $deviceid);
4309 qemu_devicedelverify($vmid, $deviceid);
4310 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4311 my $device = parse_drive($deviceid, $conf->{$deviceid});
4312
4313 qemu_devicedel($vmid, $deviceid);
4314 qemu_devicedelverify($vmid, $deviceid);
4315 qemu_drivedel($vmid, $deviceid);
4316 qemu_iothread_del($vmid, $deviceid, $device);
4317 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4318 qemu_devicedel($vmid, $deviceid);
4319 qemu_devicedelverify($vmid, $deviceid);
4320 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4321 my $device = parse_drive($deviceid, $conf->{$deviceid});
4322
4323 qemu_devicedel($vmid, $deviceid);
4324 qemu_devicedelverify($vmid, $deviceid);
4325 qemu_drivedel($vmid, $deviceid);
4326 qemu_deletescsihw($conf, $vmid, $deviceid);
4327
4328 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4329 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4330 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4331 qemu_devicedel($vmid, $deviceid);
4332 qemu_devicedelverify($vmid, $deviceid);
4333 qemu_netdevdel($vmid, $deviceid);
4334 } else {
4335 die "can't unplug device '$deviceid'\n";
4336 }
4337
4338 return 1;
4339 }
4340
4341 sub qemu_spice_usbredir_chardev_add {
4342 my ($vmid, $id) = @_;
4343
4344 mon_cmd($vmid, "chardev-add" , (
4345 id => $id,
4346 backend => {
4347 type => 'spicevmc',
4348 data => {
4349 type => "usbredir",
4350 },
4351 },
4352 ));
4353 }
4354
4355 sub qemu_iothread_add {
4356 my ($vmid, $deviceid, $device) = @_;
4357
4358 if ($device->{iothread}) {
4359 my $iothreads = vm_iothreads_list($vmid);
4360 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4361 }
4362 }
4363
4364 sub qemu_iothread_del {
4365 my ($vmid, $deviceid, $device) = @_;
4366
4367 if ($device->{iothread}) {
4368 my $iothreads = vm_iothreads_list($vmid);
4369 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4370 }
4371 }
4372
4373 sub qemu_driveadd {
4374 my ($storecfg, $vmid, $device) = @_;
4375
4376 my $kvmver = get_running_qemu_version($vmid);
4377 my $io_uring = min_version($kvmver, 6, 0);
4378 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4379 $drive =~ s/\\/\\\\/g;
4380 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4381
4382 # If the command succeeds qemu prints: "OK"
4383 return 1 if $ret =~ m/OK/s;
4384
4385 die "adding drive failed: $ret\n";
4386 }
4387
4388 sub qemu_drivedel {
4389 my ($vmid, $deviceid) = @_;
4390
4391 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4392 $ret =~ s/^\s+//;
4393
4394 return 1 if $ret eq "";
4395
4396 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4397 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4398
4399 die "deleting drive $deviceid failed : $ret\n";
4400 }
4401
4402 sub qemu_deviceaddverify {
4403 my ($vmid, $deviceid) = @_;
4404
4405 for (my $i = 0; $i <= 5; $i++) {
4406 my $devices_list = vm_devices_list($vmid);
4407 return 1 if defined($devices_list->{$deviceid});
4408 sleep 1;
4409 }
4410
4411 die "error on hotplug device '$deviceid'\n";
4412 }
4413
4414
4415 sub qemu_devicedelverify {
4416 my ($vmid, $deviceid) = @_;
4417
4418 # need to verify that the device is correctly removed as device_del
4419 # is async and empty return is not reliable
4420
4421 for (my $i = 0; $i <= 5; $i++) {
4422 my $devices_list = vm_devices_list($vmid);
4423 return 1 if !defined($devices_list->{$deviceid});
4424 sleep 1;
4425 }
4426
4427 die "error on hot-unplugging device '$deviceid'\n";
4428 }
4429
4430 sub qemu_findorcreatescsihw {
4431 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4432
4433 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4434
4435 my $scsihwid="$controller_prefix$controller";
4436 my $devices_list = vm_devices_list($vmid);
4437
4438 if (!defined($devices_list->{$scsihwid})) {
4439 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4440 }
4441
4442 return 1;
4443 }
4444
4445 sub qemu_deletescsihw {
4446 my ($conf, $vmid, $opt) = @_;
4447
4448 my $device = parse_drive($opt, $conf->{$opt});
4449
4450 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4451 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4452 return 1;
4453 }
4454
4455 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4456
4457 my $devices_list = vm_devices_list($vmid);
4458 foreach my $opt (keys %{$devices_list}) {
4459 if (is_valid_drivename($opt)) {
4460 my $drive = parse_drive($opt, $conf->{$opt});
4461 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4462 return 1;
4463 }
4464 }
4465 }
4466
4467 my $scsihwid="scsihw$controller";
4468
4469 vm_deviceunplug($vmid, $conf, $scsihwid);
4470
4471 return 1;
4472 }
4473
4474 sub qemu_add_pci_bridge {
4475 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4476
4477 my $bridges = {};
4478
4479 my $bridgeid;
4480
4481 print_pci_addr($device, $bridges, $arch, $machine_type);
4482
4483 while (my ($k, $v) = each %$bridges) {
4484 $bridgeid = $k;
4485 }
4486 return 1 if !defined($bridgeid) || $bridgeid < 1;
4487
4488 my $bridge = "pci.$bridgeid";
4489 my $devices_list = vm_devices_list($vmid);
4490
4491 if (!defined($devices_list->{$bridge})) {
4492 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4493 }
4494
4495 return 1;
4496 }
4497
4498 sub qemu_set_link_status {
4499 my ($vmid, $device, $up) = @_;
4500
4501 mon_cmd($vmid, "set_link", name => $device,
4502 up => $up ? JSON::true : JSON::false);
4503 }
4504
4505 sub qemu_netdevadd {
4506 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4507
4508 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4509 my %options = split(/[=,]/, $netdev);
4510
4511 if (defined(my $vhost = $options{vhost})) {
4512 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4513 }
4514
4515 if (defined(my $queues = $options{queues})) {
4516 $options{queues} = $queues + 0;
4517 }
4518
4519 mon_cmd($vmid, "netdev_add", %options);
4520 return 1;
4521 }
4522
4523 sub qemu_netdevdel {
4524 my ($vmid, $deviceid) = @_;
4525
4526 mon_cmd($vmid, "netdev_del", id => $deviceid);
4527 }
4528
4529 sub qemu_usb_hotplug {
4530 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4531
4532 return if !$device;
4533
4534 # remove the old one first
4535 vm_deviceunplug($vmid, $conf, $deviceid);
4536
4537 # check if xhci controller is necessary and available
4538 my $devicelist = vm_devices_list($vmid);
4539
4540 if (!$devicelist->{xhci}) {
4541 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4542 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4543 }
4544
4545 # add the new one
4546 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4547 }
4548
4549 sub qemu_cpu_hotplug {
4550 my ($vmid, $conf, $vcpus) = @_;
4551
4552 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4553
4554 my $sockets = 1;
4555 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4556 $sockets = $conf->{sockets} if $conf->{sockets};
4557 my $cores = $conf->{cores} || 1;
4558 my $maxcpus = $sockets * $cores;
4559
4560 $vcpus = $maxcpus if !$vcpus;
4561
4562 die "you can't add more vcpus than maxcpus\n"
4563 if $vcpus > $maxcpus;
4564
4565 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4566
4567 if ($vcpus < $currentvcpus) {
4568
4569 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4570
4571 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4572 qemu_devicedel($vmid, "cpu$i");
4573 my $retry = 0;
4574 my $currentrunningvcpus = undef;
4575 while (1) {
4576 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4577 last if scalar(@{$currentrunningvcpus}) == $i-1;
4578 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4579 $retry++;
4580 sleep 1;
4581 }
4582 #update conf after each succesfull cpu unplug
4583 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4584 PVE::QemuConfig->write_config($vmid, $conf);
4585 }
4586 } else {
4587 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4588 }
4589
4590 return;
4591 }
4592
4593 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4594 die "vcpus in running vm does not match its configuration\n"
4595 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4596
4597 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4598 my $arch = get_vm_arch($conf);
4599
4600 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4601 my $cpustr = print_cpu_device($conf, $arch, $i);
4602 qemu_deviceadd($vmid, $cpustr);
4603
4604 my $retry = 0;
4605 my $currentrunningvcpus = undef;
4606 while (1) {
4607 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4608 last if scalar(@{$currentrunningvcpus}) == $i;
4609 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4610 sleep 1;
4611 $retry++;
4612 }
4613 #update conf after each succesfull cpu hotplug
4614 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4615 PVE::QemuConfig->write_config($vmid, $conf);
4616 }
4617 } else {
4618
4619 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4620 mon_cmd($vmid, "cpu-add", id => int($i));
4621 }
4622 }
4623 }
4624
4625 sub qemu_block_set_io_throttle {
4626 my ($vmid, $deviceid,
4627 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4628 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4629 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4630 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4631
4632 return if !check_running($vmid) ;
4633
4634 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4635 bps => int($bps),
4636 bps_rd => int($bps_rd),
4637 bps_wr => int($bps_wr),
4638 iops => int($iops),
4639 iops_rd => int($iops_rd),
4640 iops_wr => int($iops_wr),
4641 bps_max => int($bps_max),
4642 bps_rd_max => int($bps_rd_max),
4643 bps_wr_max => int($bps_wr_max),
4644 iops_max => int($iops_max),
4645 iops_rd_max => int($iops_rd_max),
4646 iops_wr_max => int($iops_wr_max),
4647 bps_max_length => int($bps_max_length),
4648 bps_rd_max_length => int($bps_rd_max_length),
4649 bps_wr_max_length => int($bps_wr_max_length),
4650 iops_max_length => int($iops_max_length),
4651 iops_rd_max_length => int($iops_rd_max_length),
4652 iops_wr_max_length => int($iops_wr_max_length),
4653 );
4654
4655 }
4656
4657 sub qemu_block_resize {
4658 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4659
4660 my $running = check_running($vmid);
4661
4662 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4663
4664 return if !$running;
4665
4666 my $padding = (1024 - $size % 1024) % 1024;
4667 $size = $size + $padding;
4668
4669 mon_cmd(
4670 $vmid,
4671 "block_resize",
4672 device => $deviceid,
4673 size => int($size),
4674 timeout => 60,
4675 );
4676 }
4677
4678 sub qemu_volume_snapshot {
4679 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4680
4681 my $running = check_running($vmid);
4682
4683 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4684 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4685 } else {
4686 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4687 }
4688 }
4689
4690 sub qemu_volume_snapshot_delete {
4691 my ($vmid, $storecfg, $volid, $snap) = @_;
4692
4693 my $running = check_running($vmid);
4694 my $attached_deviceid;
4695
4696 if ($running) {
4697 my $conf = PVE::QemuConfig->load_config($vmid);
4698 PVE::QemuConfig->foreach_volume($conf, sub {
4699 my ($ds, $drive) = @_;
4700 $attached_deviceid = "drive-$ds" if $drive->{file} eq $volid;
4701 });
4702 }
4703
4704 if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
4705 mon_cmd(
4706 $vmid,
4707 'blockdev-snapshot-delete-internal-sync',
4708 device => $attached_deviceid,
4709 name => $snap,
4710 );
4711 } else {
4712 PVE::Storage::volume_snapshot_delete(
4713 $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
4714 }
4715 }
4716
4717 sub set_migration_caps {
4718 my ($vmid, $savevm) = @_;
4719
4720 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4721
4722 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4723 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4724
4725 my $cap_ref = [];
4726
4727 my $enabled_cap = {
4728 "auto-converge" => 1,
4729 "xbzrle" => 1,
4730 "x-rdma-pin-all" => 0,
4731 "zero-blocks" => 0,
4732 "compress" => 0,
4733 "dirty-bitmaps" => $dirty_bitmaps,
4734 };
4735
4736 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4737
4738 for my $supported_capability (@$supported_capabilities) {
4739 push @$cap_ref, {
4740 capability => $supported_capability->{capability},
4741 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4742 };
4743 }
4744
4745 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4746 }
4747
4748 sub foreach_volid {
4749 my ($conf, $func, @param) = @_;
4750
4751 my $volhash = {};
4752
4753 my $test_volid = sub {
4754 my ($key, $drive, $snapname, $pending) = @_;
4755
4756 my $volid = $drive->{file};
4757 return if !$volid;
4758
4759 $volhash->{$volid}->{cdrom} //= 1;
4760 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4761
4762 my $replicate = $drive->{replicate} // 1;
4763 $volhash->{$volid}->{replicate} //= 0;
4764 $volhash->{$volid}->{replicate} = 1 if $replicate;
4765
4766 $volhash->{$volid}->{shared} //= 0;
4767 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4768
4769 $volhash->{$volid}->{is_unused} //= 0;
4770 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4771
4772 $volhash->{$volid}->{is_attached} //= 0;
4773 $volhash->{$volid}->{is_attached} = 1
4774 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4775
4776 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4777 if defined($snapname);
4778
4779 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4780
4781 my $size = $drive->{size};
4782 $volhash->{$volid}->{size} //= $size if $size;
4783
4784 $volhash->{$volid}->{is_vmstate} //= 0;
4785 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4786
4787 $volhash->{$volid}->{is_tpmstate} //= 0;
4788 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4789
4790 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4791 };
4792
4793 my $include_opts = {
4794 extra_keys => ['vmstate'],
4795 include_unused => 1,
4796 };
4797
4798 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4799
4800 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4801 if defined($conf->{pending}) && $conf->{pending}->%*;
4802
4803 foreach my $snapname (keys %{$conf->{snapshots}}) {
4804 my $snap = $conf->{snapshots}->{$snapname};
4805 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4806 }
4807
4808 foreach my $volid (keys %$volhash) {
4809 &$func($volid, $volhash->{$volid}, @param);
4810 }
4811 }
4812
4813 my $fast_plug_option = {
4814 'description' => 1,
4815 'hookscript' => 1,
4816 'lock' => 1,
4817 'migrate_downtime' => 1,
4818 'migrate_speed' => 1,
4819 'name' => 1,
4820 'onboot' => 1,
4821 'protection' => 1,
4822 'shares' => 1,
4823 'startup' => 1,
4824 'tags' => 1,
4825 'vmstatestorage' => 1,
4826 };
4827
4828 for my $opt (keys %$confdesc_cloudinit) {
4829 $fast_plug_option->{$opt} = 1;
4830 };
4831
4832 # hotplug changes in [PENDING]
4833 # $selection hash can be used to only apply specified options, for
4834 # example: { cores => 1 } (only apply changed 'cores')
4835 # $errors ref is used to return error messages
4836 sub vmconfig_hotplug_pending {
4837 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4838
4839 my $defaults = load_defaults();
4840 my $arch = get_vm_arch($conf);
4841 my $machine_type = get_vm_machine($conf, undef, $arch);
4842
4843 # commit values which do not have any impact on running VM first
4844 # Note: those option cannot raise errors, we we do not care about
4845 # $selection and always apply them.
4846
4847 my $add_error = sub {
4848 my ($opt, $msg) = @_;
4849 $errors->{$opt} = "hotplug problem - $msg";
4850 };
4851
4852 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4853
4854 my $cloudinit_record_changed = sub {
4855 my ($conf, $opt, $old, $new) = @_;
4856 return if !$cloudinit_pending_properties->{$opt};
4857
4858 my $ci = ($conf->{cloudinit} //= {});
4859
4860 my $recorded = $ci->{$opt};
4861 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4862
4863 if (defined($new)) {
4864 if (defined($old)) {
4865 # an existing value is being modified
4866 if (defined($recorded)) {
4867 # the value was already not in sync
4868 if ($new eq $recorded) {
4869 # a value is being reverted to the cloud-init state:
4870 delete $ci->{$opt};
4871 delete $added{$opt};
4872 } else {
4873 # the value was changed multiple times, do nothing
4874 }
4875 } elsif ($added{$opt}) {
4876 # the value had been marked as added and is being changed, do nothing
4877 } else {
4878 # the value is new, record it:
4879 $ci->{$opt} = $old;
4880 }
4881 } else {
4882 # a new value is being added
4883 if (defined($recorded)) {
4884 # it was already not in sync
4885 if ($new eq $recorded) {
4886 # a value is being reverted to the cloud-init state:
4887 delete $ci->{$opt};
4888 delete $added{$opt};
4889 } else {
4890 # the value had temporarily been removed, do nothing
4891 }
4892 } elsif ($added{$opt}) {
4893 # the value had been marked as added already, do nothing
4894 } else {
4895 # the value is new, add it
4896 $added{$opt} = 1;
4897 }
4898 }
4899 } elsif (!defined($old)) {
4900 # a non-existent value is being removed? ignore...
4901 } else {
4902 # a value is being deleted
4903 if (defined($recorded)) {
4904 # a value was already recorded, just keep it
4905 } elsif ($added{$opt}) {
4906 # the value was marked as added, remove it
4907 delete $added{$opt};
4908 } else {
4909 # a previously unrecorded value is being removed, record the old value:
4910 $ci->{$opt} = $old;
4911 }
4912 }
4913
4914 my $added = join(',', sort keys %added);
4915 $ci->{added} = $added if length($added);
4916 };
4917
4918 my $changes = 0;
4919 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4920 if ($fast_plug_option->{$opt}) {
4921 my $new = delete $conf->{pending}->{$opt};
4922 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4923 $conf->{$opt} = $new;
4924 $changes = 1;
4925 }
4926 }
4927
4928 if ($changes) {
4929 PVE::QemuConfig->write_config($vmid, $conf);
4930 }
4931
4932 my $ostype = $conf->{ostype};
4933 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4934 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4935 my $usb_hotplug = $hotplug_features->{usb}
4936 && min_version($version, 7, 1)
4937 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4938
4939 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4940 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4941
4942 foreach my $opt (sort keys %$pending_delete_hash) {
4943 next if $selection && !$selection->{$opt};
4944 my $force = $pending_delete_hash->{$opt}->{force};
4945 eval {
4946 if ($opt eq 'hotplug') {
4947 die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/);
4948 } elsif ($opt eq 'tablet') {
4949 die "skip\n" if !$hotplug_features->{usb};
4950 if ($defaults->{tablet}) {
4951 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4952 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4953 if $arch eq 'aarch64';
4954 } else {
4955 vm_deviceunplug($vmid, $conf, 'tablet');
4956 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4957 }
4958 } elsif ($opt =~ m/^usb(\d+)$/) {
4959 my $index = $1;
4960 die "skip\n" if !$usb_hotplug;
4961 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
4962 vm_deviceunplug($vmid, $conf, $opt);
4963 } elsif ($opt eq 'vcpus') {
4964 die "skip\n" if !$hotplug_features->{cpu};
4965 qemu_cpu_hotplug($vmid, $conf, undef);
4966 } elsif ($opt eq 'balloon') {
4967 # enable balloon device is not hotpluggable
4968 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4969 # here we reset the ballooning value to memory
4970 my $balloon = get_current_memory($conf->{memory});
4971 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4972 } elsif ($fast_plug_option->{$opt}) {
4973 # do nothing
4974 } elsif ($opt =~ m/^net(\d+)$/) {
4975 die "skip\n" if !$hotplug_features->{network};
4976 vm_deviceunplug($vmid, $conf, $opt);
4977 if($have_sdn) {
4978 my $net = PVE::QemuServer::parse_net($conf->{$opt});
4979 PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name});
4980 }
4981 } elsif (is_valid_drivename($opt)) {
4982 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4983 vm_deviceunplug($vmid, $conf, $opt);
4984 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4985 } elsif ($opt =~ m/^memory$/) {
4986 die "skip\n" if !$hotplug_features->{memory};
4987 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf);
4988 } elsif ($opt eq 'cpuunits') {
4989 $cgroup->change_cpu_shares(undef);
4990 } elsif ($opt eq 'cpulimit') {
4991 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4992 } else {
4993 die "skip\n";
4994 }
4995 };
4996 if (my $err = $@) {
4997 &$add_error($opt, $err) if $err ne "skip\n";
4998 } else {
4999 my $old = delete $conf->{$opt};
5000 $cloudinit_record_changed->($conf, $opt, $old, undef);
5001 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5002 }
5003 }
5004
5005 my $cloudinit_opt;
5006 foreach my $opt (keys %{$conf->{pending}}) {
5007 next if $selection && !$selection->{$opt};
5008 my $value = $conf->{pending}->{$opt};
5009 eval {
5010 if ($opt eq 'hotplug') {
5011 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5012 die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/);
5013 } elsif ($opt eq 'tablet') {
5014 die "skip\n" if !$hotplug_features->{usb};
5015 if ($value == 1) {
5016 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5017 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5018 if $arch eq 'aarch64';
5019 } elsif ($value == 0) {
5020 vm_deviceunplug($vmid, $conf, 'tablet');
5021 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5022 }
5023 } elsif ($opt =~ m/^usb(\d+)$/) {
5024 my $index = $1;
5025 die "skip\n" if !$usb_hotplug;
5026 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5027 my $id = $opt;
5028 if ($d->{host} =~ m/^spice$/i) {
5029 $id = "usbredirdev$index";
5030 }
5031 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5032 } elsif ($opt eq 'vcpus') {
5033 die "skip\n" if !$hotplug_features->{cpu};
5034 qemu_cpu_hotplug($vmid, $conf, $value);
5035 } elsif ($opt eq 'balloon') {
5036 # enable/disable balloning device is not hotpluggable
5037 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5038 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5039 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5040
5041 # allow manual ballooning if shares is set to zero
5042 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5043 my $memory = get_current_memory($conf->{memory});
5044 my $balloon = $conf->{pending}->{balloon} || $memory;
5045 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5046 }
5047 } elsif ($opt =~ m/^net(\d+)$/) {
5048 # some changes can be done without hotplug
5049 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5050 $vmid, $opt, $value, $arch, $machine_type);
5051 } elsif (is_valid_drivename($opt)) {
5052 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5053 # some changes can be done without hotplug
5054 my $drive = parse_drive($opt, $value);
5055 if (drive_is_cloudinit($drive)) {
5056 $cloudinit_opt = [$opt, $drive];
5057 # apply all the other changes first, then generate the cloudinit disk
5058 die "skip\n";
5059 }
5060 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5061 $vmid, $opt, $value, $arch, $machine_type);
5062 } elsif ($opt =~ m/^memory$/) { #dimms
5063 die "skip\n" if !$hotplug_features->{memory};
5064 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value);
5065 } elsif ($opt eq 'cpuunits') {
5066 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5067 $cgroup->change_cpu_shares($new_cpuunits);
5068 } elsif ($opt eq 'cpulimit') {
5069 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5070 $cgroup->change_cpu_quota($cpulimit, 100000);
5071 } elsif ($opt eq 'agent') {
5072 vmconfig_update_agent($conf, $opt, $value);
5073 } else {
5074 die "skip\n"; # skip non-hot-pluggable options
5075 }
5076 };
5077 if (my $err = $@) {
5078 &$add_error($opt, $err) if $err ne "skip\n";
5079 } else {
5080 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5081 $conf->{$opt} = $value;
5082 delete $conf->{pending}->{$opt};
5083 }
5084 }
5085
5086 if (defined($cloudinit_opt)) {
5087 my ($opt, $drive) = @$cloudinit_opt;
5088 my $value = $conf->{pending}->{$opt};
5089 eval {
5090 my $temp = {%$conf, $opt => $value};
5091 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5092 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5093 $vmid, $opt, $value, $arch, $machine_type);
5094 };
5095 if (my $err = $@) {
5096 &$add_error($opt, $err) if $err ne "skip\n";
5097 } else {
5098 $conf->{$opt} = $value;
5099 delete $conf->{pending}->{$opt};
5100 }
5101 }
5102
5103 # unplug xhci controller if no usb device is left
5104 if ($usb_hotplug) {
5105 my $has_usb = 0;
5106 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5107 next if !defined($conf->{"usb$i"});
5108 $has_usb = 1;
5109 last;
5110 }
5111 if (!$has_usb) {
5112 vm_deviceunplug($vmid, $conf, 'xhci');
5113 }
5114 }
5115
5116 PVE::QemuConfig->write_config($vmid, $conf);
5117
5118 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5119 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5120 }
5121 }
5122
5123 sub try_deallocate_drive {
5124 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5125
5126 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5127 my $volid = $drive->{file};
5128 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5129 my $sid = PVE::Storage::parse_volume_id($volid);
5130 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5131
5132 # check if the disk is really unused
5133 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5134 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5135 PVE::Storage::vdisk_free($storecfg, $volid);
5136 return 1;
5137 } else {
5138 # If vm is not owner of this disk remove from config
5139 return 1;
5140 }
5141 }
5142
5143 return;
5144 }
5145
5146 sub vmconfig_delete_or_detach_drive {
5147 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5148
5149 my $drive = parse_drive($opt, $conf->{$opt});
5150
5151 my $rpcenv = PVE::RPCEnvironment::get();
5152 my $authuser = $rpcenv->get_user();
5153
5154 if ($force) {
5155 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5156 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5157 } else {
5158 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5159 }
5160 }
5161
5162
5163
5164 sub vmconfig_apply_pending {
5165 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5166
5167 return if !scalar(keys %{$conf->{pending}});
5168
5169 my $add_apply_error = sub {
5170 my ($opt, $msg) = @_;
5171 my $err_msg = "unable to apply pending change $opt : $msg";
5172 $errors->{$opt} = $err_msg;
5173 warn $err_msg;
5174 };
5175
5176 # cold plug
5177
5178 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5179 foreach my $opt (sort keys %$pending_delete_hash) {
5180 my $force = $pending_delete_hash->{$opt}->{force};
5181 eval {
5182 if ($opt =~ m/^unused/) {
5183 die "internal error";
5184 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5185 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5186 } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) {
5187 if($have_sdn) {
5188 my $net = PVE::QemuServer::parse_net($conf->{$opt});
5189 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
5190 warn if $@;
5191 }
5192 }
5193 };
5194 if (my $err = $@) {
5195 $add_apply_error->($opt, $err);
5196 } else {
5197 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5198 delete $conf->{$opt};
5199 }
5200 }
5201
5202 PVE::QemuConfig->cleanup_pending($conf);
5203
5204 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5205
5206 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5207 next if $opt eq 'delete'; # just to be sure
5208 eval {
5209 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5210 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5211 } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
5212 return if !$have_sdn; # return from eval if SDN is not available
5213
5214 my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt});
5215 if ($conf->{$opt}) {
5216 my $old_net = PVE::QemuServer::parse_net($conf->{$opt});
5217
5218 if (defined($old_net->{bridge}) && defined($old_net->{macaddr}) && (
5219 safe_string_ne($old_net->{bridge}, $new_net->{bridge}) ||
5220 safe_string_ne($old_net->{macaddr}, $new_net->{macaddr})
5221 )) {
5222 PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name});
5223 }
5224 }
5225 #fixme: reuse ip if mac change && same bridge
5226 PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1);
5227 }
5228 };
5229 if (my $err = $@) {
5230 $add_apply_error->($opt, $err);
5231 } else {
5232
5233 if (is_valid_drivename($opt)) {
5234 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5235 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5236 }
5237
5238 $conf->{$opt} = delete $conf->{pending}->{$opt};
5239 }
5240 }
5241
5242 # write all changes at once to avoid unnecessary i/o
5243 PVE::QemuConfig->write_config($vmid, $conf);
5244 if ($generate_cloudinit) {
5245 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5246 # After successful generation and if there were changes to be applied, update the
5247 # config to drop the {cloudinit} entry.
5248 PVE::QemuConfig->write_config($vmid, $conf);
5249 }
5250 }
5251 }
5252
5253 sub vmconfig_update_net {
5254 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5255
5256 my $newnet = parse_net($value);
5257
5258 if ($conf->{$opt}) {
5259 my $oldnet = parse_net($conf->{$opt});
5260
5261 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5262 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5263 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5264 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5265 !($newnet->{bridge} && $oldnet->{bridge})
5266 ) { # bridge/nat mode change
5267
5268 # for non online change, we try to hot-unplug
5269 die "skip\n" if !$hotplug;
5270 vm_deviceunplug($vmid, $conf, $opt);
5271
5272 if ($have_sdn) {
5273 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5274 }
5275
5276 } else {
5277
5278 die "internal error" if $opt !~ m/net(\d+)/;
5279 my $iface = "tap${vmid}i$1";
5280
5281 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5282 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5283 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5284 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
5285 ) {
5286 PVE::Network::tap_unplug($iface);
5287
5288 #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example)
5289 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5290 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5291 ) {
5292 qemu_set_link_status($vmid, $opt, 0);
5293 }
5294
5295 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) {
5296 if ($have_sdn) {
5297 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5298 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5299 }
5300 }
5301
5302 if ($have_sdn) {
5303 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5304 } else {
5305 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5306 }
5307
5308 #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example)
5309 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5310 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5311 ) {
5312 qemu_set_link_status($vmid, $opt, 1);
5313 }
5314
5315 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5316 # Rate can be applied on its own but any change above needs to
5317 # include the rate in tap_plug since OVS resets everything.
5318 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5319 }
5320
5321 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5322 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5323 }
5324
5325 return 1;
5326 }
5327 }
5328
5329 if ($hotplug) {
5330 if ($have_sdn) {
5331 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5332 PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name});
5333 }
5334 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5335 } else {
5336 die "skip\n";
5337 }
5338 }
5339
5340 sub vmconfig_update_agent {
5341 my ($conf, $opt, $value) = @_;
5342
5343 die "skip\n" if !$conf->{$opt};
5344
5345 my $hotplug_options = { fstrim_cloned_disks => 1 };
5346
5347 my $old_agent = parse_guest_agent($conf);
5348 my $agent = parse_guest_agent({$opt => $value});
5349
5350 for my $option (keys %$agent) { # added/changed options
5351 next if defined($hotplug_options->{$option});
5352 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5353 }
5354
5355 for my $option (keys %$old_agent) { # removed options
5356 next if defined($hotplug_options->{$option});
5357 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5358 }
5359
5360 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5361 }
5362
5363 sub vmconfig_update_disk {
5364 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5365
5366 my $drive = parse_drive($opt, $value);
5367
5368 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5369 my $media = $drive->{media} || 'disk';
5370 my $oldmedia = $old_drive->{media} || 'disk';
5371 die "unable to change media type\n" if $media ne $oldmedia;
5372
5373 if (!drive_is_cdrom($old_drive)) {
5374
5375 if ($drive->{file} ne $old_drive->{file}) {
5376
5377 die "skip\n" if !$hotplug;
5378
5379 # unplug and register as unused
5380 vm_deviceunplug($vmid, $conf, $opt);
5381 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5382
5383 } else {
5384 # update existing disk
5385
5386 # skip non hotpluggable value
5387 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5388 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5389 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5390 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5391 safe_string_ne($drive->{product}, $old_drive->{product}) ||
5392 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5393 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5394 safe_string_ne($drive->{vendor}, $old_drive->{vendor}) ||
5395 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5396 die "skip\n";
5397 }
5398
5399 # apply throttle
5400 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5401 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5402 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5403 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5404 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5405 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5406 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5407 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5408 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5409 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5410 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5411 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5412 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5413 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5414 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5415 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5416 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5417 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5418
5419 qemu_block_set_io_throttle(
5420 $vmid,"drive-$opt",
5421 ($drive->{mbps} || 0)*1024*1024,
5422 ($drive->{mbps_rd} || 0)*1024*1024,
5423 ($drive->{mbps_wr} || 0)*1024*1024,
5424 $drive->{iops} || 0,
5425 $drive->{iops_rd} || 0,
5426 $drive->{iops_wr} || 0,
5427 ($drive->{mbps_max} || 0)*1024*1024,
5428 ($drive->{mbps_rd_max} || 0)*1024*1024,
5429 ($drive->{mbps_wr_max} || 0)*1024*1024,
5430 $drive->{iops_max} || 0,
5431 $drive->{iops_rd_max} || 0,
5432 $drive->{iops_wr_max} || 0,
5433 $drive->{bps_max_length} || 1,
5434 $drive->{bps_rd_max_length} || 1,
5435 $drive->{bps_wr_max_length} || 1,
5436 $drive->{iops_max_length} || 1,
5437 $drive->{iops_rd_max_length} || 1,
5438 $drive->{iops_wr_max_length} || 1,
5439 );
5440
5441 }
5442
5443 return 1;
5444 }
5445
5446 } else { # cdrom
5447
5448 if ($drive->{file} eq 'none') {
5449 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5450 if (drive_is_cloudinit($old_drive)) {
5451 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5452 }
5453 } else {
5454 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5455
5456 # force eject if locked
5457 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5458
5459 if ($path) {
5460 mon_cmd($vmid, "blockdev-change-medium",
5461 id => "$opt", filename => "$path");
5462 }
5463 }
5464
5465 return 1;
5466 }
5467 }
5468
5469 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5470 # hotplug new disks
5471 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5472 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5473 }
5474
5475 sub vmconfig_update_cloudinit_drive {
5476 my ($storecfg, $conf, $vmid) = @_;
5477
5478 my $cloudinit_ds = undef;
5479 my $cloudinit_drive = undef;
5480
5481 PVE::QemuConfig->foreach_volume($conf, sub {
5482 my ($ds, $drive) = @_;
5483 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5484 $cloudinit_ds = $ds;
5485 $cloudinit_drive = $drive;
5486 }
5487 });
5488
5489 return if !$cloudinit_drive;
5490
5491 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5492 PVE::QemuConfig->write_config($vmid, $conf);
5493 }
5494
5495 my $running = PVE::QemuServer::check_running($vmid);
5496
5497 if ($running) {
5498 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5499 if ($path) {
5500 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5501 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5502 }
5503 }
5504 }
5505
5506 # called in locked context by incoming migration
5507 sub vm_migrate_get_nbd_disks {
5508 my ($storecfg, $conf, $replicated_volumes) = @_;
5509
5510 my $local_volumes = {};
5511 PVE::QemuConfig->foreach_volume($conf, sub {
5512 my ($ds, $drive) = @_;
5513
5514 return if drive_is_cdrom($drive);
5515 return if $ds eq 'tpmstate0';
5516
5517 my $volid = $drive->{file};
5518
5519 return if !$volid;
5520
5521 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5522
5523 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5524 return if $scfg->{shared};
5525
5526 my $format = qemu_img_format($scfg, $volname);
5527
5528 # replicated disks re-use existing state via bitmap
5529 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5530 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
5531 });
5532 return $local_volumes;
5533 }
5534
5535 # called in locked context by incoming migration
5536 sub vm_migrate_alloc_nbd_disks {
5537 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5538
5539 my $nbd = {};
5540 foreach my $opt (sort keys %$source_volumes) {
5541 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5542
5543 if ($use_existing) {
5544 $nbd->{$opt}->{drivestr} = print_drive($drive);
5545 $nbd->{$opt}->{volid} = $volid;
5546 $nbd->{$opt}->{replicated} = 1;
5547 next;
5548 }
5549
5550 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5551
5552 # order of precedence, filtered by whether storage supports it:
5553 # 1. explicit requested format
5554 # 2. default format of storage
5555 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5556 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
5557
5558 my $size = $drive->{size} / 1024;
5559 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5560 my $newdrive = $drive;
5561 $newdrive->{format} = $format;
5562 $newdrive->{file} = $newvolid;
5563 my $drivestr = print_drive($newdrive);
5564 $nbd->{$opt}->{drivestr} = $drivestr;
5565 $nbd->{$opt}->{volid} = $newvolid;
5566 }
5567
5568 return $nbd;
5569 }
5570
5571 # see vm_start_nolock for parameters, additionally:
5572 # migrate_opts:
5573 # storagemap = parsed storage map for allocating NBD disks
5574 sub vm_start {
5575 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5576
5577 return PVE::QemuConfig->lock_config($vmid, sub {
5578 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5579
5580 die "you can't start a vm if it's a template\n"
5581 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5582
5583 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5584 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5585
5586 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5587
5588 if ($has_backup_lock && $running) {
5589 # a backup is currently running, attempt to start the guest in the
5590 # existing QEMU instance
5591 return vm_resume($vmid);
5592 }
5593
5594 PVE::QemuConfig->check_lock($conf)
5595 if !($params->{skiplock} || $has_suspended_lock);
5596
5597 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5598
5599 die "VM $vmid already running\n" if $running;
5600
5601 if (my $storagemap = $migrate_opts->{storagemap}) {
5602 my $replicated = $migrate_opts->{replicated_volumes};
5603 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5604 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5605
5606 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5607 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5608 }
5609 }
5610
5611 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5612 });
5613 }
5614
5615
5616 # params:
5617 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5618 # skiplock => 0/1, skip checking for config lock
5619 # skiptemplate => 0/1, skip checking whether VM is template
5620 # forcemachine => to force QEMU machine (rollback/migration)
5621 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5622 # timeout => in seconds
5623 # paused => start VM in paused state (backup)
5624 # resume => resume from hibernation
5625 # live-restore-backing => {
5626 # sata0 => {
5627 # name => blockdev-name,
5628 # blockdev => "arg to the -blockdev command instantiating device named 'name'",
5629 # },
5630 # virtio2 => ...
5631 # }
5632 # migrate_opts:
5633 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5634 # migratedfrom => source node
5635 # spice_ticket => used for spice migration, passed via tunnel/stdin
5636 # network => CIDR of migration network
5637 # type => secure/insecure - tunnel over encrypted connection or plain-text
5638 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5639 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5640 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5641 # contained in config
5642 sub vm_start_nolock {
5643 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5644
5645 my $statefile = $params->{statefile};
5646 my $resume = $params->{resume};
5647
5648 my $migratedfrom = $migrate_opts->{migratedfrom};
5649 my $migration_type = $migrate_opts->{type};
5650
5651 my $res = {};
5652
5653 # clean up leftover reboot request files
5654 eval { clear_reboot_request($vmid); };
5655 warn $@ if $@;
5656
5657 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5658 vmconfig_apply_pending($vmid, $conf, $storecfg);
5659 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5660 }
5661
5662 # don't regenerate the ISO if the VM is started as part of a live migration
5663 # this way we can reuse the old ISO with the correct config
5664 if (!$migratedfrom) {
5665 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5666 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5667 # $conf->{cloudinit}, so we could just not do this?
5668 # But we do it above, so for now let's be consistent.
5669 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5670 }
5671 }
5672
5673 # override offline migrated volumes, conf is out of date still
5674 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5675 for my $key (sort keys $offline_volumes->%*) {
5676 my $parsed = parse_drive($key, $conf->{$key});
5677 $parsed->{file} = $offline_volumes->{$key};
5678 $conf->{$key} = print_drive($parsed);
5679 }
5680 }
5681
5682 my $defaults = load_defaults();
5683
5684 # set environment variable useful inside network script
5685 # for remote migration the config is available on the target node!
5686 if (!$migrate_opts->{remote_node}) {
5687 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5688 }
5689
5690 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5691
5692 my $forcemachine = $params->{forcemachine};
5693 my $forcecpu = $params->{forcecpu};
5694 if ($resume) {
5695 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5696 $forcemachine = $conf->{runningmachine};
5697 $forcecpu = $conf->{runningcpu};
5698 print "Resuming suspended VM\n";
5699 }
5700
5701 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5702 $conf, $defaults, $forcemachine, $forcecpu, $params->{'live-restore-backing'});
5703
5704 my $migration_ip;
5705 my $get_migration_ip = sub {
5706 my ($nodename) = @_;
5707
5708 return $migration_ip if defined($migration_ip);
5709
5710 my $cidr = $migrate_opts->{network};
5711
5712 if (!defined($cidr)) {
5713 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5714 $cidr = $dc_conf->{migration}->{network};
5715 }
5716
5717 if (defined($cidr)) {
5718 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5719
5720 die "could not get IP: no address configured on local " .
5721 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5722
5723 die "could not get IP: multiple addresses configured on local " .
5724 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5725
5726 $migration_ip = @$ips[0];
5727 }
5728
5729 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5730 if !defined($migration_ip);
5731
5732 return $migration_ip;
5733 };
5734
5735 if ($statefile) {
5736 if ($statefile eq 'tcp') {
5737 my $migrate = $res->{migrate} = { proto => 'tcp' };
5738 $migrate->{addr} = "localhost";
5739 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5740 my $nodename = nodename();
5741
5742 if (!defined($migration_type)) {
5743 if (defined($datacenterconf->{migration}->{type})) {
5744 $migration_type = $datacenterconf->{migration}->{type};
5745 } else {
5746 $migration_type = 'secure';
5747 }
5748 }
5749
5750 if ($migration_type eq 'insecure') {
5751 $migrate->{addr} = $get_migration_ip->($nodename);
5752 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5753 }
5754
5755 # see #4501: port reservation should be done close to usage - tell QEMU where to listen
5756 # via QMP later
5757 push @$cmd, '-incoming', 'defer';
5758 push @$cmd, '-S';
5759
5760 } elsif ($statefile eq 'unix') {
5761 # should be default for secure migrations as a ssh TCP forward
5762 # tunnel is not deterministic reliable ready and fails regurarly
5763 # to set up in time, so use UNIX socket forwards
5764 my $migrate = $res->{migrate} = { proto => 'unix' };
5765 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5766 unlink $migrate->{addr};
5767
5768 $migrate->{uri} = "unix:$migrate->{addr}";
5769 push @$cmd, '-incoming', $migrate->{uri};
5770 push @$cmd, '-S';
5771
5772 } elsif (-e $statefile) {
5773 push @$cmd, '-loadstate', $statefile;
5774 } else {
5775 my $statepath = PVE::Storage::path($storecfg, $statefile);
5776 push @$vollist, $statefile;
5777 push @$cmd, '-loadstate', $statepath;
5778 }
5779 } elsif ($params->{paused}) {
5780 push @$cmd, '-S';
5781 }
5782
5783 my $memory = get_current_memory($conf->{memory});
5784 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume);
5785
5786 my $pci_reserve_list = [];
5787 for my $device (values $pci_devices->%*) {
5788 next if $device->{mdev}; # we don't reserve for mdev devices
5789 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5790 }
5791
5792 # reserve all PCI IDs before actually doing anything with them
5793 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5794
5795 eval {
5796 my $uuid;
5797 for my $id (sort keys %$pci_devices) {
5798 my $d = $pci_devices->{$id};
5799 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5800
5801 my $chosen_mdev;
5802 for my $dev ($d->{ids}->@*) {
5803 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5804 if ($d->{mdev}) {
5805 warn $@ if $@;
5806 $chosen_mdev = $info;
5807 last if $chosen_mdev; # if successful, we're done
5808 } else {
5809 die $@ if $@;
5810 }
5811 }
5812
5813 next if !$d->{mdev};
5814 die "could not create mediated device\n" if !defined($chosen_mdev);
5815
5816 # nvidia grid needs the uuid of the mdev as qemu parameter
5817 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5818 if (defined($conf->{smbios1})) {
5819 my $smbios_conf = parse_smbios1($conf->{smbios1});
5820 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5821 }
5822 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5823 }
5824 }
5825 push @$cmd, '-uuid', $uuid if defined($uuid);
5826 };
5827 if (my $err = $@) {
5828 eval { cleanup_pci_devices($vmid, $conf) };
5829 warn $@ if $@;
5830 die $err;
5831 }
5832
5833 PVE::Storage::activate_volumes($storecfg, $vollist);
5834
5835
5836 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5837 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5838 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5839 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5840 # timeout should be more than enough here...
5841 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5842
5843 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5844
5845 my %run_params = (
5846 timeout => $statefile ? undef : $start_timeout,
5847 umask => 0077,
5848 noerr => 1,
5849 );
5850
5851 # when migrating, prefix QEMU output so other side can pick up any
5852 # errors that might occur and show the user
5853 if ($migratedfrom) {
5854 $run_params{quiet} = 1;
5855 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5856 }
5857
5858 my %systemd_properties = (
5859 Slice => 'qemu.slice',
5860 KillMode => 'process',
5861 SendSIGKILL => 0,
5862 TimeoutStopUSec => ULONG_MAX, # infinity
5863 );
5864
5865 if (PVE::CGroup::cgroup_mode() == 2) {
5866 $systemd_properties{CPUWeight} = $cpuunits;
5867 } else {
5868 $systemd_properties{CPUShares} = $cpuunits;
5869 }
5870
5871 if (my $cpulimit = $conf->{cpulimit}) {
5872 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5873 }
5874 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5875
5876 my $run_qemu = sub {
5877 PVE::Tools::run_fork sub {
5878 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5879
5880 my $tpmpid;
5881 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
5882 # start the TPM emulator so QEMU can connect on start
5883 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5884 }
5885
5886 my $exitcode = run_command($cmd, %run_params);
5887 if ($exitcode) {
5888 if ($tpmpid) {
5889 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5890 kill 'TERM', $tpmpid;
5891 }
5892 die "QEMU exited with code $exitcode\n";
5893 }
5894 };
5895 };
5896
5897 if ($conf->{hugepages}) {
5898
5899 my $code = sub {
5900 my $hotplug_features =
5901 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5902 my $hugepages_topology =
5903 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5904
5905 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5906
5907 PVE::QemuServer::Memory::hugepages_mount();
5908 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5909
5910 eval { $run_qemu->() };
5911 if (my $err = $@) {
5912 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5913 if !$conf->{keephugepages};
5914 die $err;
5915 }
5916
5917 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5918 if !$conf->{keephugepages};
5919 };
5920 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5921
5922 } else {
5923 eval { $run_qemu->() };
5924 }
5925
5926 if (my $err = $@) {
5927 # deactivate volumes if start fails
5928 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5929 warn $@ if $@;
5930 eval { cleanup_pci_devices($vmid, $conf) };
5931 warn $@ if $@;
5932
5933 die "start failed: $err";
5934 }
5935
5936 # re-reserve all PCI IDs now that we can know the actual VM PID
5937 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5938 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5939 warn $@ if $@;
5940
5941 if (defined(my $migrate = $res->{migrate})) {
5942 if ($migrate->{proto} eq 'tcp') {
5943 my $nodename = nodename();
5944 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5945 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5946 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5947 mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri});
5948 }
5949 print "migration listens on $migrate->{uri}\n";
5950 } elsif ($statefile) {
5951 eval { mon_cmd($vmid, "cont"); };
5952 warn $@ if $@;
5953 }
5954
5955 #start nbd server for storage migration
5956 if (my $nbd = $migrate_opts->{nbd}) {
5957 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5958
5959 my $migrate_storage_uri;
5960 # nbd_protocol_version > 0 for unix socket support
5961 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5962 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5963 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5964 $migrate_storage_uri = "nbd:unix:$socket_path";
5965 $res->{migrate}->{unix_sockets} = [$socket_path];
5966 } else {
5967 my $nodename = nodename();
5968 my $localip = $get_migration_ip->($nodename);
5969 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5970 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5971
5972 mon_cmd($vmid, "nbd-server-start", addr => {
5973 type => 'inet',
5974 data => {
5975 host => "${localip}",
5976 port => "${storage_migrate_port}",
5977 },
5978 });
5979 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5980 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5981 }
5982
5983 my $block_info = mon_cmd($vmid, "query-block");
5984 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5985
5986 foreach my $opt (sort keys %$nbd) {
5987 my $drivestr = $nbd->{$opt}->{drivestr};
5988 my $volid = $nbd->{$opt}->{volid};
5989
5990 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5991
5992 mon_cmd(
5993 $vmid,
5994 "block-export-add",
5995 id => "drive-$opt",
5996 'node-name' => $block_node,
5997 writable => JSON::true,
5998 type => "nbd",
5999 name => "drive-$opt", # NBD export name
6000 );
6001
6002 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6003 print "storage migration listens on $nbd_uri volume:$drivestr\n";
6004 print "re-using replicated volume: $opt - $volid\n"
6005 if $nbd->{$opt}->{replicated};
6006
6007 $res->{drives}->{$opt} = $nbd->{$opt};
6008 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6009 }
6010 }
6011
6012 if ($migratedfrom) {
6013 eval {
6014 set_migration_caps($vmid);
6015 };
6016 warn $@ if $@;
6017
6018 if ($spice_port) {
6019 print "spice listens on port $spice_port\n";
6020 $res->{spice_port} = $spice_port;
6021 if ($migrate_opts->{spice_ticket}) {
6022 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6023 $migrate_opts->{spice_ticket});
6024 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6025 }
6026 }
6027
6028 } else {
6029 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6030 if !$statefile && $conf->{balloon};
6031
6032 foreach my $opt (keys %$conf) {
6033 next if $opt !~ m/^net\d+$/;
6034 my $nicconf = parse_net($conf->{$opt});
6035 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6036 }
6037 add_nets_bridge_fdb($conf, $vmid);
6038 }
6039
6040 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6041 eval {
6042 mon_cmd(
6043 $vmid,
6044 'qom-set',
6045 path => "machine/peripheral/balloon0",
6046 property => "guest-stats-polling-interval",
6047 value => 2
6048 );
6049 };
6050 log_warn("could not set polling interval for ballooning - $@") if $@;
6051 }
6052
6053 if ($resume) {
6054 print "Resumed VM, removing state\n";
6055 if (my $vmstate = $conf->{vmstate}) {
6056 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6057 PVE::Storage::vdisk_free($storecfg, $vmstate);
6058 }
6059 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6060 PVE::QemuConfig->write_config($vmid, $conf);
6061 }
6062
6063 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6064
6065 my ($current_machine, $is_deprecated) =
6066 PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
6067 if ($is_deprecated) {
6068 log_warn(
6069 "current machine version '$current_machine' is deprecated - see the documentation and ".
6070 "change to a newer one",
6071 );
6072 }
6073
6074 return $res;
6075 }
6076
6077 sub vm_commandline {
6078 my ($storecfg, $vmid, $snapname) = @_;
6079
6080 my $conf = PVE::QemuConfig->load_config($vmid);
6081
6082 my ($forcemachine, $forcecpu);
6083 if ($snapname) {
6084 my $snapshot = $conf->{snapshots}->{$snapname};
6085 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6086
6087 # check for machine or CPU overrides in snapshot
6088 $forcemachine = $snapshot->{runningmachine};
6089 $forcecpu = $snapshot->{runningcpu};
6090
6091 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6092
6093 $conf = $snapshot;
6094 }
6095
6096 my $defaults = load_defaults();
6097
6098 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6099
6100 return PVE::Tools::cmd2string($cmd);
6101 }
6102
6103 sub vm_reset {
6104 my ($vmid, $skiplock) = @_;
6105
6106 PVE::QemuConfig->lock_config($vmid, sub {
6107
6108 my $conf = PVE::QemuConfig->load_config($vmid);
6109
6110 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6111
6112 mon_cmd($vmid, "system_reset");
6113 });
6114 }
6115
6116 sub get_vm_volumes {
6117 my ($conf) = @_;
6118
6119 my $vollist = [];
6120 foreach_volid($conf, sub {
6121 my ($volid, $attr) = @_;
6122
6123 return if $volid =~ m|^/|;
6124
6125 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6126 return if !$sid;
6127
6128 push @$vollist, $volid;
6129 });
6130
6131 return $vollist;
6132 }
6133
6134 sub cleanup_pci_devices {
6135 my ($vmid, $conf) = @_;
6136
6137 foreach my $key (keys %$conf) {
6138 next if $key !~ m/^hostpci(\d+)$/;
6139 my $hostpciindex = $1;
6140 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6141 my $d = parse_hostpci($conf->{$key});
6142 if ($d->{mdev}) {
6143 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6144 # don't want to break ABI just for this two liner
6145 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6146
6147 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6148 # out when we do it first. so wait for up to 10 seconds and then try it manually
6149 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/ && -e $dev_sysfs_dir) {
6150 my $count = 0;
6151 while (-e $dev_sysfs_dir && $count < 10) {
6152 sleep 1;
6153 $count++;
6154 }
6155 print "waited $count seconds for mediated device driver finishing clean up\n";
6156 }
6157
6158 if (-e $dev_sysfs_dir) {
6159 print "actively clean up mediated device with UUID $uuid\n";
6160 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1");
6161 }
6162 }
6163 }
6164 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6165 }
6166
6167 sub vm_stop_cleanup {
6168 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6169
6170 eval {
6171
6172 if (!$keepActive) {
6173 my $vollist = get_vm_volumes($conf);
6174 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6175
6176 if (my $tpmdrive = $conf->{tpmstate0}) {
6177 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6178 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6179 if ($storeid) {
6180 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6181 }
6182 }
6183 }
6184
6185 foreach my $ext (qw(mon qmp pid vnc qga)) {
6186 unlink "/var/run/qemu-server/${vmid}.$ext";
6187 }
6188
6189 if ($conf->{ivshmem}) {
6190 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6191 # just delete it for now, VMs which have this already open do not
6192 # are affected, but new VMs will get a separated one. If this
6193 # becomes an issue we either add some sort of ref-counting or just
6194 # add a "don't delete on stop" flag to the ivshmem format.
6195 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6196 }
6197
6198 cleanup_pci_devices($vmid, $conf);
6199
6200 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6201 };
6202 warn $@ if $@; # avoid errors - just warn
6203 }
6204
6205 # call only in locked context
6206 sub _do_vm_stop {
6207 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6208
6209 my $pid = check_running($vmid, $nocheck);
6210 return if !$pid;
6211
6212 my $conf;
6213 if (!$nocheck) {
6214 $conf = PVE::QemuConfig->load_config($vmid);
6215 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6216 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6217 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6218 $timeout = $opts->{down} if $opts->{down};
6219 }
6220 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6221 }
6222
6223 eval {
6224 if ($shutdown) {
6225 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6226 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6227 } else {
6228 mon_cmd($vmid, "system_powerdown");
6229 }
6230 } else {
6231 mon_cmd($vmid, "quit");
6232 }
6233 };
6234 my $err = $@;
6235
6236 if (!$err) {
6237 $timeout = 60 if !defined($timeout);
6238
6239 my $count = 0;
6240 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6241 $count++;
6242 sleep 1;
6243 }
6244
6245 if ($count >= $timeout) {
6246 if ($force) {
6247 warn "VM still running - terminating now with SIGTERM\n";
6248 kill 15, $pid;
6249 } else {
6250 die "VM quit/powerdown failed - got timeout\n";
6251 }
6252 } else {
6253 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6254 return;
6255 }
6256 } else {
6257 if (!check_running($vmid, $nocheck)) {
6258 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6259 return;
6260 }
6261 if ($force) {
6262 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6263 kill 15, $pid;
6264 } else {
6265 die "VM quit/powerdown failed\n";
6266 }
6267 }
6268
6269 # wait again
6270 $timeout = 10;
6271
6272 my $count = 0;
6273 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6274 $count++;
6275 sleep 1;
6276 }
6277
6278 if ($count >= $timeout) {
6279 warn "VM still running - terminating now with SIGKILL\n";
6280 kill 9, $pid;
6281 sleep 1;
6282 }
6283
6284 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6285 }
6286
6287 # Note: use $nocheck to skip tests if VM configuration file exists.
6288 # We need that when migration VMs to other nodes (files already moved)
6289 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6290 sub vm_stop {
6291 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6292
6293 $force = 1 if !defined($force) && !$shutdown;
6294
6295 if ($migratedfrom){
6296 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6297 kill 15, $pid if $pid;
6298 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6299 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6300 return;
6301 }
6302
6303 PVE::QemuConfig->lock_config($vmid, sub {
6304 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6305 });
6306 }
6307
6308 sub vm_reboot {
6309 my ($vmid, $timeout) = @_;
6310
6311 PVE::QemuConfig->lock_config($vmid, sub {
6312 eval {
6313
6314 # only reboot if running, as qmeventd starts it again on a stop event
6315 return if !check_running($vmid);
6316
6317 create_reboot_request($vmid);
6318
6319 my $storecfg = PVE::Storage::config();
6320 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6321
6322 };
6323 if (my $err = $@) {
6324 # avoid that the next normal shutdown will be confused for a reboot
6325 clear_reboot_request($vmid);
6326 die $err;
6327 }
6328 });
6329 }
6330
6331 # note: if using the statestorage parameter, the caller has to check privileges
6332 sub vm_suspend {
6333 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6334
6335 my $conf;
6336 my $path;
6337 my $storecfg;
6338 my $vmstate;
6339
6340 PVE::QemuConfig->lock_config($vmid, sub {
6341
6342 $conf = PVE::QemuConfig->load_config($vmid);
6343
6344 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6345 PVE::QemuConfig->check_lock($conf)
6346 if !($skiplock || $is_backing_up);
6347
6348 die "cannot suspend to disk during backup\n"
6349 if $is_backing_up && $includestate;
6350
6351 if ($includestate) {
6352 $conf->{lock} = 'suspending';
6353 my $date = strftime("%Y-%m-%d", localtime(time()));
6354 $storecfg = PVE::Storage::config();
6355 if (!$statestorage) {
6356 $statestorage = find_vmstate_storage($conf, $storecfg);
6357 # check permissions for the storage
6358 my $rpcenv = PVE::RPCEnvironment::get();
6359 if ($rpcenv->{type} ne 'cli') {
6360 my $authuser = $rpcenv->get_user();
6361 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6362 }
6363 }
6364
6365
6366 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6367 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6368 $path = PVE::Storage::path($storecfg, $vmstate);
6369 PVE::QemuConfig->write_config($vmid, $conf);
6370 } else {
6371 mon_cmd($vmid, "stop");
6372 }
6373 });
6374
6375 if ($includestate) {
6376 # save vm state
6377 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6378
6379 eval {
6380 set_migration_caps($vmid, 1);
6381 mon_cmd($vmid, "savevm-start", statefile => $path);
6382 for(;;) {
6383 my $state = mon_cmd($vmid, "query-savevm");
6384 if (!$state->{status}) {
6385 die "savevm not active\n";
6386 } elsif ($state->{status} eq 'active') {
6387 sleep(1);
6388 next;
6389 } elsif ($state->{status} eq 'completed') {
6390 print "State saved, quitting\n";
6391 last;
6392 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6393 die "query-savevm failed with error '$state->{error}'\n"
6394 } else {
6395 die "query-savevm returned status '$state->{status}'\n";
6396 }
6397 }
6398 };
6399 my $err = $@;
6400
6401 PVE::QemuConfig->lock_config($vmid, sub {
6402 $conf = PVE::QemuConfig->load_config($vmid);
6403 if ($err) {
6404 # cleanup, but leave suspending lock, to indicate something went wrong
6405 eval {
6406 mon_cmd($vmid, "savevm-end");
6407 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6408 PVE::Storage::vdisk_free($storecfg, $vmstate);
6409 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6410 PVE::QemuConfig->write_config($vmid, $conf);
6411 };
6412 warn $@ if $@;
6413 die $err;
6414 }
6415
6416 die "lock changed unexpectedly\n"
6417 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6418
6419 mon_cmd($vmid, "quit");
6420 $conf->{lock} = 'suspended';
6421 PVE::QemuConfig->write_config($vmid, $conf);
6422 });
6423 }
6424 }
6425
6426 # $nocheck is set when called as part of a migration - in this context the
6427 # location of the config file (source or target node) is not deterministic,
6428 # since migration cannot wait for pmxcfs to process the rename
6429 sub vm_resume {
6430 my ($vmid, $skiplock, $nocheck) = @_;
6431
6432 PVE::QemuConfig->lock_config($vmid, sub {
6433 my $res = mon_cmd($vmid, 'query-status');
6434 my $resume_cmd = 'cont';
6435 my $reset = 0;
6436 my $conf;
6437 if ($nocheck) {
6438 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6439 if ($@) {
6440 my $vmlist = PVE::Cluster::get_vmlist();
6441 if (exists($vmlist->{ids}->{$vmid})) {
6442 my $node = $vmlist->{ids}->{$vmid}->{node};
6443 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6444 }
6445 if (!$conf) {
6446 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6447 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6448 }
6449 }
6450 } else {
6451 $conf = PVE::QemuConfig->load_config($vmid);
6452 }
6453
6454 if ($res->{status}) {
6455 return if $res->{status} eq 'running'; # job done, go home
6456 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6457 $reset = 1 if $res->{status} eq 'shutdown';
6458 }
6459
6460 if (!$nocheck) {
6461 PVE::QemuConfig->check_lock($conf)
6462 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6463 }
6464
6465 if ($reset) {
6466 # required if a VM shuts down during a backup and we get a resume
6467 # request before the backup finishes for example
6468 mon_cmd($vmid, "system_reset");
6469 }
6470
6471 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6472
6473 mon_cmd($vmid, $resume_cmd);
6474 });
6475 }
6476
6477 sub vm_sendkey {
6478 my ($vmid, $skiplock, $key) = @_;
6479
6480 PVE::QemuConfig->lock_config($vmid, sub {
6481
6482 my $conf = PVE::QemuConfig->load_config($vmid);
6483
6484 # there is no qmp command, so we use the human monitor command
6485 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6486 die $res if $res ne '';
6487 });
6488 }
6489
6490 sub check_bridge_access {
6491 my ($rpcenv, $authuser, $conf) = @_;
6492
6493 return 1 if $authuser eq 'root@pam';
6494
6495 for my $opt (sort keys $conf->%*) {
6496 next if $opt !~ m/^net\d+$/;
6497 my $net = parse_net($conf->{$opt});
6498 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6499 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6500 }
6501 return 1;
6502 };
6503
6504 sub check_mapping_access {
6505 my ($rpcenv, $user, $conf) = @_;
6506
6507 for my $opt (keys $conf->%*) {
6508 if ($opt =~ m/^usb\d+$/) {
6509 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6510 if (my $host = $device->{host}) {
6511 die "only root can set '$opt' config for real devices\n"
6512 if $host !~ m/^spice$/i && $user ne 'root@pam';
6513 } elsif ($device->{mapping}) {
6514 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6515 } else {
6516 die "either 'host' or 'mapping' must be set.\n";
6517 }
6518 } elsif ($opt =~ m/^hostpci\d+$/) {
6519 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6520 if ($device->{host}) {
6521 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6522 } elsif ($device->{mapping}) {
6523 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6524 } else {
6525 die "either 'host' or 'mapping' must be set.\n";
6526 }
6527 }
6528 }
6529 };
6530
6531 sub check_restore_permissions {
6532 my ($rpcenv, $user, $conf) = @_;
6533
6534 check_bridge_access($rpcenv, $user, $conf);
6535 check_mapping_access($rpcenv, $user, $conf);
6536 }
6537 # vzdump restore implementaion
6538
6539 sub tar_archive_read_firstfile {
6540 my $archive = shift;
6541
6542 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6543
6544 # try to detect archive type first
6545 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6546 die "unable to open file '$archive'\n";
6547 my $firstfile = <$fh>;
6548 kill 15, $pid;
6549 close $fh;
6550
6551 die "ERROR: archive contaions no data\n" if !$firstfile;
6552 chomp $firstfile;
6553
6554 return $firstfile;
6555 }
6556
6557 sub tar_restore_cleanup {
6558 my ($storecfg, $statfile) = @_;
6559
6560 print STDERR "starting cleanup\n";
6561
6562 if (my $fd = IO::File->new($statfile, "r")) {
6563 while (defined(my $line = <$fd>)) {
6564 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6565 my $volid = $2;
6566 eval {
6567 if ($volid =~ m|^/|) {
6568 unlink $volid || die 'unlink failed\n';
6569 } else {
6570 PVE::Storage::vdisk_free($storecfg, $volid);
6571 }
6572 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6573 };
6574 print STDERR "unable to cleanup '$volid' - $@" if $@;
6575 } else {
6576 print STDERR "unable to parse line in statfile - $line";
6577 }
6578 }
6579 $fd->close();
6580 }
6581 }
6582
6583 sub restore_file_archive {
6584 my ($archive, $vmid, $user, $opts) = @_;
6585
6586 return restore_vma_archive($archive, $vmid, $user, $opts)
6587 if $archive eq '-';
6588
6589 my $info = PVE::Storage::archive_info($archive);
6590 my $format = $opts->{format} // $info->{format};
6591 my $comp = $info->{compression};
6592
6593 # try to detect archive format
6594 if ($format eq 'tar') {
6595 return restore_tar_archive($archive, $vmid, $user, $opts);
6596 } else {
6597 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6598 }
6599 }
6600
6601 # hepler to remove disks that will not be used after restore
6602 my $restore_cleanup_oldconf = sub {
6603 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6604
6605 my $kept_disks = {};
6606
6607 PVE::QemuConfig->foreach_volume($oldconf, sub {
6608 my ($ds, $drive) = @_;
6609
6610 return if drive_is_cdrom($drive, 1);
6611
6612 my $volid = $drive->{file};
6613 return if !$volid || $volid =~ m|^/|;
6614
6615 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6616 return if !$path || !$owner || ($owner != $vmid);
6617
6618 # Note: only delete disk we want to restore
6619 # other volumes will become unused
6620 if ($virtdev_hash->{$ds}) {
6621 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6622 if (my $err = $@) {
6623 warn $err;
6624 }
6625 } else {
6626 $kept_disks->{$volid} = 1;
6627 }
6628 });
6629
6630 # after the restore we have no snapshots anymore
6631 for my $snapname (keys $oldconf->{snapshots}->%*) {
6632 my $snap = $oldconf->{snapshots}->{$snapname};
6633 if ($snap->{vmstate}) {
6634 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6635 if (my $err = $@) {
6636 warn $err;
6637 }
6638 }
6639
6640 for my $volid (keys $kept_disks->%*) {
6641 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6642 warn $@ if $@;
6643 }
6644 }
6645 };
6646
6647 # Helper to parse vzdump backup device hints
6648 #
6649 # $rpcenv: Environment, used to ckeck storage permissions
6650 # $user: User ID, to check storage permissions
6651 # $storecfg: Storage configuration
6652 # $fh: the file handle for reading the configuration
6653 # $devinfo: should contain device sizes for all backu-up'ed devices
6654 # $options: backup options (pool, default storage)
6655 #
6656 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6657 my $parse_backup_hints = sub {
6658 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6659
6660 my $check_storage = sub { # assert if an image can be allocate
6661 my ($storeid, $scfg) = @_;
6662 die "Content type 'images' is not available on storage '$storeid'\n"
6663 if !$scfg->{content}->{images};
6664 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6665 if $user ne 'root@pam';
6666 };
6667
6668 my $virtdev_hash = {};
6669 while (defined(my $line = <$fh>)) {
6670 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6671 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6672 die "archive does not contain data for drive '$virtdev'\n"
6673 if !$devinfo->{$devname};
6674
6675 if (defined($options->{storage})) {
6676 $storeid = $options->{storage} || 'local';
6677 } elsif (!$storeid) {
6678 $storeid = 'local';
6679 }
6680 $format = 'raw' if !$format;
6681 $devinfo->{$devname}->{devname} = $devname;
6682 $devinfo->{$devname}->{virtdev} = $virtdev;
6683 $devinfo->{$devname}->{format} = $format;
6684 $devinfo->{$devname}->{storeid} = $storeid;
6685
6686 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6687 $check_storage->($storeid, $scfg); # permission and content type check
6688
6689 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6690 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6691 my $virtdev = $1;
6692 my $drive = parse_drive($virtdev, $2);
6693
6694 if (drive_is_cloudinit($drive)) {
6695 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6696 $storeid = $options->{storage} if defined ($options->{storage});
6697 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6698 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6699
6700 $check_storage->($storeid, $scfg); # permission and content type check
6701
6702 $virtdev_hash->{$virtdev} = {
6703 format => $format,
6704 storeid => $storeid,
6705 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6706 is_cloudinit => 1,
6707 };
6708 }
6709 }
6710 }
6711
6712 return $virtdev_hash;
6713 };
6714
6715 # Helper to allocate and activate all volumes required for a restore
6716 #
6717 # $storecfg: Storage configuration
6718 # $virtdev_hash: as returned by parse_backup_hints()
6719 #
6720 # Returns: { $virtdev => $volid }
6721 my $restore_allocate_devices = sub {
6722 my ($storecfg, $virtdev_hash, $vmid) = @_;
6723
6724 my $map = {};
6725 foreach my $virtdev (sort keys %$virtdev_hash) {
6726 my $d = $virtdev_hash->{$virtdev};
6727 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6728 my $storeid = $d->{storeid};
6729 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6730
6731 # test if requested format is supported
6732 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6733 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6734 $d->{format} = $defFormat if !$supported;
6735
6736 my $name;
6737 if ($d->{is_cloudinit}) {
6738 $name = "vm-$vmid-cloudinit";
6739 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6740 if ($scfg->{path}) {
6741 $name .= ".$d->{format}";
6742 }
6743 }
6744
6745 my $volid = PVE::Storage::vdisk_alloc(
6746 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6747
6748 print STDERR "new volume ID is '$volid'\n";
6749 $d->{volid} = $volid;
6750
6751 PVE::Storage::activate_volumes($storecfg, [$volid]);
6752
6753 $map->{$virtdev} = $volid;
6754 }
6755
6756 return $map;
6757 };
6758
6759 sub restore_update_config_line {
6760 my ($cookie, $map, $line, $unique) = @_;
6761
6762 return '' if $line =~ m/^\#qmdump\#/;
6763 return '' if $line =~ m/^\#vzdump\#/;
6764 return '' if $line =~ m/^lock:/;
6765 return '' if $line =~ m/^unused\d+:/;
6766 return '' if $line =~ m/^parent:/;
6767
6768 my $res = '';
6769
6770 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6771 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6772 # try to convert old 1.X settings
6773 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6774 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6775 my ($model, $macaddr) = split(/\=/, $devconfig);
6776 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6777 my $net = {
6778 model => $model,
6779 bridge => "vmbr$ind",
6780 macaddr => $macaddr,
6781 };
6782 my $netstr = print_net($net);
6783
6784 $res .= "net$cookie->{netcount}: $netstr\n";
6785 $cookie->{netcount}++;
6786 }
6787 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6788 my ($id, $netstr) = ($1, $2);
6789 my $net = parse_net($netstr);
6790 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6791 $netstr = print_net($net);
6792 $res .= "$id: $netstr\n";
6793 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6794 my $virtdev = $1;
6795 my $value = $3;
6796 my $di = parse_drive($virtdev, $value);
6797 if (defined($di->{backup}) && !$di->{backup}) {
6798 $res .= "#$line";
6799 } elsif ($map->{$virtdev}) {
6800 delete $di->{format}; # format can change on restore
6801 $di->{file} = $map->{$virtdev};
6802 $value = print_drive($di);
6803 $res .= "$virtdev: $value\n";
6804 } else {
6805 $res .= $line;
6806 }
6807 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6808 my $vmgenid = $1;
6809 if ($vmgenid ne '0') {
6810 # always generate a new vmgenid if there was a valid one setup
6811 $vmgenid = generate_uuid();
6812 }
6813 $res .= "vmgenid: $vmgenid\n";
6814 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6815 my ($uuid, $uuid_str);
6816 UUID::generate($uuid);
6817 UUID::unparse($uuid, $uuid_str);
6818 my $smbios1 = parse_smbios1($2);
6819 $smbios1->{uuid} = $uuid_str;
6820 $res .= $1.print_smbios1($smbios1)."\n";
6821 } else {
6822 $res .= $line;
6823 }
6824
6825 return $res;
6826 }
6827
6828 my $restore_deactivate_volumes = sub {
6829 my ($storecfg, $virtdev_hash) = @_;
6830
6831 my $vollist = [];
6832 for my $dev (values $virtdev_hash->%*) {
6833 push $vollist->@*, $dev->{volid} if $dev->{volid};
6834 }
6835
6836 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6837 print STDERR $@ if $@;
6838 };
6839
6840 my $restore_destroy_volumes = sub {
6841 my ($storecfg, $virtdev_hash) = @_;
6842
6843 for my $dev (values $virtdev_hash->%*) {
6844 my $volid = $dev->{volid} or next;
6845 eval {
6846 PVE::Storage::vdisk_free($storecfg, $volid);
6847 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6848 };
6849 print STDERR "unable to cleanup '$volid' - $@" if $@;
6850 }
6851 };
6852
6853 sub restore_merge_config {
6854 my ($filename, $backup_conf_raw, $override_conf) = @_;
6855
6856 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6857 for my $key (keys $override_conf->%*) {
6858 $backup_conf->{$key} = $override_conf->{$key};
6859 }
6860
6861 return $backup_conf;
6862 }
6863
6864 sub scan_volids {
6865 my ($cfg, $vmid) = @_;
6866
6867 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6868
6869 my $volid_hash = {};
6870 foreach my $storeid (keys %$info) {
6871 foreach my $item (@{$info->{$storeid}}) {
6872 next if !($item->{volid} && $item->{size});
6873 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6874 $volid_hash->{$item->{volid}} = $item;
6875 }
6876 }
6877
6878 return $volid_hash;
6879 }
6880
6881 sub update_disk_config {
6882 my ($vmid, $conf, $volid_hash) = @_;
6883
6884 my $changes;
6885 my $prefix = "VM $vmid";
6886
6887 # used and unused disks
6888 my $referenced = {};
6889
6890 # Note: it is allowed to define multiple storages with same path (alias), so
6891 # we need to check both 'volid' and real 'path' (two different volid can point
6892 # to the same path).
6893
6894 my $referencedpath = {};
6895
6896 # update size info
6897 PVE::QemuConfig->foreach_volume($conf, sub {
6898 my ($opt, $drive) = @_;
6899
6900 my $volid = $drive->{file};
6901 return if !$volid;
6902 my $volume = $volid_hash->{$volid};
6903
6904 # mark volid as "in-use" for next step
6905 $referenced->{$volid} = 1;
6906 if ($volume && (my $path = $volume->{path})) {
6907 $referencedpath->{$path} = 1;
6908 }
6909
6910 return if drive_is_cdrom($drive);
6911 return if !$volume;
6912
6913 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6914 if (defined($updated)) {
6915 $changes = 1;
6916 $conf->{$opt} = print_drive($updated);
6917 print "$prefix ($opt): $msg\n";
6918 }
6919 });
6920
6921 # remove 'unusedX' entry if volume is used
6922 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6923 my ($opt, $drive) = @_;
6924
6925 my $volid = $drive->{file};
6926 return if !$volid;
6927
6928 my $path;
6929 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6930 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6931 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6932 $changes = 1;
6933 delete $conf->{$opt};
6934 }
6935
6936 $referenced->{$volid} = 1;
6937 $referencedpath->{$path} = 1 if $path;
6938 });
6939
6940 foreach my $volid (sort keys %$volid_hash) {
6941 next if $volid =~ m/vm-$vmid-state-/;
6942 next if $referenced->{$volid};
6943 my $path = $volid_hash->{$volid}->{path};
6944 next if !$path; # just to be sure
6945 next if $referencedpath->{$path};
6946 $changes = 1;
6947 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6948 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6949 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6950 }
6951
6952 return $changes;
6953 }
6954
6955 sub rescan {
6956 my ($vmid, $nolock, $dryrun) = @_;
6957
6958 my $cfg = PVE::Storage::config();
6959
6960 print "rescan volumes...\n";
6961 my $volid_hash = scan_volids($cfg, $vmid);
6962
6963 my $updatefn = sub {
6964 my ($vmid) = @_;
6965
6966 my $conf = PVE::QemuConfig->load_config($vmid);
6967
6968 PVE::QemuConfig->check_lock($conf);
6969
6970 my $vm_volids = {};
6971 foreach my $volid (keys %$volid_hash) {
6972 my $info = $volid_hash->{$volid};
6973 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6974 }
6975
6976 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6977
6978 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6979 };
6980
6981 if (defined($vmid)) {
6982 if ($nolock) {
6983 &$updatefn($vmid);
6984 } else {
6985 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6986 }
6987 } else {
6988 my $vmlist = config_list();
6989 foreach my $vmid (keys %$vmlist) {
6990 if ($nolock) {
6991 &$updatefn($vmid);
6992 } else {
6993 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6994 }
6995 }
6996 }
6997 }
6998
6999 sub restore_proxmox_backup_archive {
7000 my ($archive, $vmid, $user, $options) = @_;
7001
7002 my $storecfg = PVE::Storage::config();
7003
7004 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7005 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7006
7007 my $fingerprint = $scfg->{fingerprint};
7008 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
7009
7010 my $repo = PVE::PBSClient::get_repository($scfg);
7011 my $namespace = $scfg->{namespace};
7012
7013 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7014 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7015 local $ENV{PBS_PASSWORD} = $password;
7016 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7017
7018 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7019 PVE::Storage::parse_volname($storecfg, $archive);
7020
7021 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7022
7023 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7024
7025 my $tmpdir = "/var/tmp/vzdumptmp$$";
7026 rmtree $tmpdir;
7027 mkpath $tmpdir;
7028
7029 my $conffile = PVE::QemuConfig->config_file($vmid);
7030 # disable interrupts (always do cleanups)
7031 local $SIG{INT} =
7032 local $SIG{TERM} =
7033 local $SIG{QUIT} =
7034 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7035
7036 # Note: $oldconf is undef if VM does not exists
7037 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7038 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7039 my $new_conf_raw = '';
7040
7041 my $rpcenv = PVE::RPCEnvironment::get();
7042 my $devinfo = {}; # info about drives included in backup
7043 my $virtdev_hash = {}; # info about allocated drives
7044
7045 eval {
7046 # enable interrupts
7047 local $SIG{INT} =
7048 local $SIG{TERM} =
7049 local $SIG{QUIT} =
7050 local $SIG{HUP} =
7051 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7052
7053 my $cfgfn = "$tmpdir/qemu-server.conf";
7054 my $firewall_config_fn = "$tmpdir/fw.conf";
7055 my $index_fn = "$tmpdir/index.json";
7056
7057 my $cmd = "restore";
7058
7059 my $param = [$pbs_backup_name, "index.json", $index_fn];
7060 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7061 my $index = PVE::Tools::file_get_contents($index_fn);
7062 $index = decode_json($index);
7063
7064 foreach my $info (@{$index->{files}}) {
7065 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7066 my $devname = $1;
7067 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7068 $devinfo->{$devname}->{size} = $1;
7069 } else {
7070 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7071 }
7072 }
7073 }
7074
7075 my $is_qemu_server_backup = scalar(
7076 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7077 );
7078 if (!$is_qemu_server_backup) {
7079 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7080 }
7081 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7082
7083 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7084 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7085
7086 if ($has_firewall_config) {
7087 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7088 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7089
7090 my $pve_firewall_dir = '/etc/pve/firewall';
7091 mkdir $pve_firewall_dir; # make sure the dir exists
7092 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7093 }
7094
7095 my $fh = IO::File->new($cfgfn, "r") ||
7096 die "unable to read qemu-server.conf - $!\n";
7097
7098 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7099
7100 # fixme: rate limit?
7101
7102 # create empty/temp config
7103 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7104
7105 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7106
7107 # allocate volumes
7108 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7109
7110 foreach my $virtdev (sort keys %$virtdev_hash) {
7111 my $d = $virtdev_hash->{$virtdev};
7112 next if $d->{is_cloudinit}; # no need to restore cloudinit
7113
7114 # this fails if storage is unavailable
7115 my $volid = $d->{volid};
7116 my $path = PVE::Storage::path($storecfg, $volid);
7117
7118 # for live-restore we only want to preload the efidisk and TPM state
7119 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7120
7121 my @ns_arg;
7122 if (defined(my $ns = $scfg->{namespace})) {
7123 @ns_arg = ('--ns', $ns);
7124 }
7125
7126 my $pbs_restore_cmd = [
7127 '/usr/bin/pbs-restore',
7128 '--repository', $repo,
7129 @ns_arg,
7130 $pbs_backup_name,
7131 "$d->{devname}.img.fidx",
7132 $path,
7133 '--verbose',
7134 ];
7135
7136 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7137 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7138
7139 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7140 push @$pbs_restore_cmd, '--skip-zero';
7141 }
7142
7143 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7144 print "restore proxmox backup image: $dbg_cmdstring\n";
7145 run_command($pbs_restore_cmd);
7146 }
7147
7148 $fh->seek(0, 0) || die "seek failed - $!\n";
7149
7150 my $cookie = { netcount => 0 };
7151 while (defined(my $line = <$fh>)) {
7152 $new_conf_raw .= restore_update_config_line(
7153 $cookie,
7154 $map,
7155 $line,
7156 $options->{unique},
7157 );
7158 }
7159
7160 $fh->close();
7161 };
7162 my $err = $@;
7163
7164 if ($err || !$options->{live}) {
7165 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7166 }
7167
7168 rmtree $tmpdir;
7169
7170 if ($err) {
7171 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7172 die $err;
7173 }
7174
7175 if ($options->{live}) {
7176 # keep lock during live-restore
7177 $new_conf_raw .= "\nlock: create";
7178 }
7179
7180 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7181 check_restore_permissions($rpcenv, $user, $new_conf);
7182 PVE::QemuConfig->write_config($vmid, $new_conf);
7183
7184 eval { rescan($vmid, 1); };
7185 warn $@ if $@;
7186
7187 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7188
7189 if ($options->{live}) {
7190 # enable interrupts
7191 local $SIG{INT} =
7192 local $SIG{TERM} =
7193 local $SIG{QUIT} =
7194 local $SIG{HUP} =
7195 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7196
7197 my $conf = PVE::QemuConfig->load_config($vmid);
7198 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7199
7200 # these special drives are already restored before start
7201 delete $devinfo->{'drive-efidisk0'};
7202 delete $devinfo->{'drive-tpmstate0-backup'};
7203
7204 my $pbs_opts = {
7205 repo => $repo,
7206 keyfile => $keyfile,
7207 snapshot => $pbs_backup_name,
7208 namespace => $namespace,
7209 };
7210 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7211
7212 PVE::QemuConfig->remove_lock($vmid, "create");
7213 }
7214 }
7215
7216 sub pbs_live_restore {
7217 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7218
7219 print "starting VM for live-restore\n";
7220 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7221
7222 my $live_restore_backing = {};
7223 for my $ds (keys %$restored_disks) {
7224 $ds =~ m/^drive-(.*)$/;
7225 my $confname = $1;
7226 my $pbs_conf = {};
7227 $pbs_conf = {
7228 repository => $opts->{repo},
7229 snapshot => $opts->{snapshot},
7230 archive => "$ds.img.fidx",
7231 };
7232 $pbs_conf->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7233 $pbs_conf->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7234
7235 my $drive = parse_drive($confname, $conf->{$confname});
7236 print "restoring '$ds' to '$drive->{file}'\n";
7237
7238 my $pbs_name = "drive-${confname}-pbs";
7239 $live_restore_backing->{$confname} = {
7240 name => $pbs_name,
7241 blockdev => print_pbs_blockdev($pbs_conf, $pbs_name),
7242 };
7243 }
7244
7245 my $drives_streamed = 0;
7246 eval {
7247 # make sure HA doesn't interrupt our restore by stopping the VM
7248 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7249 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7250 }
7251
7252 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7253 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7254 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7255
7256 my $qmeventd_fd = register_qmeventd_handle($vmid);
7257
7258 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7259 # this will effectively collapse the backing image chain consisting of
7260 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7261 # removes itself once all backing images vanish with 'auto-remove=on')
7262 my $jobs = {};
7263 for my $ds (sort keys %$restored_disks) {
7264 my $job_id = "restore-$ds";
7265 mon_cmd($vmid, 'block-stream',
7266 'job-id' => $job_id,
7267 device => "$ds",
7268 );
7269 $jobs->{$job_id} = {};
7270 }
7271
7272 mon_cmd($vmid, 'cont');
7273 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7274
7275 print "restore-drive jobs finished successfully, removing all tracking block devices"
7276 ." to disconnect from Proxmox Backup Server\n";
7277
7278 for my $ds (sort keys %$restored_disks) {
7279 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7280 }
7281
7282 close($qmeventd_fd);
7283 };
7284
7285 my $err = $@;
7286
7287 if ($err) {
7288 warn "An error occurred during live-restore: $err\n";
7289 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7290 die "live-restore failed\n";
7291 }
7292 }
7293
7294 # Inspired by pbs live-restore, this restores with the disks being available as files.
7295 # Theoretically this can also be used to quick-start a full-clone vm if the
7296 # disks are all available as files.
7297 #
7298 # The mapping should provide a path by config entry, such as
7299 # `{ scsi0 => { format => <qcow2|raw|...>, path => "/path/to/file", sata1 => ... } }`
7300 #
7301 # This is used when doing a `create` call with the `--live-import` parameter,
7302 # where the disks get an `import-from=` property. The non-live part is
7303 # therefore already handled in the `$create_disks()` call happening in the
7304 # `create` api call
7305 sub live_import_from_files {
7306 my ($mapping, $vmid, $conf, $restore_options) = @_;
7307
7308 my $live_restore_backing = {};
7309 for my $dev (keys %$mapping) {
7310 die "disk not support for live-restoring: '$dev'\n"
7311 if !is_valid_drivename($dev) || $dev =~ /^(?:efidisk|tpmstate)/;
7312
7313 die "mapping contains disk '$dev' which does not exist in the config\n"
7314 if !exists($conf->{$dev});
7315
7316 my $info = $mapping->{$dev};
7317 my ($format, $path) = $info->@{qw(format path)};
7318 die "missing path for '$dev' mapping\n" if !$path;
7319 die "missing format for '$dev' mapping\n" if !$format;
7320 die "invalid format '$format' for '$dev' mapping\n"
7321 if !grep { $format eq $_ } qw(raw qcow2 vmdk);
7322
7323 $live_restore_backing->{$dev} = {
7324 name => "drive-$dev-restore",
7325 blockdev => "driver=$format,node-name=drive-$dev-restore"
7326 . ",read-only=on"
7327 . ",file.driver=file,file.filename=$path"
7328 };
7329 };
7330
7331 my $storecfg = PVE::Storage::config();
7332 eval {
7333
7334 # make sure HA doesn't interrupt our restore by stopping the VM
7335 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7336 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7337 }
7338
7339 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7340
7341 # prevent shutdowns from qmeventd when the VM powers off from the inside
7342 my $qmeventd_fd = register_qmeventd_handle($vmid);
7343
7344 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7345 # this will effectively collapse the backing image chain consisting of
7346 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7347 # removes itself once all backing images vanish with 'auto-remove=on')
7348 my $jobs = {};
7349 for my $ds (sort keys %$live_restore_backing) {
7350 my $job_id = "restore-$ds";
7351 mon_cmd($vmid, 'block-stream',
7352 'job-id' => $job_id,
7353 device => "drive-$ds",
7354 );
7355 $jobs->{$job_id} = {};
7356 }
7357
7358 mon_cmd($vmid, 'cont');
7359 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7360
7361 print "restore-drive jobs finished successfully, removing all tracking block devices\n";
7362
7363 for my $ds (sort keys %$live_restore_backing) {
7364 mon_cmd($vmid, 'blockdev-del', 'node-name' => "drive-$ds-restore");
7365 }
7366
7367 close($qmeventd_fd);
7368 };
7369
7370 my $err = $@;
7371
7372 if ($err) {
7373 warn "An error occurred during live-restore: $err\n";
7374 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7375 die "live-restore failed\n";
7376 }
7377
7378 PVE::QemuConfig->remove_lock($vmid, "import");
7379 }
7380
7381 sub restore_vma_archive {
7382 my ($archive, $vmid, $user, $opts, $comp) = @_;
7383
7384 my $readfrom = $archive;
7385
7386 my $cfg = PVE::Storage::config();
7387 my $commands = [];
7388 my $bwlimit = $opts->{bwlimit};
7389
7390 my $dbg_cmdstring = '';
7391 my $add_pipe = sub {
7392 my ($cmd) = @_;
7393 push @$commands, $cmd;
7394 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7395 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7396 $readfrom = '-';
7397 };
7398
7399 my $input = undef;
7400 if ($archive eq '-') {
7401 $input = '<&STDIN';
7402 } else {
7403 # If we use a backup from a PVE defined storage we also consider that
7404 # storage's rate limit:
7405 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7406 if (defined($volid)) {
7407 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7408 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7409 if ($readlimit) {
7410 print STDERR "applying read rate limit: $readlimit\n";
7411 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7412 $add_pipe->($cstream);
7413 }
7414 }
7415 }
7416
7417 if ($comp) {
7418 my $info = PVE::Storage::decompressor_info('vma', $comp);
7419 my $cmd = $info->{decompressor};
7420 push @$cmd, $readfrom;
7421 $add_pipe->($cmd);
7422 }
7423
7424 my $tmpdir = "/var/tmp/vzdumptmp$$";
7425 rmtree $tmpdir;
7426
7427 # disable interrupts (always do cleanups)
7428 local $SIG{INT} =
7429 local $SIG{TERM} =
7430 local $SIG{QUIT} =
7431 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7432
7433 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7434 POSIX::mkfifo($mapfifo, 0600);
7435 my $fifofh;
7436 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7437
7438 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7439
7440 my $devinfo = {}; # info about drives included in backup
7441 my $virtdev_hash = {}; # info about allocated drives
7442
7443 my $rpcenv = PVE::RPCEnvironment::get();
7444
7445 my $conffile = PVE::QemuConfig->config_file($vmid);
7446
7447 # Note: $oldconf is undef if VM does not exist
7448 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7449 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7450 my $new_conf_raw = '';
7451
7452 my %storage_limits;
7453
7454 my $print_devmap = sub {
7455 my $cfgfn = "$tmpdir/qemu-server.conf";
7456
7457 # we can read the config - that is already extracted
7458 my $fh = IO::File->new($cfgfn, "r") ||
7459 die "unable to read qemu-server.conf - $!\n";
7460
7461 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7462 if (-f $fwcfgfn) {
7463 my $pve_firewall_dir = '/etc/pve/firewall';
7464 mkdir $pve_firewall_dir; # make sure the dir exists
7465 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7466 }
7467
7468 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7469
7470 foreach my $info (values %{$virtdev_hash}) {
7471 my $storeid = $info->{storeid};
7472 next if defined($storage_limits{$storeid});
7473
7474 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7475 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7476 $storage_limits{$storeid} = $limit * 1024;
7477 }
7478
7479 foreach my $devname (keys %$devinfo) {
7480 die "found no device mapping information for device '$devname'\n"
7481 if !$devinfo->{$devname}->{virtdev};
7482 }
7483
7484 # create empty/temp config
7485 if ($oldconf) {
7486 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7487 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7488 }
7489
7490 # allocate volumes
7491 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7492
7493 # print restore information to $fifofh
7494 foreach my $virtdev (sort keys %$virtdev_hash) {
7495 my $d = $virtdev_hash->{$virtdev};
7496 next if $d->{is_cloudinit}; # no need to restore cloudinit
7497
7498 my $storeid = $d->{storeid};
7499 my $volid = $d->{volid};
7500
7501 my $map_opts = '';
7502 if (my $limit = $storage_limits{$storeid}) {
7503 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7504 }
7505
7506 my $write_zeros = 1;
7507 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7508 $write_zeros = 0;
7509 }
7510
7511 my $path = PVE::Storage::path($cfg, $volid);
7512
7513 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7514
7515 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7516 }
7517
7518 $fh->seek(0, 0) || die "seek failed - $!\n";
7519
7520 my $cookie = { netcount => 0 };
7521 while (defined(my $line = <$fh>)) {
7522 $new_conf_raw .= restore_update_config_line(
7523 $cookie,
7524 $map,
7525 $line,
7526 $opts->{unique},
7527 );
7528 }
7529
7530 $fh->close();
7531 };
7532
7533 my $oldtimeout;
7534
7535 eval {
7536 # enable interrupts
7537 local $SIG{INT} =
7538 local $SIG{TERM} =
7539 local $SIG{QUIT} =
7540 local $SIG{HUP} =
7541 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7542 local $SIG{ALRM} = sub { die "got timeout\n"; };
7543
7544 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
7545
7546 my $parser = sub {
7547 my $line = shift;
7548
7549 print "$line\n";
7550
7551 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7552 my ($dev_id, $size, $devname) = ($1, $2, $3);
7553 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7554 } elsif ($line =~ m/^CTIME: /) {
7555 # we correctly received the vma config, so we can disable
7556 # the timeout now for disk allocation
7557 alarm($oldtimeout || 0);
7558 $oldtimeout = undef;
7559 &$print_devmap();
7560 print $fifofh "done\n";
7561 close($fifofh);
7562 $fifofh = undef;
7563 }
7564 };
7565
7566 print "restore vma archive: $dbg_cmdstring\n";
7567 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7568 };
7569 my $err = $@;
7570
7571 alarm($oldtimeout) if $oldtimeout;
7572
7573 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7574
7575 close($fifofh) if $fifofh;
7576 unlink $mapfifo;
7577 rmtree $tmpdir;
7578
7579 if ($err) {
7580 $restore_destroy_volumes->($cfg, $virtdev_hash);
7581 die $err;
7582 }
7583
7584 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7585 check_restore_permissions($rpcenv, $user, $new_conf);
7586 PVE::QemuConfig->write_config($vmid, $new_conf);
7587
7588 eval { rescan($vmid, 1); };
7589 warn $@ if $@;
7590
7591 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7592 }
7593
7594 sub restore_tar_archive {
7595 my ($archive, $vmid, $user, $opts) = @_;
7596
7597 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7598 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7599 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7600 }
7601
7602 if ($archive ne '-') {
7603 my $firstfile = tar_archive_read_firstfile($archive);
7604 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7605 if $firstfile ne 'qemu-server.conf';
7606 }
7607
7608 my $storecfg = PVE::Storage::config();
7609
7610 # avoid zombie disks when restoring over an existing VM -> cleanup first
7611 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7612 # skiplock=1 because qmrestore has set the 'create' lock itself already
7613 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7614 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7615
7616 my $tocmd = "/usr/lib/qemu-server/qmextract";
7617
7618 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7619 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7620 $tocmd .= ' --prealloc' if $opts->{prealloc};
7621 $tocmd .= ' --info' if $opts->{info};
7622
7623 # tar option "xf" does not autodetect compression when read from STDIN,
7624 # so we pipe to zcat
7625 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7626 PVE::Tools::shellquote("--to-command=$tocmd");
7627
7628 my $tmpdir = "/var/tmp/vzdumptmp$$";
7629 mkpath $tmpdir;
7630
7631 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7632 local $ENV{VZDUMP_VMID} = $vmid;
7633 local $ENV{VZDUMP_USER} = $user;
7634
7635 my $conffile = PVE::QemuConfig->config_file($vmid);
7636 my $new_conf_raw = '';
7637
7638 # disable interrupts (always do cleanups)
7639 local $SIG{INT} =
7640 local $SIG{TERM} =
7641 local $SIG{QUIT} =
7642 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7643
7644 eval {
7645 # enable interrupts
7646 local $SIG{INT} =
7647 local $SIG{TERM} =
7648 local $SIG{QUIT} =
7649 local $SIG{HUP} =
7650 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7651
7652 if ($archive eq '-') {
7653 print "extracting archive from STDIN\n";
7654 run_command($cmd, input => "<&STDIN");
7655 } else {
7656 print "extracting archive '$archive'\n";
7657 run_command($cmd);
7658 }
7659
7660 return if $opts->{info};
7661
7662 # read new mapping
7663 my $map = {};
7664 my $statfile = "$tmpdir/qmrestore.stat";
7665 if (my $fd = IO::File->new($statfile, "r")) {
7666 while (defined (my $line = <$fd>)) {
7667 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7668 $map->{$1} = $2 if $1;
7669 } else {
7670 print STDERR "unable to parse line in statfile - $line\n";
7671 }
7672 }
7673 $fd->close();
7674 }
7675
7676 my $confsrc = "$tmpdir/qemu-server.conf";
7677
7678 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7679
7680 my $cookie = { netcount => 0 };
7681 while (defined (my $line = <$srcfd>)) {
7682 $new_conf_raw .= restore_update_config_line(
7683 $cookie,
7684 $map,
7685 $line,
7686 $opts->{unique},
7687 );
7688 }
7689
7690 $srcfd->close();
7691 };
7692 if (my $err = $@) {
7693 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7694 die $err;
7695 }
7696
7697 rmtree $tmpdir;
7698
7699 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7700
7701 PVE::Cluster::cfs_update(); # make sure we read new file
7702
7703 eval { rescan($vmid, 1); };
7704 warn $@ if $@;
7705 };
7706
7707 sub foreach_storage_used_by_vm {
7708 my ($conf, $func) = @_;
7709
7710 my $sidhash = {};
7711
7712 PVE::QemuConfig->foreach_volume($conf, sub {
7713 my ($ds, $drive) = @_;
7714 return if drive_is_cdrom($drive);
7715
7716 my $volid = $drive->{file};
7717
7718 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7719 $sidhash->{$sid} = $sid if $sid;
7720 });
7721
7722 foreach my $sid (sort keys %$sidhash) {
7723 &$func($sid);
7724 }
7725 }
7726
7727 my $qemu_snap_storage = {
7728 rbd => 1,
7729 };
7730 sub do_snapshots_with_qemu {
7731 my ($storecfg, $volid, $deviceid) = @_;
7732
7733 return if $deviceid =~ m/tpmstate0/;
7734
7735 my $storage_name = PVE::Storage::parse_volume_id($volid);
7736 my $scfg = $storecfg->{ids}->{$storage_name};
7737 die "could not find storage '$storage_name'\n" if !defined($scfg);
7738
7739 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7740 return 1;
7741 }
7742
7743 if ($volid =~ m/\.(qcow2|qed)$/){
7744 return 1;
7745 }
7746
7747 return;
7748 }
7749
7750 sub qga_check_running {
7751 my ($vmid, $nowarn) = @_;
7752
7753 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7754 if ($@) {
7755 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7756 return 0;
7757 }
7758 return 1;
7759 }
7760
7761 sub template_create {
7762 my ($vmid, $conf, $disk) = @_;
7763
7764 my $storecfg = PVE::Storage::config();
7765
7766 PVE::QemuConfig->foreach_volume($conf, sub {
7767 my ($ds, $drive) = @_;
7768
7769 return if drive_is_cdrom($drive);
7770 return if $disk && $ds ne $disk;
7771
7772 my $volid = $drive->{file};
7773 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7774
7775 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7776 $drive->{file} = $voliddst;
7777 $conf->{$ds} = print_drive($drive);
7778 PVE::QemuConfig->write_config($vmid, $conf);
7779 });
7780 }
7781
7782 sub convert_iscsi_path {
7783 my ($path) = @_;
7784
7785 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7786 my $portal = $1;
7787 my $target = $2;
7788 my $lun = $3;
7789
7790 my $initiator_name = get_initiator_name();
7791
7792 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7793 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7794 }
7795
7796 die "cannot convert iscsi path '$path', unkown format\n";
7797 }
7798
7799 sub qemu_img_convert {
7800 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7801
7802 my $storecfg = PVE::Storage::config();
7803 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7804 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7805
7806 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7807
7808 my $cachemode;
7809 my $src_path;
7810 my $src_is_iscsi = 0;
7811 my $src_format;
7812
7813 if ($src_storeid) {
7814 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7815 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7816 $src_format = qemu_img_format($src_scfg, $src_volname);
7817 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7818 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7819 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7820 } elsif (-f $src_volid || -b $src_volid) {
7821 $src_path = $src_volid;
7822 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7823 $src_format = $1;
7824 }
7825 }
7826
7827 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7828
7829 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7830 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7831 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7832 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7833
7834 my $cmd = [];
7835 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7836 push @$cmd, '-l', "snapshot.name=$snapname"
7837 if $snapname && $src_format && $src_format eq "qcow2";
7838 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7839 push @$cmd, '-T', $cachemode if defined($cachemode);
7840 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7841
7842 if ($src_is_iscsi) {
7843 push @$cmd, '--image-opts';
7844 $src_path = convert_iscsi_path($src_path);
7845 } elsif ($src_format) {
7846 push @$cmd, '-f', $src_format;
7847 }
7848
7849 if ($dst_is_iscsi) {
7850 push @$cmd, '--target-image-opts';
7851 $dst_path = convert_iscsi_path($dst_path);
7852 } else {
7853 push @$cmd, '-O', $dst_format;
7854 }
7855
7856 push @$cmd, $src_path;
7857
7858 if (!$dst_is_iscsi && $is_zero_initialized) {
7859 push @$cmd, "zeroinit:$dst_path";
7860 } else {
7861 push @$cmd, $dst_path;
7862 }
7863
7864 my $parser = sub {
7865 my $line = shift;
7866 if($line =~ m/\((\S+)\/100\%\)/){
7867 my $percent = $1;
7868 my $transferred = int($size * $percent / 100);
7869 my $total_h = render_bytes($size, 1);
7870 my $transferred_h = render_bytes($transferred, 1);
7871
7872 print "transferred $transferred_h of $total_h ($percent%)\n";
7873 }
7874
7875 };
7876
7877 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7878 my $err = $@;
7879 die "copy failed: $err" if $err;
7880 }
7881
7882 sub qemu_img_format {
7883 my ($scfg, $volname) = @_;
7884
7885 # FIXME: this entire function is kind of weird given that `parse_volname`
7886 # also already gives us a format?
7887 my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
7888
7889 if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7890 return $1;
7891 } else {
7892 return "raw";
7893 }
7894 }
7895
7896 sub qemu_drive_mirror {
7897 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7898
7899 $jobs = {} if !$jobs;
7900
7901 my $qemu_target;
7902 my $format;
7903 $jobs->{"drive-$drive"} = {};
7904
7905 if ($dst_volid =~ /^nbd:/) {
7906 $qemu_target = $dst_volid;
7907 $format = "nbd";
7908 } else {
7909 my $storecfg = PVE::Storage::config();
7910 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7911
7912 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7913
7914 $format = qemu_img_format($dst_scfg, $dst_volname);
7915
7916 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7917
7918 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7919 }
7920
7921 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7922 $opts->{format} = $format if $format;
7923
7924 if (defined($src_bitmap)) {
7925 $opts->{sync} = 'incremental';
7926 $opts->{bitmap} = $src_bitmap;
7927 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7928 }
7929
7930 if (defined($bwlimit)) {
7931 $opts->{speed} = $bwlimit * 1024;
7932 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7933 } else {
7934 print "drive mirror is starting for drive-$drive\n";
7935 }
7936
7937 # if a job already runs for this device we get an error, catch it for cleanup
7938 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7939 if (my $err = $@) {
7940 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7941 warn "$@\n" if $@;
7942 die "mirroring error: $err\n";
7943 }
7944
7945 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7946 }
7947
7948 # $completion can be either
7949 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7950 # 'cancel': wait until all jobs are ready, block-job-cancel them
7951 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7952 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7953 sub qemu_drive_mirror_monitor {
7954 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7955
7956 $completion //= 'complete';
7957 $op //= "mirror";
7958
7959 eval {
7960 my $err_complete = 0;
7961
7962 my $starttime = time ();
7963 while (1) {
7964 die "block job ('$op') timed out\n" if $err_complete > 300;
7965
7966 my $stats = mon_cmd($vmid, "query-block-jobs");
7967 my $ctime = time();
7968
7969 my $running_jobs = {};
7970 for my $stat (@$stats) {
7971 next if $stat->{type} ne $op;
7972 $running_jobs->{$stat->{device}} = $stat;
7973 }
7974
7975 my $readycounter = 0;
7976
7977 for my $job_id (sort keys %$jobs) {
7978 my $job = $running_jobs->{$job_id};
7979
7980 my $vanished = !defined($job);
7981 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7982 if($complete || ($vanished && $completion eq 'auto')) {
7983 print "$job_id: $op-job finished\n";
7984 delete $jobs->{$job_id};
7985 next;
7986 }
7987
7988 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7989
7990 my $busy = $job->{busy};
7991 my $ready = $job->{ready};
7992 if (my $total = $job->{len}) {
7993 my $transferred = $job->{offset} || 0;
7994 my $remaining = $total - $transferred;
7995 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7996
7997 my $duration = $ctime - $starttime;
7998 my $total_h = render_bytes($total, 1);
7999 my $transferred_h = render_bytes($transferred, 1);
8000
8001 my $status = sprintf(
8002 "transferred $transferred_h of $total_h ($percent%%) in %s",
8003 render_duration($duration),
8004 );
8005
8006 if ($ready) {
8007 if ($busy) {
8008 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
8009 } else {
8010 $status .= ", ready";
8011 }
8012 }
8013 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
8014 $jobs->{$job_id}->{ready} = $ready;
8015 }
8016
8017 $readycounter++ if $job->{ready};
8018 }
8019
8020 last if scalar(keys %$jobs) == 0;
8021
8022 if ($readycounter == scalar(keys %$jobs)) {
8023 print "all '$op' jobs are ready\n";
8024
8025 # do the complete later (or has already been done)
8026 last if $completion eq 'skip' || $completion eq 'auto';
8027
8028 if ($vmiddst && $vmiddst != $vmid) {
8029 my $agent_running = $qga && qga_check_running($vmid);
8030 if ($agent_running) {
8031 print "freeze filesystem\n";
8032 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
8033 warn $@ if $@;
8034 } else {
8035 print "suspend vm\n";
8036 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
8037 warn $@ if $@;
8038 }
8039
8040 # if we clone a disk for a new target vm, we don't switch the disk
8041 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
8042
8043 if ($agent_running) {
8044 print "unfreeze filesystem\n";
8045 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
8046 warn $@ if $@;
8047 } else {
8048 print "resume vm\n";
8049 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
8050 warn $@ if $@;
8051 }
8052
8053 last;
8054 } else {
8055
8056 for my $job_id (sort keys %$jobs) {
8057 # try to switch the disk if source and destination are on the same guest
8058 print "$job_id: Completing block job_id...\n";
8059
8060 my $op;
8061 if ($completion eq 'complete') {
8062 $op = 'block-job-complete';
8063 } elsif ($completion eq 'cancel') {
8064 $op = 'block-job-cancel';
8065 } else {
8066 die "invalid completion value: $completion\n";
8067 }
8068 eval { mon_cmd($vmid, $op, device => $job_id) };
8069 if ($@ =~ m/cannot be completed/) {
8070 print "$job_id: block job cannot be completed, trying again.\n";
8071 $err_complete++;
8072 }else {
8073 print "$job_id: Completed successfully.\n";
8074 $jobs->{$job_id}->{complete} = 1;
8075 }
8076 }
8077 }
8078 }
8079 sleep 1;
8080 }
8081 };
8082 my $err = $@;
8083
8084 if ($err) {
8085 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8086 die "block job ($op) error: $err";
8087 }
8088 }
8089
8090 sub qemu_blockjobs_cancel {
8091 my ($vmid, $jobs) = @_;
8092
8093 foreach my $job (keys %$jobs) {
8094 print "$job: Cancelling block job\n";
8095 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8096 $jobs->{$job}->{cancel} = 1;
8097 }
8098
8099 while (1) {
8100 my $stats = mon_cmd($vmid, "query-block-jobs");
8101
8102 my $running_jobs = {};
8103 foreach my $stat (@$stats) {
8104 $running_jobs->{$stat->{device}} = $stat;
8105 }
8106
8107 foreach my $job (keys %$jobs) {
8108
8109 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8110 print "$job: Done.\n";
8111 delete $jobs->{$job};
8112 }
8113 }
8114
8115 last if scalar(keys %$jobs) == 0;
8116
8117 sleep 1;
8118 }
8119 }
8120
8121 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8122 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
8123 my sub clone_disk_check_io_uring {
8124 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8125
8126 return if !$use_drive_mirror;
8127
8128 # Don't complain when not changing storage.
8129 # Assume if it works for the source, it'll work for the target too.
8130 return if $src_storeid eq $dst_storeid;
8131
8132 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8133 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8134
8135 my $cache_direct = drive_uses_cache_direct($src_drive);
8136
8137 my $src_uses_io_uring;
8138 if ($src_drive->{aio}) {
8139 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8140 } else {
8141 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8142 }
8143
8144 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8145 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8146 }
8147
8148 sub clone_disk {
8149 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8150
8151 my ($vmid, $running) = $source->@{qw(vmid running)};
8152 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8153
8154 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8155 my ($storage, $format) = $dest->@{qw(storage format)};
8156
8157 my $unused = $src_drivename =~ /^unused/;
8158 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname && !$unused;
8159
8160 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8161 die "cloning from/to EFI disk requires EFI disk\n"
8162 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8163 die "cloning from/to TPM state requires TPM state\n"
8164 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8165
8166 # This would lead to two device nodes in QEMU pointing to the same backing image!
8167 die "cannot change drive name when cloning disk from/to the same VM\n"
8168 if $use_drive_mirror && $vmid == $newvmid;
8169 }
8170
8171 die "cannot move TPM state while VM is running\n"
8172 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8173
8174 my $newvolid;
8175
8176 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8177 print "$src_drivename " if $src_drivename;
8178 print "($drive->{file})\n";
8179
8180 if (!$full) {
8181 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8182 push @$newvollist, $newvolid;
8183 } else {
8184 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8185 my $storeid = $storage || $src_storeid;
8186
8187 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8188
8189 my $name = undef;
8190 my $size = undef;
8191 if (drive_is_cloudinit($drive)) {
8192 $name = "vm-$newvmid-cloudinit";
8193 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8194 if ($scfg->{path}) {
8195 $name .= ".$dst_format";
8196 }
8197 $snapname = undef;
8198 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8199 } elsif ($dst_drivename eq 'efidisk0') {
8200 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8201 } elsif ($dst_drivename eq 'tpmstate0') {
8202 $dst_format = 'raw';
8203 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8204 } else {
8205 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8206
8207 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8208 }
8209 $newvolid = PVE::Storage::vdisk_alloc(
8210 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8211 );
8212 push @$newvollist, $newvolid;
8213
8214 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8215
8216 if (drive_is_cloudinit($drive)) {
8217 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8218 # if this is the case, we have to complete any block-jobs still there from
8219 # previous drive-mirrors
8220 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8221 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8222 }
8223 goto no_data_clone;
8224 }
8225
8226 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8227 if ($use_drive_mirror) {
8228 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8229 $completion, $qga, $bwlimit);
8230 } else {
8231 if ($dst_drivename eq 'efidisk0') {
8232 # the relevant data on the efidisk may be smaller than the source
8233 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8234 # that is given by the OVMF_VARS.fd
8235 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8236 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8237
8238 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8239
8240 # better for Ceph if block size is not too small, see bug #3324
8241 my $bs = 1024*1024;
8242
8243 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8244
8245 if ($src_format eq 'qcow2' && $snapname) {
8246 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8247 if !min_version(kvm_user_version(), 6, 2);
8248 push $cmd->@*, '-l', $snapname;
8249 }
8250 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8251 run_command($cmd);
8252 } else {
8253 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8254 }
8255 }
8256 }
8257
8258 no_data_clone:
8259 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8260
8261 my $disk = dclone($drive);
8262 delete $disk->{format};
8263 $disk->{file} = $newvolid;
8264 $disk->{size} = $size if defined($size) && !$unused;
8265
8266 return $disk;
8267 }
8268
8269 sub get_running_qemu_version {
8270 my ($vmid) = @_;
8271 my $res = mon_cmd($vmid, "query-version");
8272 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8273 }
8274
8275 sub qemu_use_old_bios_files {
8276 my ($machine_type) = @_;
8277
8278 return if !$machine_type;
8279
8280 my $use_old_bios_files = undef;
8281
8282 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8283 $machine_type = $1;
8284 $use_old_bios_files = 1;
8285 } else {
8286 my $version = extract_version($machine_type, kvm_user_version());
8287 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8288 # load new efi bios files on migration. So this hack is required to allow
8289 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8290 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8291 $use_old_bios_files = !min_version($version, 2, 4);
8292 }
8293
8294 return ($use_old_bios_files, $machine_type);
8295 }
8296
8297 sub get_efivars_size {
8298 my ($conf, $efidisk) = @_;
8299
8300 my $arch = get_vm_arch($conf);
8301 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8302 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8303 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8304 return -s $ovmf_vars;
8305 }
8306
8307 sub update_efidisk_size {
8308 my ($conf) = @_;
8309
8310 return if !defined($conf->{efidisk0});
8311
8312 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8313 $disk->{size} = get_efivars_size($conf);
8314 $conf->{efidisk0} = print_drive($disk);
8315
8316 return;
8317 }
8318
8319 sub update_tpmstate_size {
8320 my ($conf) = @_;
8321
8322 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8323 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8324 $conf->{tpmstate0} = print_drive($disk);
8325 }
8326
8327 sub create_efidisk($$$$$$$) {
8328 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8329
8330 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8331
8332 my $vars_size_b = -s $ovmf_vars;
8333 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8334 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8335 PVE::Storage::activate_volumes($storecfg, [$volid]);
8336
8337 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8338 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8339
8340 return ($volid, $size/1024);
8341 }
8342
8343 sub vm_iothreads_list {
8344 my ($vmid) = @_;
8345
8346 my $res = mon_cmd($vmid, 'query-iothreads');
8347
8348 my $iothreads = {};
8349 foreach my $iothread (@$res) {
8350 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8351 }
8352
8353 return $iothreads;
8354 }
8355
8356 sub scsihw_infos {
8357 my ($conf, $drive) = @_;
8358
8359 my $maxdev = 0;
8360
8361 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8362 $maxdev = 7;
8363 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8364 $maxdev = 1;
8365 } else {
8366 $maxdev = 256;
8367 }
8368
8369 my $controller = int($drive->{index} / $maxdev);
8370 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8371 ? "virtioscsi"
8372 : "scsihw";
8373
8374 return ($maxdev, $controller, $controller_prefix);
8375 }
8376
8377 sub resolve_dst_disk_format {
8378 my ($storecfg, $storeid, $src_volname, $format) = @_;
8379 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8380
8381 if (!$format) {
8382 # if no target format is specified, use the source disk format as hint
8383 if ($src_volname) {
8384 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8385 $format = qemu_img_format($scfg, $src_volname);
8386 } else {
8387 return $defFormat;
8388 }
8389 }
8390
8391 # test if requested format is supported - else use default
8392 my $supported = grep { $_ eq $format } @$validFormats;
8393 $format = $defFormat if !$supported;
8394 return $format;
8395 }
8396
8397 # NOTE: if this logic changes, please update docs & possibly gui logic
8398 sub find_vmstate_storage {
8399 my ($conf, $storecfg) = @_;
8400
8401 # first, return storage from conf if set
8402 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8403
8404 my ($target, $shared, $local);
8405
8406 foreach_storage_used_by_vm($conf, sub {
8407 my ($sid) = @_;
8408 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8409 my $dst = $scfg->{shared} ? \$shared : \$local;
8410 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8411 });
8412
8413 # second, use shared storage where VM has at least one disk
8414 # third, use local storage where VM has at least one disk
8415 # fall back to local storage
8416 $target = $shared // $local // 'local';
8417
8418 return $target;
8419 }
8420
8421 sub generate_uuid {
8422 my ($uuid, $uuid_str);
8423 UUID::generate($uuid);
8424 UUID::unparse($uuid, $uuid_str);
8425 return $uuid_str;
8426 }
8427
8428 sub generate_smbios1_uuid {
8429 return "uuid=".generate_uuid();
8430 }
8431
8432 sub nbd_stop {
8433 my ($vmid) = @_;
8434
8435 mon_cmd($vmid, 'nbd-server-stop', timeout => 25);
8436 }
8437
8438 sub create_reboot_request {
8439 my ($vmid) = @_;
8440 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8441 or die "failed to create reboot trigger file: $!\n";
8442 close($fh);
8443 }
8444
8445 sub clear_reboot_request {
8446 my ($vmid) = @_;
8447 my $path = "/run/qemu-server/$vmid.reboot";
8448 my $res = 0;
8449
8450 $res = unlink($path);
8451 die "could not remove reboot request for $vmid: $!"
8452 if !$res && $! != POSIX::ENOENT;
8453
8454 return $res;
8455 }
8456
8457 sub bootorder_from_legacy {
8458 my ($conf, $bootcfg) = @_;
8459
8460 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8461 my $bootindex_hash = {};
8462 my $i = 1;
8463 foreach my $o (split(//, $boot)) {
8464 $bootindex_hash->{$o} = $i*100;
8465 $i++;
8466 }
8467
8468 my $bootorder = {};
8469
8470 PVE::QemuConfig->foreach_volume($conf, sub {
8471 my ($ds, $drive) = @_;
8472
8473 if (drive_is_cdrom ($drive, 1)) {
8474 if ($bootindex_hash->{d}) {
8475 $bootorder->{$ds} = $bootindex_hash->{d};
8476 $bootindex_hash->{d} += 1;
8477 }
8478 } elsif ($bootindex_hash->{c}) {
8479 $bootorder->{$ds} = $bootindex_hash->{c}
8480 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8481 $bootindex_hash->{c} += 1;
8482 }
8483 });
8484
8485 if ($bootindex_hash->{n}) {
8486 for (my $i = 0; $i < $MAX_NETS; $i++) {
8487 my $netname = "net$i";
8488 next if !$conf->{$netname};
8489 $bootorder->{$netname} = $bootindex_hash->{n};
8490 $bootindex_hash->{n} += 1;
8491 }
8492 }
8493
8494 return $bootorder;
8495 }
8496
8497 # Generate default device list for 'boot: order=' property. Matches legacy
8498 # default boot order, but with explicit device names. This is important, since
8499 # the fallback for when neither 'order' nor the old format is specified relies
8500 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8501 sub get_default_bootdevices {
8502 my ($conf) = @_;
8503
8504 my @ret = ();
8505
8506 # harddisk
8507 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8508 push @ret, $first if $first;
8509
8510 # cdrom
8511 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8512 push @ret, $first if $first;
8513
8514 # network
8515 for (my $i = 0; $i < $MAX_NETS; $i++) {
8516 my $netname = "net$i";
8517 next if !$conf->{$netname};
8518 push @ret, $netname;
8519 last;
8520 }
8521
8522 return \@ret;
8523 }
8524
8525 sub device_bootorder {
8526 my ($conf) = @_;
8527
8528 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8529
8530 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8531
8532 my $bootorder = {};
8533 if (!defined($boot) || $boot->{legacy}) {
8534 $bootorder = bootorder_from_legacy($conf, $boot);
8535 } elsif ($boot->{order}) {
8536 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8537 for my $dev (PVE::Tools::split_list($boot->{order})) {
8538 $bootorder->{$dev} = $i++;
8539 }
8540 }
8541
8542 return $bootorder;
8543 }
8544
8545 sub register_qmeventd_handle {
8546 my ($vmid) = @_;
8547
8548 my $fh;
8549 my $peer = "/var/run/qmeventd.sock";
8550 my $count = 0;
8551
8552 for (;;) {
8553 $count++;
8554 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8555 last if $fh;
8556 if ($! != EINTR && $! != EAGAIN) {
8557 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8558 }
8559 if ($count > 4) {
8560 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8561 . "after $count retries\n";
8562 }
8563 usleep(25000);
8564 }
8565
8566 # send handshake to mark VM as backing up
8567 print $fh to_json({vzdump => {vmid => "$vmid"}});
8568
8569 # return handle to be closed later when inhibit is no longer required
8570 return $fh;
8571 }
8572
8573 # bash completion helper
8574
8575 sub complete_backup_archives {
8576 my ($cmdname, $pname, $cvalue) = @_;
8577
8578 my $cfg = PVE::Storage::config();
8579
8580 my $storeid;
8581
8582 if ($cvalue =~ m/^([^:]+):/) {
8583 $storeid = $1;
8584 }
8585
8586 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8587
8588 my $res = [];
8589 foreach my $id (keys %$data) {
8590 foreach my $item (@{$data->{$id}}) {
8591 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8592 push @$res, $item->{volid} if defined($item->{volid});
8593 }
8594 }
8595
8596 return $res;
8597 }
8598
8599 my $complete_vmid_full = sub {
8600 my ($running) = @_;
8601
8602 my $idlist = vmstatus();
8603
8604 my $res = [];
8605
8606 foreach my $id (keys %$idlist) {
8607 my $d = $idlist->{$id};
8608 if (defined($running)) {
8609 next if $d->{template};
8610 next if $running && $d->{status} ne 'running';
8611 next if !$running && $d->{status} eq 'running';
8612 }
8613 push @$res, $id;
8614
8615 }
8616 return $res;
8617 };
8618
8619 sub complete_vmid {
8620 return &$complete_vmid_full();
8621 }
8622
8623 sub complete_vmid_stopped {
8624 return &$complete_vmid_full(0);
8625 }
8626
8627 sub complete_vmid_running {
8628 return &$complete_vmid_full(1);
8629 }
8630
8631 sub complete_storage {
8632
8633 my $cfg = PVE::Storage::config();
8634 my $ids = $cfg->{ids};
8635
8636 my $res = [];
8637 foreach my $sid (keys %$ids) {
8638 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8639 next if !$ids->{$sid}->{content}->{images};
8640 push @$res, $sid;
8641 }
8642
8643 return $res;
8644 }
8645
8646 sub complete_migration_storage {
8647 my ($cmd, $param, $current_value, $all_args) = @_;
8648
8649 my $targetnode = @$all_args[1];
8650
8651 my $cfg = PVE::Storage::config();
8652 my $ids = $cfg->{ids};
8653
8654 my $res = [];
8655 foreach my $sid (keys %$ids) {
8656 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8657 next if !$ids->{$sid}->{content}->{images};
8658 push @$res, $sid;
8659 }
8660
8661 return $res;
8662 }
8663
8664 sub vm_is_paused {
8665 my ($vmid, $include_suspended) = @_;
8666 my $qmpstatus = eval {
8667 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8668 mon_cmd($vmid, "query-status");
8669 };
8670 warn "$@\n" if $@;
8671 return $qmpstatus && (
8672 $qmpstatus->{status} eq "paused" ||
8673 $qmpstatus->{status} eq "prelaunch" ||
8674 ($include_suspended && $qmpstatus->{status} eq "suspended")
8675 );
8676 }
8677
8678 sub check_volume_storage_type {
8679 my ($storecfg, $vol) = @_;
8680
8681 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8682 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8683 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8684
8685 die "storage '$storeid' does not support content-type '$vtype'\n"
8686 if !$scfg->{content}->{$vtype};
8687
8688 return 1;
8689 }
8690
8691 sub add_nets_bridge_fdb {
8692 my ($conf, $vmid) = @_;
8693
8694 for my $opt (keys %$conf) {
8695 next if $opt !~ m/^net(\d+)$/;
8696 my $iface = "tap${vmid}i$1";
8697 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8698 my $net = parse_net($conf->{$opt}, 1) or next;
8699
8700 my $mac = $net->{macaddr};
8701 if (!$mac) {
8702 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8703 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8704 next;
8705 }
8706
8707 my $bridge = $net->{bridge};
8708 if (!$bridge) {
8709 log_warn("Interface '$iface' not attached to any bridge.");
8710 next;
8711 }
8712 if ($have_sdn) {
8713 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge);
8714 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8715 PVE::Network::add_bridge_fdb($iface, $mac);
8716 }
8717 }
8718 }
8719
8720 sub del_nets_bridge_fdb {
8721 my ($conf, $vmid) = @_;
8722
8723 for my $opt (keys %$conf) {
8724 next if $opt !~ m/^net(\d+)$/;
8725 my $iface = "tap${vmid}i$1";
8726
8727 my $net = parse_net($conf->{$opt}) or next;
8728 my $mac = $net->{macaddr} or next;
8729
8730 my $bridge = $net->{bridge};
8731 if ($have_sdn) {
8732 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge);
8733 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8734 PVE::Network::del_bridge_fdb($iface, $mac);
8735 }
8736 }
8737 }
8738
8739 sub create_ifaces_ipams_ips {
8740 my ($conf, $vmid) = @_;
8741
8742 return if !$have_sdn;
8743
8744 foreach my $opt (keys %$conf) {
8745 if ($opt =~ m/^net(\d+)$/) {
8746 my $value = $conf->{$opt};
8747 my $net = PVE::QemuServer::parse_net($value);
8748 eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) };
8749 warn $@ if $@;
8750 }
8751 }
8752 }
8753
8754 sub delete_ifaces_ipams_ips {
8755 my ($conf, $vmid) = @_;
8756
8757 return if !$have_sdn;
8758
8759 foreach my $opt (keys %$conf) {
8760 if ($opt =~ m/^net(\d+)$/) {
8761 my $net = PVE::QemuServer::parse_net($conf->{$opt});
8762 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
8763 warn $@ if $@;
8764 }
8765 }
8766 }
8767
8768 1;