]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
8d0ed22cc5d016a56a9c4c2abe0b67a422948f0a
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::HA::Config;
38 use PVE::Mapping::PCI;
39 use PVE::Mapping::USB;
40 use PVE::INotify;
41 use PVE::JSONSchema qw(get_standard_option parse_property_string);
42 use PVE::ProcFSTools;
43 use PVE::PBSClient;
44 use PVE::RESTEnvironment qw(log_warn);
45 use PVE::RPCEnvironment;
46 use PVE::Storage;
47 use PVE::SysFSTools;
48 use PVE::Systemd;
49 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
50
51 use PVE::QMPClient;
52 use PVE::QemuConfig;
53 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
54 use PVE::QemuServer::Cloudinit;
55 use PVE::QemuServer::CGroup;
56 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options get_cpu_bitness is_native_arch);
57 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
58 use PVE::QemuServer::Machine;
59 use PVE::QemuServer::Memory qw(get_current_memory);
60 use PVE::QemuServer::Monitor qw(mon_cmd);
61 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
62 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
63 use PVE::QemuServer::USB;
64
65 my $have_sdn;
66 eval {
67 require PVE::Network::SDN::Zones;
68 require PVE::Network::SDN::Vnets;
69 $have_sdn = 1;
70 };
71
72 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
73 my $OVMF = {
74 x86_64 => {
75 '4m-no-smm' => [
76 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
77 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
78 ],
79 '4m-no-smm-ms' => [
80 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
81 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
82 ],
83 '4m' => [
84 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
85 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
86 ],
87 '4m-ms' => [
88 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
89 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
90 ],
91 # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build
92 # anymore. how can we deperacate this sanely without breaking existing instances, or using
93 # older backups and snapshot?
94 default => [
95 "$EDK2_FW_BASE/OVMF_CODE.fd",
96 "$EDK2_FW_BASE/OVMF_VARS.fd",
97 ],
98 },
99 aarch64 => {
100 default => [
101 "$EDK2_FW_BASE/AAVMF_CODE.fd",
102 "$EDK2_FW_BASE/AAVMF_VARS.fd",
103 ],
104 },
105 };
106
107 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
108
109 # Note about locking: we use flock on the config file protect against concurent actions.
110 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
111 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
112 # But you can ignore this kind of lock with the --skiplock flag.
113
114 cfs_register_file(
115 '/qemu-server/',
116 \&parse_vm_config,
117 \&write_vm_config
118 );
119
120 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
121 description => "Some command save/restore state from this location.",
122 type => 'string',
123 maxLength => 128,
124 optional => 1,
125 });
126
127 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
128 description => "Specifies the QEMU machine type.",
129 type => 'string',
130 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
131 maxLength => 40,
132 optional => 1,
133 });
134
135 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
136 my $nodename_cache;
137 sub nodename {
138 $nodename_cache //= PVE::INotify::nodename();
139 return $nodename_cache;
140 }
141
142 my $watchdog_fmt = {
143 model => {
144 default_key => 1,
145 type => 'string',
146 enum => [qw(i6300esb ib700)],
147 description => "Watchdog type to emulate.",
148 default => 'i6300esb',
149 optional => 1,
150 },
151 action => {
152 type => 'string',
153 enum => [qw(reset shutdown poweroff pause debug none)],
154 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
155 optional => 1,
156 },
157 };
158 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
159
160 my $agent_fmt = {
161 enabled => {
162 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
163 type => 'boolean',
164 default => 0,
165 default_key => 1,
166 },
167 fstrim_cloned_disks => {
168 description => "Run fstrim after moving a disk or migrating the VM.",
169 type => 'boolean',
170 optional => 1,
171 default => 0,
172 },
173 'freeze-fs-on-backup' => {
174 description => "Freeze/thaw guest filesystems on backup for consistency.",
175 type => 'boolean',
176 optional => 1,
177 default => 1,
178 },
179 type => {
180 description => "Select the agent type",
181 type => 'string',
182 default => 'virtio',
183 optional => 1,
184 enum => [qw(virtio isa)],
185 },
186 };
187
188 my $vga_fmt = {
189 type => {
190 description => "Select the VGA type.",
191 type => 'string',
192 default => 'std',
193 optional => 1,
194 default_key => 1,
195 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
196 },
197 memory => {
198 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
199 type => 'integer',
200 optional => 1,
201 minimum => 4,
202 maximum => 512,
203 },
204 clipboard => {
205 description => 'Enable a specific clipboard. If not set, depending on the display type the'
206 .' SPICE one will be added. Migration with VNC clipboard is not yet supported!',
207 type => 'string',
208 enum => ['vnc'],
209 optional => 1,
210 },
211 };
212
213 my $ivshmem_fmt = {
214 size => {
215 type => 'integer',
216 minimum => 1,
217 description => "The size of the file in MB.",
218 },
219 name => {
220 type => 'string',
221 pattern => '[a-zA-Z0-9\-]+',
222 optional => 1,
223 format_description => 'string',
224 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
225 },
226 };
227
228 my $audio_fmt = {
229 device => {
230 type => 'string',
231 enum => [qw(ich9-intel-hda intel-hda AC97)],
232 description => "Configure an audio device."
233 },
234 driver => {
235 type => 'string',
236 enum => ['spice', 'none'],
237 default => 'spice',
238 optional => 1,
239 description => "Driver backend for the audio device."
240 },
241 };
242
243 my $spice_enhancements_fmt = {
244 foldersharing => {
245 type => 'boolean',
246 optional => 1,
247 default => '0',
248 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
249 },
250 videostreaming => {
251 type => 'string',
252 enum => ['off', 'all', 'filter'],
253 default => 'off',
254 optional => 1,
255 description => "Enable video streaming. Uses compression for detected video streams."
256 },
257 };
258
259 my $rng_fmt = {
260 source => {
261 type => 'string',
262 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
263 default_key => 1,
264 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
265 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
266 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
267 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
268 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
269 ." a hardware RNG from the host.",
270 },
271 max_bytes => {
272 type => 'integer',
273 description => "Maximum bytes of entropy allowed to get injected into the guest every"
274 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
275 ." `0` to disable limiting (potentially dangerous!).",
276 optional => 1,
277
278 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
279 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
280 # reading from /dev/urandom
281 default => 1024,
282 },
283 period => {
284 type => 'integer',
285 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
286 ." the guest to retrieve another 'max_bytes' of entropy.",
287 optional => 1,
288 default => 1000,
289 },
290 };
291
292 my $meta_info_fmt = {
293 'ctime' => {
294 type => 'integer',
295 description => "The guest creation timestamp as UNIX epoch time",
296 minimum => 0,
297 optional => 1,
298 },
299 'creation-qemu' => {
300 type => 'string',
301 description => "The QEMU (machine) version from the time this VM was created.",
302 pattern => '\d+(\.\d+)+',
303 optional => 1,
304 },
305 };
306
307 my $confdesc = {
308 onboot => {
309 optional => 1,
310 type => 'boolean',
311 description => "Specifies whether a VM will be started during system bootup.",
312 default => 0,
313 },
314 autostart => {
315 optional => 1,
316 type => 'boolean',
317 description => "Automatic restart after crash (currently ignored).",
318 default => 0,
319 },
320 hotplug => {
321 optional => 1,
322 type => 'string', format => 'pve-hotplug-features',
323 description => "Selectively enable hotplug features. This is a comma separated list of"
324 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
325 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
326 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
327 ." windows > 7.",
328 default => 'network,disk,usb',
329 },
330 reboot => {
331 optional => 1,
332 type => 'boolean',
333 description => "Allow reboot. If set to '0' the VM exit on reboot.",
334 default => 1,
335 },
336 lock => {
337 optional => 1,
338 type => 'string',
339 description => "Lock/unlock the VM.",
340 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
341 },
342 cpulimit => {
343 optional => 1,
344 type => 'number',
345 description => "Limit of CPU usage.",
346 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
347 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
348 minimum => 0,
349 maximum => 128,
350 default => 0,
351 },
352 cpuunits => {
353 optional => 1,
354 type => 'integer',
355 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
356 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
357 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
358 ." weights of all the other running VMs.",
359 minimum => 1,
360 maximum => 262144,
361 default => 'cgroup v1: 1024, cgroup v2: 100',
362 },
363 memory => {
364 optional => 1,
365 type => 'string',
366 description => "Memory properties.",
367 format => $PVE::QemuServer::Memory::memory_fmt
368 },
369 balloon => {
370 optional => 1,
371 type => 'integer',
372 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
373 minimum => 0,
374 },
375 shares => {
376 optional => 1,
377 type => 'integer',
378 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
379 ." more memory this VM gets. Number is relative to weights of all other running VMs."
380 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
381 minimum => 0,
382 maximum => 50000,
383 default => 1000,
384 },
385 keyboard => {
386 optional => 1,
387 type => 'string',
388 description => "Keyboard layout for VNC server. This option is generally not required and"
389 ." is often better handled from within the guest OS.",
390 enum => PVE::Tools::kvmkeymaplist(),
391 default => undef,
392 },
393 name => {
394 optional => 1,
395 type => 'string', format => 'dns-name',
396 description => "Set a name for the VM. Only used on the configuration web interface.",
397 },
398 scsihw => {
399 optional => 1,
400 type => 'string',
401 description => "SCSI controller model",
402 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
403 default => 'lsi',
404 },
405 description => {
406 optional => 1,
407 type => 'string',
408 description => "Description for the VM. Shown in the web-interface VM's summary."
409 ." This is saved as comment inside the configuration file.",
410 maxLength => 1024 * 8,
411 },
412 ostype => {
413 optional => 1,
414 type => 'string',
415 # NOTE: When extending, also consider extending `%guest_types` in `Import/ESXi.pm`.
416 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
417 description => "Specify guest operating system.",
418 verbose_description => <<EODESC,
419 Specify guest operating system. This is used to enable special
420 optimization/features for specific operating systems:
421
422 [horizontal]
423 other;; unspecified OS
424 wxp;; Microsoft Windows XP
425 w2k;; Microsoft Windows 2000
426 w2k3;; Microsoft Windows 2003
427 w2k8;; Microsoft Windows 2008
428 wvista;; Microsoft Windows Vista
429 win7;; Microsoft Windows 7
430 win8;; Microsoft Windows 8/2012/2012r2
431 win10;; Microsoft Windows 10/2016/2019
432 win11;; Microsoft Windows 11/2022
433 l24;; Linux 2.4 Kernel
434 l26;; Linux 2.6 - 6.X Kernel
435 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
436 EODESC
437 },
438 boot => {
439 optional => 1,
440 type => 'string', format => 'pve-qm-boot',
441 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
442 ." key or 'legacy=' is deprecated.",
443 },
444 bootdisk => {
445 optional => 1,
446 type => 'string', format => 'pve-qm-bootdisk',
447 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
448 pattern => '(ide|sata|scsi|virtio)\d+',
449 },
450 smp => {
451 optional => 1,
452 type => 'integer',
453 description => "The number of CPUs. Please use option -sockets instead.",
454 minimum => 1,
455 default => 1,
456 },
457 sockets => {
458 optional => 1,
459 type => 'integer',
460 description => "The number of CPU sockets.",
461 minimum => 1,
462 default => 1,
463 },
464 cores => {
465 optional => 1,
466 type => 'integer',
467 description => "The number of cores per socket.",
468 minimum => 1,
469 default => 1,
470 },
471 numa => {
472 optional => 1,
473 type => 'boolean',
474 description => "Enable/disable NUMA.",
475 default => 0,
476 },
477 hugepages => {
478 optional => 1,
479 type => 'string',
480 description => "Enable/disable hugepages memory.",
481 enum => [qw(any 2 1024)],
482 },
483 keephugepages => {
484 optional => 1,
485 type => 'boolean',
486 default => 0,
487 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
488 ." after VM shutdown and can be used for subsequent starts.",
489 },
490 vcpus => {
491 optional => 1,
492 type => 'integer',
493 description => "Number of hotplugged vcpus.",
494 minimum => 1,
495 default => 0,
496 },
497 acpi => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable ACPI.",
501 default => 1,
502 },
503 agent => {
504 optional => 1,
505 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
506 type => 'string',
507 format => $agent_fmt,
508 },
509 kvm => {
510 optional => 1,
511 type => 'boolean',
512 description => "Enable/disable KVM hardware virtualization.",
513 default => 1,
514 },
515 tdf => {
516 optional => 1,
517 type => 'boolean',
518 description => "Enable/disable time drift fix.",
519 default => 0,
520 },
521 localtime => {
522 optional => 1,
523 type => 'boolean',
524 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
525 ." the `ostype` indicates a Microsoft Windows OS.",
526 },
527 freeze => {
528 optional => 1,
529 type => 'boolean',
530 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
531 },
532 vga => {
533 optional => 1,
534 type => 'string', format => $vga_fmt,
535 description => "Configure the VGA hardware.",
536 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
537 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
538 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
539 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
540 ." display server. For win* OS you can select how many independent displays you want,"
541 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
542 ." using a serial device as terminal.",
543 },
544 watchdog => {
545 optional => 1,
546 type => 'string', format => 'pve-qm-watchdog',
547 description => "Create a virtual hardware watchdog device.",
548 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
549 ." action), the watchdog must be periodically polled by an agent inside the guest or"
550 ." else the watchdog will reset the guest (or execute the respective action specified)",
551 },
552 startdate => {
553 optional => 1,
554 type => 'string',
555 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
556 description => "Set the initial date of the real time clock. Valid format for date are:"
557 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
558 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
559 default => 'now',
560 },
561 startup => get_standard_option('pve-startup-order'),
562 template => {
563 optional => 1,
564 type => 'boolean',
565 description => "Enable/disable Template.",
566 default => 0,
567 },
568 args => {
569 optional => 1,
570 type => 'string',
571 description => "Arbitrary arguments passed to kvm.",
572 verbose_description => <<EODESCR,
573 Arbitrary arguments passed to kvm, for example:
574
575 args: -no-reboot -smbios 'type=0,vendor=FOO'
576
577 NOTE: this option is for experts only.
578 EODESCR
579 },
580 tablet => {
581 optional => 1,
582 type => 'boolean',
583 default => 1,
584 description => "Enable/disable the USB tablet device.",
585 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
586 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
587 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
588 ." may consider disabling this to save some context switches. This is turned off by"
589 ." default if you use spice (`qm set <vmid> --vga qxl`).",
590 },
591 migrate_speed => {
592 optional => 1,
593 type => 'integer',
594 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
595 minimum => 0,
596 default => 0,
597 },
598 migrate_downtime => {
599 optional => 1,
600 type => 'number',
601 description => "Set maximum tolerated downtime (in seconds) for migrations.",
602 minimum => 0,
603 default => 0.1,
604 },
605 cdrom => {
606 optional => 1,
607 type => 'string', format => 'pve-qm-ide',
608 typetext => '<volume>',
609 description => "This is an alias for option -ide2",
610 },
611 cpu => {
612 optional => 1,
613 description => "Emulated CPU type.",
614 type => 'string',
615 format => 'pve-vm-cpu-conf',
616 },
617 parent => get_standard_option('pve-snapshot-name', {
618 optional => 1,
619 description => "Parent snapshot name. This is used internally, and should not be modified.",
620 }),
621 snaptime => {
622 optional => 1,
623 description => "Timestamp for snapshots.",
624 type => 'integer',
625 minimum => 0,
626 },
627 vmstate => {
628 optional => 1,
629 type => 'string', format => 'pve-volume-id',
630 description => "Reference to a volume which stores the VM state. This is used internally"
631 ." for snapshots.",
632 },
633 vmstatestorage => get_standard_option('pve-storage-id', {
634 description => "Default storage for VM state volumes/files.",
635 optional => 1,
636 }),
637 runningmachine => get_standard_option('pve-qemu-machine', {
638 description => "Specifies the QEMU machine type of the running vm. This is used internally"
639 ." for snapshots.",
640 }),
641 runningcpu => {
642 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
643 ." internally for snapshots.",
644 optional => 1,
645 type => 'string',
646 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
647 format_description => 'QEMU -cpu parameter'
648 },
649 machine => get_standard_option('pve-qemu-machine'),
650 arch => {
651 description => "Virtual processor architecture. Defaults to the host.",
652 optional => 1,
653 type => 'string',
654 enum => [qw(x86_64 aarch64)],
655 },
656 smbios1 => {
657 description => "Specify SMBIOS type 1 fields.",
658 type => 'string', format => 'pve-qm-smbios1',
659 maxLength => 512,
660 optional => 1,
661 },
662 protection => {
663 optional => 1,
664 type => 'boolean',
665 description => "Sets the protection flag of the VM. This will disable the remove VM and"
666 ." remove disk operations.",
667 default => 0,
668 },
669 bios => {
670 optional => 1,
671 type => 'string',
672 enum => [ qw(seabios ovmf) ],
673 description => "Select BIOS implementation.",
674 default => 'seabios',
675 },
676 vmgenid => {
677 type => 'string',
678 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
679 format_description => 'UUID',
680 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
681 ." to disable explicitly.",
682 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
683 ." value identifier to the guest OS. This allows to notify the guest operating system"
684 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
685 ." execution or creation from a template). The guest operating system notices the"
686 ." change, and is then able to react as appropriate by marking its copies of"
687 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
688 ."Note that auto-creation only works when done through API/CLI create or update methods"
689 .", but not when manually editing the config file.",
690 default => "1 (autogenerated)",
691 optional => 1,
692 },
693 hookscript => {
694 type => 'string',
695 format => 'pve-volume-id',
696 optional => 1,
697 description => "Script that will be executed during various steps in the vms lifetime.",
698 },
699 ivshmem => {
700 type => 'string',
701 format => $ivshmem_fmt,
702 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
703 ." the host.",
704 optional => 1,
705 },
706 audio0 => {
707 type => 'string',
708 format => $audio_fmt,
709 description => "Configure a audio device, useful in combination with QXL/Spice.",
710 optional => 1
711 },
712 spice_enhancements => {
713 type => 'string',
714 format => $spice_enhancements_fmt,
715 description => "Configure additional enhancements for SPICE.",
716 optional => 1
717 },
718 tags => {
719 type => 'string', format => 'pve-tag-list',
720 description => 'Tags of the VM. This is only meta information.',
721 optional => 1,
722 },
723 rng0 => {
724 type => 'string',
725 format => $rng_fmt,
726 description => "Configure a VirtIO-based Random Number Generator.",
727 optional => 1,
728 },
729 meta => {
730 type => 'string',
731 format => $meta_info_fmt,
732 description => "Some (read-only) meta-information about this guest.",
733 optional => 1,
734 },
735 affinity => {
736 type => 'string', format => 'pve-cpuset',
737 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
738 optional => 1,
739 },
740 };
741
742 my $cicustom_fmt = {
743 meta => {
744 type => 'string',
745 optional => 1,
746 description => 'Specify a custom file containing all meta data passed to the VM via"
747 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
748 format => 'pve-volume-id',
749 format_description => 'volume',
750 },
751 network => {
752 type => 'string',
753 optional => 1,
754 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
755 format => 'pve-volume-id',
756 format_description => 'volume',
757 },
758 user => {
759 type => 'string',
760 optional => 1,
761 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
762 format => 'pve-volume-id',
763 format_description => 'volume',
764 },
765 vendor => {
766 type => 'string',
767 optional => 1,
768 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
769 format => 'pve-volume-id',
770 format_description => 'volume',
771 },
772 };
773 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
774
775 # any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
776 my $confdesc_cloudinit = {
777 citype => {
778 optional => 1,
779 type => 'string',
780 description => 'Specifies the cloud-init configuration format. The default depends on the'
781 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
782 .' and `configdrive2` for windows.',
783 enum => ['configdrive2', 'nocloud', 'opennebula'],
784 },
785 ciuser => {
786 optional => 1,
787 type => 'string',
788 description => "cloud-init: User name to change ssh keys and password for instead of the"
789 ." image's configured default user.",
790 },
791 cipassword => {
792 optional => 1,
793 type => 'string',
794 description => 'cloud-init: Password to assign the user. Using this is generally not'
795 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
796 .' support hashed passwords.',
797 },
798 ciupgrade => {
799 optional => 1,
800 type => 'boolean',
801 description => 'cloud-init: do an automatic package upgrade after the first boot.',
802 default => 1,
803 },
804 cicustom => {
805 optional => 1,
806 type => 'string',
807 description => 'cloud-init: Specify custom files to replace the automatically generated'
808 .' ones at start.',
809 format => 'pve-qm-cicustom',
810 },
811 searchdomain => {
812 optional => 1,
813 type => 'string',
814 description => 'cloud-init: Sets DNS search domains for a container. Create will'
815 .' automatically use the setting from the host if neither searchdomain nor nameserver'
816 .' are set.',
817 },
818 nameserver => {
819 optional => 1,
820 type => 'string', format => 'address-list',
821 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
822 .' automatically use the setting from the host if neither searchdomain nor nameserver'
823 .' are set.',
824 },
825 sshkeys => {
826 optional => 1,
827 type => 'string',
828 format => 'urlencoded',
829 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
830 },
831 };
832
833 # what about other qemu settings ?
834 #cpu => 'string',
835 #machine => 'string',
836 #fda => 'file',
837 #fdb => 'file',
838 #mtdblock => 'file',
839 #sd => 'file',
840 #pflash => 'file',
841 #snapshot => 'bool',
842 #bootp => 'file',
843 ##tftp => 'dir',
844 ##smb => 'dir',
845 #kernel => 'file',
846 #append => 'string',
847 #initrd => 'file',
848 ##soundhw => 'string',
849
850 while (my ($k, $v) = each %$confdesc) {
851 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
852 }
853
854 my $MAX_NETS = 32;
855 my $MAX_SERIAL_PORTS = 4;
856 my $MAX_PARALLEL_PORTS = 3;
857
858 for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
859 $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
860 }
861
862 my $nic_model_list = [
863 'e1000',
864 'e1000-82540em',
865 'e1000-82544gc',
866 'e1000-82545em',
867 'e1000e',
868 'i82551',
869 'i82557b',
870 'i82559er',
871 'ne2k_isa',
872 'ne2k_pci',
873 'pcnet',
874 'rtl8139',
875 'virtio',
876 'vmxnet3',
877 ];
878 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
879
880 my $net_fmt_bridge_descr = <<__EOD__;
881 Bridge to attach the network device to. The Proxmox VE standard bridge
882 is called 'vmbr0'.
883
884 If you do not specify a bridge, we create a kvm user (NATed) network
885 device, which provides DHCP and DNS services. The following addresses
886 are used:
887
888 10.0.2.2 Gateway
889 10.0.2.3 DNS Server
890 10.0.2.4 SMB Server
891
892 The DHCP server assign addresses to the guest starting from 10.0.2.15.
893 __EOD__
894
895 my $net_fmt = {
896 macaddr => get_standard_option('mac-addr', {
897 description => "MAC address. That address must be unique withing your network. This is"
898 ." automatically generated if not specified.",
899 }),
900 model => {
901 type => 'string',
902 description => "Network Card Model. The 'virtio' model provides the best performance with"
903 ." very low CPU overhead. If your guest does not support this driver, it is usually"
904 ." best to use 'e1000'.",
905 enum => $nic_model_list,
906 default_key => 1,
907 },
908 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
909 bridge => get_standard_option('pve-bridge-id', {
910 description => $net_fmt_bridge_descr,
911 optional => 1,
912 }),
913 queues => {
914 type => 'integer',
915 minimum => 0, maximum => 64,
916 description => 'Number of packet queues to be used on the device.',
917 optional => 1,
918 },
919 rate => {
920 type => 'number',
921 minimum => 0,
922 description => "Rate limit in mbps (megabytes per second) as floating point number.",
923 optional => 1,
924 },
925 tag => {
926 type => 'integer',
927 minimum => 1, maximum => 4094,
928 description => 'VLAN tag to apply to packets on this interface.',
929 optional => 1,
930 },
931 trunks => {
932 type => 'string',
933 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
934 description => 'VLAN trunks to pass through this interface.',
935 format_description => 'vlanid[;vlanid...]',
936 optional => 1,
937 },
938 firewall => {
939 type => 'boolean',
940 description => 'Whether this interface should be protected by the firewall.',
941 optional => 1,
942 },
943 link_down => {
944 type => 'boolean',
945 description => 'Whether this interface should be disconnected (like pulling the plug).',
946 optional => 1,
947 },
948 mtu => {
949 type => 'integer',
950 minimum => 1, maximum => 65520,
951 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
952 optional => 1,
953 },
954 };
955
956 my $netdesc = {
957 optional => 1,
958 type => 'string', format => $net_fmt,
959 description => "Specify network devices.",
960 };
961
962 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
963
964 my $ipconfig_fmt = {
965 ip => {
966 type => 'string',
967 format => 'pve-ipv4-config',
968 format_description => 'IPv4Format/CIDR',
969 description => 'IPv4 address in CIDR format.',
970 optional => 1,
971 default => 'dhcp',
972 },
973 gw => {
974 type => 'string',
975 format => 'ipv4',
976 format_description => 'GatewayIPv4',
977 description => 'Default gateway for IPv4 traffic.',
978 optional => 1,
979 requires => 'ip',
980 },
981 ip6 => {
982 type => 'string',
983 format => 'pve-ipv6-config',
984 format_description => 'IPv6Format/CIDR',
985 description => 'IPv6 address in CIDR format.',
986 optional => 1,
987 default => 'dhcp',
988 },
989 gw6 => {
990 type => 'string',
991 format => 'ipv6',
992 format_description => 'GatewayIPv6',
993 description => 'Default gateway for IPv6 traffic.',
994 optional => 1,
995 requires => 'ip6',
996 },
997 };
998 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
999 my $ipconfigdesc = {
1000 optional => 1,
1001 type => 'string', format => 'pve-qm-ipconfig',
1002 description => <<'EODESCR',
1003 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1004
1005 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1006
1007 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1008 gateway should be provided.
1009 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1010 cloud-init 19.4 or newer.
1011
1012 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1013 dhcp on IPv4.
1014 EODESCR
1015 };
1016 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1017
1018 for (my $i = 0; $i < $MAX_NETS; $i++) {
1019 $confdesc->{"net$i"} = $netdesc;
1020 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1021 }
1022
1023 foreach my $key (keys %$confdesc_cloudinit) {
1024 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1025 }
1026
1027 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1028 sub pve_verify_cpuset {
1029 my ($set_text, $noerr) = @_;
1030
1031 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1032
1033 if ($@) {
1034 return if $noerr;
1035 die "unable to parse cpuset option\n";
1036 }
1037
1038 return PVE::CpuSet->new($members)->short_string();
1039 }
1040
1041 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1042 sub verify_volume_id_or_qm_path {
1043 my ($volid, $noerr) = @_;
1044
1045 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1046
1047 return verify_volume_id_or_absolute_path($volid, $noerr);
1048 }
1049
1050 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1051 sub verify_volume_id_or_absolute_path {
1052 my ($volid, $noerr) = @_;
1053
1054 return $volid if $volid =~ m|^/|;
1055
1056 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1057 if ($@) {
1058 return if $noerr;
1059 die $@;
1060 }
1061 return $volid;
1062 }
1063
1064 my $serialdesc = {
1065 optional => 1,
1066 type => 'string',
1067 pattern => '(/dev/.+|socket)',
1068 description => "Create a serial device inside the VM (n is 0 to 3)",
1069 verbose_description => <<EODESCR,
1070 Create a serial device inside the VM (n is 0 to 3), and pass through a
1071 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1072 host side (use 'qm terminal' to open a terminal connection).
1073
1074 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1075 use with special care.
1076
1077 CAUTION: Experimental! User reported problems with this option.
1078 EODESCR
1079 };
1080
1081 my $paralleldesc= {
1082 optional => 1,
1083 type => 'string',
1084 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1085 description => "Map host parallel devices (n is 0 to 2).",
1086 verbose_description => <<EODESCR,
1087 Map host parallel devices (n is 0 to 2).
1088
1089 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1090 machines - use with special care.
1091
1092 CAUTION: Experimental! User reported problems with this option.
1093 EODESCR
1094 };
1095
1096 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1097 $confdesc->{"parallel$i"} = $paralleldesc;
1098 }
1099
1100 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1101 $confdesc->{"serial$i"} = $serialdesc;
1102 }
1103
1104 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1105 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1106 }
1107
1108 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1109 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1110 }
1111
1112 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1113 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1114 }
1115
1116 my $boot_fmt = {
1117 legacy => {
1118 optional => 1,
1119 default_key => 1,
1120 type => 'string',
1121 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1122 . " Deprecated, use 'order=' instead.",
1123 pattern => '[acdn]{1,4}',
1124 format_description => "[acdn]{1,4}",
1125
1126 # note: this is also the fallback if boot: is not given at all
1127 default => 'cdn',
1128 },
1129 order => {
1130 optional => 1,
1131 type => 'string',
1132 format => 'pve-qm-bootdev-list',
1133 format_description => "device[;device...]",
1134 description => <<EODESC,
1135 The guest will attempt to boot from devices in the order they appear here.
1136
1137 Disks, optical drives and passed-through storage USB devices will be directly
1138 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1139 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1140
1141 Note that only devices in this list will be marked as bootable and thus loaded
1142 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1143 (e.g. software-raid), you need to specify all of them here.
1144
1145 Overrides the deprecated 'legacy=[acdn]*' value when given.
1146 EODESC
1147 },
1148 };
1149 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1150
1151 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1152 sub verify_bootdev {
1153 my ($dev, $noerr) = @_;
1154
1155 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1156 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1157
1158 my $check = sub {
1159 my ($base) = @_;
1160 return 0 if $dev !~ m/^$base\d+$/;
1161 return 0 if !$confdesc->{$dev};
1162 return 1;
1163 };
1164
1165 return $dev if $check->("net");
1166 return $dev if $check->("usb");
1167 return $dev if $check->("hostpci");
1168
1169 return if $noerr;
1170 die "invalid boot device '$dev'\n";
1171 }
1172
1173 sub print_bootorder {
1174 my ($devs) = @_;
1175 return "" if !@$devs;
1176 my $data = { order => join(';', @$devs) };
1177 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1178 }
1179
1180 my $kvm_api_version = 0;
1181
1182 sub kvm_version {
1183 return $kvm_api_version if $kvm_api_version;
1184
1185 open my $fh, '<', '/dev/kvm' or return;
1186
1187 # 0xae00 => KVM_GET_API_VERSION
1188 $kvm_api_version = ioctl($fh, 0xae00, 0);
1189 close($fh);
1190
1191 return $kvm_api_version;
1192 }
1193
1194 my $kvm_user_version = {};
1195 my $kvm_mtime = {};
1196
1197 sub kvm_user_version {
1198 my ($binary) = @_;
1199
1200 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1201 my $st = stat($binary);
1202
1203 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1204 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1205 $cachedmtime == $st->mtime;
1206
1207 $kvm_user_version->{$binary} = 'unknown';
1208 $kvm_mtime->{$binary} = $st->mtime;
1209
1210 my $code = sub {
1211 my $line = shift;
1212 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1213 $kvm_user_version->{$binary} = $2;
1214 }
1215 };
1216
1217 eval { run_command([$binary, '--version'], outfunc => $code); };
1218 warn $@ if $@;
1219
1220 return $kvm_user_version->{$binary};
1221
1222 }
1223 my sub extract_version {
1224 my ($machine_type, $version) = @_;
1225 $version = kvm_user_version() if !defined($version);
1226 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1227 }
1228
1229 sub kernel_has_vhost_net {
1230 return -c '/dev/vhost-net';
1231 }
1232
1233 sub option_exists {
1234 my $key = shift;
1235 return defined($confdesc->{$key});
1236 }
1237
1238 my $cdrom_path;
1239 sub get_cdrom_path {
1240
1241 return $cdrom_path if defined($cdrom_path);
1242
1243 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1244
1245 if (!defined($cdrom_path)) {
1246 log_warn("no physical CD-ROM available, ignoring");
1247 $cdrom_path = '';
1248 }
1249
1250 return $cdrom_path;
1251 }
1252
1253 sub get_iso_path {
1254 my ($storecfg, $vmid, $cdrom) = @_;
1255
1256 if ($cdrom eq 'cdrom') {
1257 return get_cdrom_path();
1258 } elsif ($cdrom eq 'none') {
1259 return '';
1260 } elsif ($cdrom =~ m|^/|) {
1261 return $cdrom;
1262 } else {
1263 return PVE::Storage::path($storecfg, $cdrom);
1264 }
1265 }
1266
1267 # try to convert old style file names to volume IDs
1268 sub filename_to_volume_id {
1269 my ($vmid, $file, $media) = @_;
1270
1271 if (!($file eq 'none' || $file eq 'cdrom' ||
1272 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1273
1274 return if $file =~ m|/|;
1275
1276 if ($media && $media eq 'cdrom') {
1277 $file = "local:iso/$file";
1278 } else {
1279 $file = "local:$vmid/$file";
1280 }
1281 }
1282
1283 return $file;
1284 }
1285
1286 sub verify_media_type {
1287 my ($opt, $vtype, $media) = @_;
1288
1289 return if !$media;
1290
1291 my $etype;
1292 if ($media eq 'disk') {
1293 $etype = 'images';
1294 } elsif ($media eq 'cdrom') {
1295 $etype = 'iso';
1296 } else {
1297 die "internal error";
1298 }
1299
1300 return if ($vtype eq $etype);
1301
1302 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1303 }
1304
1305 sub cleanup_drive_path {
1306 my ($opt, $storecfg, $drive) = @_;
1307
1308 # try to convert filesystem paths to volume IDs
1309
1310 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1311 ($drive->{file} !~ m|^/dev/.+|) &&
1312 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1313 ($drive->{file} !~ m/^\d+$/)) {
1314 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1315 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1316 if !$vtype;
1317 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1318 verify_media_type($opt, $vtype, $drive->{media});
1319 $drive->{file} = $volid;
1320 }
1321
1322 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1323 }
1324
1325 sub parse_hotplug_features {
1326 my ($data) = @_;
1327
1328 my $res = {};
1329
1330 return $res if $data eq '0';
1331
1332 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1333
1334 foreach my $feature (PVE::Tools::split_list($data)) {
1335 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1336 $res->{$1} = 1;
1337 } else {
1338 die "invalid hotplug feature '$feature'\n";
1339 }
1340 }
1341 return $res;
1342 }
1343
1344 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1345 sub pve_verify_hotplug_features {
1346 my ($value, $noerr) = @_;
1347
1348 return $value if parse_hotplug_features($value);
1349
1350 return if $noerr;
1351
1352 die "unable to parse hotplug option\n";
1353 }
1354
1355 sub assert_clipboard_config {
1356 my ($vga) = @_;
1357
1358 my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/;
1359
1360 if (
1361 $vga->{'clipboard'}
1362 && $vga->{'clipboard'} eq 'vnc'
1363 && $vga->{type}
1364 && $vga->{type} !~ $clipboard_regex
1365 ) {
1366 die "vga type $vga->{type} is not compatible with VNC clipboard\n";
1367 }
1368 }
1369
1370 sub print_tabletdevice_full {
1371 my ($conf, $arch) = @_;
1372
1373 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1374
1375 # we use uhci for old VMs because tablet driver was buggy in older qemu
1376 my $usbbus;
1377 if ($q35 || $arch eq 'aarch64') {
1378 $usbbus = 'ehci';
1379 } else {
1380 $usbbus = 'uhci';
1381 }
1382
1383 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1384 }
1385
1386 sub print_keyboarddevice_full {
1387 my ($conf, $arch) = @_;
1388
1389 return if $arch ne 'aarch64';
1390
1391 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1392 }
1393
1394 my sub get_drive_id {
1395 my ($drive) = @_;
1396 return "$drive->{interface}$drive->{index}";
1397 }
1398
1399 sub print_drivedevice_full {
1400 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1401
1402 my $device = '';
1403 my $maxdev = 0;
1404
1405 my $drive_id = get_drive_id($drive);
1406 if ($drive->{interface} eq 'virtio') {
1407 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1408 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1409 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1410 } elsif ($drive->{interface} eq 'scsi') {
1411
1412 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1413 my $unit = $drive->{index} % $maxdev;
1414
1415 my $machine_version = extract_version($machine_type, kvm_user_version());
1416 my $devicetype = PVE::QemuServer::Drive::get_scsi_devicetype(
1417 $drive, $storecfg, $machine_version);
1418
1419 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1420 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1421 } else {
1422 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1423 .",lun=$drive->{index}";
1424 }
1425 $device .= ",drive=drive-$drive_id,id=$drive_id";
1426
1427 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1428 $device .= ",rotation_rate=1";
1429 }
1430 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1431
1432 # only scsi-hd and scsi-cd support passing vendor and product information
1433 if ($devicetype eq 'hd' || $devicetype eq 'cd') {
1434 if (my $vendor = $drive->{vendor}) {
1435 $device .= ",vendor=$vendor";
1436 }
1437 if (my $product = $drive->{product}) {
1438 $device .= ",product=$product";
1439 }
1440 }
1441
1442 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1443 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1444 my $controller = int($drive->{index} / $maxdev);
1445 my $unit = $drive->{index} % $maxdev;
1446
1447 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1448 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1449 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1450 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1451 # were before. Move odd ones up by 2 where they don't clash.
1452 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1453 $controller += 2 * ($unit % 2);
1454 $unit = 0;
1455 }
1456
1457 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1458
1459 $device = "ide-$devicetype";
1460 if ($drive->{interface} eq 'ide') {
1461 $device .= ",bus=ide.$controller,unit=$unit";
1462 } else {
1463 $device .= ",bus=ahci$controller.$unit";
1464 }
1465 $device .= ",drive=drive-$drive_id,id=$drive_id";
1466
1467 if ($devicetype eq 'hd') {
1468 if (my $model = $drive->{model}) {
1469 $model = URI::Escape::uri_unescape($model);
1470 $device .= ",model=$model";
1471 }
1472 if ($drive->{ssd}) {
1473 $device .= ",rotation_rate=1";
1474 }
1475 }
1476 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1477 } elsif ($drive->{interface} eq 'usb') {
1478 die "implement me";
1479 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1480 } else {
1481 die "unsupported interface type";
1482 }
1483
1484 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1485
1486 if (my $serial = $drive->{serial}) {
1487 $serial = URI::Escape::uri_unescape($serial);
1488 $device .= ",serial=$serial";
1489 }
1490
1491
1492 return $device;
1493 }
1494
1495 sub get_initiator_name {
1496 my $initiator;
1497
1498 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1499 while (defined(my $line = <$fh>)) {
1500 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1501 $initiator = $1;
1502 last;
1503 }
1504 $fh->close();
1505
1506 return $initiator;
1507 }
1508
1509 my sub storage_allows_io_uring_default {
1510 my ($scfg, $cache_direct) = @_;
1511
1512 # io_uring with cache mode writeback or writethrough on krbd will hang...
1513 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1514
1515 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1516 # sometimes, just plain disable...
1517 return if $scfg && $scfg->{type} eq 'lvm';
1518
1519 # io_uring causes problems when used with CIFS since kernel 5.15
1520 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1521 return if $scfg && $scfg->{type} eq 'cifs';
1522
1523 return 1;
1524 }
1525
1526 my sub drive_uses_cache_direct {
1527 my ($drive, $scfg) = @_;
1528
1529 my $cache_direct = 0;
1530
1531 if (my $cache = $drive->{cache}) {
1532 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1533 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1534 $cache_direct = 1;
1535 }
1536
1537 return $cache_direct;
1538 }
1539
1540 sub print_drive_commandline_full {
1541 my ($storecfg, $vmid, $drive, $live_restore_name, $io_uring) = @_;
1542
1543 my $path;
1544 my $volid = $drive->{file};
1545 my $format = $drive->{format};
1546 my $drive_id = get_drive_id($drive);
1547
1548 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1549 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1550
1551 if (drive_is_cdrom($drive)) {
1552 $path = get_iso_path($storecfg, $vmid, $volid);
1553 die "$drive_id: cannot back cdrom drive with a live restore image\n" if $live_restore_name;
1554 } else {
1555 if ($storeid) {
1556 $path = PVE::Storage::path($storecfg, $volid);
1557 $format //= qemu_img_format($scfg, $volname);
1558 } else {
1559 $path = $volid;
1560 $format //= "raw";
1561 }
1562 }
1563
1564 my $is_rbd = $path =~ m/^rbd:/;
1565
1566 my $opts = '';
1567 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1568 foreach my $o (@qemu_drive_options) {
1569 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1570 }
1571
1572 # snapshot only accepts on|off
1573 if (defined($drive->{snapshot})) {
1574 my $v = $drive->{snapshot} ? 'on' : 'off';
1575 $opts .= ",snapshot=$v";
1576 }
1577
1578 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1579 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1580 }
1581
1582 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1583 my ($dir, $qmpname) = @$type;
1584 if (my $v = $drive->{"mbps$dir"}) {
1585 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1586 }
1587 if (my $v = $drive->{"mbps${dir}_max"}) {
1588 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1589 }
1590 if (my $v = $drive->{"bps${dir}_max_length"}) {
1591 $opts .= ",throttling.bps$qmpname-max-length=$v";
1592 }
1593 if (my $v = $drive->{"iops${dir}"}) {
1594 $opts .= ",throttling.iops$qmpname=$v";
1595 }
1596 if (my $v = $drive->{"iops${dir}_max"}) {
1597 $opts .= ",throttling.iops$qmpname-max=$v";
1598 }
1599 if (my $v = $drive->{"iops${dir}_max_length"}) {
1600 $opts .= ",throttling.iops$qmpname-max-length=$v";
1601 }
1602 }
1603
1604 if ($live_restore_name) {
1605 $format = "rbd" if $is_rbd;
1606 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1607 if !$format;
1608 $opts .= ",format=alloc-track,file.driver=$format";
1609 } elsif ($format) {
1610 $opts .= ",format=$format";
1611 }
1612
1613 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1614
1615 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1616
1617 if (!$drive->{aio}) {
1618 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1619 # io_uring supports all cache modes
1620 $opts .= ",aio=io_uring";
1621 } else {
1622 # aio native works only with O_DIRECT
1623 if($cache_direct) {
1624 $opts .= ",aio=native";
1625 } else {
1626 $opts .= ",aio=threads";
1627 }
1628 }
1629 }
1630
1631 if (!drive_is_cdrom($drive)) {
1632 my $detectzeroes;
1633 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1634 $detectzeroes = 'off';
1635 } elsif ($drive->{discard}) {
1636 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1637 } else {
1638 # This used to be our default with discard not being specified:
1639 $detectzeroes = 'on';
1640 }
1641
1642 # note: 'detect-zeroes' works per blockdev and we want it to persist
1643 # after the alloc-track is removed, so put it on 'file' directly
1644 my $dz_param = $live_restore_name ? "file.detect-zeroes" : "detect-zeroes";
1645 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1646 }
1647
1648 if ($live_restore_name) {
1649 $opts .= ",backing=$live_restore_name";
1650 $opts .= ",auto-remove=on";
1651 }
1652
1653 # my $file_param = $live_restore_name ? "file.file.filename" : "file";
1654 my $file_param = "file";
1655 if ($live_restore_name) {
1656 # non-rbd drivers require the underlying file to be a seperate block
1657 # node, so add a second .file indirection
1658 $file_param .= ".file" if !$is_rbd;
1659 $file_param .= ".filename";
1660 }
1661 my $pathinfo = $path ? "$file_param=$path," : '';
1662
1663 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1664 }
1665
1666 sub print_pbs_blockdev {
1667 my ($pbs_conf, $pbs_name) = @_;
1668 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1669 $blockdev .= ",repository=$pbs_conf->{repository}";
1670 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1671 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1672 $blockdev .= ",archive=$pbs_conf->{archive}";
1673 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1674 return $blockdev;
1675 }
1676
1677 sub print_netdevice_full {
1678 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1679
1680 my $device = $net->{model};
1681 if ($net->{model} eq 'virtio') {
1682 $device = 'virtio-net-pci';
1683 };
1684
1685 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1686 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1687 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1688 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1689 # and out of each queue plus one config interrupt and control vector queue
1690 my $vectors = $net->{queues} * 2 + 2;
1691 $tmpstr .= ",vectors=$vectors,mq=on";
1692 if (min_version($machine_version, 7, 1)) {
1693 $tmpstr .= ",packed=on";
1694 }
1695 }
1696
1697 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1698 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
1699 }
1700
1701 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1702
1703 if (my $mtu = $net->{mtu}) {
1704 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1705 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1706 if ($mtu == 1) {
1707 $mtu = $bridge_mtu;
1708 } elsif ($mtu < 576) {
1709 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1710 } elsif ($mtu > $bridge_mtu) {
1711 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1712 }
1713 $tmpstr .= ",host_mtu=$mtu";
1714 } else {
1715 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1716 }
1717 }
1718
1719 if ($use_old_bios_files) {
1720 my $romfile;
1721 if ($device eq 'virtio-net-pci') {
1722 $romfile = 'pxe-virtio.rom';
1723 } elsif ($device eq 'e1000') {
1724 $romfile = 'pxe-e1000.rom';
1725 } elsif ($device eq 'e1000e') {
1726 $romfile = 'pxe-e1000e.rom';
1727 } elsif ($device eq 'ne2k') {
1728 $romfile = 'pxe-ne2k_pci.rom';
1729 } elsif ($device eq 'pcnet') {
1730 $romfile = 'pxe-pcnet.rom';
1731 } elsif ($device eq 'rtl8139') {
1732 $romfile = 'pxe-rtl8139.rom';
1733 }
1734 $tmpstr .= ",romfile=$romfile" if $romfile;
1735 }
1736
1737 return $tmpstr;
1738 }
1739
1740 sub print_netdev_full {
1741 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1742
1743 my $i = '';
1744 if ($netid =~ m/^net(\d+)$/) {
1745 $i = int($1);
1746 }
1747
1748 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1749
1750 my $ifname = "tap${vmid}i$i";
1751
1752 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1753 die "interface name '$ifname' is too long (max 15 character)\n"
1754 if length($ifname) >= 16;
1755
1756 my $vhostparam = '';
1757 if (is_native_arch($arch)) {
1758 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1759 }
1760
1761 my $vmname = $conf->{name} || "vm$vmid";
1762
1763 my $netdev = "";
1764 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1765
1766 if ($net->{bridge}) {
1767 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1768 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1769 } else {
1770 $netdev = "type=user,id=$netid,hostname=$vmname";
1771 }
1772
1773 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1774
1775 return $netdev;
1776 }
1777
1778 my $vga_map = {
1779 'cirrus' => 'cirrus-vga',
1780 'std' => 'VGA',
1781 'vmware' => 'vmware-svga',
1782 'virtio' => 'virtio-vga',
1783 'virtio-gl' => 'virtio-vga-gl',
1784 };
1785
1786 sub print_vga_device {
1787 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1788
1789 my $type = $vga_map->{$vga->{type}};
1790 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1791 $type = 'virtio-gpu';
1792 }
1793 my $vgamem_mb = $vga->{memory};
1794
1795 my $max_outputs = '';
1796 if ($qxlnum) {
1797 $type = $id ? 'qxl' : 'qxl-vga';
1798
1799 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1800 # set max outputs so linux can have up to 4 qxl displays with one device
1801 if (min_version($machine_version, 4, 1)) {
1802 $max_outputs = ",max_outputs=4";
1803 }
1804 }
1805 }
1806
1807 die "no devicetype for $vga->{type}\n" if !$type;
1808
1809 my $memory = "";
1810 if ($vgamem_mb) {
1811 if ($vga->{type} =~ /^virtio/) {
1812 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1813 $memory = ",max_hostmem=$bytes";
1814 } elsif ($qxlnum) {
1815 # from https://www.spice-space.org/multiple-monitors.html
1816 $memory = ",vgamem_mb=$vga->{memory}";
1817 my $ram = $vgamem_mb * 4;
1818 my $vram = $vgamem_mb * 2;
1819 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1820 } else {
1821 $memory = ",vgamem_mb=$vga->{memory}";
1822 }
1823 } elsif ($qxlnum && $id) {
1824 $memory = ",ram_size=67108864,vram_size=33554432";
1825 }
1826
1827 my $edidoff = "";
1828 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1829 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1830 }
1831
1832 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1833 my $vgaid = "vga" . ($id // '');
1834 my $pciaddr;
1835 if ($q35 && $vgaid eq 'vga') {
1836 # the first display uses pcie.0 bus on q35 machines
1837 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1838 } else {
1839 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1840 }
1841
1842 if ($vga->{type} eq 'virtio-gl') {
1843 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1844 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1845 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1846
1847 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1848 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1849 }
1850
1851 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1852 }
1853
1854 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1855 sub parse_net {
1856 my ($data, $disable_mac_autogen) = @_;
1857
1858 my $res = eval { parse_property_string($net_fmt, $data) };
1859 if ($@) {
1860 warn $@;
1861 return;
1862 }
1863 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1864 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1865 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1866 }
1867 return $res;
1868 }
1869
1870 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1871 sub parse_ipconfig {
1872 my ($data) = @_;
1873
1874 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1875 if ($@) {
1876 warn $@;
1877 return;
1878 }
1879
1880 if ($res->{gw} && !$res->{ip}) {
1881 warn 'gateway specified without specifying an IP address';
1882 return;
1883 }
1884 if ($res->{gw6} && !$res->{ip6}) {
1885 warn 'IPv6 gateway specified without specifying an IPv6 address';
1886 return;
1887 }
1888 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1889 warn 'gateway specified together with DHCP';
1890 return;
1891 }
1892 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1893 # gw6 + auto/dhcp
1894 warn "IPv6 gateway specified together with $res->{ip6} address";
1895 return;
1896 }
1897
1898 if (!$res->{ip} && !$res->{ip6}) {
1899 return { ip => 'dhcp', ip6 => 'dhcp' };
1900 }
1901
1902 return $res;
1903 }
1904
1905 sub print_net {
1906 my $net = shift;
1907
1908 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1909 }
1910
1911 sub add_random_macs {
1912 my ($settings) = @_;
1913
1914 foreach my $opt (keys %$settings) {
1915 next if $opt !~ m/^net(\d+)$/;
1916 my $net = parse_net($settings->{$opt});
1917 next if !$net;
1918 $settings->{$opt} = print_net($net);
1919 }
1920 }
1921
1922 sub vm_is_volid_owner {
1923 my ($storecfg, $vmid, $volid) = @_;
1924
1925 if ($volid !~ m|^/|) {
1926 my ($path, $owner);
1927 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1928 if ($owner && ($owner == $vmid)) {
1929 return 1;
1930 }
1931 }
1932
1933 return;
1934 }
1935
1936 sub vmconfig_register_unused_drive {
1937 my ($storecfg, $vmid, $conf, $drive) = @_;
1938
1939 if (drive_is_cloudinit($drive)) {
1940 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1941 warn $@ if $@;
1942 delete $conf->{cloudinit};
1943 } elsif (!drive_is_cdrom($drive)) {
1944 my $volid = $drive->{file};
1945 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1946 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1947 }
1948 }
1949 }
1950
1951 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1952 my $smbios1_fmt = {
1953 uuid => {
1954 type => 'string',
1955 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
1956 format_description => 'UUID',
1957 description => "Set SMBIOS1 UUID.",
1958 optional => 1,
1959 },
1960 version => {
1961 type => 'string',
1962 pattern => '[A-Za-z0-9+\/]+={0,2}',
1963 format_description => 'Base64 encoded string',
1964 description => "Set SMBIOS1 version.",
1965 optional => 1,
1966 },
1967 serial => {
1968 type => 'string',
1969 pattern => '[A-Za-z0-9+\/]+={0,2}',
1970 format_description => 'Base64 encoded string',
1971 description => "Set SMBIOS1 serial number.",
1972 optional => 1,
1973 },
1974 manufacturer => {
1975 type => 'string',
1976 pattern => '[A-Za-z0-9+\/]+={0,2}',
1977 format_description => 'Base64 encoded string',
1978 description => "Set SMBIOS1 manufacturer.",
1979 optional => 1,
1980 },
1981 product => {
1982 type => 'string',
1983 pattern => '[A-Za-z0-9+\/]+={0,2}',
1984 format_description => 'Base64 encoded string',
1985 description => "Set SMBIOS1 product ID.",
1986 optional => 1,
1987 },
1988 sku => {
1989 type => 'string',
1990 pattern => '[A-Za-z0-9+\/]+={0,2}',
1991 format_description => 'Base64 encoded string',
1992 description => "Set SMBIOS1 SKU string.",
1993 optional => 1,
1994 },
1995 family => {
1996 type => 'string',
1997 pattern => '[A-Za-z0-9+\/]+={0,2}',
1998 format_description => 'Base64 encoded string',
1999 description => "Set SMBIOS1 family string.",
2000 optional => 1,
2001 },
2002 base64 => {
2003 type => 'boolean',
2004 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2005 optional => 1,
2006 },
2007 };
2008
2009 sub parse_smbios1 {
2010 my ($data) = @_;
2011
2012 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2013 warn $@ if $@;
2014 return $res;
2015 }
2016
2017 sub print_smbios1 {
2018 my ($smbios1) = @_;
2019 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2020 }
2021
2022 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2023
2024 sub parse_watchdog {
2025 my ($value) = @_;
2026
2027 return if !$value;
2028
2029 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2030 warn $@ if $@;
2031 return $res;
2032 }
2033
2034 sub parse_guest_agent {
2035 my ($conf) = @_;
2036
2037 return {} if !defined($conf->{agent});
2038
2039 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2040 warn $@ if $@;
2041
2042 # if the agent is disabled ignore the other potentially set properties
2043 return {} if !$res->{enabled};
2044 return $res;
2045 }
2046
2047 sub get_qga_key {
2048 my ($conf, $key) = @_;
2049 return undef if !defined($conf->{agent});
2050
2051 my $agent = parse_guest_agent($conf);
2052 return $agent->{$key};
2053 }
2054
2055 sub parse_vga {
2056 my ($value) = @_;
2057
2058 return {} if !$value;
2059 my $res = eval { parse_property_string($vga_fmt, $value) };
2060 warn $@ if $@;
2061 return $res;
2062 }
2063
2064 sub parse_rng {
2065 my ($value) = @_;
2066
2067 return if !$value;
2068
2069 my $res = eval { parse_property_string($rng_fmt, $value) };
2070 warn $@ if $@;
2071 return $res;
2072 }
2073
2074 sub parse_meta_info {
2075 my ($value) = @_;
2076
2077 return if !$value;
2078
2079 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2080 warn $@ if $@;
2081 return $res;
2082 }
2083
2084 sub new_meta_info_string {
2085 my () = @_; # for now do not allow to override any value
2086
2087 return PVE::JSONSchema::print_property_string(
2088 {
2089 'creation-qemu' => kvm_user_version(),
2090 ctime => "". int(time()),
2091 },
2092 $meta_info_fmt
2093 );
2094 }
2095
2096 sub qemu_created_version_fixups {
2097 my ($conf, $forcemachine, $kvmver) = @_;
2098
2099 my $meta = parse_meta_info($conf->{meta}) // {};
2100 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2101
2102 # check if we need to apply some handling for VMs that always use the latest machine version but
2103 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2104 # would be required (we do not want to pin machine version for non-windows OS type)
2105 if (
2106 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2107 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2108 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2109 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2110 ) {
2111 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2112 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2113 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2114 # and thus with the predictable interface naming of systemd
2115 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2116 }
2117 }
2118 return;
2119 }
2120
2121 # add JSON properties for create and set function
2122 sub json_config_properties {
2123 my ($prop, $with_disk_alloc) = @_;
2124
2125 my $skip_json_config_opts = {
2126 parent => 1,
2127 snaptime => 1,
2128 vmstate => 1,
2129 runningmachine => 1,
2130 runningcpu => 1,
2131 meta => 1,
2132 };
2133
2134 foreach my $opt (keys %$confdesc) {
2135 next if $skip_json_config_opts->{$opt};
2136
2137 if ($with_disk_alloc && is_valid_drivename($opt)) {
2138 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2139 } else {
2140 $prop->{$opt} = $confdesc->{$opt};
2141 }
2142 }
2143
2144 return $prop;
2145 }
2146
2147 # Properties that we can read from an OVF file
2148 sub json_ovf_properties {
2149 my $prop = {};
2150
2151 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2152 $prop->{$device} = {
2153 type => 'string',
2154 format => 'pve-volume-id-or-absolute-path',
2155 description => "Disk image that gets imported to $device",
2156 optional => 1,
2157 };
2158 }
2159
2160 $prop->{cores} = {
2161 type => 'integer',
2162 description => "The number of CPU cores.",
2163 optional => 1,
2164 };
2165 $prop->{memory} = {
2166 type => 'integer',
2167 description => "Amount of RAM for the VM in MB.",
2168 optional => 1,
2169 };
2170 $prop->{name} = {
2171 type => 'string',
2172 description => "Name of the VM.",
2173 optional => 1,
2174 };
2175
2176 return $prop;
2177 }
2178
2179 # return copy of $confdesc_cloudinit to generate documentation
2180 sub cloudinit_config_properties {
2181
2182 return dclone($confdesc_cloudinit);
2183 }
2184
2185 sub cloudinit_pending_properties {
2186 my $p = {
2187 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2188 name => 1,
2189 };
2190 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2191 return $p;
2192 }
2193
2194 sub check_type {
2195 my ($key, $value) = @_;
2196
2197 die "unknown setting '$key'\n" if !$confdesc->{$key};
2198
2199 my $type = $confdesc->{$key}->{type};
2200
2201 if (!defined($value)) {
2202 die "got undefined value\n";
2203 }
2204
2205 if ($value =~ m/[\n\r]/) {
2206 die "property contains a line feed\n";
2207 }
2208
2209 if ($type eq 'boolean') {
2210 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2211 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2212 die "type check ('boolean') failed - got '$value'\n";
2213 } elsif ($type eq 'integer') {
2214 return int($1) if $value =~ m/^(\d+)$/;
2215 die "type check ('integer') failed - got '$value'\n";
2216 } elsif ($type eq 'number') {
2217 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2218 die "type check ('number') failed - got '$value'\n";
2219 } elsif ($type eq 'string') {
2220 if (my $fmt = $confdesc->{$key}->{format}) {
2221 PVE::JSONSchema::check_format($fmt, $value);
2222 return $value;
2223 }
2224 $value =~ s/^\"(.*)\"$/$1/;
2225 return $value;
2226 } else {
2227 die "internal error"
2228 }
2229 }
2230
2231 sub destroy_vm {
2232 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2233
2234 my $conf = PVE::QemuConfig->load_config($vmid);
2235
2236 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2237 PVE::QemuConfig->check_lock($conf);
2238 }
2239
2240 if ($conf->{template}) {
2241 # check if any base image is still used by a linked clone
2242 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2243 my ($ds, $drive) = @_;
2244 return if drive_is_cdrom($drive);
2245
2246 my $volid = $drive->{file};
2247 return if !$volid || $volid =~ m|^/|;
2248
2249 die "base volume '$volid' is still in use by linked cloned\n"
2250 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2251
2252 });
2253 }
2254
2255 my $volids = {};
2256 my $remove_owned_drive = sub {
2257 my ($ds, $drive) = @_;
2258 return if drive_is_cdrom($drive, 1);
2259
2260 my $volid = $drive->{file};
2261 return if !$volid || $volid =~ m|^/|;
2262 return if $volids->{$volid};
2263
2264 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2265 return if !$path || !$owner || ($owner != $vmid);
2266
2267 $volids->{$volid} = 1;
2268 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2269 warn "Could not remove disk '$volid', check manually: $@" if $@;
2270 };
2271
2272 # only remove disks owned by this VM (referenced in the config)
2273 my $include_opts = {
2274 include_unused => 1,
2275 extra_keys => ['vmstate'],
2276 };
2277 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2278
2279 for my $snap (values %{$conf->{snapshots}}) {
2280 next if !defined($snap->{vmstate});
2281 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2282 next if !defined($drive);
2283 $remove_owned_drive->('vmstate', $drive);
2284 }
2285
2286 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2287
2288 if ($purge_unreferenced) { # also remove unreferenced disk
2289 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2290 PVE::Storage::foreach_volid($vmdisks, sub {
2291 my ($volid, $sid, $volname, $d) = @_;
2292 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2293 warn $@ if $@;
2294 });
2295 }
2296
2297 eval { delete_ifaces_ipams_ips($conf, $vmid)};
2298 warn $@ if $@;
2299
2300 if (defined $replacement_conf) {
2301 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2302 } else {
2303 PVE::QemuConfig->destroy_config($vmid);
2304 }
2305 }
2306
2307 sub parse_vm_config {
2308 my ($filename, $raw, $strict) = @_;
2309
2310 return if !defined($raw);
2311
2312 my $res = {
2313 digest => Digest::SHA::sha1_hex($raw),
2314 snapshots => {},
2315 pending => {},
2316 cloudinit => {},
2317 };
2318
2319 my $handle_error = sub {
2320 my ($msg) = @_;
2321
2322 if ($strict) {
2323 die $msg;
2324 } else {
2325 warn $msg;
2326 }
2327 };
2328
2329 $filename =~ m|/qemu-server/(\d+)\.conf$|
2330 || die "got strange filename '$filename'";
2331
2332 my $vmid = $1;
2333
2334 my $conf = $res;
2335 my $descr;
2336 my $finish_description = sub {
2337 if (defined($descr)) {
2338 $descr =~ s/\s+$//;
2339 $conf->{description} = $descr;
2340 }
2341 $descr = undef;
2342 };
2343 my $section = '';
2344
2345 my @lines = split(/\n/, $raw);
2346 foreach my $line (@lines) {
2347 next if $line =~ m/^\s*$/;
2348
2349 if ($line =~ m/^\[PENDING\]\s*$/i) {
2350 $section = 'pending';
2351 $finish_description->();
2352 $conf = $res->{$section} = {};
2353 next;
2354 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2355 $section = 'cloudinit';
2356 $finish_description->();
2357 $conf = $res->{$section} = {};
2358 next;
2359
2360 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2361 $section = $1;
2362 $finish_description->();
2363 $conf = $res->{snapshots}->{$section} = {};
2364 next;
2365 }
2366
2367 if ($line =~ m/^\#(.*)$/) {
2368 $descr = '' if !defined($descr);
2369 $descr .= PVE::Tools::decode_text($1) . "\n";
2370 next;
2371 }
2372
2373 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2374 $descr = '' if !defined($descr);
2375 $descr .= PVE::Tools::decode_text($2);
2376 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2377 $conf->{snapstate} = $1;
2378 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2379 my $key = $1;
2380 my $value = $2;
2381 $conf->{$key} = $value;
2382 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2383 my $value = $1;
2384 if ($section eq 'pending') {
2385 $conf->{delete} = $value; # we parse this later
2386 } else {
2387 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2388 }
2389 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2390 my $key = $1;
2391 my $value = $2;
2392 if ($section eq 'cloudinit') {
2393 # ignore validation only used for informative purpose
2394 $conf->{$key} = $value;
2395 next;
2396 }
2397 eval { $value = check_type($key, $value); };
2398 if ($@) {
2399 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2400 } else {
2401 $key = 'ide2' if $key eq 'cdrom';
2402 my $fmt = $confdesc->{$key}->{format};
2403 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2404 my $v = parse_drive($key, $value);
2405 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2406 $v->{file} = $volid;
2407 $value = print_drive($v);
2408 } else {
2409 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2410 next;
2411 }
2412 }
2413
2414 $conf->{$key} = $value;
2415 }
2416 } else {
2417 $handle_error->("vm $vmid - unable to parse config: $line\n");
2418 }
2419 }
2420
2421 $finish_description->();
2422 delete $res->{snapstate}; # just to be sure
2423
2424 return $res;
2425 }
2426
2427 sub write_vm_config {
2428 my ($filename, $conf) = @_;
2429
2430 delete $conf->{snapstate}; # just to be sure
2431
2432 if ($conf->{cdrom}) {
2433 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2434 $conf->{ide2} = $conf->{cdrom};
2435 delete $conf->{cdrom};
2436 }
2437
2438 # we do not use 'smp' any longer
2439 if ($conf->{sockets}) {
2440 delete $conf->{smp};
2441 } elsif ($conf->{smp}) {
2442 $conf->{sockets} = $conf->{smp};
2443 delete $conf->{cores};
2444 delete $conf->{smp};
2445 }
2446
2447 my $used_volids = {};
2448
2449 my $cleanup_config = sub {
2450 my ($cref, $pending, $snapname) = @_;
2451
2452 foreach my $key (keys %$cref) {
2453 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2454 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2455 my $value = $cref->{$key};
2456 if ($key eq 'delete') {
2457 die "propertry 'delete' is only allowed in [PENDING]\n"
2458 if !$pending;
2459 # fixme: check syntax?
2460 next;
2461 }
2462 eval { $value = check_type($key, $value); };
2463 die "unable to parse value of '$key' - $@" if $@;
2464
2465 $cref->{$key} = $value;
2466
2467 if (!$snapname && is_valid_drivename($key)) {
2468 my $drive = parse_drive($key, $value);
2469 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2470 }
2471 }
2472 };
2473
2474 &$cleanup_config($conf);
2475
2476 &$cleanup_config($conf->{pending}, 1);
2477
2478 foreach my $snapname (keys %{$conf->{snapshots}}) {
2479 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2480 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2481 }
2482
2483 # remove 'unusedX' settings if we re-add a volume
2484 foreach my $key (keys %$conf) {
2485 my $value = $conf->{$key};
2486 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2487 delete $conf->{$key};
2488 }
2489 }
2490
2491 my $generate_raw_config = sub {
2492 my ($conf, $pending) = @_;
2493
2494 my $raw = '';
2495
2496 # add description as comment to top of file
2497 if (defined(my $descr = $conf->{description})) {
2498 if ($descr) {
2499 foreach my $cl (split(/\n/, $descr)) {
2500 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2501 }
2502 } else {
2503 $raw .= "#\n" if $pending;
2504 }
2505 }
2506
2507 foreach my $key (sort keys %$conf) {
2508 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2509 $raw .= "$key: $conf->{$key}\n";
2510 }
2511 return $raw;
2512 };
2513
2514 my $raw = &$generate_raw_config($conf);
2515
2516 if (scalar(keys %{$conf->{pending}})){
2517 $raw .= "\n[PENDING]\n";
2518 $raw .= &$generate_raw_config($conf->{pending}, 1);
2519 }
2520
2521 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2522 $raw .= "\n[special:cloudinit]\n";
2523 $raw .= &$generate_raw_config($conf->{cloudinit});
2524 }
2525
2526 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2527 $raw .= "\n[$snapname]\n";
2528 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2529 }
2530
2531 return $raw;
2532 }
2533
2534 sub load_defaults {
2535
2536 my $res = {};
2537
2538 # we use static defaults from our JSON schema configuration
2539 foreach my $key (keys %$confdesc) {
2540 if (defined(my $default = $confdesc->{$key}->{default})) {
2541 $res->{$key} = $default;
2542 }
2543 }
2544
2545 return $res;
2546 }
2547
2548 sub config_list {
2549 my $vmlist = PVE::Cluster::get_vmlist();
2550 my $res = {};
2551 return $res if !$vmlist || !$vmlist->{ids};
2552 my $ids = $vmlist->{ids};
2553 my $nodename = nodename();
2554
2555 foreach my $vmid (keys %$ids) {
2556 my $d = $ids->{$vmid};
2557 next if !$d->{node} || $d->{node} ne $nodename;
2558 next if !$d->{type} || $d->{type} ne 'qemu';
2559 $res->{$vmid}->{exists} = 1;
2560 }
2561 return $res;
2562 }
2563
2564 # test if VM uses local resources (to prevent migration)
2565 sub check_local_resources {
2566 my ($conf, $noerr) = @_;
2567
2568 my @loc_res = ();
2569 my $mapped_res = [];
2570
2571 my $nodelist = PVE::Cluster::get_nodelist();
2572 my $pci_map = PVE::Mapping::PCI::config();
2573 my $usb_map = PVE::Mapping::USB::config();
2574
2575 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2576
2577 my $add_missing_mapping = sub {
2578 my ($type, $key, $id) = @_;
2579 for my $node (@$nodelist) {
2580 my $entry;
2581 if ($type eq 'pci') {
2582 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2583 } elsif ($type eq 'usb') {
2584 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2585 }
2586 if (!scalar($entry->@*)) {
2587 push @{$missing_mappings_by_node->{$node}}, $key;
2588 }
2589 }
2590 };
2591
2592 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2593 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2594
2595 push @loc_res, "ivshmem" if $conf->{ivshmem};
2596
2597 foreach my $k (keys %$conf) {
2598 if ($k =~ m/^usb/) {
2599 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2600 next if $entry->{host} =~ m/^spice$/i;
2601 if ($entry->{mapping}) {
2602 $add_missing_mapping->('usb', $k, $entry->{mapping});
2603 push @$mapped_res, $k;
2604 }
2605 }
2606 if ($k =~ m/^hostpci/) {
2607 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2608 if ($entry->{mapping}) {
2609 $add_missing_mapping->('pci', $k, $entry->{mapping});
2610 push @$mapped_res, $k;
2611 }
2612 }
2613 # sockets are safe: they will recreated be on the target side post-migrate
2614 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2615 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2616 }
2617
2618 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2619
2620 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2621 }
2622
2623 # check if used storages are available on all nodes (use by migrate)
2624 sub check_storage_availability {
2625 my ($storecfg, $conf, $node) = @_;
2626
2627 PVE::QemuConfig->foreach_volume($conf, sub {
2628 my ($ds, $drive) = @_;
2629
2630 my $volid = $drive->{file};
2631 return if !$volid;
2632
2633 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2634 return if !$sid;
2635
2636 # check if storage is available on both nodes
2637 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2638 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2639
2640 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2641
2642 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2643 if !$scfg->{content}->{$vtype};
2644 });
2645 }
2646
2647 # list nodes where all VM images are available (used by has_feature API)
2648 sub shared_nodes {
2649 my ($conf, $storecfg) = @_;
2650
2651 my $nodelist = PVE::Cluster::get_nodelist();
2652 my $nodehash = { map { $_ => 1 } @$nodelist };
2653 my $nodename = nodename();
2654
2655 PVE::QemuConfig->foreach_volume($conf, sub {
2656 my ($ds, $drive) = @_;
2657
2658 my $volid = $drive->{file};
2659 return if !$volid;
2660
2661 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2662 if ($storeid) {
2663 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2664 if ($scfg->{disable}) {
2665 $nodehash = {};
2666 } elsif (my $avail = $scfg->{nodes}) {
2667 foreach my $node (keys %$nodehash) {
2668 delete $nodehash->{$node} if !$avail->{$node};
2669 }
2670 } elsif (!$scfg->{shared}) {
2671 foreach my $node (keys %$nodehash) {
2672 delete $nodehash->{$node} if $node ne $nodename
2673 }
2674 }
2675 }
2676 });
2677
2678 return $nodehash
2679 }
2680
2681 sub check_local_storage_availability {
2682 my ($conf, $storecfg) = @_;
2683
2684 my $nodelist = PVE::Cluster::get_nodelist();
2685 my $nodehash = { map { $_ => {} } @$nodelist };
2686
2687 PVE::QemuConfig->foreach_volume($conf, sub {
2688 my ($ds, $drive) = @_;
2689
2690 my $volid = $drive->{file};
2691 return if !$volid;
2692
2693 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2694 if ($storeid) {
2695 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2696
2697 if ($scfg->{disable}) {
2698 foreach my $node (keys %$nodehash) {
2699 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2700 }
2701 } elsif (my $avail = $scfg->{nodes}) {
2702 foreach my $node (keys %$nodehash) {
2703 if (!$avail->{$node}) {
2704 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2705 }
2706 }
2707 }
2708 }
2709 });
2710
2711 foreach my $node (values %$nodehash) {
2712 if (my $unavail = $node->{unavailable_storages}) {
2713 $node->{unavailable_storages} = [ sort keys %$unavail ];
2714 }
2715 }
2716
2717 return $nodehash
2718 }
2719
2720 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2721 sub check_running {
2722 my ($vmid, $nocheck, $node) = @_;
2723
2724 # $nocheck is set when called during a migration, in which case the config
2725 # file might still or already reside on the *other* node
2726 # - because rename has already happened, and current node is source
2727 # - because rename hasn't happened yet, and current node is target
2728 # - because rename has happened, current node is target, but hasn't yet
2729 # processed it yet
2730 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2731 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2732 }
2733
2734 sub vzlist {
2735
2736 my $vzlist = config_list();
2737
2738 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2739
2740 while (defined(my $de = $fd->read)) {
2741 next if $de !~ m/^(\d+)\.pid$/;
2742 my $vmid = $1;
2743 next if !defined($vzlist->{$vmid});
2744 if (my $pid = check_running($vmid)) {
2745 $vzlist->{$vmid}->{pid} = $pid;
2746 }
2747 }
2748
2749 return $vzlist;
2750 }
2751
2752 our $vmstatus_return_properties = {
2753 vmid => get_standard_option('pve-vmid'),
2754 status => {
2755 description => "QEMU process status.",
2756 type => 'string',
2757 enum => ['stopped', 'running'],
2758 },
2759 maxmem => {
2760 description => "Maximum memory in bytes.",
2761 type => 'integer',
2762 optional => 1,
2763 renderer => 'bytes',
2764 },
2765 maxdisk => {
2766 description => "Root disk size in bytes.",
2767 type => 'integer',
2768 optional => 1,
2769 renderer => 'bytes',
2770 },
2771 name => {
2772 description => "VM name.",
2773 type => 'string',
2774 optional => 1,
2775 },
2776 qmpstatus => {
2777 description => "VM run state from the 'query-status' QMP monitor command.",
2778 type => 'string',
2779 optional => 1,
2780 },
2781 pid => {
2782 description => "PID of running qemu process.",
2783 type => 'integer',
2784 optional => 1,
2785 },
2786 uptime => {
2787 description => "Uptime.",
2788 type => 'integer',
2789 optional => 1,
2790 renderer => 'duration',
2791 },
2792 cpus => {
2793 description => "Maximum usable CPUs.",
2794 type => 'number',
2795 optional => 1,
2796 },
2797 lock => {
2798 description => "The current config lock, if any.",
2799 type => 'string',
2800 optional => 1,
2801 },
2802 tags => {
2803 description => "The current configured tags, if any",
2804 type => 'string',
2805 optional => 1,
2806 },
2807 'running-machine' => {
2808 description => "The currently running machine type (if running).",
2809 type => 'string',
2810 optional => 1,
2811 },
2812 'running-qemu' => {
2813 description => "The currently running QEMU version (if running).",
2814 type => 'string',
2815 optional => 1,
2816 },
2817 };
2818
2819 my $last_proc_pid_stat;
2820
2821 # get VM status information
2822 # This must be fast and should not block ($full == false)
2823 # We only query KVM using QMP if $full == true (this can be slow)
2824 sub vmstatus {
2825 my ($opt_vmid, $full) = @_;
2826
2827 my $res = {};
2828
2829 my $storecfg = PVE::Storage::config();
2830
2831 my $list = vzlist();
2832 my $defaults = load_defaults();
2833
2834 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2835
2836 my $cpucount = $cpuinfo->{cpus} || 1;
2837
2838 foreach my $vmid (keys %$list) {
2839 next if $opt_vmid && ($vmid ne $opt_vmid);
2840
2841 my $conf = PVE::QemuConfig->load_config($vmid);
2842
2843 my $d = { vmid => int($vmid) };
2844 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2845
2846 # fixme: better status?
2847 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2848
2849 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2850 if (defined($size)) {
2851 $d->{disk} = 0; # no info available
2852 $d->{maxdisk} = $size;
2853 } else {
2854 $d->{disk} = 0;
2855 $d->{maxdisk} = 0;
2856 }
2857
2858 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2859 * ($conf->{cores} || $defaults->{cores});
2860 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2861 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2862
2863 $d->{name} = $conf->{name} || "VM $vmid";
2864 $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024);
2865
2866 if ($conf->{balloon}) {
2867 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2868 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2869 : $defaults->{shares};
2870 }
2871
2872 $d->{uptime} = 0;
2873 $d->{cpu} = 0;
2874 $d->{mem} = 0;
2875
2876 $d->{netout} = 0;
2877 $d->{netin} = 0;
2878
2879 $d->{diskread} = 0;
2880 $d->{diskwrite} = 0;
2881
2882 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2883
2884 $d->{serial} = 1 if conf_has_serial($conf);
2885 $d->{lock} = $conf->{lock} if $conf->{lock};
2886 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2887
2888 $res->{$vmid} = $d;
2889 }
2890
2891 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2892 foreach my $dev (keys %$netdev) {
2893 next if $dev !~ m/^tap([1-9]\d*)i/;
2894 my $vmid = $1;
2895 my $d = $res->{$vmid};
2896 next if !$d;
2897
2898 $d->{netout} += $netdev->{$dev}->{receive};
2899 $d->{netin} += $netdev->{$dev}->{transmit};
2900
2901 if ($full) {
2902 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2903 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2904 }
2905
2906 }
2907
2908 my $ctime = gettimeofday;
2909
2910 foreach my $vmid (keys %$list) {
2911
2912 my $d = $res->{$vmid};
2913 my $pid = $d->{pid};
2914 next if !$pid;
2915
2916 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2917 next if !$pstat; # not running
2918
2919 my $used = $pstat->{utime} + $pstat->{stime};
2920
2921 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2922
2923 if ($pstat->{vsize}) {
2924 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2925 }
2926
2927 my $old = $last_proc_pid_stat->{$pid};
2928 if (!$old) {
2929 $last_proc_pid_stat->{$pid} = {
2930 time => $ctime,
2931 used => $used,
2932 cpu => 0,
2933 };
2934 next;
2935 }
2936
2937 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2938
2939 if ($dtime > 1000) {
2940 my $dutime = $used - $old->{used};
2941
2942 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2943 $last_proc_pid_stat->{$pid} = {
2944 time => $ctime,
2945 used => $used,
2946 cpu => $d->{cpu},
2947 };
2948 } else {
2949 $d->{cpu} = $old->{cpu};
2950 }
2951 }
2952
2953 return $res if !$full;
2954
2955 my $qmpclient = PVE::QMPClient->new();
2956
2957 my $ballooncb = sub {
2958 my ($vmid, $resp) = @_;
2959
2960 my $info = $resp->{'return'};
2961 return if !$info->{max_mem};
2962
2963 my $d = $res->{$vmid};
2964
2965 # use memory assigned to VM
2966 $d->{maxmem} = $info->{max_mem};
2967 $d->{balloon} = $info->{actual};
2968
2969 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
2970 $d->{mem} = $info->{total_mem} - $info->{free_mem};
2971 $d->{freemem} = $info->{free_mem};
2972 }
2973
2974 $d->{ballooninfo} = $info;
2975 };
2976
2977 my $blockstatscb = sub {
2978 my ($vmid, $resp) = @_;
2979 my $data = $resp->{'return'} || [];
2980 my $totalrdbytes = 0;
2981 my $totalwrbytes = 0;
2982
2983 for my $blockstat (@$data) {
2984 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
2985 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
2986
2987 $blockstat->{device} =~ s/drive-//;
2988 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
2989 }
2990 $res->{$vmid}->{diskread} = $totalrdbytes;
2991 $res->{$vmid}->{diskwrite} = $totalwrbytes;
2992 };
2993
2994 my $machinecb = sub {
2995 my ($vmid, $resp) = @_;
2996 my $data = $resp->{'return'} || [];
2997
2998 $res->{$vmid}->{'running-machine'} =
2999 PVE::QemuServer::Machine::current_from_query_machines($data);
3000 };
3001
3002 my $versioncb = sub {
3003 my ($vmid, $resp) = @_;
3004 my $data = $resp->{'return'} // {};
3005 my $version = 'unknown';
3006
3007 if (my $v = $data->{qemu}) {
3008 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3009 }
3010
3011 $res->{$vmid}->{'running-qemu'} = $version;
3012 };
3013
3014 my $statuscb = sub {
3015 my ($vmid, $resp) = @_;
3016
3017 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3018 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3019 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3020 # this fails if ballon driver is not loaded, so this must be
3021 # the last commnand (following command are aborted if this fails).
3022 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3023
3024 my $status = 'unknown';
3025 if (!defined($status = $resp->{'return'}->{status})) {
3026 warn "unable to get VM status\n";
3027 return;
3028 }
3029
3030 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3031 };
3032
3033 foreach my $vmid (keys %$list) {
3034 next if $opt_vmid && ($vmid ne $opt_vmid);
3035 next if !$res->{$vmid}->{pid}; # not running
3036 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3037 }
3038
3039 $qmpclient->queue_execute(undef, 2);
3040
3041 foreach my $vmid (keys %$list) {
3042 next if $opt_vmid && ($vmid ne $opt_vmid);
3043 next if !$res->{$vmid}->{pid}; #not running
3044
3045 # we can't use the $qmpclient since it might have already aborted on
3046 # 'query-balloon', but this might also fail for older versions...
3047 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3048 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3049 }
3050
3051 foreach my $vmid (keys %$list) {
3052 next if $opt_vmid && ($vmid ne $opt_vmid);
3053 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3054 }
3055
3056 return $res;
3057 }
3058
3059 sub conf_has_serial {
3060 my ($conf) = @_;
3061
3062 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3063 if ($conf->{"serial$i"}) {
3064 return 1;
3065 }
3066 }
3067
3068 return 0;
3069 }
3070
3071 sub conf_has_audio {
3072 my ($conf, $id) = @_;
3073
3074 $id //= 0;
3075 my $audio = $conf->{"audio$id"};
3076 return if !defined($audio);
3077
3078 my $audioproperties = parse_property_string($audio_fmt, $audio);
3079 my $audiodriver = $audioproperties->{driver} // 'spice';
3080
3081 return {
3082 dev => $audioproperties->{device},
3083 dev_id => "audiodev$id",
3084 backend => $audiodriver,
3085 backend_id => "$audiodriver-backend${id}",
3086 };
3087 }
3088
3089 sub audio_devs {
3090 my ($audio, $audiopciaddr, $machine_version) = @_;
3091
3092 my $devs = [];
3093
3094 my $id = $audio->{dev_id};
3095 my $audiodev = "";
3096 if (min_version($machine_version, 4, 2)) {
3097 $audiodev = ",audiodev=$audio->{backend_id}";
3098 }
3099
3100 if ($audio->{dev} eq 'AC97') {
3101 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3102 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3103 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3104 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3105 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3106 } else {
3107 die "unkown audio device '$audio->{dev}', implement me!";
3108 }
3109
3110 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3111
3112 return $devs;
3113 }
3114
3115 sub get_tpm_paths {
3116 my ($vmid) = @_;
3117 return {
3118 socket => "/var/run/qemu-server/$vmid.swtpm",
3119 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3120 };
3121 }
3122
3123 sub add_tpm_device {
3124 my ($vmid, $devices, $conf) = @_;
3125
3126 return if !$conf->{tpmstate0};
3127
3128 my $paths = get_tpm_paths($vmid);
3129
3130 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3131 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3132 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3133 }
3134
3135 sub start_swtpm {
3136 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3137
3138 return if !$tpmdrive;
3139
3140 my $state;
3141 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3142 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3143 if ($storeid) {
3144 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3145 } else {
3146 $state = $tpm->{file};
3147 }
3148
3149 my $paths = get_tpm_paths($vmid);
3150
3151 # during migration, we will get state from remote
3152 #
3153 if (!$migration) {
3154 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3155 my $setup_cmd = [
3156 "swtpm_setup",
3157 "--tpmstate",
3158 "file://$state",
3159 "--createek",
3160 "--create-ek-cert",
3161 "--create-platform-cert",
3162 "--lock-nvram",
3163 "--config",
3164 "/etc/swtpm_setup.conf", # do not use XDG configs
3165 "--runas",
3166 "0", # force creation as root, error if not possible
3167 "--not-overwrite", # ignore existing state, do not modify
3168 ];
3169
3170 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3171 # TPM 2.0 supports ECC crypto, use if possible
3172 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3173
3174 run_command($setup_cmd, outfunc => sub {
3175 print "swtpm_setup: $1\n";
3176 });
3177 }
3178
3179 # Used to distinguish different invocations in the log.
3180 my $log_prefix = "[id=" . int(time()) . "] ";
3181
3182 my $emulator_cmd = [
3183 "swtpm",
3184 "socket",
3185 "--tpmstate",
3186 "backend-uri=file://$state,mode=0600",
3187 "--ctrl",
3188 "type=unixio,path=$paths->{socket},mode=0600",
3189 "--pid",
3190 "file=$paths->{pid}",
3191 "--terminate", # terminate on QEMU disconnect
3192 "--daemon",
3193 "--log",
3194 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3195 ];
3196 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3197 run_command($emulator_cmd, outfunc => sub { print $1; });
3198
3199 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3200 while (! -e $paths->{pid}) {
3201 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3202 usleep(50_000);
3203 }
3204
3205 # return untainted PID of swtpm daemon so it can be killed on error
3206 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3207 return $1;
3208 }
3209
3210 sub vga_conf_has_spice {
3211 my ($vga) = @_;
3212
3213 my $vgaconf = parse_vga($vga);
3214 my $vgatype = $vgaconf->{type};
3215 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3216
3217 return $1 || 1;
3218 }
3219
3220 sub get_vm_arch {
3221 my ($conf) = @_;
3222 return $conf->{arch} // get_host_arch();
3223 }
3224
3225 my $default_machines = {
3226 x86_64 => 'pc',
3227 aarch64 => 'virt',
3228 };
3229
3230 sub get_installed_machine_version {
3231 my ($kvmversion) = @_;
3232 $kvmversion = kvm_user_version() if !defined($kvmversion);
3233 $kvmversion =~ m/^(\d+\.\d+)/;
3234 return $1;
3235 }
3236
3237 sub windows_get_pinned_machine_version {
3238 my ($machine, $base_version, $kvmversion) = @_;
3239
3240 my $pin_version = $base_version;
3241 if (!defined($base_version) ||
3242 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3243 ) {
3244 $pin_version = get_installed_machine_version($kvmversion);
3245 }
3246 if (!$machine || $machine eq 'pc') {
3247 $machine = "pc-i440fx-$pin_version";
3248 } elsif ($machine eq 'q35') {
3249 $machine = "pc-q35-$pin_version";
3250 } elsif ($machine eq 'virt') {
3251 $machine = "virt-$pin_version";
3252 } else {
3253 warn "unknown machine type '$machine', not touching that!\n";
3254 }
3255
3256 return $machine;
3257 }
3258
3259 sub get_vm_machine {
3260 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3261
3262 my $machine = $forcemachine || $conf->{machine};
3263
3264 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3265 $kvmversion //= kvm_user_version();
3266 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3267 # layout which confuses windows quite a bit and may result in various regressions..
3268 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3269 if (windows_version($conf->{ostype})) {
3270 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3271 }
3272 $arch //= 'x86_64';
3273 $machine ||= $default_machines->{$arch};
3274 if ($add_pve_version) {
3275 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3276 $machine .= "+pve$pvever";
3277 }
3278 }
3279
3280 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3281 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3282 $machine = $1 if $is_pxe;
3283
3284 # for version-pinned machines that do not include a pve-version (e.g.
3285 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3286 $machine .= '+pve0';
3287
3288 $machine .= '.pxe' if $is_pxe;
3289 }
3290
3291 return $machine;
3292 }
3293
3294 sub get_ovmf_files($$$) {
3295 my ($arch, $efidisk, $smm) = @_;
3296
3297 my $types = $OVMF->{$arch}
3298 or die "no OVMF images known for architecture '$arch'\n";
3299
3300 my $type = 'default';
3301 if ($arch eq 'x86_64') {
3302 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3303 $type = $smm ? "4m" : "4m-no-smm";
3304 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3305 } else {
3306 # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9
3307 }
3308 }
3309
3310 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3311 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3312 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3313
3314 return ($ovmf_code, $ovmf_vars);
3315 }
3316
3317 my $Arch2Qemu = {
3318 aarch64 => '/usr/bin/qemu-system-aarch64',
3319 x86_64 => '/usr/bin/qemu-system-x86_64',
3320 };
3321 sub get_command_for_arch($) {
3322 my ($arch) = @_;
3323 return '/usr/bin/kvm' if is_native_arch($arch);
3324
3325 my $cmd = $Arch2Qemu->{$arch}
3326 or die "don't know how to emulate architecture '$arch'\n";
3327 return $cmd;
3328 }
3329
3330 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3331 # to use in a QEMU command line (-cpu element), first array_intersect the result
3332 # of query_supported_ with query_understood_. This is necessary because:
3333 #
3334 # a) query_understood_ returns flags the host cannot use and
3335 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3336 # flags, but CPU settings - with most of them being flags. Those settings
3337 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3338 #
3339 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3340 # expensive. If you need the value returned from this, you can get it much
3341 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3342 # $accel being 'kvm' or 'tcg'.
3343 #
3344 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3345 # changes, automatically populating pmxcfs.
3346 #
3347 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3348 # since kvm and tcg machines support different flags
3349 #
3350 sub query_supported_cpu_flags {
3351 my ($arch) = @_;
3352
3353 $arch //= get_host_arch();
3354 my $default_machine = $default_machines->{$arch};
3355
3356 my $flags = {};
3357
3358 # FIXME: Once this is merged, the code below should work for ARM as well:
3359 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3360 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3361 $arch eq "aarch64";
3362
3363 my $kvm_supported = defined(kvm_version());
3364 my $qemu_cmd = get_command_for_arch($arch);
3365 my $fakevmid = -1;
3366 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3367
3368 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3369 my $query_supported_run_qemu = sub {
3370 my ($kvm) = @_;
3371
3372 my $flags = {};
3373 my $cmd = [
3374 $qemu_cmd,
3375 '-machine', $default_machine,
3376 '-display', 'none',
3377 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3378 '-mon', 'chardev=qmp,mode=control',
3379 '-pidfile', $pidfile,
3380 '-S', '-daemonize'
3381 ];
3382
3383 if (!$kvm) {
3384 push @$cmd, '-accel', 'tcg';
3385 }
3386
3387 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3388 die "QEMU flag querying VM exited with code " . $rc if $rc;
3389
3390 eval {
3391 my $cmd_result = mon_cmd(
3392 $fakevmid,
3393 'query-cpu-model-expansion',
3394 type => 'full',
3395 model => { name => 'host' }
3396 );
3397
3398 my $props = $cmd_result->{model}->{props};
3399 foreach my $prop (keys %$props) {
3400 next if $props->{$prop} ne '1';
3401 # QEMU returns some flags multiple times, with '_', '.' or '-'
3402 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3403 # We only keep those with underscores, to match /proc/cpuinfo
3404 $prop =~ s/\.|-/_/g;
3405 $flags->{$prop} = 1;
3406 }
3407 };
3408 my $err = $@;
3409
3410 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3411 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3412
3413 die $err if $err;
3414
3415 return [ sort keys %$flags ];
3416 };
3417
3418 # We need to query QEMU twice, since KVM and TCG have different supported flags
3419 PVE::QemuConfig->lock_config($fakevmid, sub {
3420 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3421 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3422
3423 if ($kvm_supported) {
3424 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3425 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3426 }
3427 });
3428
3429 return $flags;
3430 }
3431
3432 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3433 my $understood_cpu_flag_dir = "/usr/share/kvm";
3434 sub query_understood_cpu_flags {
3435 my $arch = get_host_arch();
3436 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3437
3438 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3439 if ! -e $filepath;
3440
3441 my $raw = file_get_contents($filepath);
3442 $raw =~ s/^\s+|\s+$//g;
3443 my @flags = split(/\s+/, $raw);
3444
3445 return \@flags;
3446 }
3447
3448 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3449 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3450 my sub should_disable_smm {
3451 my ($conf, $vga, $machine) = @_;
3452
3453 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3454
3455 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3456 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3457 }
3458
3459 my sub print_ovmf_drive_commandlines {
3460 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3461
3462 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3463
3464 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3465
3466 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3467 if ($d) {
3468 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3469 my ($path, $format) = $d->@{'file', 'format'};
3470 if ($storeid) {
3471 $path = PVE::Storage::path($storecfg, $d->{file});
3472 if (!defined($format)) {
3473 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3474 $format = qemu_img_format($scfg, $volname);
3475 }
3476 } elsif (!defined($format)) {
3477 die "efidisk format must be specified\n";
3478 }
3479 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3480 if ($path =~ m/^rbd:/) {
3481 $var_drive_str .= ',cache=writeback';
3482 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3483 }
3484 $var_drive_str .= ",format=$format,file=$path";
3485
3486 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3487 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3488 } else {
3489 log_warn("no efidisk configured! Using temporary efivars disk.");
3490 my $path = "/tmp/$vmid-ovmf.fd";
3491 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3492 $var_drive_str .= ",format=raw,file=$path";
3493 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3494 }
3495
3496 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3497 }
3498
3499 sub config_to_command {
3500 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3501 $live_restore_backing) = @_;
3502
3503 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3504 my $devices = [];
3505 my $bridges = {};
3506 my $ostype = $conf->{ostype};
3507 my $winversion = windows_version($ostype);
3508 my $kvm = $conf->{kvm};
3509 my $nodename = nodename();
3510
3511 my $arch = get_vm_arch($conf);
3512 my $kvm_binary = get_command_for_arch($arch);
3513 my $kvmver = kvm_user_version($kvm_binary);
3514
3515 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3516 $kvmver //= "undefined";
3517 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3518 }
3519
3520 my $add_pve_version = min_version($kvmver, 4, 1);
3521
3522 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3523 my $machine_version = extract_version($machine_type, $kvmver);
3524 $kvm //= 1 if is_native_arch($arch);
3525
3526 $machine_version =~ m/(\d+)\.(\d+)/;
3527 my ($machine_major, $machine_minor) = ($1, $2);
3528
3529 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3530 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3531 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3532 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3533 ." please upgrade node '$nodename'\n"
3534 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3535 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3536 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3537 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3538 ." node '$nodename'\n";
3539 }
3540
3541 # if a specific +pve version is required for a feature, use $version_guard
3542 # instead of min_version to allow machines to be run with the minimum
3543 # required version
3544 my $required_pve_version = 0;
3545 my $version_guard = sub {
3546 my ($major, $minor, $pve) = @_;
3547 return 0 if !min_version($machine_version, $major, $minor, $pve);
3548 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3549 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3550 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3551 return 1;
3552 };
3553
3554 if ($kvm && !defined kvm_version()) {
3555 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3556 ." or enable in BIOS.\n";
3557 }
3558
3559 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3560 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3561 my $use_old_bios_files = undef;
3562 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3563
3564 my $cmd = [];
3565 if ($conf->{affinity}) {
3566 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3567 }
3568
3569 push @$cmd, $kvm_binary;
3570
3571 push @$cmd, '-id', $vmid;
3572
3573 my $vmname = $conf->{name} || "vm$vmid";
3574
3575 push @$cmd, '-name', "$vmname,debug-threads=on";
3576
3577 push @$cmd, '-no-shutdown';
3578
3579 my $use_virtio = 0;
3580
3581 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3582 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3583 push @$cmd, '-mon', "chardev=qmp,mode=control";
3584
3585 if (min_version($machine_version, 2, 12)) {
3586 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3587 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3588 }
3589
3590 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3591
3592 push @$cmd, '-daemonize';
3593
3594 if ($conf->{smbios1}) {
3595 my $smbios_conf = parse_smbios1($conf->{smbios1});
3596 if ($smbios_conf->{base64}) {
3597 # Do not pass base64 flag to qemu
3598 delete $smbios_conf->{base64};
3599 my $smbios_string = "";
3600 foreach my $key (keys %$smbios_conf) {
3601 my $value;
3602 if ($key eq "uuid") {
3603 $value = $smbios_conf->{uuid}
3604 } else {
3605 $value = decode_base64($smbios_conf->{$key});
3606 }
3607 # qemu accepts any binary data, only commas need escaping by double comma
3608 $value =~ s/,/,,/g;
3609 $smbios_string .= "," . $key . "=" . $value if $value;
3610 }
3611 push @$cmd, '-smbios', "type=1" . $smbios_string;
3612 } else {
3613 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3614 }
3615 }
3616
3617 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3618 die "OVMF (UEFI) BIOS is not supported on 32-bit CPU types\n"
3619 if !$forcecpu && get_cpu_bitness($conf->{cpu}, $arch) == 32;
3620
3621 my ($code_drive_str, $var_drive_str) =
3622 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3623 push $cmd->@*, '-drive', $code_drive_str;
3624 push $cmd->@*, '-drive', $var_drive_str;
3625 }
3626
3627 if ($q35) { # tell QEMU to load q35 config early
3628 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3629 if (min_version($machine_version, 4, 0)) {
3630 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3631 } else {
3632 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3633 }
3634 }
3635
3636 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3637 push @$cmd, $fixups->@*;
3638 }
3639
3640 if ($conf->{vmgenid}) {
3641 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3642 }
3643
3644 # add usb controllers
3645 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3646 $conf, $bridges, $arch, $machine_type, $machine_version);
3647 push @$devices, @usbcontrollers if @usbcontrollers;
3648 my $vga = parse_vga($conf->{vga});
3649
3650 my $qxlnum = vga_conf_has_spice($conf->{vga});
3651 $vga->{type} = 'qxl' if $qxlnum;
3652
3653 if (!$vga->{type}) {
3654 if ($arch eq 'aarch64') {
3655 $vga->{type} = 'virtio';
3656 } elsif (min_version($machine_version, 2, 9)) {
3657 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3658 } else {
3659 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3660 }
3661 }
3662
3663 # enable absolute mouse coordinates (needed by vnc)
3664 my $tablet = $conf->{tablet};
3665 if (!defined($tablet)) {
3666 $tablet = $defaults->{tablet};
3667 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3668 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3669 }
3670
3671 if ($tablet) {
3672 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3673 my $kbd = print_keyboarddevice_full($conf, $arch);
3674 push @$devices, '-device', $kbd if defined($kbd);
3675 }
3676
3677 my $bootorder = device_bootorder($conf);
3678
3679 # host pci device passthrough
3680 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3681 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3682
3683 # usb devices
3684 my $usb_dev_features = {};
3685 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3686
3687 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3688 $conf, $usb_dev_features, $bootorder, $machine_version);
3689 push @$devices, @usbdevices if @usbdevices;
3690
3691 # serial devices
3692 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3693 my $path = $conf->{"serial$i"} or next;
3694 if ($path eq 'socket') {
3695 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3696 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3697 # On aarch64, serial0 is the UART device. QEMU only allows
3698 # connecting UART devices via the '-serial' command line, as
3699 # the device has a fixed slot on the hardware...
3700 if ($arch eq 'aarch64' && $i == 0) {
3701 push @$devices, '-serial', "chardev:serial$i";
3702 } else {
3703 push @$devices, '-device', "isa-serial,chardev=serial$i";
3704 }
3705 } else {
3706 die "no such serial device\n" if ! -c $path;
3707 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3708 push @$devices, '-device', "isa-serial,chardev=serial$i";
3709 }
3710 }
3711
3712 # parallel devices
3713 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3714 if (my $path = $conf->{"parallel$i"}) {
3715 die "no such parallel device\n" if ! -c $path;
3716 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3717 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3718 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3719 }
3720 }
3721
3722 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3723 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3724 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3725 push @$devices, @$audio_devs;
3726 }
3727
3728 # Add a TPM only if the VM is not a template,
3729 # to support backing up template VMs even if the TPM disk is write-protected.
3730 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
3731
3732 my $sockets = 1;
3733 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3734 $sockets = $conf->{sockets} if $conf->{sockets};
3735
3736 my $cores = $conf->{cores} || 1;
3737
3738 my $maxcpus = $sockets * $cores;
3739
3740 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3741
3742 my $allowed_vcpus = $cpuinfo->{cpus};
3743
3744 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3745
3746 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3747 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3748 for (my $i = 2; $i <= $vcpus; $i++) {
3749 my $cpustr = print_cpu_device($conf, $arch, $i);
3750 push @$cmd, '-device', $cpustr;
3751 }
3752
3753 } else {
3754
3755 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3756 }
3757 push @$cmd, '-nodefaults';
3758
3759 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3760
3761 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3762
3763 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3764
3765 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3766 push @$devices, '-device', print_vga_device(
3767 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3768
3769 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3770
3771 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3772 push @$cmd, '-vnc', "unix:$socket,password=on";
3773 } else {
3774 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3775 push @$cmd, '-nographic';
3776 }
3777
3778 # time drift fix
3779 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3780 my $useLocaltime = $conf->{localtime};
3781
3782 if ($winversion >= 5) { # windows
3783 $useLocaltime = 1 if !defined($conf->{localtime});
3784
3785 # use time drift fix when acpi is enabled
3786 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3787 $tdf = 1 if !defined($conf->{tdf});
3788 }
3789 }
3790
3791 if ($winversion >= 6) {
3792 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3793 push @$machineFlags, 'hpet=off';
3794 }
3795
3796 push @$rtcFlags, 'driftfix=slew' if $tdf;
3797
3798 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3799 push @$rtcFlags, "base=$conf->{startdate}";
3800 } elsif ($useLocaltime) {
3801 push @$rtcFlags, 'base=localtime';
3802 }
3803
3804 if ($forcecpu) {
3805 push @$cmd, '-cpu', $forcecpu;
3806 } else {
3807 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3808 }
3809
3810 PVE::QemuServer::Memory::config(
3811 $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
3812
3813 push @$cmd, '-S' if $conf->{freeze};
3814
3815 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3816
3817 my $guest_agent = parse_guest_agent($conf);
3818
3819 if ($guest_agent->{enabled}) {
3820 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3821 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3822
3823 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3824 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3825 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3826 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3827 } elsif ($guest_agent->{type} eq 'isa') {
3828 push @$devices, '-device', "isa-serial,chardev=qga0";
3829 }
3830 }
3831
3832 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3833 if ($rng && $version_guard->(4, 1, 2)) {
3834 check_rng_source($rng->{source});
3835
3836 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3837 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3838 my $limiter_str = "";
3839 if ($max_bytes) {
3840 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3841 }
3842
3843 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3844 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3845 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3846 }
3847
3848 my $spice_port;
3849
3850 assert_clipboard_config($vga);
3851 my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/;
3852
3853 if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) {
3854 if ($qxlnum > 1) {
3855 if ($winversion){
3856 for (my $i = 1; $i < $qxlnum; $i++){
3857 push @$devices, '-device', print_vga_device(
3858 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3859 }
3860 } else {
3861 # assume other OS works like Linux
3862 my ($ram, $vram) = ("134217728", "67108864");
3863 if ($vga->{memory}) {
3864 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3865 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3866 }
3867 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3868 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3869 }
3870 }
3871
3872 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3873
3874 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3875 if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') {
3876 push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on';
3877 } else {
3878 push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent';
3879 }
3880 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3881
3882 if ($is_spice) {
3883 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3884 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3885 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3886
3887 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3888 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3889
3890 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3891 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3892 if ($spice_enhancement->{foldersharing}) {
3893 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3894 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3895 }
3896
3897 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3898 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3899 if $spice_enhancement->{videostreaming};
3900 push @$devices, '-spice', "$spice_opts";
3901 }
3902 }
3903
3904 # enable balloon by default, unless explicitly disabled
3905 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3906 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3907 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3908 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3909 push @$devices, '-device', $ballooncmd;
3910 }
3911
3912 if ($conf->{watchdog}) {
3913 my $wdopts = parse_watchdog($conf->{watchdog});
3914 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3915 my $watchdog = $wdopts->{model} || 'i6300esb';
3916 push @$devices, '-device', "$watchdog$pciaddr";
3917 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3918 }
3919
3920 my $vollist = [];
3921 my $scsicontroller = {};
3922 my $ahcicontroller = {};
3923 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3924
3925 # Add iscsi initiator name if available
3926 if (my $initiator = get_initiator_name()) {
3927 push @$devices, '-iscsi', "initiator-name=$initiator";
3928 }
3929
3930 PVE::QemuConfig->foreach_volume($conf, sub {
3931 my ($ds, $drive) = @_;
3932
3933 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3934 check_volume_storage_type($storecfg, $drive->{file});
3935 push @$vollist, $drive->{file};
3936 }
3937
3938 # ignore efidisk here, already added in bios/fw handling code above
3939 return if $drive->{interface} eq 'efidisk';
3940 # similar for TPM
3941 return if $drive->{interface} eq 'tpmstate';
3942
3943 $use_virtio = 1 if $ds =~ m/^virtio/;
3944
3945 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3946
3947 if ($drive->{interface} eq 'virtio'){
3948 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3949 }
3950
3951 if ($drive->{interface} eq 'scsi') {
3952
3953 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3954
3955 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3956 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3957
3958 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3959 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3960
3961 my $iothread = '';
3962 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3963 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3964 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3965 } elsif ($drive->{iothread}) {
3966 log_warn(
3967 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
3968 );
3969 }
3970
3971 my $queues = '';
3972 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
3973 $queues = ",num_queues=$drive->{queues}";
3974 }
3975
3976 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
3977 if !$scsicontroller->{$controller};
3978 $scsicontroller->{$controller}=1;
3979 }
3980
3981 if ($drive->{interface} eq 'sata') {
3982 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
3983 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
3984 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
3985 if !$ahcicontroller->{$controller};
3986 $ahcicontroller->{$controller}=1;
3987 }
3988
3989 my $live_restore = $live_restore_backing->{$ds};
3990 my $live_blockdev_name = undef;
3991 if ($live_restore) {
3992 $live_blockdev_name = $live_restore->{name};
3993 push @$devices, '-blockdev', $live_restore->{blockdev};
3994 }
3995
3996 my $drive_cmd = print_drive_commandline_full(
3997 $storecfg, $vmid, $drive, $live_blockdev_name, min_version($kvmver, 6, 0));
3998
3999 # extra protection for templates, but SATA and IDE don't support it..
4000 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4001
4002 push @$devices, '-drive',$drive_cmd;
4003 push @$devices, '-device', print_drivedevice_full(
4004 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4005 });
4006
4007 for (my $i = 0; $i < $MAX_NETS; $i++) {
4008 my $netname = "net$i";
4009
4010 next if !$conf->{$netname};
4011 my $d = parse_net($conf->{$netname});
4012 next if !$d;
4013 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4014
4015 $use_virtio = 1 if $d->{model} eq 'virtio';
4016
4017 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4018
4019 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4020 push @$devices, '-netdev', $netdevfull;
4021
4022 my $netdevicefull = print_netdevice_full(
4023 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4024
4025 push @$devices, '-device', $netdevicefull;
4026 }
4027
4028 if ($conf->{ivshmem}) {
4029 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4030
4031 my $bus;
4032 if ($q35) {
4033 $bus = print_pcie_addr("ivshmem");
4034 } else {
4035 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4036 }
4037
4038 my $ivshmem_name = $ivshmem->{name} // $vmid;
4039 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4040
4041 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4042 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4043 .",size=$ivshmem->{size}M";
4044 }
4045
4046 # pci.4 is nested in pci.1
4047 $bridges->{1} = 1 if $bridges->{4};
4048
4049 if (!$q35) { # add pci bridges
4050 if (min_version($machine_version, 2, 3)) {
4051 $bridges->{1} = 1;
4052 $bridges->{2} = 1;
4053 }
4054 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4055 }
4056
4057 for my $k (sort {$b cmp $a} keys %$bridges) {
4058 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4059
4060 my $k_name = $k;
4061 if ($k == 2 && $legacy_igd) {
4062 $k_name = "$k-igd";
4063 }
4064 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4065 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4066
4067 if ($q35) { # add after -readconfig pve-q35.cfg
4068 splice @$devices, 2, 0, '-device', $devstr;
4069 } else {
4070 unshift @$devices, '-device', $devstr if $k > 0;
4071 }
4072 }
4073
4074 if (!$kvm) {
4075 push @$machineFlags, 'accel=tcg';
4076 }
4077
4078 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4079
4080 my $machine_type_min = $machine_type;
4081 if ($add_pve_version) {
4082 $machine_type_min =~ s/\+pve\d+$//;
4083 $machine_type_min .= "+pve$required_pve_version";
4084 }
4085 push @$machineFlags, "type=${machine_type_min}";
4086
4087 push @$cmd, @$devices;
4088 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4089 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4090 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4091
4092 if (my $vmstate = $conf->{vmstate}) {
4093 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4094 push @$vollist, $vmstate;
4095 push @$cmd, '-loadstate', $statepath;
4096 print "activating and using '$vmstate' as vmstate\n";
4097 }
4098
4099 if (PVE::QemuConfig->is_template($conf)) {
4100 # needed to workaround base volumes being read-only
4101 push @$cmd, '-snapshot';
4102 }
4103
4104 # add custom args
4105 if ($conf->{args}) {
4106 my $aa = PVE::Tools::split_args($conf->{args});
4107 push @$cmd, @$aa;
4108 }
4109
4110 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4111 }
4112
4113 sub check_rng_source {
4114 my ($source) = @_;
4115
4116 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4117 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4118 if ! -e $source;
4119
4120 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4121 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4122 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4123 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4124 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4125 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4126 ." to the host.\n";
4127 }
4128 }
4129
4130 sub spice_port {
4131 my ($vmid) = @_;
4132
4133 my $res = mon_cmd($vmid, 'query-spice');
4134
4135 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4136 }
4137
4138 sub vm_devices_list {
4139 my ($vmid) = @_;
4140
4141 my $res = mon_cmd($vmid, 'query-pci');
4142 my $devices_to_check = [];
4143 my $devices = {};
4144 foreach my $pcibus (@$res) {
4145 push @$devices_to_check, @{$pcibus->{devices}},
4146 }
4147
4148 while (@$devices_to_check) {
4149 my $to_check = [];
4150 for my $d (@$devices_to_check) {
4151 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4152 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4153
4154 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4155 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4156 }
4157 $devices_to_check = $to_check;
4158 }
4159
4160 my $resblock = mon_cmd($vmid, 'query-block');
4161 foreach my $block (@$resblock) {
4162 if($block->{device} =~ m/^drive-(\S+)/){
4163 $devices->{$1} = 1;
4164 }
4165 }
4166
4167 my $resmice = mon_cmd($vmid, 'query-mice');
4168 foreach my $mice (@$resmice) {
4169 if ($mice->{name} eq 'QEMU HID Tablet') {
4170 $devices->{tablet} = 1;
4171 last;
4172 }
4173 }
4174
4175 # for usb devices there is no query-usb
4176 # but we can iterate over the entries in
4177 # qom-list path=/machine/peripheral
4178 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4179 foreach my $per (@$resperipheral) {
4180 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4181 $devices->{$per->{name}} = 1;
4182 }
4183 }
4184
4185 return $devices;
4186 }
4187
4188 sub vm_deviceplug {
4189 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4190
4191 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4192
4193 my $devices_list = vm_devices_list($vmid);
4194 return 1 if defined($devices_list->{$deviceid});
4195
4196 # add PCI bridge if we need it for the device
4197 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4198
4199 if ($deviceid eq 'tablet') {
4200 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4201 } elsif ($deviceid eq 'keyboard') {
4202 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4203 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4204 my $id = $1;
4205 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4206 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4207 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4208 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4209 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4210 qemu_iothread_add($vmid, $deviceid, $device);
4211
4212 qemu_driveadd($storecfg, $vmid, $device);
4213 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4214
4215 qemu_deviceadd($vmid, $devicefull);
4216 eval { qemu_deviceaddverify($vmid, $deviceid); };
4217 if (my $err = $@) {
4218 eval { qemu_drivedel($vmid, $deviceid); };
4219 warn $@ if $@;
4220 die $err;
4221 }
4222 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4223 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4224 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4225 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4226
4227 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4228
4229 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4230 qemu_iothread_add($vmid, $deviceid, $device);
4231 $devicefull .= ",iothread=iothread-$deviceid";
4232 }
4233
4234 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4235 $devicefull .= ",num_queues=$device->{queues}";
4236 }
4237
4238 qemu_deviceadd($vmid, $devicefull);
4239 qemu_deviceaddverify($vmid, $deviceid);
4240 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4241 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4242 qemu_driveadd($storecfg, $vmid, $device);
4243
4244 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4245 eval { qemu_deviceadd($vmid, $devicefull); };
4246 if (my $err = $@) {
4247 eval { qemu_drivedel($vmid, $deviceid); };
4248 warn $@ if $@;
4249 die $err;
4250 }
4251 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4252 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4253
4254 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4255 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4256 my $use_old_bios_files = undef;
4257 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4258
4259 my $netdevicefull = print_netdevice_full(
4260 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4261 qemu_deviceadd($vmid, $netdevicefull);
4262 eval {
4263 qemu_deviceaddverify($vmid, $deviceid);
4264 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4265 };
4266 if (my $err = $@) {
4267 eval { qemu_netdevdel($vmid, $deviceid); };
4268 warn $@ if $@;
4269 die $err;
4270 }
4271 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4272 my $bridgeid = $2;
4273 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4274 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4275
4276 qemu_deviceadd($vmid, $devicefull);
4277 qemu_deviceaddverify($vmid, $deviceid);
4278 } else {
4279 die "can't hotplug device '$deviceid'\n";
4280 }
4281
4282 return 1;
4283 }
4284
4285 # fixme: this should raise exceptions on error!
4286 sub vm_deviceunplug {
4287 my ($vmid, $conf, $deviceid) = @_;
4288
4289 my $devices_list = vm_devices_list($vmid);
4290 return 1 if !defined($devices_list->{$deviceid});
4291
4292 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4293 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4294
4295 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4296 qemu_devicedel($vmid, $deviceid);
4297 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4298 qemu_devicedel($vmid, $deviceid);
4299 qemu_devicedelverify($vmid, $deviceid);
4300 } elsif ($deviceid =~ m/^usb\d+$/) {
4301 qemu_devicedel($vmid, $deviceid);
4302 qemu_devicedelverify($vmid, $deviceid);
4303 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4304 my $device = parse_drive($deviceid, $conf->{$deviceid});
4305
4306 qemu_devicedel($vmid, $deviceid);
4307 qemu_devicedelverify($vmid, $deviceid);
4308 qemu_drivedel($vmid, $deviceid);
4309 qemu_iothread_del($vmid, $deviceid, $device);
4310 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4311 qemu_devicedel($vmid, $deviceid);
4312 qemu_devicedelverify($vmid, $deviceid);
4313 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4314 my $device = parse_drive($deviceid, $conf->{$deviceid});
4315
4316 qemu_devicedel($vmid, $deviceid);
4317 qemu_devicedelverify($vmid, $deviceid);
4318 qemu_drivedel($vmid, $deviceid);
4319 qemu_deletescsihw($conf, $vmid, $deviceid);
4320
4321 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4322 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4323 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4324 qemu_devicedel($vmid, $deviceid);
4325 qemu_devicedelverify($vmid, $deviceid);
4326 qemu_netdevdel($vmid, $deviceid);
4327 } else {
4328 die "can't unplug device '$deviceid'\n";
4329 }
4330
4331 return 1;
4332 }
4333
4334 sub qemu_spice_usbredir_chardev_add {
4335 my ($vmid, $id) = @_;
4336
4337 mon_cmd($vmid, "chardev-add" , (
4338 id => $id,
4339 backend => {
4340 type => 'spicevmc',
4341 data => {
4342 type => "usbredir",
4343 },
4344 },
4345 ));
4346 }
4347
4348 sub qemu_iothread_add {
4349 my ($vmid, $deviceid, $device) = @_;
4350
4351 if ($device->{iothread}) {
4352 my $iothreads = vm_iothreads_list($vmid);
4353 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4354 }
4355 }
4356
4357 sub qemu_iothread_del {
4358 my ($vmid, $deviceid, $device) = @_;
4359
4360 if ($device->{iothread}) {
4361 my $iothreads = vm_iothreads_list($vmid);
4362 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4363 }
4364 }
4365
4366 sub qemu_driveadd {
4367 my ($storecfg, $vmid, $device) = @_;
4368
4369 my $kvmver = get_running_qemu_version($vmid);
4370 my $io_uring = min_version($kvmver, 6, 0);
4371 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4372 $drive =~ s/\\/\\\\/g;
4373 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4374
4375 # If the command succeeds qemu prints: "OK"
4376 return 1 if $ret =~ m/OK/s;
4377
4378 die "adding drive failed: $ret\n";
4379 }
4380
4381 sub qemu_drivedel {
4382 my ($vmid, $deviceid) = @_;
4383
4384 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4385 $ret =~ s/^\s+//;
4386
4387 return 1 if $ret eq "";
4388
4389 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4390 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4391
4392 die "deleting drive $deviceid failed : $ret\n";
4393 }
4394
4395 sub qemu_deviceaddverify {
4396 my ($vmid, $deviceid) = @_;
4397
4398 for (my $i = 0; $i <= 5; $i++) {
4399 my $devices_list = vm_devices_list($vmid);
4400 return 1 if defined($devices_list->{$deviceid});
4401 sleep 1;
4402 }
4403
4404 die "error on hotplug device '$deviceid'\n";
4405 }
4406
4407
4408 sub qemu_devicedelverify {
4409 my ($vmid, $deviceid) = @_;
4410
4411 # need to verify that the device is correctly removed as device_del
4412 # is async and empty return is not reliable
4413
4414 for (my $i = 0; $i <= 5; $i++) {
4415 my $devices_list = vm_devices_list($vmid);
4416 return 1 if !defined($devices_list->{$deviceid});
4417 sleep 1;
4418 }
4419
4420 die "error on hot-unplugging device '$deviceid'\n";
4421 }
4422
4423 sub qemu_findorcreatescsihw {
4424 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4425
4426 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4427
4428 my $scsihwid="$controller_prefix$controller";
4429 my $devices_list = vm_devices_list($vmid);
4430
4431 if (!defined($devices_list->{$scsihwid})) {
4432 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4433 }
4434
4435 return 1;
4436 }
4437
4438 sub qemu_deletescsihw {
4439 my ($conf, $vmid, $opt) = @_;
4440
4441 my $device = parse_drive($opt, $conf->{$opt});
4442
4443 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4444 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4445 return 1;
4446 }
4447
4448 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4449
4450 my $devices_list = vm_devices_list($vmid);
4451 foreach my $opt (keys %{$devices_list}) {
4452 if (is_valid_drivename($opt)) {
4453 my $drive = parse_drive($opt, $conf->{$opt});
4454 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4455 return 1;
4456 }
4457 }
4458 }
4459
4460 my $scsihwid="scsihw$controller";
4461
4462 vm_deviceunplug($vmid, $conf, $scsihwid);
4463
4464 return 1;
4465 }
4466
4467 sub qemu_add_pci_bridge {
4468 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4469
4470 my $bridges = {};
4471
4472 my $bridgeid;
4473
4474 print_pci_addr($device, $bridges, $arch, $machine_type);
4475
4476 while (my ($k, $v) = each %$bridges) {
4477 $bridgeid = $k;
4478 }
4479 return 1 if !defined($bridgeid) || $bridgeid < 1;
4480
4481 my $bridge = "pci.$bridgeid";
4482 my $devices_list = vm_devices_list($vmid);
4483
4484 if (!defined($devices_list->{$bridge})) {
4485 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4486 }
4487
4488 return 1;
4489 }
4490
4491 sub qemu_set_link_status {
4492 my ($vmid, $device, $up) = @_;
4493
4494 mon_cmd($vmid, "set_link", name => $device,
4495 up => $up ? JSON::true : JSON::false);
4496 }
4497
4498 sub qemu_netdevadd {
4499 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4500
4501 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4502 my %options = split(/[=,]/, $netdev);
4503
4504 if (defined(my $vhost = $options{vhost})) {
4505 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4506 }
4507
4508 if (defined(my $queues = $options{queues})) {
4509 $options{queues} = $queues + 0;
4510 }
4511
4512 mon_cmd($vmid, "netdev_add", %options);
4513 return 1;
4514 }
4515
4516 sub qemu_netdevdel {
4517 my ($vmid, $deviceid) = @_;
4518
4519 mon_cmd($vmid, "netdev_del", id => $deviceid);
4520 }
4521
4522 sub qemu_usb_hotplug {
4523 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4524
4525 return if !$device;
4526
4527 # remove the old one first
4528 vm_deviceunplug($vmid, $conf, $deviceid);
4529
4530 # check if xhci controller is necessary and available
4531 my $devicelist = vm_devices_list($vmid);
4532
4533 if (!$devicelist->{xhci}) {
4534 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4535 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4536 }
4537
4538 # add the new one
4539 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4540 }
4541
4542 sub qemu_cpu_hotplug {
4543 my ($vmid, $conf, $vcpus) = @_;
4544
4545 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4546
4547 my $sockets = 1;
4548 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4549 $sockets = $conf->{sockets} if $conf->{sockets};
4550 my $cores = $conf->{cores} || 1;
4551 my $maxcpus = $sockets * $cores;
4552
4553 $vcpus = $maxcpus if !$vcpus;
4554
4555 die "you can't add more vcpus than maxcpus\n"
4556 if $vcpus > $maxcpus;
4557
4558 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4559
4560 if ($vcpus < $currentvcpus) {
4561
4562 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4563
4564 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4565 qemu_devicedel($vmid, "cpu$i");
4566 my $retry = 0;
4567 my $currentrunningvcpus = undef;
4568 while (1) {
4569 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4570 last if scalar(@{$currentrunningvcpus}) == $i-1;
4571 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4572 $retry++;
4573 sleep 1;
4574 }
4575 #update conf after each succesfull cpu unplug
4576 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4577 PVE::QemuConfig->write_config($vmid, $conf);
4578 }
4579 } else {
4580 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4581 }
4582
4583 return;
4584 }
4585
4586 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4587 die "vcpus in running vm does not match its configuration\n"
4588 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4589
4590 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4591 my $arch = get_vm_arch($conf);
4592
4593 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4594 my $cpustr = print_cpu_device($conf, $arch, $i);
4595 qemu_deviceadd($vmid, $cpustr);
4596
4597 my $retry = 0;
4598 my $currentrunningvcpus = undef;
4599 while (1) {
4600 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4601 last if scalar(@{$currentrunningvcpus}) == $i;
4602 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4603 sleep 1;
4604 $retry++;
4605 }
4606 #update conf after each succesfull cpu hotplug
4607 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4608 PVE::QemuConfig->write_config($vmid, $conf);
4609 }
4610 } else {
4611
4612 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4613 mon_cmd($vmid, "cpu-add", id => int($i));
4614 }
4615 }
4616 }
4617
4618 sub qemu_block_set_io_throttle {
4619 my ($vmid, $deviceid,
4620 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4621 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4622 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4623 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4624
4625 return if !check_running($vmid) ;
4626
4627 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4628 bps => int($bps),
4629 bps_rd => int($bps_rd),
4630 bps_wr => int($bps_wr),
4631 iops => int($iops),
4632 iops_rd => int($iops_rd),
4633 iops_wr => int($iops_wr),
4634 bps_max => int($bps_max),
4635 bps_rd_max => int($bps_rd_max),
4636 bps_wr_max => int($bps_wr_max),
4637 iops_max => int($iops_max),
4638 iops_rd_max => int($iops_rd_max),
4639 iops_wr_max => int($iops_wr_max),
4640 bps_max_length => int($bps_max_length),
4641 bps_rd_max_length => int($bps_rd_max_length),
4642 bps_wr_max_length => int($bps_wr_max_length),
4643 iops_max_length => int($iops_max_length),
4644 iops_rd_max_length => int($iops_rd_max_length),
4645 iops_wr_max_length => int($iops_wr_max_length),
4646 );
4647
4648 }
4649
4650 sub qemu_block_resize {
4651 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4652
4653 my $running = check_running($vmid);
4654
4655 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4656
4657 return if !$running;
4658
4659 my $padding = (1024 - $size % 1024) % 1024;
4660 $size = $size + $padding;
4661
4662 mon_cmd(
4663 $vmid,
4664 "block_resize",
4665 device => $deviceid,
4666 size => int($size),
4667 timeout => 60,
4668 );
4669 }
4670
4671 sub qemu_volume_snapshot {
4672 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4673
4674 my $running = check_running($vmid);
4675
4676 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4677 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4678 } else {
4679 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4680 }
4681 }
4682
4683 sub qemu_volume_snapshot_delete {
4684 my ($vmid, $storecfg, $volid, $snap) = @_;
4685
4686 my $running = check_running($vmid);
4687 my $attached_deviceid;
4688
4689 if ($running) {
4690 my $conf = PVE::QemuConfig->load_config($vmid);
4691 PVE::QemuConfig->foreach_volume($conf, sub {
4692 my ($ds, $drive) = @_;
4693 $attached_deviceid = "drive-$ds" if $drive->{file} eq $volid;
4694 });
4695 }
4696
4697 if ($attached_deviceid && do_snapshots_with_qemu($storecfg, $volid, $attached_deviceid)) {
4698 mon_cmd(
4699 $vmid,
4700 'blockdev-snapshot-delete-internal-sync',
4701 device => $attached_deviceid,
4702 name => $snap,
4703 );
4704 } else {
4705 PVE::Storage::volume_snapshot_delete(
4706 $storecfg, $volid, $snap, $attached_deviceid ? 1 : undef);
4707 }
4708 }
4709
4710 sub set_migration_caps {
4711 my ($vmid, $savevm) = @_;
4712
4713 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4714
4715 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4716 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4717
4718 my $cap_ref = [];
4719
4720 my $enabled_cap = {
4721 "auto-converge" => 1,
4722 "xbzrle" => 1,
4723 "x-rdma-pin-all" => 0,
4724 "zero-blocks" => 0,
4725 "compress" => 0,
4726 "dirty-bitmaps" => $dirty_bitmaps,
4727 };
4728
4729 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4730
4731 for my $supported_capability (@$supported_capabilities) {
4732 push @$cap_ref, {
4733 capability => $supported_capability->{capability},
4734 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4735 };
4736 }
4737
4738 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4739 }
4740
4741 sub foreach_volid {
4742 my ($conf, $func, @param) = @_;
4743
4744 my $volhash = {};
4745
4746 my $test_volid = sub {
4747 my ($key, $drive, $snapname, $pending) = @_;
4748
4749 my $volid = $drive->{file};
4750 return if !$volid;
4751
4752 $volhash->{$volid}->{cdrom} //= 1;
4753 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4754
4755 my $replicate = $drive->{replicate} // 1;
4756 $volhash->{$volid}->{replicate} //= 0;
4757 $volhash->{$volid}->{replicate} = 1 if $replicate;
4758
4759 $volhash->{$volid}->{shared} //= 0;
4760 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4761
4762 $volhash->{$volid}->{is_unused} //= 0;
4763 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4764
4765 $volhash->{$volid}->{is_attached} //= 0;
4766 $volhash->{$volid}->{is_attached} = 1
4767 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4768
4769 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4770 if defined($snapname);
4771
4772 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4773
4774 my $size = $drive->{size};
4775 $volhash->{$volid}->{size} //= $size if $size;
4776
4777 $volhash->{$volid}->{is_vmstate} //= 0;
4778 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4779
4780 $volhash->{$volid}->{is_tpmstate} //= 0;
4781 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4782
4783 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4784 };
4785
4786 my $include_opts = {
4787 extra_keys => ['vmstate'],
4788 include_unused => 1,
4789 };
4790
4791 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4792
4793 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4794 if defined($conf->{pending}) && $conf->{pending}->%*;
4795
4796 foreach my $snapname (keys %{$conf->{snapshots}}) {
4797 my $snap = $conf->{snapshots}->{$snapname};
4798 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4799 }
4800
4801 foreach my $volid (keys %$volhash) {
4802 &$func($volid, $volhash->{$volid}, @param);
4803 }
4804 }
4805
4806 my $fast_plug_option = {
4807 'description' => 1,
4808 'hookscript' => 1,
4809 'lock' => 1,
4810 'migrate_downtime' => 1,
4811 'migrate_speed' => 1,
4812 'name' => 1,
4813 'onboot' => 1,
4814 'protection' => 1,
4815 'shares' => 1,
4816 'startup' => 1,
4817 'tags' => 1,
4818 'vmstatestorage' => 1,
4819 };
4820
4821 for my $opt (keys %$confdesc_cloudinit) {
4822 $fast_plug_option->{$opt} = 1;
4823 };
4824
4825 # hotplug changes in [PENDING]
4826 # $selection hash can be used to only apply specified options, for
4827 # example: { cores => 1 } (only apply changed 'cores')
4828 # $errors ref is used to return error messages
4829 sub vmconfig_hotplug_pending {
4830 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4831
4832 my $defaults = load_defaults();
4833 my $arch = get_vm_arch($conf);
4834 my $machine_type = get_vm_machine($conf, undef, $arch);
4835
4836 # commit values which do not have any impact on running VM first
4837 # Note: those option cannot raise errors, we we do not care about
4838 # $selection and always apply them.
4839
4840 my $add_error = sub {
4841 my ($opt, $msg) = @_;
4842 $errors->{$opt} = "hotplug problem - $msg";
4843 };
4844
4845 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4846
4847 my $cloudinit_record_changed = sub {
4848 my ($conf, $opt, $old, $new) = @_;
4849 return if !$cloudinit_pending_properties->{$opt};
4850
4851 my $ci = ($conf->{cloudinit} //= {});
4852
4853 my $recorded = $ci->{$opt};
4854 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4855
4856 if (defined($new)) {
4857 if (defined($old)) {
4858 # an existing value is being modified
4859 if (defined($recorded)) {
4860 # the value was already not in sync
4861 if ($new eq $recorded) {
4862 # a value is being reverted to the cloud-init state:
4863 delete $ci->{$opt};
4864 delete $added{$opt};
4865 } else {
4866 # the value was changed multiple times, do nothing
4867 }
4868 } elsif ($added{$opt}) {
4869 # the value had been marked as added and is being changed, do nothing
4870 } else {
4871 # the value is new, record it:
4872 $ci->{$opt} = $old;
4873 }
4874 } else {
4875 # a new value is being added
4876 if (defined($recorded)) {
4877 # it was already not in sync
4878 if ($new eq $recorded) {
4879 # a value is being reverted to the cloud-init state:
4880 delete $ci->{$opt};
4881 delete $added{$opt};
4882 } else {
4883 # the value had temporarily been removed, do nothing
4884 }
4885 } elsif ($added{$opt}) {
4886 # the value had been marked as added already, do nothing
4887 } else {
4888 # the value is new, add it
4889 $added{$opt} = 1;
4890 }
4891 }
4892 } elsif (!defined($old)) {
4893 # a non-existent value is being removed? ignore...
4894 } else {
4895 # a value is being deleted
4896 if (defined($recorded)) {
4897 # a value was already recorded, just keep it
4898 } elsif ($added{$opt}) {
4899 # the value was marked as added, remove it
4900 delete $added{$opt};
4901 } else {
4902 # a previously unrecorded value is being removed, record the old value:
4903 $ci->{$opt} = $old;
4904 }
4905 }
4906
4907 my $added = join(',', sort keys %added);
4908 $ci->{added} = $added if length($added);
4909 };
4910
4911 my $changes = 0;
4912 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4913 if ($fast_plug_option->{$opt}) {
4914 my $new = delete $conf->{pending}->{$opt};
4915 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4916 $conf->{$opt} = $new;
4917 $changes = 1;
4918 }
4919 }
4920
4921 if ($changes) {
4922 PVE::QemuConfig->write_config($vmid, $conf);
4923 }
4924
4925 my $ostype = $conf->{ostype};
4926 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4927 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4928 my $usb_hotplug = $hotplug_features->{usb}
4929 && min_version($version, 7, 1)
4930 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4931
4932 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4933 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4934
4935 foreach my $opt (sort keys %$pending_delete_hash) {
4936 next if $selection && !$selection->{$opt};
4937 my $force = $pending_delete_hash->{$opt}->{force};
4938 eval {
4939 if ($opt eq 'hotplug') {
4940 die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/);
4941 } elsif ($opt eq 'tablet') {
4942 die "skip\n" if !$hotplug_features->{usb};
4943 if ($defaults->{tablet}) {
4944 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
4945 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
4946 if $arch eq 'aarch64';
4947 } else {
4948 vm_deviceunplug($vmid, $conf, 'tablet');
4949 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
4950 }
4951 } elsif ($opt =~ m/^usb(\d+)$/) {
4952 my $index = $1;
4953 die "skip\n" if !$usb_hotplug;
4954 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
4955 vm_deviceunplug($vmid, $conf, $opt);
4956 } elsif ($opt eq 'vcpus') {
4957 die "skip\n" if !$hotplug_features->{cpu};
4958 qemu_cpu_hotplug($vmid, $conf, undef);
4959 } elsif ($opt eq 'balloon') {
4960 # enable balloon device is not hotpluggable
4961 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
4962 # here we reset the ballooning value to memory
4963 my $balloon = get_current_memory($conf->{memory});
4964 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
4965 } elsif ($fast_plug_option->{$opt}) {
4966 # do nothing
4967 } elsif ($opt =~ m/^net(\d+)$/) {
4968 die "skip\n" if !$hotplug_features->{network};
4969 vm_deviceunplug($vmid, $conf, $opt);
4970 if($have_sdn) {
4971 my $net = PVE::QemuServer::parse_net($conf->{$opt});
4972 PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name});
4973 }
4974 } elsif (is_valid_drivename($opt)) {
4975 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
4976 vm_deviceunplug($vmid, $conf, $opt);
4977 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
4978 } elsif ($opt =~ m/^memory$/) {
4979 die "skip\n" if !$hotplug_features->{memory};
4980 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf);
4981 } elsif ($opt eq 'cpuunits') {
4982 $cgroup->change_cpu_shares(undef);
4983 } elsif ($opt eq 'cpulimit') {
4984 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
4985 } else {
4986 die "skip\n";
4987 }
4988 };
4989 if (my $err = $@) {
4990 &$add_error($opt, $err) if $err ne "skip\n";
4991 } else {
4992 my $old = delete $conf->{$opt};
4993 $cloudinit_record_changed->($conf, $opt, $old, undef);
4994 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
4995 }
4996 }
4997
4998 my $cloudinit_opt;
4999 foreach my $opt (keys %{$conf->{pending}}) {
5000 next if $selection && !$selection->{$opt};
5001 my $value = $conf->{pending}->{$opt};
5002 eval {
5003 if ($opt eq 'hotplug') {
5004 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5005 die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/);
5006 } elsif ($opt eq 'tablet') {
5007 die "skip\n" if !$hotplug_features->{usb};
5008 if ($value == 1) {
5009 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5010 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5011 if $arch eq 'aarch64';
5012 } elsif ($value == 0) {
5013 vm_deviceunplug($vmid, $conf, 'tablet');
5014 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5015 }
5016 } elsif ($opt =~ m/^usb(\d+)$/) {
5017 my $index = $1;
5018 die "skip\n" if !$usb_hotplug;
5019 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5020 my $id = $opt;
5021 if ($d->{host} =~ m/^spice$/i) {
5022 $id = "usbredirdev$index";
5023 }
5024 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5025 } elsif ($opt eq 'vcpus') {
5026 die "skip\n" if !$hotplug_features->{cpu};
5027 qemu_cpu_hotplug($vmid, $conf, $value);
5028 } elsif ($opt eq 'balloon') {
5029 # enable/disable balloning device is not hotpluggable
5030 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5031 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5032 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5033
5034 # allow manual ballooning if shares is set to zero
5035 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5036 my $memory = get_current_memory($conf->{memory});
5037 my $balloon = $conf->{pending}->{balloon} || $memory;
5038 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5039 }
5040 } elsif ($opt =~ m/^net(\d+)$/) {
5041 # some changes can be done without hotplug
5042 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5043 $vmid, $opt, $value, $arch, $machine_type);
5044 } elsif (is_valid_drivename($opt)) {
5045 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5046 # some changes can be done without hotplug
5047 my $drive = parse_drive($opt, $value);
5048 if (drive_is_cloudinit($drive)) {
5049 $cloudinit_opt = [$opt, $drive];
5050 # apply all the other changes first, then generate the cloudinit disk
5051 die "skip\n";
5052 }
5053 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5054 $vmid, $opt, $value, $arch, $machine_type);
5055 } elsif ($opt =~ m/^memory$/) { #dimms
5056 die "skip\n" if !$hotplug_features->{memory};
5057 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value);
5058 } elsif ($opt eq 'cpuunits') {
5059 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5060 $cgroup->change_cpu_shares($new_cpuunits);
5061 } elsif ($opt eq 'cpulimit') {
5062 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5063 $cgroup->change_cpu_quota($cpulimit, 100000);
5064 } elsif ($opt eq 'agent') {
5065 vmconfig_update_agent($conf, $opt, $value);
5066 } else {
5067 die "skip\n"; # skip non-hot-pluggable options
5068 }
5069 };
5070 if (my $err = $@) {
5071 &$add_error($opt, $err) if $err ne "skip\n";
5072 } else {
5073 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5074 $conf->{$opt} = $value;
5075 delete $conf->{pending}->{$opt};
5076 }
5077 }
5078
5079 if (defined($cloudinit_opt)) {
5080 my ($opt, $drive) = @$cloudinit_opt;
5081 my $value = $conf->{pending}->{$opt};
5082 eval {
5083 my $temp = {%$conf, $opt => $value};
5084 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5085 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5086 $vmid, $opt, $value, $arch, $machine_type);
5087 };
5088 if (my $err = $@) {
5089 &$add_error($opt, $err) if $err ne "skip\n";
5090 } else {
5091 $conf->{$opt} = $value;
5092 delete $conf->{pending}->{$opt};
5093 }
5094 }
5095
5096 # unplug xhci controller if no usb device is left
5097 if ($usb_hotplug) {
5098 my $has_usb = 0;
5099 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5100 next if !defined($conf->{"usb$i"});
5101 $has_usb = 1;
5102 last;
5103 }
5104 if (!$has_usb) {
5105 vm_deviceunplug($vmid, $conf, 'xhci');
5106 }
5107 }
5108
5109 PVE::QemuConfig->write_config($vmid, $conf);
5110
5111 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5112 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5113 }
5114 }
5115
5116 sub try_deallocate_drive {
5117 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5118
5119 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5120 my $volid = $drive->{file};
5121 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5122 my $sid = PVE::Storage::parse_volume_id($volid);
5123 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5124
5125 # check if the disk is really unused
5126 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5127 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5128 PVE::Storage::vdisk_free($storecfg, $volid);
5129 return 1;
5130 } else {
5131 # If vm is not owner of this disk remove from config
5132 return 1;
5133 }
5134 }
5135
5136 return;
5137 }
5138
5139 sub vmconfig_delete_or_detach_drive {
5140 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5141
5142 my $drive = parse_drive($opt, $conf->{$opt});
5143
5144 my $rpcenv = PVE::RPCEnvironment::get();
5145 my $authuser = $rpcenv->get_user();
5146
5147 if ($force) {
5148 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5149 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5150 } else {
5151 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5152 }
5153 }
5154
5155
5156
5157 sub vmconfig_apply_pending {
5158 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5159
5160 return if !scalar(keys %{$conf->{pending}});
5161
5162 my $add_apply_error = sub {
5163 my ($opt, $msg) = @_;
5164 my $err_msg = "unable to apply pending change $opt : $msg";
5165 $errors->{$opt} = $err_msg;
5166 warn $err_msg;
5167 };
5168
5169 # cold plug
5170
5171 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5172 foreach my $opt (sort keys %$pending_delete_hash) {
5173 my $force = $pending_delete_hash->{$opt}->{force};
5174 eval {
5175 if ($opt =~ m/^unused/) {
5176 die "internal error";
5177 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5178 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5179 } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) {
5180 if($have_sdn) {
5181 my $net = PVE::QemuServer::parse_net($conf->{$opt});
5182 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
5183 warn if $@;
5184 }
5185 }
5186 };
5187 if (my $err = $@) {
5188 $add_apply_error->($opt, $err);
5189 } else {
5190 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5191 delete $conf->{$opt};
5192 }
5193 }
5194
5195 PVE::QemuConfig->cleanup_pending($conf);
5196
5197 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5198
5199 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5200 next if $opt eq 'delete'; # just to be sure
5201 eval {
5202 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5203 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5204 } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
5205 return if !$have_sdn; # return from eval if SDN is not available
5206
5207 my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt});
5208 if ($conf->{$opt}) {
5209 my $old_net = PVE::QemuServer::parse_net($conf->{$opt});
5210
5211 if (defined($old_net->{bridge}) && defined($old_net->{macaddr}) && (
5212 safe_string_ne($old_net->{bridge}, $new_net->{bridge}) ||
5213 safe_string_ne($old_net->{macaddr}, $new_net->{macaddr})
5214 )) {
5215 PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name});
5216 }
5217 }
5218 #fixme: reuse ip if mac change && same bridge
5219 PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1);
5220 }
5221 };
5222 if (my $err = $@) {
5223 $add_apply_error->($opt, $err);
5224 } else {
5225
5226 if (is_valid_drivename($opt)) {
5227 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5228 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5229 }
5230
5231 $conf->{$opt} = delete $conf->{pending}->{$opt};
5232 }
5233 }
5234
5235 # write all changes at once to avoid unnecessary i/o
5236 PVE::QemuConfig->write_config($vmid, $conf);
5237 if ($generate_cloudinit) {
5238 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5239 # After successful generation and if there were changes to be applied, update the
5240 # config to drop the {cloudinit} entry.
5241 PVE::QemuConfig->write_config($vmid, $conf);
5242 }
5243 }
5244 }
5245
5246 sub vmconfig_update_net {
5247 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5248
5249 my $newnet = parse_net($value);
5250
5251 if ($conf->{$opt}) {
5252 my $oldnet = parse_net($conf->{$opt});
5253
5254 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5255 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5256 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5257 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5258 !($newnet->{bridge} && $oldnet->{bridge})
5259 ) { # bridge/nat mode change
5260
5261 # for non online change, we try to hot-unplug
5262 die "skip\n" if !$hotplug;
5263 vm_deviceunplug($vmid, $conf, $opt);
5264
5265 if ($have_sdn) {
5266 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5267 }
5268
5269 } else {
5270
5271 die "internal error" if $opt !~ m/net(\d+)/;
5272 my $iface = "tap${vmid}i$1";
5273
5274 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5275 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5276 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5277 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})
5278 ) {
5279 PVE::Network::tap_unplug($iface);
5280
5281 #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example)
5282 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5283 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5284 ) {
5285 qemu_set_link_status($vmid, $opt, 0);
5286 }
5287
5288 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) {
5289 if ($have_sdn) {
5290 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5291 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5292 }
5293 }
5294
5295 if ($have_sdn) {
5296 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5297 } else {
5298 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5299 }
5300
5301 #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example)
5302 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5303 safe_num_ne($oldnet->{tag}, $newnet->{tag})
5304 ) {
5305 qemu_set_link_status($vmid, $opt, 1);
5306 }
5307
5308 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5309 # Rate can be applied on its own but any change above needs to
5310 # include the rate in tap_plug since OVS resets everything.
5311 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5312 }
5313
5314 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5315 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5316 }
5317
5318 return 1;
5319 }
5320 }
5321
5322 if ($hotplug) {
5323 if ($have_sdn) {
5324 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5325 PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name});
5326 }
5327 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5328 } else {
5329 die "skip\n";
5330 }
5331 }
5332
5333 sub vmconfig_update_agent {
5334 my ($conf, $opt, $value) = @_;
5335
5336 die "skip\n" if !$conf->{$opt};
5337
5338 my $hotplug_options = { fstrim_cloned_disks => 1 };
5339
5340 my $old_agent = parse_guest_agent($conf);
5341 my $agent = parse_guest_agent({$opt => $value});
5342
5343 for my $option (keys %$agent) { # added/changed options
5344 next if defined($hotplug_options->{$option});
5345 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5346 }
5347
5348 for my $option (keys %$old_agent) { # removed options
5349 next if defined($hotplug_options->{$option});
5350 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5351 }
5352
5353 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5354 }
5355
5356 sub vmconfig_update_disk {
5357 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5358
5359 my $drive = parse_drive($opt, $value);
5360
5361 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5362 my $media = $drive->{media} || 'disk';
5363 my $oldmedia = $old_drive->{media} || 'disk';
5364 die "unable to change media type\n" if $media ne $oldmedia;
5365
5366 if (!drive_is_cdrom($old_drive)) {
5367
5368 if ($drive->{file} ne $old_drive->{file}) {
5369
5370 die "skip\n" if !$hotplug;
5371
5372 # unplug and register as unused
5373 vm_deviceunplug($vmid, $conf, $opt);
5374 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5375
5376 } else {
5377 # update existing disk
5378
5379 # skip non hotpluggable value
5380 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5381 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5382 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5383 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5384 safe_string_ne($drive->{product}, $old_drive->{product}) ||
5385 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5386 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5387 safe_string_ne($drive->{vendor}, $old_drive->{vendor}) ||
5388 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5389 die "skip\n";
5390 }
5391
5392 # apply throttle
5393 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5394 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5395 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5396 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5397 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5398 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5399 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5400 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5401 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5402 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5403 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5404 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5405 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5406 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5407 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5408 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5409 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5410 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5411
5412 qemu_block_set_io_throttle(
5413 $vmid,"drive-$opt",
5414 ($drive->{mbps} || 0)*1024*1024,
5415 ($drive->{mbps_rd} || 0)*1024*1024,
5416 ($drive->{mbps_wr} || 0)*1024*1024,
5417 $drive->{iops} || 0,
5418 $drive->{iops_rd} || 0,
5419 $drive->{iops_wr} || 0,
5420 ($drive->{mbps_max} || 0)*1024*1024,
5421 ($drive->{mbps_rd_max} || 0)*1024*1024,
5422 ($drive->{mbps_wr_max} || 0)*1024*1024,
5423 $drive->{iops_max} || 0,
5424 $drive->{iops_rd_max} || 0,
5425 $drive->{iops_wr_max} || 0,
5426 $drive->{bps_max_length} || 1,
5427 $drive->{bps_rd_max_length} || 1,
5428 $drive->{bps_wr_max_length} || 1,
5429 $drive->{iops_max_length} || 1,
5430 $drive->{iops_rd_max_length} || 1,
5431 $drive->{iops_wr_max_length} || 1,
5432 );
5433
5434 }
5435
5436 return 1;
5437 }
5438
5439 } else { # cdrom
5440
5441 if ($drive->{file} eq 'none') {
5442 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5443 if (drive_is_cloudinit($old_drive)) {
5444 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5445 }
5446 } else {
5447 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5448
5449 # force eject if locked
5450 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5451
5452 if ($path) {
5453 mon_cmd($vmid, "blockdev-change-medium",
5454 id => "$opt", filename => "$path");
5455 }
5456 }
5457
5458 return 1;
5459 }
5460 }
5461
5462 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5463 # hotplug new disks
5464 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5465 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5466 }
5467
5468 sub vmconfig_update_cloudinit_drive {
5469 my ($storecfg, $conf, $vmid) = @_;
5470
5471 my $cloudinit_ds = undef;
5472 my $cloudinit_drive = undef;
5473
5474 PVE::QemuConfig->foreach_volume($conf, sub {
5475 my ($ds, $drive) = @_;
5476 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5477 $cloudinit_ds = $ds;
5478 $cloudinit_drive = $drive;
5479 }
5480 });
5481
5482 return if !$cloudinit_drive;
5483
5484 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5485 PVE::QemuConfig->write_config($vmid, $conf);
5486 }
5487
5488 my $running = PVE::QemuServer::check_running($vmid);
5489
5490 if ($running) {
5491 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5492 if ($path) {
5493 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5494 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5495 }
5496 }
5497 }
5498
5499 # called in locked context by incoming migration
5500 sub vm_migrate_get_nbd_disks {
5501 my ($storecfg, $conf, $replicated_volumes) = @_;
5502
5503 my $local_volumes = {};
5504 PVE::QemuConfig->foreach_volume($conf, sub {
5505 my ($ds, $drive) = @_;
5506
5507 return if drive_is_cdrom($drive);
5508 return if $ds eq 'tpmstate0';
5509
5510 my $volid = $drive->{file};
5511
5512 return if !$volid;
5513
5514 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5515
5516 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5517 return if $scfg->{shared};
5518
5519 my $format = qemu_img_format($scfg, $volname);
5520
5521 # replicated disks re-use existing state via bitmap
5522 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5523 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
5524 });
5525 return $local_volumes;
5526 }
5527
5528 # called in locked context by incoming migration
5529 sub vm_migrate_alloc_nbd_disks {
5530 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5531
5532 my $nbd = {};
5533 foreach my $opt (sort keys %$source_volumes) {
5534 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5535
5536 if ($use_existing) {
5537 $nbd->{$opt}->{drivestr} = print_drive($drive);
5538 $nbd->{$opt}->{volid} = $volid;
5539 $nbd->{$opt}->{replicated} = 1;
5540 next;
5541 }
5542
5543 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5544
5545 # order of precedence, filtered by whether storage supports it:
5546 # 1. explicit requested format
5547 # 2. default format of storage
5548 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5549 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
5550
5551 my $size = $drive->{size} / 1024;
5552 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5553 my $newdrive = $drive;
5554 $newdrive->{format} = $format;
5555 $newdrive->{file} = $newvolid;
5556 my $drivestr = print_drive($newdrive);
5557 $nbd->{$opt}->{drivestr} = $drivestr;
5558 $nbd->{$opt}->{volid} = $newvolid;
5559 }
5560
5561 return $nbd;
5562 }
5563
5564 # see vm_start_nolock for parameters, additionally:
5565 # migrate_opts:
5566 # storagemap = parsed storage map for allocating NBD disks
5567 sub vm_start {
5568 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5569
5570 return PVE::QemuConfig->lock_config($vmid, sub {
5571 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5572
5573 die "you can't start a vm if it's a template\n"
5574 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5575
5576 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5577 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5578
5579 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5580
5581 if ($has_backup_lock && $running) {
5582 # a backup is currently running, attempt to start the guest in the
5583 # existing QEMU instance
5584 return vm_resume($vmid);
5585 }
5586
5587 PVE::QemuConfig->check_lock($conf)
5588 if !($params->{skiplock} || $has_suspended_lock);
5589
5590 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5591
5592 die "VM $vmid already running\n" if $running;
5593
5594 if (my $storagemap = $migrate_opts->{storagemap}) {
5595 my $replicated = $migrate_opts->{replicated_volumes};
5596 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5597 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5598
5599 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5600 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5601 }
5602 }
5603
5604 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5605 });
5606 }
5607
5608
5609 # params:
5610 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5611 # skiplock => 0/1, skip checking for config lock
5612 # skiptemplate => 0/1, skip checking whether VM is template
5613 # forcemachine => to force QEMU machine (rollback/migration)
5614 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5615 # timeout => in seconds
5616 # paused => start VM in paused state (backup)
5617 # resume => resume from hibernation
5618 # live-restore-backing => {
5619 # sata0 => {
5620 # name => blockdev-name,
5621 # blockdev => "arg to the -blockdev command instantiating device named 'name'",
5622 # },
5623 # virtio2 => ...
5624 # }
5625 # migrate_opts:
5626 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5627 # migratedfrom => source node
5628 # spice_ticket => used for spice migration, passed via tunnel/stdin
5629 # network => CIDR of migration network
5630 # type => secure/insecure - tunnel over encrypted connection or plain-text
5631 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5632 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5633 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5634 # contained in config
5635 sub vm_start_nolock {
5636 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5637
5638 my $statefile = $params->{statefile};
5639 my $resume = $params->{resume};
5640
5641 my $migratedfrom = $migrate_opts->{migratedfrom};
5642 my $migration_type = $migrate_opts->{type};
5643
5644 my $res = {};
5645
5646 # clean up leftover reboot request files
5647 eval { clear_reboot_request($vmid); };
5648 warn $@ if $@;
5649
5650 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5651 vmconfig_apply_pending($vmid, $conf, $storecfg);
5652 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5653 }
5654
5655 # don't regenerate the ISO if the VM is started as part of a live migration
5656 # this way we can reuse the old ISO with the correct config
5657 if (!$migratedfrom) {
5658 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5659 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5660 # $conf->{cloudinit}, so we could just not do this?
5661 # But we do it above, so for now let's be consistent.
5662 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5663 }
5664 }
5665
5666 # override offline migrated volumes, conf is out of date still
5667 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5668 for my $key (sort keys $offline_volumes->%*) {
5669 my $parsed = parse_drive($key, $conf->{$key});
5670 $parsed->{file} = $offline_volumes->{$key};
5671 $conf->{$key} = print_drive($parsed);
5672 }
5673 }
5674
5675 my $defaults = load_defaults();
5676
5677 # set environment variable useful inside network script
5678 # for remote migration the config is available on the target node!
5679 if (!$migrate_opts->{remote_node}) {
5680 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5681 }
5682
5683 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5684
5685 my $forcemachine = $params->{forcemachine};
5686 my $forcecpu = $params->{forcecpu};
5687 if ($resume) {
5688 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5689 $forcemachine = $conf->{runningmachine};
5690 $forcecpu = $conf->{runningcpu};
5691 print "Resuming suspended VM\n";
5692 }
5693
5694 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5695 $conf, $defaults, $forcemachine, $forcecpu, $params->{'live-restore-backing'});
5696
5697 my $migration_ip;
5698 my $get_migration_ip = sub {
5699 my ($nodename) = @_;
5700
5701 return $migration_ip if defined($migration_ip);
5702
5703 my $cidr = $migrate_opts->{network};
5704
5705 if (!defined($cidr)) {
5706 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5707 $cidr = $dc_conf->{migration}->{network};
5708 }
5709
5710 if (defined($cidr)) {
5711 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5712
5713 die "could not get IP: no address configured on local " .
5714 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5715
5716 die "could not get IP: multiple addresses configured on local " .
5717 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5718
5719 $migration_ip = @$ips[0];
5720 }
5721
5722 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5723 if !defined($migration_ip);
5724
5725 return $migration_ip;
5726 };
5727
5728 if ($statefile) {
5729 if ($statefile eq 'tcp') {
5730 my $migrate = $res->{migrate} = { proto => 'tcp' };
5731 $migrate->{addr} = "localhost";
5732 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5733 my $nodename = nodename();
5734
5735 if (!defined($migration_type)) {
5736 if (defined($datacenterconf->{migration}->{type})) {
5737 $migration_type = $datacenterconf->{migration}->{type};
5738 } else {
5739 $migration_type = 'secure';
5740 }
5741 }
5742
5743 if ($migration_type eq 'insecure') {
5744 $migrate->{addr} = $get_migration_ip->($nodename);
5745 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5746 }
5747
5748 # see #4501: port reservation should be done close to usage - tell QEMU where to listen
5749 # via QMP later
5750 push @$cmd, '-incoming', 'defer';
5751 push @$cmd, '-S';
5752
5753 } elsif ($statefile eq 'unix') {
5754 # should be default for secure migrations as a ssh TCP forward
5755 # tunnel is not deterministic reliable ready and fails regurarly
5756 # to set up in time, so use UNIX socket forwards
5757 my $migrate = $res->{migrate} = { proto => 'unix' };
5758 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5759 unlink $migrate->{addr};
5760
5761 $migrate->{uri} = "unix:$migrate->{addr}";
5762 push @$cmd, '-incoming', $migrate->{uri};
5763 push @$cmd, '-S';
5764
5765 } elsif (-e $statefile) {
5766 push @$cmd, '-loadstate', $statefile;
5767 } else {
5768 my $statepath = PVE::Storage::path($storecfg, $statefile);
5769 push @$vollist, $statefile;
5770 push @$cmd, '-loadstate', $statepath;
5771 }
5772 } elsif ($params->{paused}) {
5773 push @$cmd, '-S';
5774 }
5775
5776 my $memory = get_current_memory($conf->{memory});
5777 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume);
5778
5779 my $pci_reserve_list = [];
5780 for my $device (values $pci_devices->%*) {
5781 next if $device->{mdev}; # we don't reserve for mdev devices
5782 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5783 }
5784
5785 # reserve all PCI IDs before actually doing anything with them
5786 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5787
5788 eval {
5789 my $uuid;
5790 for my $id (sort keys %$pci_devices) {
5791 my $d = $pci_devices->{$id};
5792 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5793
5794 my $chosen_mdev;
5795 for my $dev ($d->{ids}->@*) {
5796 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5797 if ($d->{mdev}) {
5798 warn $@ if $@;
5799 $chosen_mdev = $info;
5800 last if $chosen_mdev; # if successful, we're done
5801 } else {
5802 die $@ if $@;
5803 }
5804 }
5805
5806 next if !$d->{mdev};
5807 die "could not create mediated device\n" if !defined($chosen_mdev);
5808
5809 # nvidia grid needs the uuid of the mdev as qemu parameter
5810 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5811 if (defined($conf->{smbios1})) {
5812 my $smbios_conf = parse_smbios1($conf->{smbios1});
5813 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5814 }
5815 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5816 }
5817 }
5818 push @$cmd, '-uuid', $uuid if defined($uuid);
5819 };
5820 if (my $err = $@) {
5821 eval { cleanup_pci_devices($vmid, $conf) };
5822 warn $@ if $@;
5823 die $err;
5824 }
5825
5826 PVE::Storage::activate_volumes($storecfg, $vollist);
5827
5828
5829 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5830 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5831 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5832 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5833 # timeout should be more than enough here...
5834 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5835
5836 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5837
5838 my %run_params = (
5839 timeout => $statefile ? undef : $start_timeout,
5840 umask => 0077,
5841 noerr => 1,
5842 );
5843
5844 # when migrating, prefix QEMU output so other side can pick up any
5845 # errors that might occur and show the user
5846 if ($migratedfrom) {
5847 $run_params{quiet} = 1;
5848 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5849 }
5850
5851 my %systemd_properties = (
5852 Slice => 'qemu.slice',
5853 KillMode => 'process',
5854 SendSIGKILL => 0,
5855 TimeoutStopUSec => ULONG_MAX, # infinity
5856 );
5857
5858 if (PVE::CGroup::cgroup_mode() == 2) {
5859 $systemd_properties{CPUWeight} = $cpuunits;
5860 } else {
5861 $systemd_properties{CPUShares} = $cpuunits;
5862 }
5863
5864 if (my $cpulimit = $conf->{cpulimit}) {
5865 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5866 }
5867 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5868
5869 my $run_qemu = sub {
5870 PVE::Tools::run_fork sub {
5871 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5872
5873 my $tpmpid;
5874 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
5875 # start the TPM emulator so QEMU can connect on start
5876 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5877 }
5878
5879 my $exitcode = run_command($cmd, %run_params);
5880 if ($exitcode) {
5881 if ($tpmpid) {
5882 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5883 kill 'TERM', $tpmpid;
5884 }
5885 die "QEMU exited with code $exitcode\n";
5886 }
5887 };
5888 };
5889
5890 if ($conf->{hugepages}) {
5891
5892 my $code = sub {
5893 my $hotplug_features =
5894 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5895 my $hugepages_topology =
5896 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5897
5898 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5899
5900 PVE::QemuServer::Memory::hugepages_mount();
5901 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5902
5903 eval { $run_qemu->() };
5904 if (my $err = $@) {
5905 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5906 if !$conf->{keephugepages};
5907 die $err;
5908 }
5909
5910 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5911 if !$conf->{keephugepages};
5912 };
5913 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5914
5915 } else {
5916 eval { $run_qemu->() };
5917 }
5918
5919 if (my $err = $@) {
5920 # deactivate volumes if start fails
5921 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5922 warn $@ if $@;
5923 eval { cleanup_pci_devices($vmid, $conf) };
5924 warn $@ if $@;
5925
5926 die "start failed: $err";
5927 }
5928
5929 # re-reserve all PCI IDs now that we can know the actual VM PID
5930 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5931 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5932 warn $@ if $@;
5933
5934 if (defined(my $migrate = $res->{migrate})) {
5935 if ($migrate->{proto} eq 'tcp') {
5936 my $nodename = nodename();
5937 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5938 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5939 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5940 mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri});
5941 }
5942 print "migration listens on $migrate->{uri}\n";
5943 } elsif ($statefile) {
5944 eval { mon_cmd($vmid, "cont"); };
5945 warn $@ if $@;
5946 }
5947
5948 #start nbd server for storage migration
5949 if (my $nbd = $migrate_opts->{nbd}) {
5950 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5951
5952 my $migrate_storage_uri;
5953 # nbd_protocol_version > 0 for unix socket support
5954 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5955 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5956 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5957 $migrate_storage_uri = "nbd:unix:$socket_path";
5958 $res->{migrate}->{unix_sockets} = [$socket_path];
5959 } else {
5960 my $nodename = nodename();
5961 my $localip = $get_migration_ip->($nodename);
5962 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5963 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5964
5965 mon_cmd($vmid, "nbd-server-start", addr => {
5966 type => 'inet',
5967 data => {
5968 host => "${localip}",
5969 port => "${storage_migrate_port}",
5970 },
5971 });
5972 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5973 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5974 }
5975
5976 my $block_info = mon_cmd($vmid, "query-block");
5977 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5978
5979 foreach my $opt (sort keys %$nbd) {
5980 my $drivestr = $nbd->{$opt}->{drivestr};
5981 my $volid = $nbd->{$opt}->{volid};
5982
5983 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5984
5985 mon_cmd(
5986 $vmid,
5987 "block-export-add",
5988 id => "drive-$opt",
5989 'node-name' => $block_node,
5990 writable => JSON::true,
5991 type => "nbd",
5992 name => "drive-$opt", # NBD export name
5993 );
5994
5995 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5996 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5997 print "re-using replicated volume: $opt - $volid\n"
5998 if $nbd->{$opt}->{replicated};
5999
6000 $res->{drives}->{$opt} = $nbd->{$opt};
6001 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6002 }
6003 }
6004
6005 if ($migratedfrom) {
6006 eval {
6007 set_migration_caps($vmid);
6008 };
6009 warn $@ if $@;
6010
6011 if ($spice_port) {
6012 print "spice listens on port $spice_port\n";
6013 $res->{spice_port} = $spice_port;
6014 if ($migrate_opts->{spice_ticket}) {
6015 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6016 $migrate_opts->{spice_ticket});
6017 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6018 }
6019 }
6020
6021 } else {
6022 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6023 if !$statefile && $conf->{balloon};
6024
6025 foreach my $opt (keys %$conf) {
6026 next if $opt !~ m/^net\d+$/;
6027 my $nicconf = parse_net($conf->{$opt});
6028 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6029 }
6030 add_nets_bridge_fdb($conf, $vmid);
6031 }
6032
6033 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6034 eval {
6035 mon_cmd(
6036 $vmid,
6037 'qom-set',
6038 path => "machine/peripheral/balloon0",
6039 property => "guest-stats-polling-interval",
6040 value => 2
6041 );
6042 };
6043 log_warn("could not set polling interval for ballooning - $@") if $@;
6044 }
6045
6046 if ($resume) {
6047 print "Resumed VM, removing state\n";
6048 if (my $vmstate = $conf->{vmstate}) {
6049 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6050 PVE::Storage::vdisk_free($storecfg, $vmstate);
6051 }
6052 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6053 PVE::QemuConfig->write_config($vmid, $conf);
6054 }
6055
6056 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6057
6058 my ($current_machine, $is_deprecated) =
6059 PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
6060 if ($is_deprecated) {
6061 log_warn(
6062 "current machine version '$current_machine' is deprecated - see the documentation and ".
6063 "change to a newer one",
6064 );
6065 }
6066
6067 return $res;
6068 }
6069
6070 sub vm_commandline {
6071 my ($storecfg, $vmid, $snapname) = @_;
6072
6073 my $conf = PVE::QemuConfig->load_config($vmid);
6074
6075 my ($forcemachine, $forcecpu);
6076 if ($snapname) {
6077 my $snapshot = $conf->{snapshots}->{$snapname};
6078 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6079
6080 # check for machine or CPU overrides in snapshot
6081 $forcemachine = $snapshot->{runningmachine};
6082 $forcecpu = $snapshot->{runningcpu};
6083
6084 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6085
6086 $conf = $snapshot;
6087 }
6088
6089 my $defaults = load_defaults();
6090
6091 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6092
6093 return PVE::Tools::cmd2string($cmd);
6094 }
6095
6096 sub vm_reset {
6097 my ($vmid, $skiplock) = @_;
6098
6099 PVE::QemuConfig->lock_config($vmid, sub {
6100
6101 my $conf = PVE::QemuConfig->load_config($vmid);
6102
6103 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6104
6105 mon_cmd($vmid, "system_reset");
6106 });
6107 }
6108
6109 sub get_vm_volumes {
6110 my ($conf) = @_;
6111
6112 my $vollist = [];
6113 foreach_volid($conf, sub {
6114 my ($volid, $attr) = @_;
6115
6116 return if $volid =~ m|^/|;
6117
6118 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6119 return if !$sid;
6120
6121 push @$vollist, $volid;
6122 });
6123
6124 return $vollist;
6125 }
6126
6127 sub cleanup_pci_devices {
6128 my ($vmid, $conf) = @_;
6129
6130 foreach my $key (keys %$conf) {
6131 next if $key !~ m/^hostpci(\d+)$/;
6132 my $hostpciindex = $1;
6133 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6134 my $d = parse_hostpci($conf->{$key});
6135 if ($d->{mdev}) {
6136 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6137 # don't want to break ABI just for this two liner
6138 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6139
6140 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6141 # out when we do it first. so wait for up to 10 seconds and then try it manually
6142 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/ && -e $dev_sysfs_dir) {
6143 my $count = 0;
6144 while (-e $dev_sysfs_dir && $count < 10) {
6145 sleep 1;
6146 $count++;
6147 }
6148 print "waited $count seconds for mediated device driver finishing clean up\n";
6149 }
6150
6151 if (-e $dev_sysfs_dir) {
6152 print "actively clean up mediated device with UUID $uuid\n";
6153 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1");
6154 }
6155 }
6156 }
6157 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6158 }
6159
6160 sub vm_stop_cleanup {
6161 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6162
6163 eval {
6164
6165 if (!$keepActive) {
6166 my $vollist = get_vm_volumes($conf);
6167 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6168
6169 if (my $tpmdrive = $conf->{tpmstate0}) {
6170 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6171 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6172 if ($storeid) {
6173 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6174 }
6175 }
6176 }
6177
6178 foreach my $ext (qw(mon qmp pid vnc qga)) {
6179 unlink "/var/run/qemu-server/${vmid}.$ext";
6180 }
6181
6182 if ($conf->{ivshmem}) {
6183 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6184 # just delete it for now, VMs which have this already open do not
6185 # are affected, but new VMs will get a separated one. If this
6186 # becomes an issue we either add some sort of ref-counting or just
6187 # add a "don't delete on stop" flag to the ivshmem format.
6188 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6189 }
6190
6191 cleanup_pci_devices($vmid, $conf);
6192
6193 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6194 };
6195 warn $@ if $@; # avoid errors - just warn
6196 }
6197
6198 # call only in locked context
6199 sub _do_vm_stop {
6200 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6201
6202 my $pid = check_running($vmid, $nocheck);
6203 return if !$pid;
6204
6205 my $conf;
6206 if (!$nocheck) {
6207 $conf = PVE::QemuConfig->load_config($vmid);
6208 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6209 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6210 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6211 $timeout = $opts->{down} if $opts->{down};
6212 }
6213 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6214 }
6215
6216 eval {
6217 if ($shutdown) {
6218 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6219 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6220 } else {
6221 mon_cmd($vmid, "system_powerdown");
6222 }
6223 } else {
6224 mon_cmd($vmid, "quit");
6225 }
6226 };
6227 my $err = $@;
6228
6229 if (!$err) {
6230 $timeout = 60 if !defined($timeout);
6231
6232 my $count = 0;
6233 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6234 $count++;
6235 sleep 1;
6236 }
6237
6238 if ($count >= $timeout) {
6239 if ($force) {
6240 warn "VM still running - terminating now with SIGTERM\n";
6241 kill 15, $pid;
6242 } else {
6243 die "VM quit/powerdown failed - got timeout\n";
6244 }
6245 } else {
6246 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6247 return;
6248 }
6249 } else {
6250 if (!check_running($vmid, $nocheck)) {
6251 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6252 return;
6253 }
6254 if ($force) {
6255 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6256 kill 15, $pid;
6257 } else {
6258 die "VM quit/powerdown failed\n";
6259 }
6260 }
6261
6262 # wait again
6263 $timeout = 10;
6264
6265 my $count = 0;
6266 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6267 $count++;
6268 sleep 1;
6269 }
6270
6271 if ($count >= $timeout) {
6272 warn "VM still running - terminating now with SIGKILL\n";
6273 kill 9, $pid;
6274 sleep 1;
6275 }
6276
6277 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6278 }
6279
6280 # Note: use $nocheck to skip tests if VM configuration file exists.
6281 # We need that when migration VMs to other nodes (files already moved)
6282 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6283 sub vm_stop {
6284 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6285
6286 $force = 1 if !defined($force) && !$shutdown;
6287
6288 if ($migratedfrom){
6289 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6290 kill 15, $pid if $pid;
6291 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6292 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6293 return;
6294 }
6295
6296 PVE::QemuConfig->lock_config($vmid, sub {
6297 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6298 });
6299 }
6300
6301 sub vm_reboot {
6302 my ($vmid, $timeout) = @_;
6303
6304 PVE::QemuConfig->lock_config($vmid, sub {
6305 eval {
6306
6307 # only reboot if running, as qmeventd starts it again on a stop event
6308 return if !check_running($vmid);
6309
6310 create_reboot_request($vmid);
6311
6312 my $storecfg = PVE::Storage::config();
6313 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6314
6315 };
6316 if (my $err = $@) {
6317 # avoid that the next normal shutdown will be confused for a reboot
6318 clear_reboot_request($vmid);
6319 die $err;
6320 }
6321 });
6322 }
6323
6324 # note: if using the statestorage parameter, the caller has to check privileges
6325 sub vm_suspend {
6326 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6327
6328 my $conf;
6329 my $path;
6330 my $storecfg;
6331 my $vmstate;
6332
6333 PVE::QemuConfig->lock_config($vmid, sub {
6334
6335 $conf = PVE::QemuConfig->load_config($vmid);
6336
6337 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6338 PVE::QemuConfig->check_lock($conf)
6339 if !($skiplock || $is_backing_up);
6340
6341 die "cannot suspend to disk during backup\n"
6342 if $is_backing_up && $includestate;
6343
6344 if ($includestate) {
6345 $conf->{lock} = 'suspending';
6346 my $date = strftime("%Y-%m-%d", localtime(time()));
6347 $storecfg = PVE::Storage::config();
6348 if (!$statestorage) {
6349 $statestorage = find_vmstate_storage($conf, $storecfg);
6350 # check permissions for the storage
6351 my $rpcenv = PVE::RPCEnvironment::get();
6352 if ($rpcenv->{type} ne 'cli') {
6353 my $authuser = $rpcenv->get_user();
6354 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6355 }
6356 }
6357
6358
6359 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6360 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6361 $path = PVE::Storage::path($storecfg, $vmstate);
6362 PVE::QemuConfig->write_config($vmid, $conf);
6363 } else {
6364 mon_cmd($vmid, "stop");
6365 }
6366 });
6367
6368 if ($includestate) {
6369 # save vm state
6370 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6371
6372 eval {
6373 set_migration_caps($vmid, 1);
6374 mon_cmd($vmid, "savevm-start", statefile => $path);
6375 for(;;) {
6376 my $state = mon_cmd($vmid, "query-savevm");
6377 if (!$state->{status}) {
6378 die "savevm not active\n";
6379 } elsif ($state->{status} eq 'active') {
6380 sleep(1);
6381 next;
6382 } elsif ($state->{status} eq 'completed') {
6383 print "State saved, quitting\n";
6384 last;
6385 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6386 die "query-savevm failed with error '$state->{error}'\n"
6387 } else {
6388 die "query-savevm returned status '$state->{status}'\n";
6389 }
6390 }
6391 };
6392 my $err = $@;
6393
6394 PVE::QemuConfig->lock_config($vmid, sub {
6395 $conf = PVE::QemuConfig->load_config($vmid);
6396 if ($err) {
6397 # cleanup, but leave suspending lock, to indicate something went wrong
6398 eval {
6399 mon_cmd($vmid, "savevm-end");
6400 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6401 PVE::Storage::vdisk_free($storecfg, $vmstate);
6402 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6403 PVE::QemuConfig->write_config($vmid, $conf);
6404 };
6405 warn $@ if $@;
6406 die $err;
6407 }
6408
6409 die "lock changed unexpectedly\n"
6410 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6411
6412 mon_cmd($vmid, "quit");
6413 $conf->{lock} = 'suspended';
6414 PVE::QemuConfig->write_config($vmid, $conf);
6415 });
6416 }
6417 }
6418
6419 # $nocheck is set when called as part of a migration - in this context the
6420 # location of the config file (source or target node) is not deterministic,
6421 # since migration cannot wait for pmxcfs to process the rename
6422 sub vm_resume {
6423 my ($vmid, $skiplock, $nocheck) = @_;
6424
6425 PVE::QemuConfig->lock_config($vmid, sub {
6426 my $res = mon_cmd($vmid, 'query-status');
6427 my $resume_cmd = 'cont';
6428 my $reset = 0;
6429 my $conf;
6430 if ($nocheck) {
6431 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6432 if ($@) {
6433 my $vmlist = PVE::Cluster::get_vmlist();
6434 if (exists($vmlist->{ids}->{$vmid})) {
6435 my $node = $vmlist->{ids}->{$vmid}->{node};
6436 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6437 }
6438 if (!$conf) {
6439 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6440 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6441 }
6442 }
6443 } else {
6444 $conf = PVE::QemuConfig->load_config($vmid);
6445 }
6446
6447 if ($res->{status}) {
6448 return if $res->{status} eq 'running'; # job done, go home
6449 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6450 $reset = 1 if $res->{status} eq 'shutdown';
6451 }
6452
6453 if (!$nocheck) {
6454 PVE::QemuConfig->check_lock($conf)
6455 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6456 }
6457
6458 if ($reset) {
6459 # required if a VM shuts down during a backup and we get a resume
6460 # request before the backup finishes for example
6461 mon_cmd($vmid, "system_reset");
6462 }
6463
6464 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6465
6466 mon_cmd($vmid, $resume_cmd);
6467 });
6468 }
6469
6470 sub vm_sendkey {
6471 my ($vmid, $skiplock, $key) = @_;
6472
6473 PVE::QemuConfig->lock_config($vmid, sub {
6474
6475 my $conf = PVE::QemuConfig->load_config($vmid);
6476
6477 # there is no qmp command, so we use the human monitor command
6478 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6479 die $res if $res ne '';
6480 });
6481 }
6482
6483 sub check_bridge_access {
6484 my ($rpcenv, $authuser, $conf) = @_;
6485
6486 return 1 if $authuser eq 'root@pam';
6487
6488 for my $opt (sort keys $conf->%*) {
6489 next if $opt !~ m/^net\d+$/;
6490 my $net = parse_net($conf->{$opt});
6491 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6492 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6493 }
6494 return 1;
6495 };
6496
6497 sub check_mapping_access {
6498 my ($rpcenv, $user, $conf) = @_;
6499
6500 for my $opt (keys $conf->%*) {
6501 if ($opt =~ m/^usb\d+$/) {
6502 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6503 if (my $host = $device->{host}) {
6504 die "only root can set '$opt' config for real devices\n"
6505 if $host !~ m/^spice$/i && $user ne 'root@pam';
6506 } elsif ($device->{mapping}) {
6507 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6508 } else {
6509 die "either 'host' or 'mapping' must be set.\n";
6510 }
6511 } elsif ($opt =~ m/^hostpci\d+$/) {
6512 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6513 if ($device->{host}) {
6514 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6515 } elsif ($device->{mapping}) {
6516 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6517 } else {
6518 die "either 'host' or 'mapping' must be set.\n";
6519 }
6520 }
6521 }
6522 };
6523
6524 sub check_restore_permissions {
6525 my ($rpcenv, $user, $conf) = @_;
6526
6527 check_bridge_access($rpcenv, $user, $conf);
6528 check_mapping_access($rpcenv, $user, $conf);
6529 }
6530 # vzdump restore implementaion
6531
6532 sub tar_archive_read_firstfile {
6533 my $archive = shift;
6534
6535 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6536
6537 # try to detect archive type first
6538 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6539 die "unable to open file '$archive'\n";
6540 my $firstfile = <$fh>;
6541 kill 15, $pid;
6542 close $fh;
6543
6544 die "ERROR: archive contaions no data\n" if !$firstfile;
6545 chomp $firstfile;
6546
6547 return $firstfile;
6548 }
6549
6550 sub tar_restore_cleanup {
6551 my ($storecfg, $statfile) = @_;
6552
6553 print STDERR "starting cleanup\n";
6554
6555 if (my $fd = IO::File->new($statfile, "r")) {
6556 while (defined(my $line = <$fd>)) {
6557 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6558 my $volid = $2;
6559 eval {
6560 if ($volid =~ m|^/|) {
6561 unlink $volid || die 'unlink failed\n';
6562 } else {
6563 PVE::Storage::vdisk_free($storecfg, $volid);
6564 }
6565 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6566 };
6567 print STDERR "unable to cleanup '$volid' - $@" if $@;
6568 } else {
6569 print STDERR "unable to parse line in statfile - $line";
6570 }
6571 }
6572 $fd->close();
6573 }
6574 }
6575
6576 sub restore_file_archive {
6577 my ($archive, $vmid, $user, $opts) = @_;
6578
6579 return restore_vma_archive($archive, $vmid, $user, $opts)
6580 if $archive eq '-';
6581
6582 my $info = PVE::Storage::archive_info($archive);
6583 my $format = $opts->{format} // $info->{format};
6584 my $comp = $info->{compression};
6585
6586 # try to detect archive format
6587 if ($format eq 'tar') {
6588 return restore_tar_archive($archive, $vmid, $user, $opts);
6589 } else {
6590 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6591 }
6592 }
6593
6594 # hepler to remove disks that will not be used after restore
6595 my $restore_cleanup_oldconf = sub {
6596 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6597
6598 my $kept_disks = {};
6599
6600 PVE::QemuConfig->foreach_volume($oldconf, sub {
6601 my ($ds, $drive) = @_;
6602
6603 return if drive_is_cdrom($drive, 1);
6604
6605 my $volid = $drive->{file};
6606 return if !$volid || $volid =~ m|^/|;
6607
6608 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6609 return if !$path || !$owner || ($owner != $vmid);
6610
6611 # Note: only delete disk we want to restore
6612 # other volumes will become unused
6613 if ($virtdev_hash->{$ds}) {
6614 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6615 if (my $err = $@) {
6616 warn $err;
6617 }
6618 } else {
6619 $kept_disks->{$volid} = 1;
6620 }
6621 });
6622
6623 # after the restore we have no snapshots anymore
6624 for my $snapname (keys $oldconf->{snapshots}->%*) {
6625 my $snap = $oldconf->{snapshots}->{$snapname};
6626 if ($snap->{vmstate}) {
6627 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6628 if (my $err = $@) {
6629 warn $err;
6630 }
6631 }
6632
6633 for my $volid (keys $kept_disks->%*) {
6634 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6635 warn $@ if $@;
6636 }
6637 }
6638 };
6639
6640 # Helper to parse vzdump backup device hints
6641 #
6642 # $rpcenv: Environment, used to ckeck storage permissions
6643 # $user: User ID, to check storage permissions
6644 # $storecfg: Storage configuration
6645 # $fh: the file handle for reading the configuration
6646 # $devinfo: should contain device sizes for all backu-up'ed devices
6647 # $options: backup options (pool, default storage)
6648 #
6649 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6650 my $parse_backup_hints = sub {
6651 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6652
6653 my $check_storage = sub { # assert if an image can be allocate
6654 my ($storeid, $scfg) = @_;
6655 die "Content type 'images' is not available on storage '$storeid'\n"
6656 if !$scfg->{content}->{images};
6657 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6658 if $user ne 'root@pam';
6659 };
6660
6661 my $virtdev_hash = {};
6662 while (defined(my $line = <$fh>)) {
6663 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6664 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6665 die "archive does not contain data for drive '$virtdev'\n"
6666 if !$devinfo->{$devname};
6667
6668 if (defined($options->{storage})) {
6669 $storeid = $options->{storage} || 'local';
6670 } elsif (!$storeid) {
6671 $storeid = 'local';
6672 }
6673 $format = 'raw' if !$format;
6674 $devinfo->{$devname}->{devname} = $devname;
6675 $devinfo->{$devname}->{virtdev} = $virtdev;
6676 $devinfo->{$devname}->{format} = $format;
6677 $devinfo->{$devname}->{storeid} = $storeid;
6678
6679 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6680 $check_storage->($storeid, $scfg); # permission and content type check
6681
6682 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6683 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6684 my $virtdev = $1;
6685 my $drive = parse_drive($virtdev, $2);
6686
6687 if (drive_is_cloudinit($drive)) {
6688 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6689 $storeid = $options->{storage} if defined ($options->{storage});
6690 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6691 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6692
6693 $check_storage->($storeid, $scfg); # permission and content type check
6694
6695 $virtdev_hash->{$virtdev} = {
6696 format => $format,
6697 storeid => $storeid,
6698 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6699 is_cloudinit => 1,
6700 };
6701 }
6702 }
6703 }
6704
6705 return $virtdev_hash;
6706 };
6707
6708 # Helper to allocate and activate all volumes required for a restore
6709 #
6710 # $storecfg: Storage configuration
6711 # $virtdev_hash: as returned by parse_backup_hints()
6712 #
6713 # Returns: { $virtdev => $volid }
6714 my $restore_allocate_devices = sub {
6715 my ($storecfg, $virtdev_hash, $vmid) = @_;
6716
6717 my $map = {};
6718 foreach my $virtdev (sort keys %$virtdev_hash) {
6719 my $d = $virtdev_hash->{$virtdev};
6720 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6721 my $storeid = $d->{storeid};
6722 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6723
6724 # test if requested format is supported
6725 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6726 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6727 $d->{format} = $defFormat if !$supported;
6728
6729 my $name;
6730 if ($d->{is_cloudinit}) {
6731 $name = "vm-$vmid-cloudinit";
6732 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6733 if ($scfg->{path}) {
6734 $name .= ".$d->{format}";
6735 }
6736 }
6737
6738 my $volid = PVE::Storage::vdisk_alloc(
6739 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6740
6741 print STDERR "new volume ID is '$volid'\n";
6742 $d->{volid} = $volid;
6743
6744 PVE::Storage::activate_volumes($storecfg, [$volid]);
6745
6746 $map->{$virtdev} = $volid;
6747 }
6748
6749 return $map;
6750 };
6751
6752 sub restore_update_config_line {
6753 my ($cookie, $map, $line, $unique) = @_;
6754
6755 return '' if $line =~ m/^\#qmdump\#/;
6756 return '' if $line =~ m/^\#vzdump\#/;
6757 return '' if $line =~ m/^lock:/;
6758 return '' if $line =~ m/^unused\d+:/;
6759 return '' if $line =~ m/^parent:/;
6760
6761 my $res = '';
6762
6763 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6764 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6765 # try to convert old 1.X settings
6766 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6767 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6768 my ($model, $macaddr) = split(/\=/, $devconfig);
6769 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6770 my $net = {
6771 model => $model,
6772 bridge => "vmbr$ind",
6773 macaddr => $macaddr,
6774 };
6775 my $netstr = print_net($net);
6776
6777 $res .= "net$cookie->{netcount}: $netstr\n";
6778 $cookie->{netcount}++;
6779 }
6780 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6781 my ($id, $netstr) = ($1, $2);
6782 my $net = parse_net($netstr);
6783 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6784 $netstr = print_net($net);
6785 $res .= "$id: $netstr\n";
6786 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6787 my $virtdev = $1;
6788 my $value = $3;
6789 my $di = parse_drive($virtdev, $value);
6790 if (defined($di->{backup}) && !$di->{backup}) {
6791 $res .= "#$line";
6792 } elsif ($map->{$virtdev}) {
6793 delete $di->{format}; # format can change on restore
6794 $di->{file} = $map->{$virtdev};
6795 $value = print_drive($di);
6796 $res .= "$virtdev: $value\n";
6797 } else {
6798 $res .= $line;
6799 }
6800 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6801 my $vmgenid = $1;
6802 if ($vmgenid ne '0') {
6803 # always generate a new vmgenid if there was a valid one setup
6804 $vmgenid = generate_uuid();
6805 }
6806 $res .= "vmgenid: $vmgenid\n";
6807 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6808 my ($uuid, $uuid_str);
6809 UUID::generate($uuid);
6810 UUID::unparse($uuid, $uuid_str);
6811 my $smbios1 = parse_smbios1($2);
6812 $smbios1->{uuid} = $uuid_str;
6813 $res .= $1.print_smbios1($smbios1)."\n";
6814 } else {
6815 $res .= $line;
6816 }
6817
6818 return $res;
6819 }
6820
6821 my $restore_deactivate_volumes = sub {
6822 my ($storecfg, $virtdev_hash) = @_;
6823
6824 my $vollist = [];
6825 for my $dev (values $virtdev_hash->%*) {
6826 push $vollist->@*, $dev->{volid} if $dev->{volid};
6827 }
6828
6829 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6830 print STDERR $@ if $@;
6831 };
6832
6833 my $restore_destroy_volumes = sub {
6834 my ($storecfg, $virtdev_hash) = @_;
6835
6836 for my $dev (values $virtdev_hash->%*) {
6837 my $volid = $dev->{volid} or next;
6838 eval {
6839 PVE::Storage::vdisk_free($storecfg, $volid);
6840 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6841 };
6842 print STDERR "unable to cleanup '$volid' - $@" if $@;
6843 }
6844 };
6845
6846 sub restore_merge_config {
6847 my ($filename, $backup_conf_raw, $override_conf) = @_;
6848
6849 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6850 for my $key (keys $override_conf->%*) {
6851 $backup_conf->{$key} = $override_conf->{$key};
6852 }
6853
6854 return $backup_conf;
6855 }
6856
6857 sub scan_volids {
6858 my ($cfg, $vmid) = @_;
6859
6860 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6861
6862 my $volid_hash = {};
6863 foreach my $storeid (keys %$info) {
6864 foreach my $item (@{$info->{$storeid}}) {
6865 next if !($item->{volid} && $item->{size});
6866 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6867 $volid_hash->{$item->{volid}} = $item;
6868 }
6869 }
6870
6871 return $volid_hash;
6872 }
6873
6874 sub update_disk_config {
6875 my ($vmid, $conf, $volid_hash) = @_;
6876
6877 my $changes;
6878 my $prefix = "VM $vmid";
6879
6880 # used and unused disks
6881 my $referenced = {};
6882
6883 # Note: it is allowed to define multiple storages with same path (alias), so
6884 # we need to check both 'volid' and real 'path' (two different volid can point
6885 # to the same path).
6886
6887 my $referencedpath = {};
6888
6889 # update size info
6890 PVE::QemuConfig->foreach_volume($conf, sub {
6891 my ($opt, $drive) = @_;
6892
6893 my $volid = $drive->{file};
6894 return if !$volid;
6895 my $volume = $volid_hash->{$volid};
6896
6897 # mark volid as "in-use" for next step
6898 $referenced->{$volid} = 1;
6899 if ($volume && (my $path = $volume->{path})) {
6900 $referencedpath->{$path} = 1;
6901 }
6902
6903 return if drive_is_cdrom($drive);
6904 return if !$volume;
6905
6906 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6907 if (defined($updated)) {
6908 $changes = 1;
6909 $conf->{$opt} = print_drive($updated);
6910 print "$prefix ($opt): $msg\n";
6911 }
6912 });
6913
6914 # remove 'unusedX' entry if volume is used
6915 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6916 my ($opt, $drive) = @_;
6917
6918 my $volid = $drive->{file};
6919 return if !$volid;
6920
6921 my $path;
6922 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6923 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6924 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6925 $changes = 1;
6926 delete $conf->{$opt};
6927 }
6928
6929 $referenced->{$volid} = 1;
6930 $referencedpath->{$path} = 1 if $path;
6931 });
6932
6933 foreach my $volid (sort keys %$volid_hash) {
6934 next if $volid =~ m/vm-$vmid-state-/;
6935 next if $referenced->{$volid};
6936 my $path = $volid_hash->{$volid}->{path};
6937 next if !$path; # just to be sure
6938 next if $referencedpath->{$path};
6939 $changes = 1;
6940 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6941 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6942 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6943 }
6944
6945 return $changes;
6946 }
6947
6948 sub rescan {
6949 my ($vmid, $nolock, $dryrun) = @_;
6950
6951 my $cfg = PVE::Storage::config();
6952
6953 print "rescan volumes...\n";
6954 my $volid_hash = scan_volids($cfg, $vmid);
6955
6956 my $updatefn = sub {
6957 my ($vmid) = @_;
6958
6959 my $conf = PVE::QemuConfig->load_config($vmid);
6960
6961 PVE::QemuConfig->check_lock($conf);
6962
6963 my $vm_volids = {};
6964 foreach my $volid (keys %$volid_hash) {
6965 my $info = $volid_hash->{$volid};
6966 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6967 }
6968
6969 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6970
6971 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6972 };
6973
6974 if (defined($vmid)) {
6975 if ($nolock) {
6976 &$updatefn($vmid);
6977 } else {
6978 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6979 }
6980 } else {
6981 my $vmlist = config_list();
6982 foreach my $vmid (keys %$vmlist) {
6983 if ($nolock) {
6984 &$updatefn($vmid);
6985 } else {
6986 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6987 }
6988 }
6989 }
6990 }
6991
6992 sub restore_proxmox_backup_archive {
6993 my ($archive, $vmid, $user, $options) = @_;
6994
6995 my $storecfg = PVE::Storage::config();
6996
6997 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6998 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6999
7000 my $fingerprint = $scfg->{fingerprint};
7001 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
7002
7003 my $repo = PVE::PBSClient::get_repository($scfg);
7004 my $namespace = $scfg->{namespace};
7005
7006 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7007 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7008 local $ENV{PBS_PASSWORD} = $password;
7009 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7010
7011 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7012 PVE::Storage::parse_volname($storecfg, $archive);
7013
7014 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7015
7016 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7017
7018 my $tmpdir = "/var/tmp/vzdumptmp$$";
7019 rmtree $tmpdir;
7020 mkpath $tmpdir;
7021
7022 my $conffile = PVE::QemuConfig->config_file($vmid);
7023 # disable interrupts (always do cleanups)
7024 local $SIG{INT} =
7025 local $SIG{TERM} =
7026 local $SIG{QUIT} =
7027 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7028
7029 # Note: $oldconf is undef if VM does not exists
7030 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7031 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7032 my $new_conf_raw = '';
7033
7034 my $rpcenv = PVE::RPCEnvironment::get();
7035 my $devinfo = {}; # info about drives included in backup
7036 my $virtdev_hash = {}; # info about allocated drives
7037
7038 eval {
7039 # enable interrupts
7040 local $SIG{INT} =
7041 local $SIG{TERM} =
7042 local $SIG{QUIT} =
7043 local $SIG{HUP} =
7044 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7045
7046 my $cfgfn = "$tmpdir/qemu-server.conf";
7047 my $firewall_config_fn = "$tmpdir/fw.conf";
7048 my $index_fn = "$tmpdir/index.json";
7049
7050 my $cmd = "restore";
7051
7052 my $param = [$pbs_backup_name, "index.json", $index_fn];
7053 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7054 my $index = PVE::Tools::file_get_contents($index_fn);
7055 $index = decode_json($index);
7056
7057 foreach my $info (@{$index->{files}}) {
7058 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7059 my $devname = $1;
7060 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7061 $devinfo->{$devname}->{size} = $1;
7062 } else {
7063 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7064 }
7065 }
7066 }
7067
7068 my $is_qemu_server_backup = scalar(
7069 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7070 );
7071 if (!$is_qemu_server_backup) {
7072 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7073 }
7074 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7075
7076 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7077 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7078
7079 if ($has_firewall_config) {
7080 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7081 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7082
7083 my $pve_firewall_dir = '/etc/pve/firewall';
7084 mkdir $pve_firewall_dir; # make sure the dir exists
7085 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7086 }
7087
7088 my $fh = IO::File->new($cfgfn, "r") ||
7089 die "unable to read qemu-server.conf - $!\n";
7090
7091 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7092
7093 # fixme: rate limit?
7094
7095 # create empty/temp config
7096 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7097
7098 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7099
7100 # allocate volumes
7101 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7102
7103 foreach my $virtdev (sort keys %$virtdev_hash) {
7104 my $d = $virtdev_hash->{$virtdev};
7105 next if $d->{is_cloudinit}; # no need to restore cloudinit
7106
7107 # this fails if storage is unavailable
7108 my $volid = $d->{volid};
7109 my $path = PVE::Storage::path($storecfg, $volid);
7110
7111 # for live-restore we only want to preload the efidisk and TPM state
7112 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7113
7114 my @ns_arg;
7115 if (defined(my $ns = $scfg->{namespace})) {
7116 @ns_arg = ('--ns', $ns);
7117 }
7118
7119 my $pbs_restore_cmd = [
7120 '/usr/bin/pbs-restore',
7121 '--repository', $repo,
7122 @ns_arg,
7123 $pbs_backup_name,
7124 "$d->{devname}.img.fidx",
7125 $path,
7126 '--verbose',
7127 ];
7128
7129 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7130 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7131
7132 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7133 push @$pbs_restore_cmd, '--skip-zero';
7134 }
7135
7136 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7137 print "restore proxmox backup image: $dbg_cmdstring\n";
7138 run_command($pbs_restore_cmd);
7139 }
7140
7141 $fh->seek(0, 0) || die "seek failed - $!\n";
7142
7143 my $cookie = { netcount => 0 };
7144 while (defined(my $line = <$fh>)) {
7145 $new_conf_raw .= restore_update_config_line(
7146 $cookie,
7147 $map,
7148 $line,
7149 $options->{unique},
7150 );
7151 }
7152
7153 $fh->close();
7154 };
7155 my $err = $@;
7156
7157 if ($err || !$options->{live}) {
7158 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7159 }
7160
7161 rmtree $tmpdir;
7162
7163 if ($err) {
7164 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7165 die $err;
7166 }
7167
7168 if ($options->{live}) {
7169 # keep lock during live-restore
7170 $new_conf_raw .= "\nlock: create";
7171 }
7172
7173 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7174 check_restore_permissions($rpcenv, $user, $new_conf);
7175 PVE::QemuConfig->write_config($vmid, $new_conf);
7176
7177 eval { rescan($vmid, 1); };
7178 warn $@ if $@;
7179
7180 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7181
7182 if ($options->{live}) {
7183 # enable interrupts
7184 local $SIG{INT} =
7185 local $SIG{TERM} =
7186 local $SIG{QUIT} =
7187 local $SIG{HUP} =
7188 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7189
7190 my $conf = PVE::QemuConfig->load_config($vmid);
7191 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7192
7193 # these special drives are already restored before start
7194 delete $devinfo->{'drive-efidisk0'};
7195 delete $devinfo->{'drive-tpmstate0-backup'};
7196
7197 my $pbs_opts = {
7198 repo => $repo,
7199 keyfile => $keyfile,
7200 snapshot => $pbs_backup_name,
7201 namespace => $namespace,
7202 };
7203 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7204
7205 PVE::QemuConfig->remove_lock($vmid, "create");
7206 }
7207 }
7208
7209 sub pbs_live_restore {
7210 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7211
7212 print "starting VM for live-restore\n";
7213 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7214
7215 my $live_restore_backing = {};
7216 for my $ds (keys %$restored_disks) {
7217 $ds =~ m/^drive-(.*)$/;
7218 my $confname = $1;
7219 my $pbs_conf = {};
7220 $pbs_conf = {
7221 repository => $opts->{repo},
7222 snapshot => $opts->{snapshot},
7223 archive => "$ds.img.fidx",
7224 };
7225 $pbs_conf->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7226 $pbs_conf->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7227
7228 my $drive = parse_drive($confname, $conf->{$confname});
7229 print "restoring '$ds' to '$drive->{file}'\n";
7230
7231 my $pbs_name = "drive-${confname}-pbs";
7232 $live_restore_backing->{$confname} = {
7233 name => $pbs_name,
7234 blockdev => print_pbs_blockdev($pbs_conf, $pbs_name),
7235 };
7236 }
7237
7238 my $drives_streamed = 0;
7239 eval {
7240 # make sure HA doesn't interrupt our restore by stopping the VM
7241 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7242 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7243 }
7244
7245 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7246 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7247 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7248
7249 my $qmeventd_fd = register_qmeventd_handle($vmid);
7250
7251 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7252 # this will effectively collapse the backing image chain consisting of
7253 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7254 # removes itself once all backing images vanish with 'auto-remove=on')
7255 my $jobs = {};
7256 for my $ds (sort keys %$restored_disks) {
7257 my $job_id = "restore-$ds";
7258 mon_cmd($vmid, 'block-stream',
7259 'job-id' => $job_id,
7260 device => "$ds",
7261 );
7262 $jobs->{$job_id} = {};
7263 }
7264
7265 mon_cmd($vmid, 'cont');
7266 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7267
7268 print "restore-drive jobs finished successfully, removing all tracking block devices"
7269 ." to disconnect from Proxmox Backup Server\n";
7270
7271 for my $ds (sort keys %$restored_disks) {
7272 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7273 }
7274
7275 close($qmeventd_fd);
7276 };
7277
7278 my $err = $@;
7279
7280 if ($err) {
7281 warn "An error occurred during live-restore: $err\n";
7282 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7283 die "live-restore failed\n";
7284 }
7285 }
7286
7287 # Inspired by pbs live-restore, this restores with the disks being available as files.
7288 # Theoretically this can also be used to quick-start a full-clone vm if the
7289 # disks are all available as files.
7290 #
7291 # The mapping should provide a path by config entry, such as
7292 # `{ scsi0 => { format => <qcow2|raw|...>, path => "/path/to/file", sata1 => ... } }`
7293 #
7294 # This is used when doing a `create` call with the `--live-import` parameter,
7295 # where the disks get an `import-from=` property. The non-live part is
7296 # therefore already handled in the `$create_disks()` call happening in the
7297 # `create` api call
7298 sub live_import_from_files {
7299 my ($mapping, $vmid, $conf, $restore_options) = @_;
7300
7301 my $live_restore_backing = {};
7302 for my $dev (keys %$mapping) {
7303 die "disk not support for live-restoring: '$dev'\n"
7304 if !is_valid_drivename($dev) || $dev =~ /^(?:efidisk|tpmstate)/;
7305
7306 die "mapping contains disk '$dev' which does not exist in the config\n"
7307 if !exists($conf->{$dev});
7308
7309 my $info = $mapping->{$dev};
7310 my ($format, $path) = $info->@{qw(format path)};
7311 die "missing path for '$dev' mapping\n" if !$path;
7312 die "missing format for '$dev' mapping\n" if !$format;
7313 die "invalid format '$format' for '$dev' mapping\n"
7314 if !grep { $format eq $_ } qw(raw qcow2 vmdk);
7315
7316 $live_restore_backing->{$dev} = {
7317 name => "drive-$dev-restore",
7318 blockdev => "driver=$format,node-name=drive-$dev-restore"
7319 . ",read-only=on"
7320 . ",file.driver=file,file.filename=$path"
7321 };
7322 };
7323
7324 my $storecfg = PVE::Storage::config();
7325 eval {
7326
7327 # make sure HA doesn't interrupt our restore by stopping the VM
7328 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7329 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7330 }
7331
7332 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'live-restore-backing' => $live_restore_backing}, {});
7333
7334 # prevent shutdowns from qmeventd when the VM powers off from the inside
7335 my $qmeventd_fd = register_qmeventd_handle($vmid);
7336
7337 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7338 # this will effectively collapse the backing image chain consisting of
7339 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7340 # removes itself once all backing images vanish with 'auto-remove=on')
7341 my $jobs = {};
7342 for my $ds (sort keys %$live_restore_backing) {
7343 my $job_id = "restore-$ds";
7344 mon_cmd($vmid, 'block-stream',
7345 'job-id' => $job_id,
7346 device => "drive-$ds",
7347 );
7348 $jobs->{$job_id} = {};
7349 }
7350
7351 mon_cmd($vmid, 'cont');
7352 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7353
7354 print "restore-drive jobs finished successfully, removing all tracking block devices\n";
7355
7356 for my $ds (sort keys %$live_restore_backing) {
7357 mon_cmd($vmid, 'blockdev-del', 'node-name' => "drive-$ds-restore");
7358 }
7359
7360 close($qmeventd_fd);
7361 };
7362
7363 my $err = $@;
7364
7365 if ($err) {
7366 warn "An error occurred during live-restore: $err\n";
7367 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7368 die "live-restore failed\n";
7369 }
7370
7371 PVE::QemuConfig->remove_lock($vmid, "import");
7372 }
7373
7374 sub restore_vma_archive {
7375 my ($archive, $vmid, $user, $opts, $comp) = @_;
7376
7377 my $readfrom = $archive;
7378
7379 my $cfg = PVE::Storage::config();
7380 my $commands = [];
7381 my $bwlimit = $opts->{bwlimit};
7382
7383 my $dbg_cmdstring = '';
7384 my $add_pipe = sub {
7385 my ($cmd) = @_;
7386 push @$commands, $cmd;
7387 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7388 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7389 $readfrom = '-';
7390 };
7391
7392 my $input = undef;
7393 if ($archive eq '-') {
7394 $input = '<&STDIN';
7395 } else {
7396 # If we use a backup from a PVE defined storage we also consider that
7397 # storage's rate limit:
7398 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7399 if (defined($volid)) {
7400 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7401 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7402 if ($readlimit) {
7403 print STDERR "applying read rate limit: $readlimit\n";
7404 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7405 $add_pipe->($cstream);
7406 }
7407 }
7408 }
7409
7410 if ($comp) {
7411 my $info = PVE::Storage::decompressor_info('vma', $comp);
7412 my $cmd = $info->{decompressor};
7413 push @$cmd, $readfrom;
7414 $add_pipe->($cmd);
7415 }
7416
7417 my $tmpdir = "/var/tmp/vzdumptmp$$";
7418 rmtree $tmpdir;
7419
7420 # disable interrupts (always do cleanups)
7421 local $SIG{INT} =
7422 local $SIG{TERM} =
7423 local $SIG{QUIT} =
7424 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7425
7426 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7427 POSIX::mkfifo($mapfifo, 0600);
7428 my $fifofh;
7429 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7430
7431 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7432
7433 my $devinfo = {}; # info about drives included in backup
7434 my $virtdev_hash = {}; # info about allocated drives
7435
7436 my $rpcenv = PVE::RPCEnvironment::get();
7437
7438 my $conffile = PVE::QemuConfig->config_file($vmid);
7439
7440 # Note: $oldconf is undef if VM does not exist
7441 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7442 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7443 my $new_conf_raw = '';
7444
7445 my %storage_limits;
7446
7447 my $print_devmap = sub {
7448 my $cfgfn = "$tmpdir/qemu-server.conf";
7449
7450 # we can read the config - that is already extracted
7451 my $fh = IO::File->new($cfgfn, "r") ||
7452 die "unable to read qemu-server.conf - $!\n";
7453
7454 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7455 if (-f $fwcfgfn) {
7456 my $pve_firewall_dir = '/etc/pve/firewall';
7457 mkdir $pve_firewall_dir; # make sure the dir exists
7458 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7459 }
7460
7461 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7462
7463 foreach my $info (values %{$virtdev_hash}) {
7464 my $storeid = $info->{storeid};
7465 next if defined($storage_limits{$storeid});
7466
7467 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7468 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7469 $storage_limits{$storeid} = $limit * 1024;
7470 }
7471
7472 foreach my $devname (keys %$devinfo) {
7473 die "found no device mapping information for device '$devname'\n"
7474 if !$devinfo->{$devname}->{virtdev};
7475 }
7476
7477 # create empty/temp config
7478 if ($oldconf) {
7479 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7480 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7481 }
7482
7483 # allocate volumes
7484 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7485
7486 # print restore information to $fifofh
7487 foreach my $virtdev (sort keys %$virtdev_hash) {
7488 my $d = $virtdev_hash->{$virtdev};
7489 next if $d->{is_cloudinit}; # no need to restore cloudinit
7490
7491 my $storeid = $d->{storeid};
7492 my $volid = $d->{volid};
7493
7494 my $map_opts = '';
7495 if (my $limit = $storage_limits{$storeid}) {
7496 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7497 }
7498
7499 my $write_zeros = 1;
7500 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7501 $write_zeros = 0;
7502 }
7503
7504 my $path = PVE::Storage::path($cfg, $volid);
7505
7506 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7507
7508 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7509 }
7510
7511 $fh->seek(0, 0) || die "seek failed - $!\n";
7512
7513 my $cookie = { netcount => 0 };
7514 while (defined(my $line = <$fh>)) {
7515 $new_conf_raw .= restore_update_config_line(
7516 $cookie,
7517 $map,
7518 $line,
7519 $opts->{unique},
7520 );
7521 }
7522
7523 $fh->close();
7524 };
7525
7526 my $oldtimeout;
7527
7528 eval {
7529 # enable interrupts
7530 local $SIG{INT} =
7531 local $SIG{TERM} =
7532 local $SIG{QUIT} =
7533 local $SIG{HUP} =
7534 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7535 local $SIG{ALRM} = sub { die "got timeout\n"; };
7536
7537 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
7538
7539 my $parser = sub {
7540 my $line = shift;
7541
7542 print "$line\n";
7543
7544 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7545 my ($dev_id, $size, $devname) = ($1, $2, $3);
7546 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7547 } elsif ($line =~ m/^CTIME: /) {
7548 # we correctly received the vma config, so we can disable
7549 # the timeout now for disk allocation
7550 alarm($oldtimeout || 0);
7551 $oldtimeout = undef;
7552 &$print_devmap();
7553 print $fifofh "done\n";
7554 close($fifofh);
7555 $fifofh = undef;
7556 }
7557 };
7558
7559 print "restore vma archive: $dbg_cmdstring\n";
7560 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7561 };
7562 my $err = $@;
7563
7564 alarm($oldtimeout) if $oldtimeout;
7565
7566 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7567
7568 close($fifofh) if $fifofh;
7569 unlink $mapfifo;
7570 rmtree $tmpdir;
7571
7572 if ($err) {
7573 $restore_destroy_volumes->($cfg, $virtdev_hash);
7574 die $err;
7575 }
7576
7577 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7578 check_restore_permissions($rpcenv, $user, $new_conf);
7579 PVE::QemuConfig->write_config($vmid, $new_conf);
7580
7581 eval { rescan($vmid, 1); };
7582 warn $@ if $@;
7583
7584 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7585 }
7586
7587 sub restore_tar_archive {
7588 my ($archive, $vmid, $user, $opts) = @_;
7589
7590 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7591 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7592 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7593 }
7594
7595 if ($archive ne '-') {
7596 my $firstfile = tar_archive_read_firstfile($archive);
7597 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7598 if $firstfile ne 'qemu-server.conf';
7599 }
7600
7601 my $storecfg = PVE::Storage::config();
7602
7603 # avoid zombie disks when restoring over an existing VM -> cleanup first
7604 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7605 # skiplock=1 because qmrestore has set the 'create' lock itself already
7606 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7607 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7608
7609 my $tocmd = "/usr/lib/qemu-server/qmextract";
7610
7611 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7612 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7613 $tocmd .= ' --prealloc' if $opts->{prealloc};
7614 $tocmd .= ' --info' if $opts->{info};
7615
7616 # tar option "xf" does not autodetect compression when read from STDIN,
7617 # so we pipe to zcat
7618 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7619 PVE::Tools::shellquote("--to-command=$tocmd");
7620
7621 my $tmpdir = "/var/tmp/vzdumptmp$$";
7622 mkpath $tmpdir;
7623
7624 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7625 local $ENV{VZDUMP_VMID} = $vmid;
7626 local $ENV{VZDUMP_USER} = $user;
7627
7628 my $conffile = PVE::QemuConfig->config_file($vmid);
7629 my $new_conf_raw = '';
7630
7631 # disable interrupts (always do cleanups)
7632 local $SIG{INT} =
7633 local $SIG{TERM} =
7634 local $SIG{QUIT} =
7635 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7636
7637 eval {
7638 # enable interrupts
7639 local $SIG{INT} =
7640 local $SIG{TERM} =
7641 local $SIG{QUIT} =
7642 local $SIG{HUP} =
7643 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7644
7645 if ($archive eq '-') {
7646 print "extracting archive from STDIN\n";
7647 run_command($cmd, input => "<&STDIN");
7648 } else {
7649 print "extracting archive '$archive'\n";
7650 run_command($cmd);
7651 }
7652
7653 return if $opts->{info};
7654
7655 # read new mapping
7656 my $map = {};
7657 my $statfile = "$tmpdir/qmrestore.stat";
7658 if (my $fd = IO::File->new($statfile, "r")) {
7659 while (defined (my $line = <$fd>)) {
7660 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7661 $map->{$1} = $2 if $1;
7662 } else {
7663 print STDERR "unable to parse line in statfile - $line\n";
7664 }
7665 }
7666 $fd->close();
7667 }
7668
7669 my $confsrc = "$tmpdir/qemu-server.conf";
7670
7671 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7672
7673 my $cookie = { netcount => 0 };
7674 while (defined (my $line = <$srcfd>)) {
7675 $new_conf_raw .= restore_update_config_line(
7676 $cookie,
7677 $map,
7678 $line,
7679 $opts->{unique},
7680 );
7681 }
7682
7683 $srcfd->close();
7684 };
7685 if (my $err = $@) {
7686 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7687 die $err;
7688 }
7689
7690 rmtree $tmpdir;
7691
7692 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7693
7694 PVE::Cluster::cfs_update(); # make sure we read new file
7695
7696 eval { rescan($vmid, 1); };
7697 warn $@ if $@;
7698 };
7699
7700 sub foreach_storage_used_by_vm {
7701 my ($conf, $func) = @_;
7702
7703 my $sidhash = {};
7704
7705 PVE::QemuConfig->foreach_volume($conf, sub {
7706 my ($ds, $drive) = @_;
7707 return if drive_is_cdrom($drive);
7708
7709 my $volid = $drive->{file};
7710
7711 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7712 $sidhash->{$sid} = $sid if $sid;
7713 });
7714
7715 foreach my $sid (sort keys %$sidhash) {
7716 &$func($sid);
7717 }
7718 }
7719
7720 my $qemu_snap_storage = {
7721 rbd => 1,
7722 };
7723 sub do_snapshots_with_qemu {
7724 my ($storecfg, $volid, $deviceid) = @_;
7725
7726 return if $deviceid =~ m/tpmstate0/;
7727
7728 my $storage_name = PVE::Storage::parse_volume_id($volid);
7729 my $scfg = $storecfg->{ids}->{$storage_name};
7730 die "could not find storage '$storage_name'\n" if !defined($scfg);
7731
7732 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7733 return 1;
7734 }
7735
7736 if ($volid =~ m/\.(qcow2|qed)$/){
7737 return 1;
7738 }
7739
7740 return;
7741 }
7742
7743 sub qga_check_running {
7744 my ($vmid, $nowarn) = @_;
7745
7746 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7747 if ($@) {
7748 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7749 return 0;
7750 }
7751 return 1;
7752 }
7753
7754 sub template_create {
7755 my ($vmid, $conf, $disk) = @_;
7756
7757 my $storecfg = PVE::Storage::config();
7758
7759 PVE::QemuConfig->foreach_volume($conf, sub {
7760 my ($ds, $drive) = @_;
7761
7762 return if drive_is_cdrom($drive);
7763 return if $disk && $ds ne $disk;
7764
7765 my $volid = $drive->{file};
7766 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7767
7768 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7769 $drive->{file} = $voliddst;
7770 $conf->{$ds} = print_drive($drive);
7771 PVE::QemuConfig->write_config($vmid, $conf);
7772 });
7773 }
7774
7775 sub convert_iscsi_path {
7776 my ($path) = @_;
7777
7778 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7779 my $portal = $1;
7780 my $target = $2;
7781 my $lun = $3;
7782
7783 my $initiator_name = get_initiator_name();
7784
7785 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7786 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7787 }
7788
7789 die "cannot convert iscsi path '$path', unkown format\n";
7790 }
7791
7792 sub qemu_img_convert {
7793 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7794
7795 my $storecfg = PVE::Storage::config();
7796 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7797 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7798
7799 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7800
7801 my $cachemode;
7802 my $src_path;
7803 my $src_is_iscsi = 0;
7804 my $src_format;
7805
7806 if ($src_storeid) {
7807 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7808 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7809 $src_format = qemu_img_format($src_scfg, $src_volname);
7810 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7811 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7812 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7813 } elsif (-f $src_volid || -b $src_volid) {
7814 $src_path = $src_volid;
7815 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7816 $src_format = $1;
7817 }
7818 }
7819
7820 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7821
7822 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7823 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7824 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7825 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7826
7827 my $cmd = [];
7828 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7829 push @$cmd, '-l', "snapshot.name=$snapname"
7830 if $snapname && $src_format && $src_format eq "qcow2";
7831 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7832 push @$cmd, '-T', $cachemode if defined($cachemode);
7833 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7834
7835 if ($src_is_iscsi) {
7836 push @$cmd, '--image-opts';
7837 $src_path = convert_iscsi_path($src_path);
7838 } elsif ($src_format) {
7839 push @$cmd, '-f', $src_format;
7840 }
7841
7842 if ($dst_is_iscsi) {
7843 push @$cmd, '--target-image-opts';
7844 $dst_path = convert_iscsi_path($dst_path);
7845 } else {
7846 push @$cmd, '-O', $dst_format;
7847 }
7848
7849 push @$cmd, $src_path;
7850
7851 if (!$dst_is_iscsi && $is_zero_initialized) {
7852 push @$cmd, "zeroinit:$dst_path";
7853 } else {
7854 push @$cmd, $dst_path;
7855 }
7856
7857 my $parser = sub {
7858 my $line = shift;
7859 if($line =~ m/\((\S+)\/100\%\)/){
7860 my $percent = $1;
7861 my $transferred = int($size * $percent / 100);
7862 my $total_h = render_bytes($size, 1);
7863 my $transferred_h = render_bytes($transferred, 1);
7864
7865 print "transferred $transferred_h of $total_h ($percent%)\n";
7866 }
7867
7868 };
7869
7870 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7871 my $err = $@;
7872 die "copy failed: $err" if $err;
7873 }
7874
7875 sub qemu_img_format {
7876 my ($scfg, $volname) = @_;
7877
7878 # FIXME: this entire function is kind of weird given that `parse_volname`
7879 # also already gives us a format?
7880 my $is_path_storage = $scfg->{path} || $scfg->{type} eq 'esxi';
7881
7882 if ($is_path_storage && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7883 return $1;
7884 } else {
7885 return "raw";
7886 }
7887 }
7888
7889 sub qemu_drive_mirror {
7890 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7891
7892 $jobs = {} if !$jobs;
7893
7894 my $qemu_target;
7895 my $format;
7896 $jobs->{"drive-$drive"} = {};
7897
7898 if ($dst_volid =~ /^nbd:/) {
7899 $qemu_target = $dst_volid;
7900 $format = "nbd";
7901 } else {
7902 my $storecfg = PVE::Storage::config();
7903 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7904
7905 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7906
7907 $format = qemu_img_format($dst_scfg, $dst_volname);
7908
7909 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7910
7911 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7912 }
7913
7914 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7915 $opts->{format} = $format if $format;
7916
7917 if (defined($src_bitmap)) {
7918 $opts->{sync} = 'incremental';
7919 $opts->{bitmap} = $src_bitmap;
7920 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7921 }
7922
7923 if (defined($bwlimit)) {
7924 $opts->{speed} = $bwlimit * 1024;
7925 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7926 } else {
7927 print "drive mirror is starting for drive-$drive\n";
7928 }
7929
7930 # if a job already runs for this device we get an error, catch it for cleanup
7931 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7932 if (my $err = $@) {
7933 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7934 warn "$@\n" if $@;
7935 die "mirroring error: $err\n";
7936 }
7937
7938 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7939 }
7940
7941 # $completion can be either
7942 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7943 # 'cancel': wait until all jobs are ready, block-job-cancel them
7944 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7945 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7946 sub qemu_drive_mirror_monitor {
7947 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7948
7949 $completion //= 'complete';
7950 $op //= "mirror";
7951
7952 eval {
7953 my $err_complete = 0;
7954
7955 my $starttime = time ();
7956 while (1) {
7957 die "block job ('$op') timed out\n" if $err_complete > 300;
7958
7959 my $stats = mon_cmd($vmid, "query-block-jobs");
7960 my $ctime = time();
7961
7962 my $running_jobs = {};
7963 for my $stat (@$stats) {
7964 next if $stat->{type} ne $op;
7965 $running_jobs->{$stat->{device}} = $stat;
7966 }
7967
7968 my $readycounter = 0;
7969
7970 for my $job_id (sort keys %$jobs) {
7971 my $job = $running_jobs->{$job_id};
7972
7973 my $vanished = !defined($job);
7974 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7975 if($complete || ($vanished && $completion eq 'auto')) {
7976 print "$job_id: $op-job finished\n";
7977 delete $jobs->{$job_id};
7978 next;
7979 }
7980
7981 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7982
7983 my $busy = $job->{busy};
7984 my $ready = $job->{ready};
7985 if (my $total = $job->{len}) {
7986 my $transferred = $job->{offset} || 0;
7987 my $remaining = $total - $transferred;
7988 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7989
7990 my $duration = $ctime - $starttime;
7991 my $total_h = render_bytes($total, 1);
7992 my $transferred_h = render_bytes($transferred, 1);
7993
7994 my $status = sprintf(
7995 "transferred $transferred_h of $total_h ($percent%%) in %s",
7996 render_duration($duration),
7997 );
7998
7999 if ($ready) {
8000 if ($busy) {
8001 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
8002 } else {
8003 $status .= ", ready";
8004 }
8005 }
8006 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
8007 $jobs->{$job_id}->{ready} = $ready;
8008 }
8009
8010 $readycounter++ if $job->{ready};
8011 }
8012
8013 last if scalar(keys %$jobs) == 0;
8014
8015 if ($readycounter == scalar(keys %$jobs)) {
8016 print "all '$op' jobs are ready\n";
8017
8018 # do the complete later (or has already been done)
8019 last if $completion eq 'skip' || $completion eq 'auto';
8020
8021 if ($vmiddst && $vmiddst != $vmid) {
8022 my $agent_running = $qga && qga_check_running($vmid);
8023 if ($agent_running) {
8024 print "freeze filesystem\n";
8025 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
8026 warn $@ if $@;
8027 } else {
8028 print "suspend vm\n";
8029 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
8030 warn $@ if $@;
8031 }
8032
8033 # if we clone a disk for a new target vm, we don't switch the disk
8034 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
8035
8036 if ($agent_running) {
8037 print "unfreeze filesystem\n";
8038 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
8039 warn $@ if $@;
8040 } else {
8041 print "resume vm\n";
8042 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
8043 warn $@ if $@;
8044 }
8045
8046 last;
8047 } else {
8048
8049 for my $job_id (sort keys %$jobs) {
8050 # try to switch the disk if source and destination are on the same guest
8051 print "$job_id: Completing block job_id...\n";
8052
8053 my $op;
8054 if ($completion eq 'complete') {
8055 $op = 'block-job-complete';
8056 } elsif ($completion eq 'cancel') {
8057 $op = 'block-job-cancel';
8058 } else {
8059 die "invalid completion value: $completion\n";
8060 }
8061 eval { mon_cmd($vmid, $op, device => $job_id) };
8062 if ($@ =~ m/cannot be completed/) {
8063 print "$job_id: block job cannot be completed, trying again.\n";
8064 $err_complete++;
8065 }else {
8066 print "$job_id: Completed successfully.\n";
8067 $jobs->{$job_id}->{complete} = 1;
8068 }
8069 }
8070 }
8071 }
8072 sleep 1;
8073 }
8074 };
8075 my $err = $@;
8076
8077 if ($err) {
8078 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8079 die "block job ($op) error: $err";
8080 }
8081 }
8082
8083 sub qemu_blockjobs_cancel {
8084 my ($vmid, $jobs) = @_;
8085
8086 foreach my $job (keys %$jobs) {
8087 print "$job: Cancelling block job\n";
8088 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8089 $jobs->{$job}->{cancel} = 1;
8090 }
8091
8092 while (1) {
8093 my $stats = mon_cmd($vmid, "query-block-jobs");
8094
8095 my $running_jobs = {};
8096 foreach my $stat (@$stats) {
8097 $running_jobs->{$stat->{device}} = $stat;
8098 }
8099
8100 foreach my $job (keys %$jobs) {
8101
8102 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8103 print "$job: Done.\n";
8104 delete $jobs->{$job};
8105 }
8106 }
8107
8108 last if scalar(keys %$jobs) == 0;
8109
8110 sleep 1;
8111 }
8112 }
8113
8114 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8115 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
8116 my sub clone_disk_check_io_uring {
8117 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8118
8119 return if !$use_drive_mirror;
8120
8121 # Don't complain when not changing storage.
8122 # Assume if it works for the source, it'll work for the target too.
8123 return if $src_storeid eq $dst_storeid;
8124
8125 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8126 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8127
8128 my $cache_direct = drive_uses_cache_direct($src_drive);
8129
8130 my $src_uses_io_uring;
8131 if ($src_drive->{aio}) {
8132 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8133 } else {
8134 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8135 }
8136
8137 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8138 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8139 }
8140
8141 sub clone_disk {
8142 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8143
8144 my ($vmid, $running) = $source->@{qw(vmid running)};
8145 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8146
8147 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8148 my ($storage, $format) = $dest->@{qw(storage format)};
8149
8150 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8151
8152 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8153 die "cloning from/to EFI disk requires EFI disk\n"
8154 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8155 die "cloning from/to TPM state requires TPM state\n"
8156 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8157
8158 # This would lead to two device nodes in QEMU pointing to the same backing image!
8159 die "cannot change drive name when cloning disk from/to the same VM\n"
8160 if $use_drive_mirror && $vmid == $newvmid;
8161 }
8162
8163 die "cannot move TPM state while VM is running\n"
8164 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8165
8166 my $newvolid;
8167
8168 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8169 print "$src_drivename " if $src_drivename;
8170 print "($drive->{file})\n";
8171
8172 if (!$full) {
8173 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8174 push @$newvollist, $newvolid;
8175 } else {
8176 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8177 my $storeid = $storage || $src_storeid;
8178
8179 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8180
8181 my $name = undef;
8182 my $size = undef;
8183 if (drive_is_cloudinit($drive)) {
8184 $name = "vm-$newvmid-cloudinit";
8185 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8186 if ($scfg->{path}) {
8187 $name .= ".$dst_format";
8188 }
8189 $snapname = undef;
8190 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8191 } elsif ($dst_drivename eq 'efidisk0') {
8192 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8193 } elsif ($dst_drivename eq 'tpmstate0') {
8194 $dst_format = 'raw';
8195 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8196 } else {
8197 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8198
8199 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8200 }
8201 $newvolid = PVE::Storage::vdisk_alloc(
8202 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8203 );
8204 push @$newvollist, $newvolid;
8205
8206 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8207
8208 if (drive_is_cloudinit($drive)) {
8209 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8210 # if this is the case, we have to complete any block-jobs still there from
8211 # previous drive-mirrors
8212 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8213 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8214 }
8215 goto no_data_clone;
8216 }
8217
8218 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8219 if ($use_drive_mirror) {
8220 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8221 $completion, $qga, $bwlimit);
8222 } else {
8223 if ($dst_drivename eq 'efidisk0') {
8224 # the relevant data on the efidisk may be smaller than the source
8225 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8226 # that is given by the OVMF_VARS.fd
8227 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8228 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8229
8230 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8231
8232 # better for Ceph if block size is not too small, see bug #3324
8233 my $bs = 1024*1024;
8234
8235 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8236
8237 if ($src_format eq 'qcow2' && $snapname) {
8238 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8239 if !min_version(kvm_user_version(), 6, 2);
8240 push $cmd->@*, '-l', $snapname;
8241 }
8242 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8243 run_command($cmd);
8244 } else {
8245 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8246 }
8247 }
8248 }
8249
8250 no_data_clone:
8251 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8252
8253 my $disk = dclone($drive);
8254 delete $disk->{format};
8255 $disk->{file} = $newvolid;
8256 $disk->{size} = $size if defined($size);
8257
8258 return $disk;
8259 }
8260
8261 sub get_running_qemu_version {
8262 my ($vmid) = @_;
8263 my $res = mon_cmd($vmid, "query-version");
8264 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8265 }
8266
8267 sub qemu_use_old_bios_files {
8268 my ($machine_type) = @_;
8269
8270 return if !$machine_type;
8271
8272 my $use_old_bios_files = undef;
8273
8274 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8275 $machine_type = $1;
8276 $use_old_bios_files = 1;
8277 } else {
8278 my $version = extract_version($machine_type, kvm_user_version());
8279 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8280 # load new efi bios files on migration. So this hack is required to allow
8281 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8282 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8283 $use_old_bios_files = !min_version($version, 2, 4);
8284 }
8285
8286 return ($use_old_bios_files, $machine_type);
8287 }
8288
8289 sub get_efivars_size {
8290 my ($conf, $efidisk) = @_;
8291
8292 my $arch = get_vm_arch($conf);
8293 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8294 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8295 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8296 return -s $ovmf_vars;
8297 }
8298
8299 sub update_efidisk_size {
8300 my ($conf) = @_;
8301
8302 return if !defined($conf->{efidisk0});
8303
8304 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8305 $disk->{size} = get_efivars_size($conf);
8306 $conf->{efidisk0} = print_drive($disk);
8307
8308 return;
8309 }
8310
8311 sub update_tpmstate_size {
8312 my ($conf) = @_;
8313
8314 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8315 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8316 $conf->{tpmstate0} = print_drive($disk);
8317 }
8318
8319 sub create_efidisk($$$$$$$) {
8320 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8321
8322 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8323
8324 my $vars_size_b = -s $ovmf_vars;
8325 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8326 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8327 PVE::Storage::activate_volumes($storecfg, [$volid]);
8328
8329 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8330 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8331
8332 return ($volid, $size/1024);
8333 }
8334
8335 sub vm_iothreads_list {
8336 my ($vmid) = @_;
8337
8338 my $res = mon_cmd($vmid, 'query-iothreads');
8339
8340 my $iothreads = {};
8341 foreach my $iothread (@$res) {
8342 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8343 }
8344
8345 return $iothreads;
8346 }
8347
8348 sub scsihw_infos {
8349 my ($conf, $drive) = @_;
8350
8351 my $maxdev = 0;
8352
8353 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8354 $maxdev = 7;
8355 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8356 $maxdev = 1;
8357 } else {
8358 $maxdev = 256;
8359 }
8360
8361 my $controller = int($drive->{index} / $maxdev);
8362 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8363 ? "virtioscsi"
8364 : "scsihw";
8365
8366 return ($maxdev, $controller, $controller_prefix);
8367 }
8368
8369 sub resolve_dst_disk_format {
8370 my ($storecfg, $storeid, $src_volname, $format) = @_;
8371 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8372
8373 if (!$format) {
8374 # if no target format is specified, use the source disk format as hint
8375 if ($src_volname) {
8376 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8377 $format = qemu_img_format($scfg, $src_volname);
8378 } else {
8379 return $defFormat;
8380 }
8381 }
8382
8383 # test if requested format is supported - else use default
8384 my $supported = grep { $_ eq $format } @$validFormats;
8385 $format = $defFormat if !$supported;
8386 return $format;
8387 }
8388
8389 # NOTE: if this logic changes, please update docs & possibly gui logic
8390 sub find_vmstate_storage {
8391 my ($conf, $storecfg) = @_;
8392
8393 # first, return storage from conf if set
8394 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8395
8396 my ($target, $shared, $local);
8397
8398 foreach_storage_used_by_vm($conf, sub {
8399 my ($sid) = @_;
8400 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8401 my $dst = $scfg->{shared} ? \$shared : \$local;
8402 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8403 });
8404
8405 # second, use shared storage where VM has at least one disk
8406 # third, use local storage where VM has at least one disk
8407 # fall back to local storage
8408 $target = $shared // $local // 'local';
8409
8410 return $target;
8411 }
8412
8413 sub generate_uuid {
8414 my ($uuid, $uuid_str);
8415 UUID::generate($uuid);
8416 UUID::unparse($uuid, $uuid_str);
8417 return $uuid_str;
8418 }
8419
8420 sub generate_smbios1_uuid {
8421 return "uuid=".generate_uuid();
8422 }
8423
8424 sub nbd_stop {
8425 my ($vmid) = @_;
8426
8427 mon_cmd($vmid, 'nbd-server-stop', timeout => 25);
8428 }
8429
8430 sub create_reboot_request {
8431 my ($vmid) = @_;
8432 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8433 or die "failed to create reboot trigger file: $!\n";
8434 close($fh);
8435 }
8436
8437 sub clear_reboot_request {
8438 my ($vmid) = @_;
8439 my $path = "/run/qemu-server/$vmid.reboot";
8440 my $res = 0;
8441
8442 $res = unlink($path);
8443 die "could not remove reboot request for $vmid: $!"
8444 if !$res && $! != POSIX::ENOENT;
8445
8446 return $res;
8447 }
8448
8449 sub bootorder_from_legacy {
8450 my ($conf, $bootcfg) = @_;
8451
8452 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8453 my $bootindex_hash = {};
8454 my $i = 1;
8455 foreach my $o (split(//, $boot)) {
8456 $bootindex_hash->{$o} = $i*100;
8457 $i++;
8458 }
8459
8460 my $bootorder = {};
8461
8462 PVE::QemuConfig->foreach_volume($conf, sub {
8463 my ($ds, $drive) = @_;
8464
8465 if (drive_is_cdrom ($drive, 1)) {
8466 if ($bootindex_hash->{d}) {
8467 $bootorder->{$ds} = $bootindex_hash->{d};
8468 $bootindex_hash->{d} += 1;
8469 }
8470 } elsif ($bootindex_hash->{c}) {
8471 $bootorder->{$ds} = $bootindex_hash->{c}
8472 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8473 $bootindex_hash->{c} += 1;
8474 }
8475 });
8476
8477 if ($bootindex_hash->{n}) {
8478 for (my $i = 0; $i < $MAX_NETS; $i++) {
8479 my $netname = "net$i";
8480 next if !$conf->{$netname};
8481 $bootorder->{$netname} = $bootindex_hash->{n};
8482 $bootindex_hash->{n} += 1;
8483 }
8484 }
8485
8486 return $bootorder;
8487 }
8488
8489 # Generate default device list for 'boot: order=' property. Matches legacy
8490 # default boot order, but with explicit device names. This is important, since
8491 # the fallback for when neither 'order' nor the old format is specified relies
8492 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8493 sub get_default_bootdevices {
8494 my ($conf) = @_;
8495
8496 my @ret = ();
8497
8498 # harddisk
8499 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8500 push @ret, $first if $first;
8501
8502 # cdrom
8503 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8504 push @ret, $first if $first;
8505
8506 # network
8507 for (my $i = 0; $i < $MAX_NETS; $i++) {
8508 my $netname = "net$i";
8509 next if !$conf->{$netname};
8510 push @ret, $netname;
8511 last;
8512 }
8513
8514 return \@ret;
8515 }
8516
8517 sub device_bootorder {
8518 my ($conf) = @_;
8519
8520 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8521
8522 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8523
8524 my $bootorder = {};
8525 if (!defined($boot) || $boot->{legacy}) {
8526 $bootorder = bootorder_from_legacy($conf, $boot);
8527 } elsif ($boot->{order}) {
8528 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8529 for my $dev (PVE::Tools::split_list($boot->{order})) {
8530 $bootorder->{$dev} = $i++;
8531 }
8532 }
8533
8534 return $bootorder;
8535 }
8536
8537 sub register_qmeventd_handle {
8538 my ($vmid) = @_;
8539
8540 my $fh;
8541 my $peer = "/var/run/qmeventd.sock";
8542 my $count = 0;
8543
8544 for (;;) {
8545 $count++;
8546 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8547 last if $fh;
8548 if ($! != EINTR && $! != EAGAIN) {
8549 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8550 }
8551 if ($count > 4) {
8552 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8553 . "after $count retries\n";
8554 }
8555 usleep(25000);
8556 }
8557
8558 # send handshake to mark VM as backing up
8559 print $fh to_json({vzdump => {vmid => "$vmid"}});
8560
8561 # return handle to be closed later when inhibit is no longer required
8562 return $fh;
8563 }
8564
8565 # bash completion helper
8566
8567 sub complete_backup_archives {
8568 my ($cmdname, $pname, $cvalue) = @_;
8569
8570 my $cfg = PVE::Storage::config();
8571
8572 my $storeid;
8573
8574 if ($cvalue =~ m/^([^:]+):/) {
8575 $storeid = $1;
8576 }
8577
8578 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8579
8580 my $res = [];
8581 foreach my $id (keys %$data) {
8582 foreach my $item (@{$data->{$id}}) {
8583 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8584 push @$res, $item->{volid} if defined($item->{volid});
8585 }
8586 }
8587
8588 return $res;
8589 }
8590
8591 my $complete_vmid_full = sub {
8592 my ($running) = @_;
8593
8594 my $idlist = vmstatus();
8595
8596 my $res = [];
8597
8598 foreach my $id (keys %$idlist) {
8599 my $d = $idlist->{$id};
8600 if (defined($running)) {
8601 next if $d->{template};
8602 next if $running && $d->{status} ne 'running';
8603 next if !$running && $d->{status} eq 'running';
8604 }
8605 push @$res, $id;
8606
8607 }
8608 return $res;
8609 };
8610
8611 sub complete_vmid {
8612 return &$complete_vmid_full();
8613 }
8614
8615 sub complete_vmid_stopped {
8616 return &$complete_vmid_full(0);
8617 }
8618
8619 sub complete_vmid_running {
8620 return &$complete_vmid_full(1);
8621 }
8622
8623 sub complete_storage {
8624
8625 my $cfg = PVE::Storage::config();
8626 my $ids = $cfg->{ids};
8627
8628 my $res = [];
8629 foreach my $sid (keys %$ids) {
8630 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8631 next if !$ids->{$sid}->{content}->{images};
8632 push @$res, $sid;
8633 }
8634
8635 return $res;
8636 }
8637
8638 sub complete_migration_storage {
8639 my ($cmd, $param, $current_value, $all_args) = @_;
8640
8641 my $targetnode = @$all_args[1];
8642
8643 my $cfg = PVE::Storage::config();
8644 my $ids = $cfg->{ids};
8645
8646 my $res = [];
8647 foreach my $sid (keys %$ids) {
8648 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8649 next if !$ids->{$sid}->{content}->{images};
8650 push @$res, $sid;
8651 }
8652
8653 return $res;
8654 }
8655
8656 sub vm_is_paused {
8657 my ($vmid, $include_suspended) = @_;
8658 my $qmpstatus = eval {
8659 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8660 mon_cmd($vmid, "query-status");
8661 };
8662 warn "$@\n" if $@;
8663 return $qmpstatus && (
8664 $qmpstatus->{status} eq "paused" ||
8665 $qmpstatus->{status} eq "prelaunch" ||
8666 ($include_suspended && $qmpstatus->{status} eq "suspended")
8667 );
8668 }
8669
8670 sub check_volume_storage_type {
8671 my ($storecfg, $vol) = @_;
8672
8673 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8674 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8675 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8676
8677 die "storage '$storeid' does not support content-type '$vtype'\n"
8678 if !$scfg->{content}->{$vtype};
8679
8680 return 1;
8681 }
8682
8683 sub add_nets_bridge_fdb {
8684 my ($conf, $vmid) = @_;
8685
8686 for my $opt (keys %$conf) {
8687 next if $opt !~ m/^net(\d+)$/;
8688 my $iface = "tap${vmid}i$1";
8689 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8690 my $net = parse_net($conf->{$opt}, 1) or next;
8691
8692 my $mac = $net->{macaddr};
8693 if (!$mac) {
8694 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8695 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8696 next;
8697 }
8698
8699 my $bridge = $net->{bridge};
8700 if (!$bridge) {
8701 log_warn("Interface '$iface' not attached to any bridge.");
8702 next;
8703 }
8704 if ($have_sdn) {
8705 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge);
8706 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8707 PVE::Network::add_bridge_fdb($iface, $mac);
8708 }
8709 }
8710 }
8711
8712 sub del_nets_bridge_fdb {
8713 my ($conf, $vmid) = @_;
8714
8715 for my $opt (keys %$conf) {
8716 next if $opt !~ m/^net(\d+)$/;
8717 my $iface = "tap${vmid}i$1";
8718
8719 my $net = parse_net($conf->{$opt}) or next;
8720 my $mac = $net->{macaddr} or next;
8721
8722 my $bridge = $net->{bridge};
8723 if ($have_sdn) {
8724 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge);
8725 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8726 PVE::Network::del_bridge_fdb($iface, $mac);
8727 }
8728 }
8729 }
8730
8731 sub create_ifaces_ipams_ips {
8732 my ($conf, $vmid) = @_;
8733
8734 return if !$have_sdn;
8735
8736 foreach my $opt (keys %$conf) {
8737 if ($opt =~ m/^net(\d+)$/) {
8738 my $value = $conf->{$opt};
8739 my $net = PVE::QemuServer::parse_net($value);
8740 eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) };
8741 warn $@ if $@;
8742 }
8743 }
8744 }
8745
8746 sub delete_ifaces_ipams_ips {
8747 my ($conf, $vmid) = @_;
8748
8749 return if !$have_sdn;
8750
8751 foreach my $opt (keys %$conf) {
8752 if ($opt =~ m/^net(\d+)$/) {
8753 my $net = PVE::QemuServer::parse_net($conf->{$opt});
8754 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
8755 warn $@ if $@;
8756 }
8757 }
8758 }
8759
8760 1;