]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
fix #4501: TCP migration: start vm: move port reservation and usage closer together
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::Mapping::PCI;
38 use PVE::Mapping::USB;
39 use PVE::INotify;
40 use PVE::JSONSchema qw(get_standard_option parse_property_string);
41 use PVE::ProcFSTools;
42 use PVE::PBSClient;
43 use PVE::RESTEnvironment qw(log_warn);
44 use PVE::RPCEnvironment;
45 use PVE::Storage;
46 use PVE::SysFSTools;
47 use PVE::Systemd;
48 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
49
50 use PVE::QMPClient;
51 use PVE::QemuConfig;
52 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
53 use PVE::QemuServer::Cloudinit;
54 use PVE::QemuServer::CGroup;
55 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
56 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
57 use PVE::QemuServer::Machine;
58 use PVE::QemuServer::Memory qw(get_current_memory);
59 use PVE::QemuServer::Monitor qw(mon_cmd);
60 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
61 use PVE::QemuServer::QMPHelpers qw(qemu_deviceadd qemu_devicedel qemu_objectadd qemu_objectdel);
62 use PVE::QemuServer::USB;
63
64 my $have_sdn;
65 eval {
66 require PVE::Network::SDN::Zones;
67 require PVE::Network::SDN::Vnets;
68 $have_sdn = 1;
69 };
70
71 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
72 my $OVMF = {
73 x86_64 => {
74 '4m-no-smm' => [
75 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
76 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
77 ],
78 '4m-no-smm-ms' => [
79 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
80 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
81 ],
82 '4m' => [
83 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
84 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
85 ],
86 '4m-ms' => [
87 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
88 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
89 ],
90 # FIXME: These are legacy 2MB-sized images that modern OVMF doesn't supports to build
91 # anymore. how can we deperacate this sanely without breaking existing instances, or using
92 # older backups and snapshot?
93 default => [
94 "$EDK2_FW_BASE/OVMF_CODE.fd",
95 "$EDK2_FW_BASE/OVMF_VARS.fd",
96 ],
97 },
98 aarch64 => {
99 default => [
100 "$EDK2_FW_BASE/AAVMF_CODE.fd",
101 "$EDK2_FW_BASE/AAVMF_VARS.fd",
102 ],
103 },
104 };
105
106 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
107
108 # Note about locking: we use flock on the config file protect against concurent actions.
109 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
110 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
111 # But you can ignore this kind of lock with the --skiplock flag.
112
113 cfs_register_file(
114 '/qemu-server/',
115 \&parse_vm_config,
116 \&write_vm_config
117 );
118
119 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
120 description => "Some command save/restore state from this location.",
121 type => 'string',
122 maxLength => 128,
123 optional => 1,
124 });
125
126 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
127 description => "Specifies the QEMU machine type.",
128 type => 'string',
129 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
130 maxLength => 40,
131 optional => 1,
132 });
133
134 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
135 my $nodename_cache;
136 sub nodename {
137 $nodename_cache //= PVE::INotify::nodename();
138 return $nodename_cache;
139 }
140
141 my $watchdog_fmt = {
142 model => {
143 default_key => 1,
144 type => 'string',
145 enum => [qw(i6300esb ib700)],
146 description => "Watchdog type to emulate.",
147 default => 'i6300esb',
148 optional => 1,
149 },
150 action => {
151 type => 'string',
152 enum => [qw(reset shutdown poweroff pause debug none)],
153 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
154 optional => 1,
155 },
156 };
157 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
158
159 my $agent_fmt = {
160 enabled => {
161 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
162 type => 'boolean',
163 default => 0,
164 default_key => 1,
165 },
166 fstrim_cloned_disks => {
167 description => "Run fstrim after moving a disk or migrating the VM.",
168 type => 'boolean',
169 optional => 1,
170 default => 0,
171 },
172 'freeze-fs-on-backup' => {
173 description => "Freeze/thaw guest filesystems on backup for consistency.",
174 type => 'boolean',
175 optional => 1,
176 default => 1,
177 },
178 type => {
179 description => "Select the agent type",
180 type => 'string',
181 default => 'virtio',
182 optional => 1,
183 enum => [qw(virtio isa)],
184 },
185 };
186
187 my $vga_fmt = {
188 type => {
189 description => "Select the VGA type.",
190 type => 'string',
191 default => 'std',
192 optional => 1,
193 default_key => 1,
194 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
195 },
196 memory => {
197 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
198 type => 'integer',
199 optional => 1,
200 minimum => 4,
201 maximum => 512,
202 },
203 clipboard => {
204 description => 'Enable a specific clipboard. If not set, depending on the display type the'
205 .' SPICE one will be added. Migration with VNC clipboard is not yet supported!',
206 type => 'string',
207 enum => ['vnc'],
208 optional => 1,
209 },
210 };
211
212 my $ivshmem_fmt = {
213 size => {
214 type => 'integer',
215 minimum => 1,
216 description => "The size of the file in MB.",
217 },
218 name => {
219 type => 'string',
220 pattern => '[a-zA-Z0-9\-]+',
221 optional => 1,
222 format_description => 'string',
223 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
224 },
225 };
226
227 my $audio_fmt = {
228 device => {
229 type => 'string',
230 enum => [qw(ich9-intel-hda intel-hda AC97)],
231 description => "Configure an audio device."
232 },
233 driver => {
234 type => 'string',
235 enum => ['spice', 'none'],
236 default => 'spice',
237 optional => 1,
238 description => "Driver backend for the audio device."
239 },
240 };
241
242 my $spice_enhancements_fmt = {
243 foldersharing => {
244 type => 'boolean',
245 optional => 1,
246 default => '0',
247 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
248 },
249 videostreaming => {
250 type => 'string',
251 enum => ['off', 'all', 'filter'],
252 default => 'off',
253 optional => 1,
254 description => "Enable video streaming. Uses compression for detected video streams."
255 },
256 };
257
258 my $rng_fmt = {
259 source => {
260 type => 'string',
261 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
262 default_key => 1,
263 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
264 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
265 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
266 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
267 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
268 ." a hardware RNG from the host.",
269 },
270 max_bytes => {
271 type => 'integer',
272 description => "Maximum bytes of entropy allowed to get injected into the guest every"
273 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
274 ." `0` to disable limiting (potentially dangerous!).",
275 optional => 1,
276
277 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
278 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
279 # reading from /dev/urandom
280 default => 1024,
281 },
282 period => {
283 type => 'integer',
284 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
285 ." the guest to retrieve another 'max_bytes' of entropy.",
286 optional => 1,
287 default => 1000,
288 },
289 };
290
291 my $meta_info_fmt = {
292 'ctime' => {
293 type => 'integer',
294 description => "The guest creation timestamp as UNIX epoch time",
295 minimum => 0,
296 optional => 1,
297 },
298 'creation-qemu' => {
299 type => 'string',
300 description => "The QEMU (machine) version from the time this VM was created.",
301 pattern => '\d+(\.\d+)+',
302 optional => 1,
303 },
304 };
305
306 my $confdesc = {
307 onboot => {
308 optional => 1,
309 type => 'boolean',
310 description => "Specifies whether a VM will be started during system bootup.",
311 default => 0,
312 },
313 autostart => {
314 optional => 1,
315 type => 'boolean',
316 description => "Automatic restart after crash (currently ignored).",
317 default => 0,
318 },
319 hotplug => {
320 optional => 1,
321 type => 'string', format => 'pve-hotplug-features',
322 description => "Selectively enable hotplug features. This is a comma separated list of"
323 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
324 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
325 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
326 ." windows > 7.",
327 default => 'network,disk,usb',
328 },
329 reboot => {
330 optional => 1,
331 type => 'boolean',
332 description => "Allow reboot. If set to '0' the VM exit on reboot.",
333 default => 1,
334 },
335 lock => {
336 optional => 1,
337 type => 'string',
338 description => "Lock/unlock the VM.",
339 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
340 },
341 cpulimit => {
342 optional => 1,
343 type => 'number',
344 description => "Limit of CPU usage.",
345 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
346 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
347 minimum => 0,
348 maximum => 128,
349 default => 0,
350 },
351 cpuunits => {
352 optional => 1,
353 type => 'integer',
354 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
355 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
356 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
357 ." weights of all the other running VMs.",
358 minimum => 1,
359 maximum => 262144,
360 default => 'cgroup v1: 1024, cgroup v2: 100',
361 },
362 memory => {
363 optional => 1,
364 type => 'string',
365 description => "Memory properties.",
366 format => $PVE::QemuServer::Memory::memory_fmt
367 },
368 balloon => {
369 optional => 1,
370 type => 'integer',
371 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
372 minimum => 0,
373 },
374 shares => {
375 optional => 1,
376 type => 'integer',
377 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
378 ." more memory this VM gets. Number is relative to weights of all other running VMs."
379 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
380 minimum => 0,
381 maximum => 50000,
382 default => 1000,
383 },
384 keyboard => {
385 optional => 1,
386 type => 'string',
387 description => "Keyboard layout for VNC server. This option is generally not required and"
388 ." is often better handled from within the guest OS.",
389 enum => PVE::Tools::kvmkeymaplist(),
390 default => undef,
391 },
392 name => {
393 optional => 1,
394 type => 'string', format => 'dns-name',
395 description => "Set a name for the VM. Only used on the configuration web interface.",
396 },
397 scsihw => {
398 optional => 1,
399 type => 'string',
400 description => "SCSI controller model",
401 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
402 default => 'lsi',
403 },
404 description => {
405 optional => 1,
406 type => 'string',
407 description => "Description for the VM. Shown in the web-interface VM's summary."
408 ." This is saved as comment inside the configuration file.",
409 maxLength => 1024 * 8,
410 },
411 ostype => {
412 optional => 1,
413 type => 'string',
414 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
415 description => "Specify guest operating system.",
416 verbose_description => <<EODESC,
417 Specify guest operating system. This is used to enable special
418 optimization/features for specific operating systems:
419
420 [horizontal]
421 other;; unspecified OS
422 wxp;; Microsoft Windows XP
423 w2k;; Microsoft Windows 2000
424 w2k3;; Microsoft Windows 2003
425 w2k8;; Microsoft Windows 2008
426 wvista;; Microsoft Windows Vista
427 win7;; Microsoft Windows 7
428 win8;; Microsoft Windows 8/2012/2012r2
429 win10;; Microsoft Windows 10/2016/2019
430 win11;; Microsoft Windows 11/2022
431 l24;; Linux 2.4 Kernel
432 l26;; Linux 2.6 - 6.X Kernel
433 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
434 EODESC
435 },
436 boot => {
437 optional => 1,
438 type => 'string', format => 'pve-qm-boot',
439 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
440 ." key or 'legacy=' is deprecated.",
441 },
442 bootdisk => {
443 optional => 1,
444 type => 'string', format => 'pve-qm-bootdisk',
445 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
446 pattern => '(ide|sata|scsi|virtio)\d+',
447 },
448 smp => {
449 optional => 1,
450 type => 'integer',
451 description => "The number of CPUs. Please use option -sockets instead.",
452 minimum => 1,
453 default => 1,
454 },
455 sockets => {
456 optional => 1,
457 type => 'integer',
458 description => "The number of CPU sockets.",
459 minimum => 1,
460 default => 1,
461 },
462 cores => {
463 optional => 1,
464 type => 'integer',
465 description => "The number of cores per socket.",
466 minimum => 1,
467 default => 1,
468 },
469 numa => {
470 optional => 1,
471 type => 'boolean',
472 description => "Enable/disable NUMA.",
473 default => 0,
474 },
475 hugepages => {
476 optional => 1,
477 type => 'string',
478 description => "Enable/disable hugepages memory.",
479 enum => [qw(any 2 1024)],
480 },
481 keephugepages => {
482 optional => 1,
483 type => 'boolean',
484 default => 0,
485 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
486 ." after VM shutdown and can be used for subsequent starts.",
487 },
488 vcpus => {
489 optional => 1,
490 type => 'integer',
491 description => "Number of hotplugged vcpus.",
492 minimum => 1,
493 default => 0,
494 },
495 acpi => {
496 optional => 1,
497 type => 'boolean',
498 description => "Enable/disable ACPI.",
499 default => 1,
500 },
501 agent => {
502 optional => 1,
503 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
504 type => 'string',
505 format => $agent_fmt,
506 },
507 kvm => {
508 optional => 1,
509 type => 'boolean',
510 description => "Enable/disable KVM hardware virtualization.",
511 default => 1,
512 },
513 tdf => {
514 optional => 1,
515 type => 'boolean',
516 description => "Enable/disable time drift fix.",
517 default => 0,
518 },
519 localtime => {
520 optional => 1,
521 type => 'boolean',
522 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
523 ." the `ostype` indicates a Microsoft Windows OS.",
524 },
525 freeze => {
526 optional => 1,
527 type => 'boolean',
528 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
529 },
530 vga => {
531 optional => 1,
532 type => 'string', format => $vga_fmt,
533 description => "Configure the VGA hardware.",
534 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
535 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
536 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
537 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
538 ." display server. For win* OS you can select how many independent displays you want,"
539 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
540 ." using a serial device as terminal.",
541 },
542 watchdog => {
543 optional => 1,
544 type => 'string', format => 'pve-qm-watchdog',
545 description => "Create a virtual hardware watchdog device.",
546 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
547 ." action), the watchdog must be periodically polled by an agent inside the guest or"
548 ." else the watchdog will reset the guest (or execute the respective action specified)",
549 },
550 startdate => {
551 optional => 1,
552 type => 'string',
553 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
554 description => "Set the initial date of the real time clock. Valid format for date are:"
555 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
556 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
557 default => 'now',
558 },
559 startup => get_standard_option('pve-startup-order'),
560 template => {
561 optional => 1,
562 type => 'boolean',
563 description => "Enable/disable Template.",
564 default => 0,
565 },
566 args => {
567 optional => 1,
568 type => 'string',
569 description => "Arbitrary arguments passed to kvm.",
570 verbose_description => <<EODESCR,
571 Arbitrary arguments passed to kvm, for example:
572
573 args: -no-reboot -smbios 'type=0,vendor=FOO'
574
575 NOTE: this option is for experts only.
576 EODESCR
577 },
578 tablet => {
579 optional => 1,
580 type => 'boolean',
581 default => 1,
582 description => "Enable/disable the USB tablet device.",
583 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
584 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
585 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
586 ." may consider disabling this to save some context switches. This is turned off by"
587 ." default if you use spice (`qm set <vmid> --vga qxl`).",
588 },
589 migrate_speed => {
590 optional => 1,
591 type => 'integer',
592 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
593 minimum => 0,
594 default => 0,
595 },
596 migrate_downtime => {
597 optional => 1,
598 type => 'number',
599 description => "Set maximum tolerated downtime (in seconds) for migrations.",
600 minimum => 0,
601 default => 0.1,
602 },
603 cdrom => {
604 optional => 1,
605 type => 'string', format => 'pve-qm-ide',
606 typetext => '<volume>',
607 description => "This is an alias for option -ide2",
608 },
609 cpu => {
610 optional => 1,
611 description => "Emulated CPU type.",
612 type => 'string',
613 format => 'pve-vm-cpu-conf',
614 },
615 parent => get_standard_option('pve-snapshot-name', {
616 optional => 1,
617 description => "Parent snapshot name. This is used internally, and should not be modified.",
618 }),
619 snaptime => {
620 optional => 1,
621 description => "Timestamp for snapshots.",
622 type => 'integer',
623 minimum => 0,
624 },
625 vmstate => {
626 optional => 1,
627 type => 'string', format => 'pve-volume-id',
628 description => "Reference to a volume which stores the VM state. This is used internally"
629 ." for snapshots.",
630 },
631 vmstatestorage => get_standard_option('pve-storage-id', {
632 description => "Default storage for VM state volumes/files.",
633 optional => 1,
634 }),
635 runningmachine => get_standard_option('pve-qemu-machine', {
636 description => "Specifies the QEMU machine type of the running vm. This is used internally"
637 ." for snapshots.",
638 }),
639 runningcpu => {
640 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
641 ." internally for snapshots.",
642 optional => 1,
643 type => 'string',
644 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
645 format_description => 'QEMU -cpu parameter'
646 },
647 machine => get_standard_option('pve-qemu-machine'),
648 arch => {
649 description => "Virtual processor architecture. Defaults to the host.",
650 optional => 1,
651 type => 'string',
652 enum => [qw(x86_64 aarch64)],
653 },
654 smbios1 => {
655 description => "Specify SMBIOS type 1 fields.",
656 type => 'string', format => 'pve-qm-smbios1',
657 maxLength => 512,
658 optional => 1,
659 },
660 protection => {
661 optional => 1,
662 type => 'boolean',
663 description => "Sets the protection flag of the VM. This will disable the remove VM and"
664 ." remove disk operations.",
665 default => 0,
666 },
667 bios => {
668 optional => 1,
669 type => 'string',
670 enum => [ qw(seabios ovmf) ],
671 description => "Select BIOS implementation.",
672 default => 'seabios',
673 },
674 vmgenid => {
675 type => 'string',
676 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
677 format_description => 'UUID',
678 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
679 ." to disable explicitly.",
680 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
681 ." value identifier to the guest OS. This allows to notify the guest operating system"
682 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
683 ." execution or creation from a template). The guest operating system notices the"
684 ." change, and is then able to react as appropriate by marking its copies of"
685 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
686 ."Note that auto-creation only works when done through API/CLI create or update methods"
687 .", but not when manually editing the config file.",
688 default => "1 (autogenerated)",
689 optional => 1,
690 },
691 hookscript => {
692 type => 'string',
693 format => 'pve-volume-id',
694 optional => 1,
695 description => "Script that will be executed during various steps in the vms lifetime.",
696 },
697 ivshmem => {
698 type => 'string',
699 format => $ivshmem_fmt,
700 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
701 ." the host.",
702 optional => 1,
703 },
704 audio0 => {
705 type => 'string',
706 format => $audio_fmt,
707 description => "Configure a audio device, useful in combination with QXL/Spice.",
708 optional => 1
709 },
710 spice_enhancements => {
711 type => 'string',
712 format => $spice_enhancements_fmt,
713 description => "Configure additional enhancements for SPICE.",
714 optional => 1
715 },
716 tags => {
717 type => 'string', format => 'pve-tag-list',
718 description => 'Tags of the VM. This is only meta information.',
719 optional => 1,
720 },
721 rng0 => {
722 type => 'string',
723 format => $rng_fmt,
724 description => "Configure a VirtIO-based Random Number Generator.",
725 optional => 1,
726 },
727 meta => {
728 type => 'string',
729 format => $meta_info_fmt,
730 description => "Some (read-only) meta-information about this guest.",
731 optional => 1,
732 },
733 affinity => {
734 type => 'string', format => 'pve-cpuset',
735 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
736 optional => 1,
737 },
738 };
739
740 my $cicustom_fmt = {
741 meta => {
742 type => 'string',
743 optional => 1,
744 description => 'Specify a custom file containing all meta data passed to the VM via"
745 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
746 format => 'pve-volume-id',
747 format_description => 'volume',
748 },
749 network => {
750 type => 'string',
751 optional => 1,
752 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
753 format => 'pve-volume-id',
754 format_description => 'volume',
755 },
756 user => {
757 type => 'string',
758 optional => 1,
759 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
760 format => 'pve-volume-id',
761 format_description => 'volume',
762 },
763 vendor => {
764 type => 'string',
765 optional => 1,
766 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
767 format => 'pve-volume-id',
768 format_description => 'volume',
769 },
770 };
771 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
772
773 # any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
774 my $confdesc_cloudinit = {
775 citype => {
776 optional => 1,
777 type => 'string',
778 description => 'Specifies the cloud-init configuration format. The default depends on the'
779 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
780 .' and `configdrive2` for windows.',
781 enum => ['configdrive2', 'nocloud', 'opennebula'],
782 },
783 ciuser => {
784 optional => 1,
785 type => 'string',
786 description => "cloud-init: User name to change ssh keys and password for instead of the"
787 ." image's configured default user.",
788 },
789 cipassword => {
790 optional => 1,
791 type => 'string',
792 description => 'cloud-init: Password to assign the user. Using this is generally not'
793 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
794 .' support hashed passwords.',
795 },
796 ciupgrade => {
797 optional => 1,
798 type => 'boolean',
799 description => 'cloud-init: do an automatic package upgrade after the first boot.',
800 default => 1,
801 },
802 cicustom => {
803 optional => 1,
804 type => 'string',
805 description => 'cloud-init: Specify custom files to replace the automatically generated'
806 .' ones at start.',
807 format => 'pve-qm-cicustom',
808 },
809 searchdomain => {
810 optional => 1,
811 type => 'string',
812 description => 'cloud-init: Sets DNS search domains for a container. Create will'
813 .' automatically use the setting from the host if neither searchdomain nor nameserver'
814 .' are set.',
815 },
816 nameserver => {
817 optional => 1,
818 type => 'string', format => 'address-list',
819 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
820 .' automatically use the setting from the host if neither searchdomain nor nameserver'
821 .' are set.',
822 },
823 sshkeys => {
824 optional => 1,
825 type => 'string',
826 format => 'urlencoded',
827 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
828 },
829 };
830
831 # what about other qemu settings ?
832 #cpu => 'string',
833 #machine => 'string',
834 #fda => 'file',
835 #fdb => 'file',
836 #mtdblock => 'file',
837 #sd => 'file',
838 #pflash => 'file',
839 #snapshot => 'bool',
840 #bootp => 'file',
841 ##tftp => 'dir',
842 ##smb => 'dir',
843 #kernel => 'file',
844 #append => 'string',
845 #initrd => 'file',
846 ##soundhw => 'string',
847
848 while (my ($k, $v) = each %$confdesc) {
849 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
850 }
851
852 my $MAX_NETS = 32;
853 my $MAX_SERIAL_PORTS = 4;
854 my $MAX_PARALLEL_PORTS = 3;
855
856 for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
857 $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
858 }
859
860 my $nic_model_list = [
861 'e1000',
862 'e1000-82540em',
863 'e1000-82544gc',
864 'e1000-82545em',
865 'e1000e',
866 'i82551',
867 'i82557b',
868 'i82559er',
869 'ne2k_isa',
870 'ne2k_pci',
871 'pcnet',
872 'rtl8139',
873 'virtio',
874 'vmxnet3',
875 ];
876 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
877
878 my $net_fmt_bridge_descr = <<__EOD__;
879 Bridge to attach the network device to. The Proxmox VE standard bridge
880 is called 'vmbr0'.
881
882 If you do not specify a bridge, we create a kvm user (NATed) network
883 device, which provides DHCP and DNS services. The following addresses
884 are used:
885
886 10.0.2.2 Gateway
887 10.0.2.3 DNS Server
888 10.0.2.4 SMB Server
889
890 The DHCP server assign addresses to the guest starting from 10.0.2.15.
891 __EOD__
892
893 my $net_fmt = {
894 macaddr => get_standard_option('mac-addr', {
895 description => "MAC address. That address must be unique withing your network. This is"
896 ." automatically generated if not specified.",
897 }),
898 model => {
899 type => 'string',
900 description => "Network Card Model. The 'virtio' model provides the best performance with"
901 ." very low CPU overhead. If your guest does not support this driver, it is usually"
902 ." best to use 'e1000'.",
903 enum => $nic_model_list,
904 default_key => 1,
905 },
906 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
907 bridge => get_standard_option('pve-bridge-id', {
908 description => $net_fmt_bridge_descr,
909 optional => 1,
910 }),
911 queues => {
912 type => 'integer',
913 minimum => 0, maximum => 64,
914 description => 'Number of packet queues to be used on the device.',
915 optional => 1,
916 },
917 rate => {
918 type => 'number',
919 minimum => 0,
920 description => "Rate limit in mbps (megabytes per second) as floating point number.",
921 optional => 1,
922 },
923 tag => {
924 type => 'integer',
925 minimum => 1, maximum => 4094,
926 description => 'VLAN tag to apply to packets on this interface.',
927 optional => 1,
928 },
929 trunks => {
930 type => 'string',
931 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
932 description => 'VLAN trunks to pass through this interface.',
933 format_description => 'vlanid[;vlanid...]',
934 optional => 1,
935 },
936 firewall => {
937 type => 'boolean',
938 description => 'Whether this interface should be protected by the firewall.',
939 optional => 1,
940 },
941 link_down => {
942 type => 'boolean',
943 description => 'Whether this interface should be disconnected (like pulling the plug).',
944 optional => 1,
945 },
946 mtu => {
947 type => 'integer',
948 minimum => 1, maximum => 65520,
949 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
950 optional => 1,
951 },
952 };
953
954 my $netdesc = {
955 optional => 1,
956 type => 'string', format => $net_fmt,
957 description => "Specify network devices.",
958 };
959
960 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
961
962 my $ipconfig_fmt = {
963 ip => {
964 type => 'string',
965 format => 'pve-ipv4-config',
966 format_description => 'IPv4Format/CIDR',
967 description => 'IPv4 address in CIDR format.',
968 optional => 1,
969 default => 'dhcp',
970 },
971 gw => {
972 type => 'string',
973 format => 'ipv4',
974 format_description => 'GatewayIPv4',
975 description => 'Default gateway for IPv4 traffic.',
976 optional => 1,
977 requires => 'ip',
978 },
979 ip6 => {
980 type => 'string',
981 format => 'pve-ipv6-config',
982 format_description => 'IPv6Format/CIDR',
983 description => 'IPv6 address in CIDR format.',
984 optional => 1,
985 default => 'dhcp',
986 },
987 gw6 => {
988 type => 'string',
989 format => 'ipv6',
990 format_description => 'GatewayIPv6',
991 description => 'Default gateway for IPv6 traffic.',
992 optional => 1,
993 requires => 'ip6',
994 },
995 };
996 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
997 my $ipconfigdesc = {
998 optional => 1,
999 type => 'string', format => 'pve-qm-ipconfig',
1000 description => <<'EODESCR',
1001 cloud-init: Specify IP addresses and gateways for the corresponding interface.
1002
1003 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
1004
1005 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
1006 gateway should be provided.
1007 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
1008 cloud-init 19.4 or newer.
1009
1010 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1011 dhcp on IPv4.
1012 EODESCR
1013 };
1014 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1015
1016 for (my $i = 0; $i < $MAX_NETS; $i++) {
1017 $confdesc->{"net$i"} = $netdesc;
1018 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1019 }
1020
1021 foreach my $key (keys %$confdesc_cloudinit) {
1022 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1023 }
1024
1025 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1026 sub pve_verify_cpuset {
1027 my ($set_text, $noerr) = @_;
1028
1029 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1030
1031 if ($@) {
1032 return if $noerr;
1033 die "unable to parse cpuset option\n";
1034 }
1035
1036 return PVE::CpuSet->new($members)->short_string();
1037 }
1038
1039 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1040 sub verify_volume_id_or_qm_path {
1041 my ($volid, $noerr) = @_;
1042
1043 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1044
1045 return verify_volume_id_or_absolute_path($volid, $noerr);
1046 }
1047
1048 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1049 sub verify_volume_id_or_absolute_path {
1050 my ($volid, $noerr) = @_;
1051
1052 return $volid if $volid =~ m|^/|;
1053
1054 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1055 if ($@) {
1056 return if $noerr;
1057 die $@;
1058 }
1059 return $volid;
1060 }
1061
1062 my $serialdesc = {
1063 optional => 1,
1064 type => 'string',
1065 pattern => '(/dev/.+|socket)',
1066 description => "Create a serial device inside the VM (n is 0 to 3)",
1067 verbose_description => <<EODESCR,
1068 Create a serial device inside the VM (n is 0 to 3), and pass through a
1069 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1070 host side (use 'qm terminal' to open a terminal connection).
1071
1072 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1073 use with special care.
1074
1075 CAUTION: Experimental! User reported problems with this option.
1076 EODESCR
1077 };
1078
1079 my $paralleldesc= {
1080 optional => 1,
1081 type => 'string',
1082 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1083 description => "Map host parallel devices (n is 0 to 2).",
1084 verbose_description => <<EODESCR,
1085 Map host parallel devices (n is 0 to 2).
1086
1087 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1088 machines - use with special care.
1089
1090 CAUTION: Experimental! User reported problems with this option.
1091 EODESCR
1092 };
1093
1094 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1095 $confdesc->{"parallel$i"} = $paralleldesc;
1096 }
1097
1098 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1099 $confdesc->{"serial$i"} = $serialdesc;
1100 }
1101
1102 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1103 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1104 }
1105
1106 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1107 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1108 }
1109
1110 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1111 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1112 }
1113
1114 my $boot_fmt = {
1115 legacy => {
1116 optional => 1,
1117 default_key => 1,
1118 type => 'string',
1119 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1120 . " Deprecated, use 'order=' instead.",
1121 pattern => '[acdn]{1,4}',
1122 format_description => "[acdn]{1,4}",
1123
1124 # note: this is also the fallback if boot: is not given at all
1125 default => 'cdn',
1126 },
1127 order => {
1128 optional => 1,
1129 type => 'string',
1130 format => 'pve-qm-bootdev-list',
1131 format_description => "device[;device...]",
1132 description => <<EODESC,
1133 The guest will attempt to boot from devices in the order they appear here.
1134
1135 Disks, optical drives and passed-through storage USB devices will be directly
1136 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1137 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1138
1139 Note that only devices in this list will be marked as bootable and thus loaded
1140 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1141 (e.g. software-raid), you need to specify all of them here.
1142
1143 Overrides the deprecated 'legacy=[acdn]*' value when given.
1144 EODESC
1145 },
1146 };
1147 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1148
1149 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1150 sub verify_bootdev {
1151 my ($dev, $noerr) = @_;
1152
1153 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1154 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1155
1156 my $check = sub {
1157 my ($base) = @_;
1158 return 0 if $dev !~ m/^$base\d+$/;
1159 return 0 if !$confdesc->{$dev};
1160 return 1;
1161 };
1162
1163 return $dev if $check->("net");
1164 return $dev if $check->("usb");
1165 return $dev if $check->("hostpci");
1166
1167 return if $noerr;
1168 die "invalid boot device '$dev'\n";
1169 }
1170
1171 sub print_bootorder {
1172 my ($devs) = @_;
1173 return "" if !@$devs;
1174 my $data = { order => join(';', @$devs) };
1175 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1176 }
1177
1178 my $kvm_api_version = 0;
1179
1180 sub kvm_version {
1181 return $kvm_api_version if $kvm_api_version;
1182
1183 open my $fh, '<', '/dev/kvm' or return;
1184
1185 # 0xae00 => KVM_GET_API_VERSION
1186 $kvm_api_version = ioctl($fh, 0xae00, 0);
1187 close($fh);
1188
1189 return $kvm_api_version;
1190 }
1191
1192 my $kvm_user_version = {};
1193 my $kvm_mtime = {};
1194
1195 sub kvm_user_version {
1196 my ($binary) = @_;
1197
1198 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1199 my $st = stat($binary);
1200
1201 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1202 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1203 $cachedmtime == $st->mtime;
1204
1205 $kvm_user_version->{$binary} = 'unknown';
1206 $kvm_mtime->{$binary} = $st->mtime;
1207
1208 my $code = sub {
1209 my $line = shift;
1210 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1211 $kvm_user_version->{$binary} = $2;
1212 }
1213 };
1214
1215 eval { run_command([$binary, '--version'], outfunc => $code); };
1216 warn $@ if $@;
1217
1218 return $kvm_user_version->{$binary};
1219
1220 }
1221 my sub extract_version {
1222 my ($machine_type, $version) = @_;
1223 $version = kvm_user_version() if !defined($version);
1224 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1225 }
1226
1227 sub kernel_has_vhost_net {
1228 return -c '/dev/vhost-net';
1229 }
1230
1231 sub option_exists {
1232 my $key = shift;
1233 return defined($confdesc->{$key});
1234 }
1235
1236 my $cdrom_path;
1237 sub get_cdrom_path {
1238
1239 return $cdrom_path if defined($cdrom_path);
1240
1241 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1242
1243 if (!defined($cdrom_path)) {
1244 log_warn("no physical CD-ROM available, ignoring");
1245 $cdrom_path = '';
1246 }
1247
1248 return $cdrom_path;
1249 }
1250
1251 sub get_iso_path {
1252 my ($storecfg, $vmid, $cdrom) = @_;
1253
1254 if ($cdrom eq 'cdrom') {
1255 return get_cdrom_path();
1256 } elsif ($cdrom eq 'none') {
1257 return '';
1258 } elsif ($cdrom =~ m|^/|) {
1259 return $cdrom;
1260 } else {
1261 return PVE::Storage::path($storecfg, $cdrom);
1262 }
1263 }
1264
1265 # try to convert old style file names to volume IDs
1266 sub filename_to_volume_id {
1267 my ($vmid, $file, $media) = @_;
1268
1269 if (!($file eq 'none' || $file eq 'cdrom' ||
1270 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1271
1272 return if $file =~ m|/|;
1273
1274 if ($media && $media eq 'cdrom') {
1275 $file = "local:iso/$file";
1276 } else {
1277 $file = "local:$vmid/$file";
1278 }
1279 }
1280
1281 return $file;
1282 }
1283
1284 sub verify_media_type {
1285 my ($opt, $vtype, $media) = @_;
1286
1287 return if !$media;
1288
1289 my $etype;
1290 if ($media eq 'disk') {
1291 $etype = 'images';
1292 } elsif ($media eq 'cdrom') {
1293 $etype = 'iso';
1294 } else {
1295 die "internal error";
1296 }
1297
1298 return if ($vtype eq $etype);
1299
1300 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1301 }
1302
1303 sub cleanup_drive_path {
1304 my ($opt, $storecfg, $drive) = @_;
1305
1306 # try to convert filesystem paths to volume IDs
1307
1308 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1309 ($drive->{file} !~ m|^/dev/.+|) &&
1310 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1311 ($drive->{file} !~ m/^\d+$/)) {
1312 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1313 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1314 if !$vtype;
1315 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1316 verify_media_type($opt, $vtype, $drive->{media});
1317 $drive->{file} = $volid;
1318 }
1319
1320 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1321 }
1322
1323 sub parse_hotplug_features {
1324 my ($data) = @_;
1325
1326 my $res = {};
1327
1328 return $res if $data eq '0';
1329
1330 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1331
1332 foreach my $feature (PVE::Tools::split_list($data)) {
1333 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1334 $res->{$1} = 1;
1335 } else {
1336 die "invalid hotplug feature '$feature'\n";
1337 }
1338 }
1339 return $res;
1340 }
1341
1342 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1343 sub pve_verify_hotplug_features {
1344 my ($value, $noerr) = @_;
1345
1346 return $value if parse_hotplug_features($value);
1347
1348 return if $noerr;
1349
1350 die "unable to parse hotplug option\n";
1351 }
1352
1353 sub assert_clipboard_config {
1354 my ($vga) = @_;
1355
1356 my $clipboard_regex = qr/^(std|cirrus|vmware|virtio|qxl)/;
1357
1358 if (
1359 $vga->{'clipboard'}
1360 && $vga->{'clipboard'} eq 'vnc'
1361 && $vga->{type}
1362 && $vga->{type} !~ $clipboard_regex
1363 ) {
1364 die "vga type $vga->{type} is not compatible with VNC clipboard\n";
1365 }
1366 }
1367
1368 sub scsi_inquiry {
1369 my($fh, $noerr) = @_;
1370
1371 my $SG_IO = 0x2285;
1372 my $SG_GET_VERSION_NUM = 0x2282;
1373
1374 my $versionbuf = "\x00" x 8;
1375 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1376 if (!$ret) {
1377 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1378 return;
1379 }
1380 my $version = unpack("I", $versionbuf);
1381 if ($version < 30000) {
1382 die "scsi generic interface too old\n" if !$noerr;
1383 return;
1384 }
1385
1386 my $buf = "\x00" x 36;
1387 my $sensebuf = "\x00" x 8;
1388 my $cmd = pack("C x3 C x1", 0x12, 36);
1389
1390 # see /usr/include/scsi/sg.h
1391 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1392
1393 my $packet = pack(
1394 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1395 );
1396
1397 $ret = ioctl($fh, $SG_IO, $packet);
1398 if (!$ret) {
1399 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1400 return;
1401 }
1402
1403 my @res = unpack($sg_io_hdr_t, $packet);
1404 if ($res[17] || $res[18]) {
1405 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1406 return;
1407 }
1408
1409 my $res = {};
1410 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1411
1412 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1413 $res->{type} &= 0x1F;
1414
1415 return $res;
1416 }
1417
1418 sub path_is_scsi {
1419 my ($path) = @_;
1420
1421 my $fh = IO::File->new("+<$path") || return;
1422 my $res = scsi_inquiry($fh, 1);
1423 close($fh);
1424
1425 return $res;
1426 }
1427
1428 sub print_tabletdevice_full {
1429 my ($conf, $arch) = @_;
1430
1431 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1432
1433 # we use uhci for old VMs because tablet driver was buggy in older qemu
1434 my $usbbus;
1435 if ($q35 || $arch eq 'aarch64') {
1436 $usbbus = 'ehci';
1437 } else {
1438 $usbbus = 'uhci';
1439 }
1440
1441 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1442 }
1443
1444 sub print_keyboarddevice_full {
1445 my ($conf, $arch) = @_;
1446
1447 return if $arch ne 'aarch64';
1448
1449 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1450 }
1451
1452 my sub get_drive_id {
1453 my ($drive) = @_;
1454 return "$drive->{interface}$drive->{index}";
1455 }
1456
1457 sub print_drivedevice_full {
1458 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1459
1460 my $device = '';
1461 my $maxdev = 0;
1462
1463 my $drive_id = get_drive_id($drive);
1464 if ($drive->{interface} eq 'virtio') {
1465 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1466 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1467 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1468 } elsif ($drive->{interface} eq 'scsi') {
1469
1470 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1471 my $unit = $drive->{index} % $maxdev;
1472 my $devicetype = 'hd';
1473 my $path = '';
1474 if (drive_is_cdrom($drive)) {
1475 $devicetype = 'cd';
1476 } else {
1477 if ($drive->{file} =~ m|^/|) {
1478 $path = $drive->{file};
1479 if (my $info = path_is_scsi($path)) {
1480 if ($info->{type} == 0 && $drive->{scsiblock}) {
1481 $devicetype = 'block';
1482 } elsif ($info->{type} == 1) { # tape
1483 $devicetype = 'generic';
1484 }
1485 }
1486 } else {
1487 $path = PVE::Storage::path($storecfg, $drive->{file});
1488 }
1489
1490 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1491 my $version = extract_version($machine_type, kvm_user_version());
1492 if ($path =~ m/^iscsi\:\/\// &&
1493 !min_version($version, 4, 1)) {
1494 $devicetype = 'generic';
1495 }
1496 }
1497
1498 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1499 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1500 } else {
1501 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1502 .",lun=$drive->{index}";
1503 }
1504 $device .= ",drive=drive-$drive_id,id=$drive_id";
1505
1506 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1507 $device .= ",rotation_rate=1";
1508 }
1509 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1510
1511 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1512 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1513 my $controller = int($drive->{index} / $maxdev);
1514 my $unit = $drive->{index} % $maxdev;
1515
1516 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1517 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1518 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1519 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1520 # were before. Move odd ones up by 2 where they don't clash.
1521 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1522 $controller += 2 * ($unit % 2);
1523 $unit = 0;
1524 }
1525
1526 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1527
1528 $device = "ide-$devicetype";
1529 if ($drive->{interface} eq 'ide') {
1530 $device .= ",bus=ide.$controller,unit=$unit";
1531 } else {
1532 $device .= ",bus=ahci$controller.$unit";
1533 }
1534 $device .= ",drive=drive-$drive_id,id=$drive_id";
1535
1536 if ($devicetype eq 'hd') {
1537 if (my $model = $drive->{model}) {
1538 $model = URI::Escape::uri_unescape($model);
1539 $device .= ",model=$model";
1540 }
1541 if ($drive->{ssd}) {
1542 $device .= ",rotation_rate=1";
1543 }
1544 }
1545 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1546 } elsif ($drive->{interface} eq 'usb') {
1547 die "implement me";
1548 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1549 } else {
1550 die "unsupported interface type";
1551 }
1552
1553 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1554
1555 if (my $serial = $drive->{serial}) {
1556 $serial = URI::Escape::uri_unescape($serial);
1557 $device .= ",serial=$serial";
1558 }
1559
1560
1561 return $device;
1562 }
1563
1564 sub get_initiator_name {
1565 my $initiator;
1566
1567 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1568 while (defined(my $line = <$fh>)) {
1569 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1570 $initiator = $1;
1571 last;
1572 }
1573 $fh->close();
1574
1575 return $initiator;
1576 }
1577
1578 my sub storage_allows_io_uring_default {
1579 my ($scfg, $cache_direct) = @_;
1580
1581 # io_uring with cache mode writeback or writethrough on krbd will hang...
1582 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1583
1584 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1585 # sometimes, just plain disable...
1586 return if $scfg && $scfg->{type} eq 'lvm';
1587
1588 # io_uring causes problems when used with CIFS since kernel 5.15
1589 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1590 return if $scfg && $scfg->{type} eq 'cifs';
1591
1592 return 1;
1593 }
1594
1595 my sub drive_uses_cache_direct {
1596 my ($drive, $scfg) = @_;
1597
1598 my $cache_direct = 0;
1599
1600 if (my $cache = $drive->{cache}) {
1601 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1602 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1603 $cache_direct = 1;
1604 }
1605
1606 return $cache_direct;
1607 }
1608
1609 sub print_drive_commandline_full {
1610 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1611
1612 my $path;
1613 my $volid = $drive->{file};
1614 my $format = $drive->{format};
1615 my $drive_id = get_drive_id($drive);
1616
1617 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1618 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1619
1620 if (drive_is_cdrom($drive)) {
1621 $path = get_iso_path($storecfg, $vmid, $volid);
1622 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1623 } else {
1624 if ($storeid) {
1625 $path = PVE::Storage::path($storecfg, $volid);
1626 $format //= qemu_img_format($scfg, $volname);
1627 } else {
1628 $path = $volid;
1629 $format //= "raw";
1630 }
1631 }
1632
1633 my $is_rbd = $path =~ m/^rbd:/;
1634
1635 my $opts = '';
1636 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1637 foreach my $o (@qemu_drive_options) {
1638 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1639 }
1640
1641 # snapshot only accepts on|off
1642 if (defined($drive->{snapshot})) {
1643 my $v = $drive->{snapshot} ? 'on' : 'off';
1644 $opts .= ",snapshot=$v";
1645 }
1646
1647 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1648 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1649 }
1650
1651 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1652 my ($dir, $qmpname) = @$type;
1653 if (my $v = $drive->{"mbps$dir"}) {
1654 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1655 }
1656 if (my $v = $drive->{"mbps${dir}_max"}) {
1657 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1658 }
1659 if (my $v = $drive->{"bps${dir}_max_length"}) {
1660 $opts .= ",throttling.bps$qmpname-max-length=$v";
1661 }
1662 if (my $v = $drive->{"iops${dir}"}) {
1663 $opts .= ",throttling.iops$qmpname=$v";
1664 }
1665 if (my $v = $drive->{"iops${dir}_max"}) {
1666 $opts .= ",throttling.iops$qmpname-max=$v";
1667 }
1668 if (my $v = $drive->{"iops${dir}_max_length"}) {
1669 $opts .= ",throttling.iops$qmpname-max-length=$v";
1670 }
1671 }
1672
1673 if ($pbs_name) {
1674 $format = "rbd" if $is_rbd;
1675 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1676 if !$format;
1677 $opts .= ",format=alloc-track,file.driver=$format";
1678 } elsif ($format) {
1679 $opts .= ",format=$format";
1680 }
1681
1682 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1683
1684 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1685
1686 if (!$drive->{aio}) {
1687 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1688 # io_uring supports all cache modes
1689 $opts .= ",aio=io_uring";
1690 } else {
1691 # aio native works only with O_DIRECT
1692 if($cache_direct) {
1693 $opts .= ",aio=native";
1694 } else {
1695 $opts .= ",aio=threads";
1696 }
1697 }
1698 }
1699
1700 if (!drive_is_cdrom($drive)) {
1701 my $detectzeroes;
1702 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1703 $detectzeroes = 'off';
1704 } elsif ($drive->{discard}) {
1705 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1706 } else {
1707 # This used to be our default with discard not being specified:
1708 $detectzeroes = 'on';
1709 }
1710
1711 # note: 'detect-zeroes' works per blockdev and we want it to persist
1712 # after the alloc-track is removed, so put it on 'file' directly
1713 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1714 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1715 }
1716
1717 if ($pbs_name) {
1718 $opts .= ",backing=$pbs_name";
1719 $opts .= ",auto-remove=on";
1720 }
1721
1722 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1723 my $file_param = "file";
1724 if ($pbs_name) {
1725 # non-rbd drivers require the underlying file to be a seperate block
1726 # node, so add a second .file indirection
1727 $file_param .= ".file" if !$is_rbd;
1728 $file_param .= ".filename";
1729 }
1730 my $pathinfo = $path ? "$file_param=$path," : '';
1731
1732 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1733 }
1734
1735 sub print_pbs_blockdev {
1736 my ($pbs_conf, $pbs_name) = @_;
1737 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1738 $blockdev .= ",repository=$pbs_conf->{repository}";
1739 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1740 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1741 $blockdev .= ",archive=$pbs_conf->{archive}";
1742 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1743 return $blockdev;
1744 }
1745
1746 sub print_netdevice_full {
1747 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1748
1749 my $device = $net->{model};
1750 if ($net->{model} eq 'virtio') {
1751 $device = 'virtio-net-pci';
1752 };
1753
1754 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1755 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1756 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1757 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1758 # and out of each queue plus one config interrupt and control vector queue
1759 my $vectors = $net->{queues} * 2 + 2;
1760 $tmpstr .= ",vectors=$vectors,mq=on";
1761 if (min_version($machine_version, 7, 1)) {
1762 $tmpstr .= ",packed=on";
1763 }
1764 }
1765
1766 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1767 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
1768 }
1769
1770 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1771
1772 if (my $mtu = $net->{mtu}) {
1773 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1774 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1775 if ($mtu == 1) {
1776 $mtu = $bridge_mtu;
1777 } elsif ($mtu < 576) {
1778 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1779 } elsif ($mtu > $bridge_mtu) {
1780 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1781 }
1782 $tmpstr .= ",host_mtu=$mtu";
1783 } else {
1784 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1785 }
1786 }
1787
1788 if ($use_old_bios_files) {
1789 my $romfile;
1790 if ($device eq 'virtio-net-pci') {
1791 $romfile = 'pxe-virtio.rom';
1792 } elsif ($device eq 'e1000') {
1793 $romfile = 'pxe-e1000.rom';
1794 } elsif ($device eq 'e1000e') {
1795 $romfile = 'pxe-e1000e.rom';
1796 } elsif ($device eq 'ne2k') {
1797 $romfile = 'pxe-ne2k_pci.rom';
1798 } elsif ($device eq 'pcnet') {
1799 $romfile = 'pxe-pcnet.rom';
1800 } elsif ($device eq 'rtl8139') {
1801 $romfile = 'pxe-rtl8139.rom';
1802 }
1803 $tmpstr .= ",romfile=$romfile" if $romfile;
1804 }
1805
1806 return $tmpstr;
1807 }
1808
1809 sub print_netdev_full {
1810 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1811
1812 my $i = '';
1813 if ($netid =~ m/^net(\d+)$/) {
1814 $i = int($1);
1815 }
1816
1817 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1818
1819 my $ifname = "tap${vmid}i$i";
1820
1821 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1822 die "interface name '$ifname' is too long (max 15 character)\n"
1823 if length($ifname) >= 16;
1824
1825 my $vhostparam = '';
1826 if (is_native($arch)) {
1827 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1828 }
1829
1830 my $vmname = $conf->{name} || "vm$vmid";
1831
1832 my $netdev = "";
1833 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1834
1835 if ($net->{bridge}) {
1836 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1837 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1838 } else {
1839 $netdev = "type=user,id=$netid,hostname=$vmname";
1840 }
1841
1842 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1843
1844 return $netdev;
1845 }
1846
1847 my $vga_map = {
1848 'cirrus' => 'cirrus-vga',
1849 'std' => 'VGA',
1850 'vmware' => 'vmware-svga',
1851 'virtio' => 'virtio-vga',
1852 'virtio-gl' => 'virtio-vga-gl',
1853 };
1854
1855 sub print_vga_device {
1856 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1857
1858 my $type = $vga_map->{$vga->{type}};
1859 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1860 $type = 'virtio-gpu';
1861 }
1862 my $vgamem_mb = $vga->{memory};
1863
1864 my $max_outputs = '';
1865 if ($qxlnum) {
1866 $type = $id ? 'qxl' : 'qxl-vga';
1867
1868 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1869 # set max outputs so linux can have up to 4 qxl displays with one device
1870 if (min_version($machine_version, 4, 1)) {
1871 $max_outputs = ",max_outputs=4";
1872 }
1873 }
1874 }
1875
1876 die "no devicetype for $vga->{type}\n" if !$type;
1877
1878 my $memory = "";
1879 if ($vgamem_mb) {
1880 if ($vga->{type} =~ /^virtio/) {
1881 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1882 $memory = ",max_hostmem=$bytes";
1883 } elsif ($qxlnum) {
1884 # from https://www.spice-space.org/multiple-monitors.html
1885 $memory = ",vgamem_mb=$vga->{memory}";
1886 my $ram = $vgamem_mb * 4;
1887 my $vram = $vgamem_mb * 2;
1888 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1889 } else {
1890 $memory = ",vgamem_mb=$vga->{memory}";
1891 }
1892 } elsif ($qxlnum && $id) {
1893 $memory = ",ram_size=67108864,vram_size=33554432";
1894 }
1895
1896 my $edidoff = "";
1897 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1898 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1899 }
1900
1901 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1902 my $vgaid = "vga" . ($id // '');
1903 my $pciaddr;
1904 if ($q35 && $vgaid eq 'vga') {
1905 # the first display uses pcie.0 bus on q35 machines
1906 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1907 } else {
1908 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1909 }
1910
1911 if ($vga->{type} eq 'virtio-gl') {
1912 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1913 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1914 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1915
1916 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1917 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1918 }
1919
1920 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1921 }
1922
1923 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1924 sub parse_net {
1925 my ($data, $disable_mac_autogen) = @_;
1926
1927 my $res = eval { parse_property_string($net_fmt, $data) };
1928 if ($@) {
1929 warn $@;
1930 return;
1931 }
1932 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1933 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1934 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1935 }
1936 return $res;
1937 }
1938
1939 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1940 sub parse_ipconfig {
1941 my ($data) = @_;
1942
1943 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1944 if ($@) {
1945 warn $@;
1946 return;
1947 }
1948
1949 if ($res->{gw} && !$res->{ip}) {
1950 warn 'gateway specified without specifying an IP address';
1951 return;
1952 }
1953 if ($res->{gw6} && !$res->{ip6}) {
1954 warn 'IPv6 gateway specified without specifying an IPv6 address';
1955 return;
1956 }
1957 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1958 warn 'gateway specified together with DHCP';
1959 return;
1960 }
1961 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1962 # gw6 + auto/dhcp
1963 warn "IPv6 gateway specified together with $res->{ip6} address";
1964 return;
1965 }
1966
1967 if (!$res->{ip} && !$res->{ip6}) {
1968 return { ip => 'dhcp', ip6 => 'dhcp' };
1969 }
1970
1971 return $res;
1972 }
1973
1974 sub print_net {
1975 my $net = shift;
1976
1977 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1978 }
1979
1980 sub add_random_macs {
1981 my ($settings) = @_;
1982
1983 foreach my $opt (keys %$settings) {
1984 next if $opt !~ m/^net(\d+)$/;
1985 my $net = parse_net($settings->{$opt});
1986 next if !$net;
1987 $settings->{$opt} = print_net($net);
1988 }
1989 }
1990
1991 sub vm_is_volid_owner {
1992 my ($storecfg, $vmid, $volid) = @_;
1993
1994 if ($volid !~ m|^/|) {
1995 my ($path, $owner);
1996 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1997 if ($owner && ($owner == $vmid)) {
1998 return 1;
1999 }
2000 }
2001
2002 return;
2003 }
2004
2005 sub vmconfig_register_unused_drive {
2006 my ($storecfg, $vmid, $conf, $drive) = @_;
2007
2008 if (drive_is_cloudinit($drive)) {
2009 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
2010 warn $@ if $@;
2011 delete $conf->{cloudinit};
2012 } elsif (!drive_is_cdrom($drive)) {
2013 my $volid = $drive->{file};
2014 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
2015 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
2016 }
2017 }
2018 }
2019
2020 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
2021 my $smbios1_fmt = {
2022 uuid => {
2023 type => 'string',
2024 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2025 format_description => 'UUID',
2026 description => "Set SMBIOS1 UUID.",
2027 optional => 1,
2028 },
2029 version => {
2030 type => 'string',
2031 pattern => '[A-Za-z0-9+\/]+={0,2}',
2032 format_description => 'Base64 encoded string',
2033 description => "Set SMBIOS1 version.",
2034 optional => 1,
2035 },
2036 serial => {
2037 type => 'string',
2038 pattern => '[A-Za-z0-9+\/]+={0,2}',
2039 format_description => 'Base64 encoded string',
2040 description => "Set SMBIOS1 serial number.",
2041 optional => 1,
2042 },
2043 manufacturer => {
2044 type => 'string',
2045 pattern => '[A-Za-z0-9+\/]+={0,2}',
2046 format_description => 'Base64 encoded string',
2047 description => "Set SMBIOS1 manufacturer.",
2048 optional => 1,
2049 },
2050 product => {
2051 type => 'string',
2052 pattern => '[A-Za-z0-9+\/]+={0,2}',
2053 format_description => 'Base64 encoded string',
2054 description => "Set SMBIOS1 product ID.",
2055 optional => 1,
2056 },
2057 sku => {
2058 type => 'string',
2059 pattern => '[A-Za-z0-9+\/]+={0,2}',
2060 format_description => 'Base64 encoded string',
2061 description => "Set SMBIOS1 SKU string.",
2062 optional => 1,
2063 },
2064 family => {
2065 type => 'string',
2066 pattern => '[A-Za-z0-9+\/]+={0,2}',
2067 format_description => 'Base64 encoded string',
2068 description => "Set SMBIOS1 family string.",
2069 optional => 1,
2070 },
2071 base64 => {
2072 type => 'boolean',
2073 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2074 optional => 1,
2075 },
2076 };
2077
2078 sub parse_smbios1 {
2079 my ($data) = @_;
2080
2081 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2082 warn $@ if $@;
2083 return $res;
2084 }
2085
2086 sub print_smbios1 {
2087 my ($smbios1) = @_;
2088 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2089 }
2090
2091 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2092
2093 sub parse_watchdog {
2094 my ($value) = @_;
2095
2096 return if !$value;
2097
2098 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2099 warn $@ if $@;
2100 return $res;
2101 }
2102
2103 sub parse_guest_agent {
2104 my ($conf) = @_;
2105
2106 return {} if !defined($conf->{agent});
2107
2108 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2109 warn $@ if $@;
2110
2111 # if the agent is disabled ignore the other potentially set properties
2112 return {} if !$res->{enabled};
2113 return $res;
2114 }
2115
2116 sub get_qga_key {
2117 my ($conf, $key) = @_;
2118 return undef if !defined($conf->{agent});
2119
2120 my $agent = parse_guest_agent($conf);
2121 return $agent->{$key};
2122 }
2123
2124 sub parse_vga {
2125 my ($value) = @_;
2126
2127 return {} if !$value;
2128 my $res = eval { parse_property_string($vga_fmt, $value) };
2129 warn $@ if $@;
2130 return $res;
2131 }
2132
2133 sub parse_rng {
2134 my ($value) = @_;
2135
2136 return if !$value;
2137
2138 my $res = eval { parse_property_string($rng_fmt, $value) };
2139 warn $@ if $@;
2140 return $res;
2141 }
2142
2143 sub parse_meta_info {
2144 my ($value) = @_;
2145
2146 return if !$value;
2147
2148 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2149 warn $@ if $@;
2150 return $res;
2151 }
2152
2153 sub new_meta_info_string {
2154 my () = @_; # for now do not allow to override any value
2155
2156 return PVE::JSONSchema::print_property_string(
2157 {
2158 'creation-qemu' => kvm_user_version(),
2159 ctime => "". int(time()),
2160 },
2161 $meta_info_fmt
2162 );
2163 }
2164
2165 sub qemu_created_version_fixups {
2166 my ($conf, $forcemachine, $kvmver) = @_;
2167
2168 my $meta = parse_meta_info($conf->{meta}) // {};
2169 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2170
2171 # check if we need to apply some handling for VMs that always use the latest machine version but
2172 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2173 # would be required (we do not want to pin machine version for non-windows OS type)
2174 if (
2175 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2176 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2177 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2178 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2179 ) {
2180 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2181 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2182 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2183 # and thus with the predictable interface naming of systemd
2184 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2185 }
2186 }
2187 return;
2188 }
2189
2190 # add JSON properties for create and set function
2191 sub json_config_properties {
2192 my ($prop, $with_disk_alloc) = @_;
2193
2194 my $skip_json_config_opts = {
2195 parent => 1,
2196 snaptime => 1,
2197 vmstate => 1,
2198 runningmachine => 1,
2199 runningcpu => 1,
2200 meta => 1,
2201 };
2202
2203 foreach my $opt (keys %$confdesc) {
2204 next if $skip_json_config_opts->{$opt};
2205
2206 if ($with_disk_alloc && is_valid_drivename($opt)) {
2207 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2208 } else {
2209 $prop->{$opt} = $confdesc->{$opt};
2210 }
2211 }
2212
2213 return $prop;
2214 }
2215
2216 # Properties that we can read from an OVF file
2217 sub json_ovf_properties {
2218 my $prop = {};
2219
2220 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2221 $prop->{$device} = {
2222 type => 'string',
2223 format => 'pve-volume-id-or-absolute-path',
2224 description => "Disk image that gets imported to $device",
2225 optional => 1,
2226 };
2227 }
2228
2229 $prop->{cores} = {
2230 type => 'integer',
2231 description => "The number of CPU cores.",
2232 optional => 1,
2233 };
2234 $prop->{memory} = {
2235 type => 'integer',
2236 description => "Amount of RAM for the VM in MB.",
2237 optional => 1,
2238 };
2239 $prop->{name} = {
2240 type => 'string',
2241 description => "Name of the VM.",
2242 optional => 1,
2243 };
2244
2245 return $prop;
2246 }
2247
2248 # return copy of $confdesc_cloudinit to generate documentation
2249 sub cloudinit_config_properties {
2250
2251 return dclone($confdesc_cloudinit);
2252 }
2253
2254 sub cloudinit_pending_properties {
2255 my $p = {
2256 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2257 name => 1,
2258 };
2259 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2260 return $p;
2261 }
2262
2263 sub check_type {
2264 my ($key, $value) = @_;
2265
2266 die "unknown setting '$key'\n" if !$confdesc->{$key};
2267
2268 my $type = $confdesc->{$key}->{type};
2269
2270 if (!defined($value)) {
2271 die "got undefined value\n";
2272 }
2273
2274 if ($value =~ m/[\n\r]/) {
2275 die "property contains a line feed\n";
2276 }
2277
2278 if ($type eq 'boolean') {
2279 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2280 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2281 die "type check ('boolean') failed - got '$value'\n";
2282 } elsif ($type eq 'integer') {
2283 return int($1) if $value =~ m/^(\d+)$/;
2284 die "type check ('integer') failed - got '$value'\n";
2285 } elsif ($type eq 'number') {
2286 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2287 die "type check ('number') failed - got '$value'\n";
2288 } elsif ($type eq 'string') {
2289 if (my $fmt = $confdesc->{$key}->{format}) {
2290 PVE::JSONSchema::check_format($fmt, $value);
2291 return $value;
2292 }
2293 $value =~ s/^\"(.*)\"$/$1/;
2294 return $value;
2295 } else {
2296 die "internal error"
2297 }
2298 }
2299
2300 sub destroy_vm {
2301 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2302
2303 my $conf = PVE::QemuConfig->load_config($vmid);
2304
2305 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2306 PVE::QemuConfig->check_lock($conf);
2307 }
2308
2309 if ($conf->{template}) {
2310 # check if any base image is still used by a linked clone
2311 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2312 my ($ds, $drive) = @_;
2313 return if drive_is_cdrom($drive);
2314
2315 my $volid = $drive->{file};
2316 return if !$volid || $volid =~ m|^/|;
2317
2318 die "base volume '$volid' is still in use by linked cloned\n"
2319 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2320
2321 });
2322 }
2323
2324 my $volids = {};
2325 my $remove_owned_drive = sub {
2326 my ($ds, $drive) = @_;
2327 return if drive_is_cdrom($drive, 1);
2328
2329 my $volid = $drive->{file};
2330 return if !$volid || $volid =~ m|^/|;
2331 return if $volids->{$volid};
2332
2333 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2334 return if !$path || !$owner || ($owner != $vmid);
2335
2336 $volids->{$volid} = 1;
2337 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2338 warn "Could not remove disk '$volid', check manually: $@" if $@;
2339 };
2340
2341 # only remove disks owned by this VM (referenced in the config)
2342 my $include_opts = {
2343 include_unused => 1,
2344 extra_keys => ['vmstate'],
2345 };
2346 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2347
2348 for my $snap (values %{$conf->{snapshots}}) {
2349 next if !defined($snap->{vmstate});
2350 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2351 next if !defined($drive);
2352 $remove_owned_drive->('vmstate', $drive);
2353 }
2354
2355 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2356
2357 if ($purge_unreferenced) { # also remove unreferenced disk
2358 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2359 PVE::Storage::foreach_volid($vmdisks, sub {
2360 my ($volid, $sid, $volname, $d) = @_;
2361 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2362 warn $@ if $@;
2363 });
2364 }
2365
2366 eval { delete_ifaces_ipams_ips($conf, $vmid)};
2367 warn $@ if $@;
2368
2369 if (defined $replacement_conf) {
2370 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2371 } else {
2372 PVE::QemuConfig->destroy_config($vmid);
2373 }
2374 }
2375
2376 sub parse_vm_config {
2377 my ($filename, $raw, $strict) = @_;
2378
2379 return if !defined($raw);
2380
2381 my $res = {
2382 digest => Digest::SHA::sha1_hex($raw),
2383 snapshots => {},
2384 pending => {},
2385 cloudinit => {},
2386 };
2387
2388 my $handle_error = sub {
2389 my ($msg) = @_;
2390
2391 if ($strict) {
2392 die $msg;
2393 } else {
2394 warn $msg;
2395 }
2396 };
2397
2398 $filename =~ m|/qemu-server/(\d+)\.conf$|
2399 || die "got strange filename '$filename'";
2400
2401 my $vmid = $1;
2402
2403 my $conf = $res;
2404 my $descr;
2405 my $finish_description = sub {
2406 if (defined($descr)) {
2407 $descr =~ s/\s+$//;
2408 $conf->{description} = $descr;
2409 }
2410 $descr = undef;
2411 };
2412 my $section = '';
2413
2414 my @lines = split(/\n/, $raw);
2415 foreach my $line (@lines) {
2416 next if $line =~ m/^\s*$/;
2417
2418 if ($line =~ m/^\[PENDING\]\s*$/i) {
2419 $section = 'pending';
2420 $finish_description->();
2421 $conf = $res->{$section} = {};
2422 next;
2423 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2424 $section = 'cloudinit';
2425 $finish_description->();
2426 $conf = $res->{$section} = {};
2427 next;
2428
2429 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2430 $section = $1;
2431 $finish_description->();
2432 $conf = $res->{snapshots}->{$section} = {};
2433 next;
2434 }
2435
2436 if ($line =~ m/^\#(.*)$/) {
2437 $descr = '' if !defined($descr);
2438 $descr .= PVE::Tools::decode_text($1) . "\n";
2439 next;
2440 }
2441
2442 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2443 $descr = '' if !defined($descr);
2444 $descr .= PVE::Tools::decode_text($2);
2445 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2446 $conf->{snapstate} = $1;
2447 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2448 my $key = $1;
2449 my $value = $2;
2450 $conf->{$key} = $value;
2451 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2452 my $value = $1;
2453 if ($section eq 'pending') {
2454 $conf->{delete} = $value; # we parse this later
2455 } else {
2456 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2457 }
2458 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2459 my $key = $1;
2460 my $value = $2;
2461 if ($section eq 'cloudinit') {
2462 # ignore validation only used for informative purpose
2463 $conf->{$key} = $value;
2464 next;
2465 }
2466 eval { $value = check_type($key, $value); };
2467 if ($@) {
2468 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2469 } else {
2470 $key = 'ide2' if $key eq 'cdrom';
2471 my $fmt = $confdesc->{$key}->{format};
2472 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2473 my $v = parse_drive($key, $value);
2474 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2475 $v->{file} = $volid;
2476 $value = print_drive($v);
2477 } else {
2478 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2479 next;
2480 }
2481 }
2482
2483 $conf->{$key} = $value;
2484 }
2485 } else {
2486 $handle_error->("vm $vmid - unable to parse config: $line\n");
2487 }
2488 }
2489
2490 $finish_description->();
2491 delete $res->{snapstate}; # just to be sure
2492
2493 return $res;
2494 }
2495
2496 sub write_vm_config {
2497 my ($filename, $conf) = @_;
2498
2499 delete $conf->{snapstate}; # just to be sure
2500
2501 if ($conf->{cdrom}) {
2502 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2503 $conf->{ide2} = $conf->{cdrom};
2504 delete $conf->{cdrom};
2505 }
2506
2507 # we do not use 'smp' any longer
2508 if ($conf->{sockets}) {
2509 delete $conf->{smp};
2510 } elsif ($conf->{smp}) {
2511 $conf->{sockets} = $conf->{smp};
2512 delete $conf->{cores};
2513 delete $conf->{smp};
2514 }
2515
2516 my $used_volids = {};
2517
2518 my $cleanup_config = sub {
2519 my ($cref, $pending, $snapname) = @_;
2520
2521 foreach my $key (keys %$cref) {
2522 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2523 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2524 my $value = $cref->{$key};
2525 if ($key eq 'delete') {
2526 die "propertry 'delete' is only allowed in [PENDING]\n"
2527 if !$pending;
2528 # fixme: check syntax?
2529 next;
2530 }
2531 eval { $value = check_type($key, $value); };
2532 die "unable to parse value of '$key' - $@" if $@;
2533
2534 $cref->{$key} = $value;
2535
2536 if (!$snapname && is_valid_drivename($key)) {
2537 my $drive = parse_drive($key, $value);
2538 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2539 }
2540 }
2541 };
2542
2543 &$cleanup_config($conf);
2544
2545 &$cleanup_config($conf->{pending}, 1);
2546
2547 foreach my $snapname (keys %{$conf->{snapshots}}) {
2548 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2549 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2550 }
2551
2552 # remove 'unusedX' settings if we re-add a volume
2553 foreach my $key (keys %$conf) {
2554 my $value = $conf->{$key};
2555 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2556 delete $conf->{$key};
2557 }
2558 }
2559
2560 my $generate_raw_config = sub {
2561 my ($conf, $pending) = @_;
2562
2563 my $raw = '';
2564
2565 # add description as comment to top of file
2566 if (defined(my $descr = $conf->{description})) {
2567 if ($descr) {
2568 foreach my $cl (split(/\n/, $descr)) {
2569 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2570 }
2571 } else {
2572 $raw .= "#\n" if $pending;
2573 }
2574 }
2575
2576 foreach my $key (sort keys %$conf) {
2577 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2578 $raw .= "$key: $conf->{$key}\n";
2579 }
2580 return $raw;
2581 };
2582
2583 my $raw = &$generate_raw_config($conf);
2584
2585 if (scalar(keys %{$conf->{pending}})){
2586 $raw .= "\n[PENDING]\n";
2587 $raw .= &$generate_raw_config($conf->{pending}, 1);
2588 }
2589
2590 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2591 $raw .= "\n[special:cloudinit]\n";
2592 $raw .= &$generate_raw_config($conf->{cloudinit});
2593 }
2594
2595 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2596 $raw .= "\n[$snapname]\n";
2597 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2598 }
2599
2600 return $raw;
2601 }
2602
2603 sub load_defaults {
2604
2605 my $res = {};
2606
2607 # we use static defaults from our JSON schema configuration
2608 foreach my $key (keys %$confdesc) {
2609 if (defined(my $default = $confdesc->{$key}->{default})) {
2610 $res->{$key} = $default;
2611 }
2612 }
2613
2614 return $res;
2615 }
2616
2617 sub config_list {
2618 my $vmlist = PVE::Cluster::get_vmlist();
2619 my $res = {};
2620 return $res if !$vmlist || !$vmlist->{ids};
2621 my $ids = $vmlist->{ids};
2622 my $nodename = nodename();
2623
2624 foreach my $vmid (keys %$ids) {
2625 my $d = $ids->{$vmid};
2626 next if !$d->{node} || $d->{node} ne $nodename;
2627 next if !$d->{type} || $d->{type} ne 'qemu';
2628 $res->{$vmid}->{exists} = 1;
2629 }
2630 return $res;
2631 }
2632
2633 # test if VM uses local resources (to prevent migration)
2634 sub check_local_resources {
2635 my ($conf, $noerr) = @_;
2636
2637 my @loc_res = ();
2638 my $mapped_res = [];
2639
2640 my $nodelist = PVE::Cluster::get_nodelist();
2641 my $pci_map = PVE::Mapping::PCI::config();
2642 my $usb_map = PVE::Mapping::USB::config();
2643
2644 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2645
2646 my $add_missing_mapping = sub {
2647 my ($type, $key, $id) = @_;
2648 for my $node (@$nodelist) {
2649 my $entry;
2650 if ($type eq 'pci') {
2651 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2652 } elsif ($type eq 'usb') {
2653 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2654 }
2655 if (!scalar($entry->@*)) {
2656 push @{$missing_mappings_by_node->{$node}}, $key;
2657 }
2658 }
2659 };
2660
2661 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2662 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2663
2664 push @loc_res, "ivshmem" if $conf->{ivshmem};
2665
2666 foreach my $k (keys %$conf) {
2667 if ($k =~ m/^usb/) {
2668 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2669 next if $entry->{host} =~ m/^spice$/i;
2670 if ($entry->{mapping}) {
2671 $add_missing_mapping->('usb', $k, $entry->{mapping});
2672 push @$mapped_res, $k;
2673 }
2674 }
2675 if ($k =~ m/^hostpci/) {
2676 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2677 if ($entry->{mapping}) {
2678 $add_missing_mapping->('pci', $k, $entry->{mapping});
2679 push @$mapped_res, $k;
2680 }
2681 }
2682 # sockets are safe: they will recreated be on the target side post-migrate
2683 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2684 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2685 }
2686
2687 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2688
2689 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2690 }
2691
2692 # check if used storages are available on all nodes (use by migrate)
2693 sub check_storage_availability {
2694 my ($storecfg, $conf, $node) = @_;
2695
2696 PVE::QemuConfig->foreach_volume($conf, sub {
2697 my ($ds, $drive) = @_;
2698
2699 my $volid = $drive->{file};
2700 return if !$volid;
2701
2702 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2703 return if !$sid;
2704
2705 # check if storage is available on both nodes
2706 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2707 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2708
2709 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2710
2711 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2712 if !$scfg->{content}->{$vtype};
2713 });
2714 }
2715
2716 # list nodes where all VM images are available (used by has_feature API)
2717 sub shared_nodes {
2718 my ($conf, $storecfg) = @_;
2719
2720 my $nodelist = PVE::Cluster::get_nodelist();
2721 my $nodehash = { map { $_ => 1 } @$nodelist };
2722 my $nodename = nodename();
2723
2724 PVE::QemuConfig->foreach_volume($conf, sub {
2725 my ($ds, $drive) = @_;
2726
2727 my $volid = $drive->{file};
2728 return if !$volid;
2729
2730 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2731 if ($storeid) {
2732 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2733 if ($scfg->{disable}) {
2734 $nodehash = {};
2735 } elsif (my $avail = $scfg->{nodes}) {
2736 foreach my $node (keys %$nodehash) {
2737 delete $nodehash->{$node} if !$avail->{$node};
2738 }
2739 } elsif (!$scfg->{shared}) {
2740 foreach my $node (keys %$nodehash) {
2741 delete $nodehash->{$node} if $node ne $nodename
2742 }
2743 }
2744 }
2745 });
2746
2747 return $nodehash
2748 }
2749
2750 sub check_local_storage_availability {
2751 my ($conf, $storecfg) = @_;
2752
2753 my $nodelist = PVE::Cluster::get_nodelist();
2754 my $nodehash = { map { $_ => {} } @$nodelist };
2755
2756 PVE::QemuConfig->foreach_volume($conf, sub {
2757 my ($ds, $drive) = @_;
2758
2759 my $volid = $drive->{file};
2760 return if !$volid;
2761
2762 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2763 if ($storeid) {
2764 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2765
2766 if ($scfg->{disable}) {
2767 foreach my $node (keys %$nodehash) {
2768 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2769 }
2770 } elsif (my $avail = $scfg->{nodes}) {
2771 foreach my $node (keys %$nodehash) {
2772 if (!$avail->{$node}) {
2773 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2774 }
2775 }
2776 }
2777 }
2778 });
2779
2780 foreach my $node (values %$nodehash) {
2781 if (my $unavail = $node->{unavailable_storages}) {
2782 $node->{unavailable_storages} = [ sort keys %$unavail ];
2783 }
2784 }
2785
2786 return $nodehash
2787 }
2788
2789 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2790 sub check_running {
2791 my ($vmid, $nocheck, $node) = @_;
2792
2793 # $nocheck is set when called during a migration, in which case the config
2794 # file might still or already reside on the *other* node
2795 # - because rename has already happened, and current node is source
2796 # - because rename hasn't happened yet, and current node is target
2797 # - because rename has happened, current node is target, but hasn't yet
2798 # processed it yet
2799 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2800 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2801 }
2802
2803 sub vzlist {
2804
2805 my $vzlist = config_list();
2806
2807 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2808
2809 while (defined(my $de = $fd->read)) {
2810 next if $de !~ m/^(\d+)\.pid$/;
2811 my $vmid = $1;
2812 next if !defined($vzlist->{$vmid});
2813 if (my $pid = check_running($vmid)) {
2814 $vzlist->{$vmid}->{pid} = $pid;
2815 }
2816 }
2817
2818 return $vzlist;
2819 }
2820
2821 our $vmstatus_return_properties = {
2822 vmid => get_standard_option('pve-vmid'),
2823 status => {
2824 description => "QEMU process status.",
2825 type => 'string',
2826 enum => ['stopped', 'running'],
2827 },
2828 maxmem => {
2829 description => "Maximum memory in bytes.",
2830 type => 'integer',
2831 optional => 1,
2832 renderer => 'bytes',
2833 },
2834 maxdisk => {
2835 description => "Root disk size in bytes.",
2836 type => 'integer',
2837 optional => 1,
2838 renderer => 'bytes',
2839 },
2840 name => {
2841 description => "VM name.",
2842 type => 'string',
2843 optional => 1,
2844 },
2845 qmpstatus => {
2846 description => "VM run state from the 'query-status' QMP monitor command.",
2847 type => 'string',
2848 optional => 1,
2849 },
2850 pid => {
2851 description => "PID of running qemu process.",
2852 type => 'integer',
2853 optional => 1,
2854 },
2855 uptime => {
2856 description => "Uptime.",
2857 type => 'integer',
2858 optional => 1,
2859 renderer => 'duration',
2860 },
2861 cpus => {
2862 description => "Maximum usable CPUs.",
2863 type => 'number',
2864 optional => 1,
2865 },
2866 lock => {
2867 description => "The current config lock, if any.",
2868 type => 'string',
2869 optional => 1,
2870 },
2871 tags => {
2872 description => "The current configured tags, if any",
2873 type => 'string',
2874 optional => 1,
2875 },
2876 'running-machine' => {
2877 description => "The currently running machine type (if running).",
2878 type => 'string',
2879 optional => 1,
2880 },
2881 'running-qemu' => {
2882 description => "The currently running QEMU version (if running).",
2883 type => 'string',
2884 optional => 1,
2885 },
2886 };
2887
2888 my $last_proc_pid_stat;
2889
2890 # get VM status information
2891 # This must be fast and should not block ($full == false)
2892 # We only query KVM using QMP if $full == true (this can be slow)
2893 sub vmstatus {
2894 my ($opt_vmid, $full) = @_;
2895
2896 my $res = {};
2897
2898 my $storecfg = PVE::Storage::config();
2899
2900 my $list = vzlist();
2901 my $defaults = load_defaults();
2902
2903 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2904
2905 my $cpucount = $cpuinfo->{cpus} || 1;
2906
2907 foreach my $vmid (keys %$list) {
2908 next if $opt_vmid && ($vmid ne $opt_vmid);
2909
2910 my $conf = PVE::QemuConfig->load_config($vmid);
2911
2912 my $d = { vmid => int($vmid) };
2913 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2914
2915 # fixme: better status?
2916 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2917
2918 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2919 if (defined($size)) {
2920 $d->{disk} = 0; # no info available
2921 $d->{maxdisk} = $size;
2922 } else {
2923 $d->{disk} = 0;
2924 $d->{maxdisk} = 0;
2925 }
2926
2927 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2928 * ($conf->{cores} || $defaults->{cores});
2929 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2930 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2931
2932 $d->{name} = $conf->{name} || "VM $vmid";
2933 $d->{maxmem} = get_current_memory($conf->{memory})*(1024*1024);
2934
2935 if ($conf->{balloon}) {
2936 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2937 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2938 : $defaults->{shares};
2939 }
2940
2941 $d->{uptime} = 0;
2942 $d->{cpu} = 0;
2943 $d->{mem} = 0;
2944
2945 $d->{netout} = 0;
2946 $d->{netin} = 0;
2947
2948 $d->{diskread} = 0;
2949 $d->{diskwrite} = 0;
2950
2951 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2952
2953 $d->{serial} = 1 if conf_has_serial($conf);
2954 $d->{lock} = $conf->{lock} if $conf->{lock};
2955 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2956
2957 $res->{$vmid} = $d;
2958 }
2959
2960 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2961 foreach my $dev (keys %$netdev) {
2962 next if $dev !~ m/^tap([1-9]\d*)i/;
2963 my $vmid = $1;
2964 my $d = $res->{$vmid};
2965 next if !$d;
2966
2967 $d->{netout} += $netdev->{$dev}->{receive};
2968 $d->{netin} += $netdev->{$dev}->{transmit};
2969
2970 if ($full) {
2971 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2972 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2973 }
2974
2975 }
2976
2977 my $ctime = gettimeofday;
2978
2979 foreach my $vmid (keys %$list) {
2980
2981 my $d = $res->{$vmid};
2982 my $pid = $d->{pid};
2983 next if !$pid;
2984
2985 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2986 next if !$pstat; # not running
2987
2988 my $used = $pstat->{utime} + $pstat->{stime};
2989
2990 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2991
2992 if ($pstat->{vsize}) {
2993 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2994 }
2995
2996 my $old = $last_proc_pid_stat->{$pid};
2997 if (!$old) {
2998 $last_proc_pid_stat->{$pid} = {
2999 time => $ctime,
3000 used => $used,
3001 cpu => 0,
3002 };
3003 next;
3004 }
3005
3006 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
3007
3008 if ($dtime > 1000) {
3009 my $dutime = $used - $old->{used};
3010
3011 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
3012 $last_proc_pid_stat->{$pid} = {
3013 time => $ctime,
3014 used => $used,
3015 cpu => $d->{cpu},
3016 };
3017 } else {
3018 $d->{cpu} = $old->{cpu};
3019 }
3020 }
3021
3022 return $res if !$full;
3023
3024 my $qmpclient = PVE::QMPClient->new();
3025
3026 my $ballooncb = sub {
3027 my ($vmid, $resp) = @_;
3028
3029 my $info = $resp->{'return'};
3030 return if !$info->{max_mem};
3031
3032 my $d = $res->{$vmid};
3033
3034 # use memory assigned to VM
3035 $d->{maxmem} = $info->{max_mem};
3036 $d->{balloon} = $info->{actual};
3037
3038 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3039 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3040 $d->{freemem} = $info->{free_mem};
3041 }
3042
3043 $d->{ballooninfo} = $info;
3044 };
3045
3046 my $blockstatscb = sub {
3047 my ($vmid, $resp) = @_;
3048 my $data = $resp->{'return'} || [];
3049 my $totalrdbytes = 0;
3050 my $totalwrbytes = 0;
3051
3052 for my $blockstat (@$data) {
3053 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3054 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3055
3056 $blockstat->{device} =~ s/drive-//;
3057 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3058 }
3059 $res->{$vmid}->{diskread} = $totalrdbytes;
3060 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3061 };
3062
3063 my $machinecb = sub {
3064 my ($vmid, $resp) = @_;
3065 my $data = $resp->{'return'} || [];
3066
3067 $res->{$vmid}->{'running-machine'} =
3068 PVE::QemuServer::Machine::current_from_query_machines($data);
3069 };
3070
3071 my $versioncb = sub {
3072 my ($vmid, $resp) = @_;
3073 my $data = $resp->{'return'} // {};
3074 my $version = 'unknown';
3075
3076 if (my $v = $data->{qemu}) {
3077 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3078 }
3079
3080 $res->{$vmid}->{'running-qemu'} = $version;
3081 };
3082
3083 my $statuscb = sub {
3084 my ($vmid, $resp) = @_;
3085
3086 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3087 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3088 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3089 # this fails if ballon driver is not loaded, so this must be
3090 # the last commnand (following command are aborted if this fails).
3091 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3092
3093 my $status = 'unknown';
3094 if (!defined($status = $resp->{'return'}->{status})) {
3095 warn "unable to get VM status\n";
3096 return;
3097 }
3098
3099 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3100 };
3101
3102 foreach my $vmid (keys %$list) {
3103 next if $opt_vmid && ($vmid ne $opt_vmid);
3104 next if !$res->{$vmid}->{pid}; # not running
3105 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3106 }
3107
3108 $qmpclient->queue_execute(undef, 2);
3109
3110 foreach my $vmid (keys %$list) {
3111 next if $opt_vmid && ($vmid ne $opt_vmid);
3112 next if !$res->{$vmid}->{pid}; #not running
3113
3114 # we can't use the $qmpclient since it might have already aborted on
3115 # 'query-balloon', but this might also fail for older versions...
3116 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3117 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3118 }
3119
3120 foreach my $vmid (keys %$list) {
3121 next if $opt_vmid && ($vmid ne $opt_vmid);
3122 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3123 }
3124
3125 return $res;
3126 }
3127
3128 sub conf_has_serial {
3129 my ($conf) = @_;
3130
3131 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3132 if ($conf->{"serial$i"}) {
3133 return 1;
3134 }
3135 }
3136
3137 return 0;
3138 }
3139
3140 sub conf_has_audio {
3141 my ($conf, $id) = @_;
3142
3143 $id //= 0;
3144 my $audio = $conf->{"audio$id"};
3145 return if !defined($audio);
3146
3147 my $audioproperties = parse_property_string($audio_fmt, $audio);
3148 my $audiodriver = $audioproperties->{driver} // 'spice';
3149
3150 return {
3151 dev => $audioproperties->{device},
3152 dev_id => "audiodev$id",
3153 backend => $audiodriver,
3154 backend_id => "$audiodriver-backend${id}",
3155 };
3156 }
3157
3158 sub audio_devs {
3159 my ($audio, $audiopciaddr, $machine_version) = @_;
3160
3161 my $devs = [];
3162
3163 my $id = $audio->{dev_id};
3164 my $audiodev = "";
3165 if (min_version($machine_version, 4, 2)) {
3166 $audiodev = ",audiodev=$audio->{backend_id}";
3167 }
3168
3169 if ($audio->{dev} eq 'AC97') {
3170 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3171 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3172 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3173 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3174 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3175 } else {
3176 die "unkown audio device '$audio->{dev}', implement me!";
3177 }
3178
3179 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3180
3181 return $devs;
3182 }
3183
3184 sub get_tpm_paths {
3185 my ($vmid) = @_;
3186 return {
3187 socket => "/var/run/qemu-server/$vmid.swtpm",
3188 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3189 };
3190 }
3191
3192 sub add_tpm_device {
3193 my ($vmid, $devices, $conf) = @_;
3194
3195 return if !$conf->{tpmstate0};
3196
3197 my $paths = get_tpm_paths($vmid);
3198
3199 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3200 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3201 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3202 }
3203
3204 sub start_swtpm {
3205 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3206
3207 return if !$tpmdrive;
3208
3209 my $state;
3210 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3211 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3212 if ($storeid) {
3213 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3214 } else {
3215 $state = $tpm->{file};
3216 }
3217
3218 my $paths = get_tpm_paths($vmid);
3219
3220 # during migration, we will get state from remote
3221 #
3222 if (!$migration) {
3223 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3224 my $setup_cmd = [
3225 "swtpm_setup",
3226 "--tpmstate",
3227 "file://$state",
3228 "--createek",
3229 "--create-ek-cert",
3230 "--create-platform-cert",
3231 "--lock-nvram",
3232 "--config",
3233 "/etc/swtpm_setup.conf", # do not use XDG configs
3234 "--runas",
3235 "0", # force creation as root, error if not possible
3236 "--not-overwrite", # ignore existing state, do not modify
3237 ];
3238
3239 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3240 # TPM 2.0 supports ECC crypto, use if possible
3241 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3242
3243 run_command($setup_cmd, outfunc => sub {
3244 print "swtpm_setup: $1\n";
3245 });
3246 }
3247
3248 # Used to distinguish different invocations in the log.
3249 my $log_prefix = "[id=" . int(time()) . "] ";
3250
3251 my $emulator_cmd = [
3252 "swtpm",
3253 "socket",
3254 "--tpmstate",
3255 "backend-uri=file://$state,mode=0600",
3256 "--ctrl",
3257 "type=unixio,path=$paths->{socket},mode=0600",
3258 "--pid",
3259 "file=$paths->{pid}",
3260 "--terminate", # terminate on QEMU disconnect
3261 "--daemon",
3262 "--log",
3263 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3264 ];
3265 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3266 run_command($emulator_cmd, outfunc => sub { print $1; });
3267
3268 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3269 while (! -e $paths->{pid}) {
3270 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3271 usleep(50_000);
3272 }
3273
3274 # return untainted PID of swtpm daemon so it can be killed on error
3275 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3276 return $1;
3277 }
3278
3279 sub vga_conf_has_spice {
3280 my ($vga) = @_;
3281
3282 my $vgaconf = parse_vga($vga);
3283 my $vgatype = $vgaconf->{type};
3284 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3285
3286 return $1 || 1;
3287 }
3288
3289 sub is_native($) {
3290 my ($arch) = @_;
3291 return get_host_arch() eq $arch;
3292 }
3293
3294 sub get_vm_arch {
3295 my ($conf) = @_;
3296 return $conf->{arch} // get_host_arch();
3297 }
3298
3299 my $default_machines = {
3300 x86_64 => 'pc',
3301 aarch64 => 'virt',
3302 };
3303
3304 sub get_installed_machine_version {
3305 my ($kvmversion) = @_;
3306 $kvmversion = kvm_user_version() if !defined($kvmversion);
3307 $kvmversion =~ m/^(\d+\.\d+)/;
3308 return $1;
3309 }
3310
3311 sub windows_get_pinned_machine_version {
3312 my ($machine, $base_version, $kvmversion) = @_;
3313
3314 my $pin_version = $base_version;
3315 if (!defined($base_version) ||
3316 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3317 ) {
3318 $pin_version = get_installed_machine_version($kvmversion);
3319 }
3320 if (!$machine || $machine eq 'pc') {
3321 $machine = "pc-i440fx-$pin_version";
3322 } elsif ($machine eq 'q35') {
3323 $machine = "pc-q35-$pin_version";
3324 } elsif ($machine eq 'virt') {
3325 $machine = "virt-$pin_version";
3326 } else {
3327 warn "unknown machine type '$machine', not touching that!\n";
3328 }
3329
3330 return $machine;
3331 }
3332
3333 sub get_vm_machine {
3334 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3335
3336 my $machine = $forcemachine || $conf->{machine};
3337
3338 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3339 $kvmversion //= kvm_user_version();
3340 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3341 # layout which confuses windows quite a bit and may result in various regressions..
3342 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3343 if (windows_version($conf->{ostype})) {
3344 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3345 }
3346 $arch //= 'x86_64';
3347 $machine ||= $default_machines->{$arch};
3348 if ($add_pve_version) {
3349 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3350 $machine .= "+pve$pvever";
3351 }
3352 }
3353
3354 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3355 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3356 $machine = $1 if $is_pxe;
3357
3358 # for version-pinned machines that do not include a pve-version (e.g.
3359 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3360 $machine .= '+pve0';
3361
3362 $machine .= '.pxe' if $is_pxe;
3363 }
3364
3365 return $machine;
3366 }
3367
3368 sub get_ovmf_files($$$) {
3369 my ($arch, $efidisk, $smm) = @_;
3370
3371 my $types = $OVMF->{$arch}
3372 or die "no OVMF images known for architecture '$arch'\n";
3373
3374 my $type = 'default';
3375 if ($arch eq 'x86_64') {
3376 if (defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3377 $type = $smm ? "4m" : "4m-no-smm";
3378 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3379 } else {
3380 # TODO: log_warn about use of legacy images for x86_64 with Promxox VE 9
3381 }
3382 }
3383
3384 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3385 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3386 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3387
3388 return ($ovmf_code, $ovmf_vars);
3389 }
3390
3391 my $Arch2Qemu = {
3392 aarch64 => '/usr/bin/qemu-system-aarch64',
3393 x86_64 => '/usr/bin/qemu-system-x86_64',
3394 };
3395 sub get_command_for_arch($) {
3396 my ($arch) = @_;
3397 return '/usr/bin/kvm' if is_native($arch);
3398
3399 my $cmd = $Arch2Qemu->{$arch}
3400 or die "don't know how to emulate architecture '$arch'\n";
3401 return $cmd;
3402 }
3403
3404 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3405 # to use in a QEMU command line (-cpu element), first array_intersect the result
3406 # of query_supported_ with query_understood_. This is necessary because:
3407 #
3408 # a) query_understood_ returns flags the host cannot use and
3409 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3410 # flags, but CPU settings - with most of them being flags. Those settings
3411 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3412 #
3413 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3414 # expensive. If you need the value returned from this, you can get it much
3415 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3416 # $accel being 'kvm' or 'tcg'.
3417 #
3418 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3419 # changes, automatically populating pmxcfs.
3420 #
3421 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3422 # since kvm and tcg machines support different flags
3423 #
3424 sub query_supported_cpu_flags {
3425 my ($arch) = @_;
3426
3427 $arch //= get_host_arch();
3428 my $default_machine = $default_machines->{$arch};
3429
3430 my $flags = {};
3431
3432 # FIXME: Once this is merged, the code below should work for ARM as well:
3433 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3434 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3435 $arch eq "aarch64";
3436
3437 my $kvm_supported = defined(kvm_version());
3438 my $qemu_cmd = get_command_for_arch($arch);
3439 my $fakevmid = -1;
3440 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3441
3442 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3443 my $query_supported_run_qemu = sub {
3444 my ($kvm) = @_;
3445
3446 my $flags = {};
3447 my $cmd = [
3448 $qemu_cmd,
3449 '-machine', $default_machine,
3450 '-display', 'none',
3451 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3452 '-mon', 'chardev=qmp,mode=control',
3453 '-pidfile', $pidfile,
3454 '-S', '-daemonize'
3455 ];
3456
3457 if (!$kvm) {
3458 push @$cmd, '-accel', 'tcg';
3459 }
3460
3461 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3462 die "QEMU flag querying VM exited with code " . $rc if $rc;
3463
3464 eval {
3465 my $cmd_result = mon_cmd(
3466 $fakevmid,
3467 'query-cpu-model-expansion',
3468 type => 'full',
3469 model => { name => 'host' }
3470 );
3471
3472 my $props = $cmd_result->{model}->{props};
3473 foreach my $prop (keys %$props) {
3474 next if $props->{$prop} ne '1';
3475 # QEMU returns some flags multiple times, with '_', '.' or '-'
3476 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3477 # We only keep those with underscores, to match /proc/cpuinfo
3478 $prop =~ s/\.|-/_/g;
3479 $flags->{$prop} = 1;
3480 }
3481 };
3482 my $err = $@;
3483
3484 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3485 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3486
3487 die $err if $err;
3488
3489 return [ sort keys %$flags ];
3490 };
3491
3492 # We need to query QEMU twice, since KVM and TCG have different supported flags
3493 PVE::QemuConfig->lock_config($fakevmid, sub {
3494 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3495 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3496
3497 if ($kvm_supported) {
3498 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3499 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3500 }
3501 });
3502
3503 return $flags;
3504 }
3505
3506 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3507 my $understood_cpu_flag_dir = "/usr/share/kvm";
3508 sub query_understood_cpu_flags {
3509 my $arch = get_host_arch();
3510 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3511
3512 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3513 if ! -e $filepath;
3514
3515 my $raw = file_get_contents($filepath);
3516 $raw =~ s/^\s+|\s+$//g;
3517 my @flags = split(/\s+/, $raw);
3518
3519 return \@flags;
3520 }
3521
3522 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3523 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3524 my sub should_disable_smm {
3525 my ($conf, $vga, $machine) = @_;
3526
3527 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3528
3529 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3530 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3531 }
3532
3533 my sub print_ovmf_drive_commandlines {
3534 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3535
3536 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3537
3538 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3539
3540 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3541 if ($d) {
3542 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3543 my ($path, $format) = $d->@{'file', 'format'};
3544 if ($storeid) {
3545 $path = PVE::Storage::path($storecfg, $d->{file});
3546 if (!defined($format)) {
3547 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3548 $format = qemu_img_format($scfg, $volname);
3549 }
3550 } elsif (!defined($format)) {
3551 die "efidisk format must be specified\n";
3552 }
3553 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3554 if ($path =~ m/^rbd:/) {
3555 $var_drive_str .= ',cache=writeback';
3556 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3557 }
3558 $var_drive_str .= ",format=$format,file=$path";
3559
3560 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3561 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3562 } else {
3563 log_warn("no efidisk configured! Using temporary efivars disk.");
3564 my $path = "/tmp/$vmid-ovmf.fd";
3565 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3566 $var_drive_str .= ",format=raw,file=$path";
3567 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3568 }
3569
3570 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3571 }
3572
3573 sub config_to_command {
3574 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3575 $pbs_backing) = @_;
3576
3577 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3578 my $devices = [];
3579 my $bridges = {};
3580 my $ostype = $conf->{ostype};
3581 my $winversion = windows_version($ostype);
3582 my $kvm = $conf->{kvm};
3583 my $nodename = nodename();
3584
3585 my $arch = get_vm_arch($conf);
3586 my $kvm_binary = get_command_for_arch($arch);
3587 my $kvmver = kvm_user_version($kvm_binary);
3588
3589 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3590 $kvmver //= "undefined";
3591 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3592 }
3593
3594 my $add_pve_version = min_version($kvmver, 4, 1);
3595
3596 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3597 my $machine_version = extract_version($machine_type, $kvmver);
3598 $kvm //= 1 if is_native($arch);
3599
3600 $machine_version =~ m/(\d+)\.(\d+)/;
3601 my ($machine_major, $machine_minor) = ($1, $2);
3602
3603 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3604 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3605 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3606 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3607 ." please upgrade node '$nodename'\n"
3608 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3609 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3610 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3611 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3612 ." node '$nodename'\n";
3613 }
3614
3615 # if a specific +pve version is required for a feature, use $version_guard
3616 # instead of min_version to allow machines to be run with the minimum
3617 # required version
3618 my $required_pve_version = 0;
3619 my $version_guard = sub {
3620 my ($major, $minor, $pve) = @_;
3621 return 0 if !min_version($machine_version, $major, $minor, $pve);
3622 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3623 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3624 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3625 return 1;
3626 };
3627
3628 if ($kvm && !defined kvm_version()) {
3629 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3630 ." or enable in BIOS.\n";
3631 }
3632
3633 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3634 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3635 my $use_old_bios_files = undef;
3636 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3637
3638 my $cmd = [];
3639 if ($conf->{affinity}) {
3640 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3641 }
3642
3643 push @$cmd, $kvm_binary;
3644
3645 push @$cmd, '-id', $vmid;
3646
3647 my $vmname = $conf->{name} || "vm$vmid";
3648
3649 push @$cmd, '-name', "$vmname,debug-threads=on";
3650
3651 push @$cmd, '-no-shutdown';
3652
3653 my $use_virtio = 0;
3654
3655 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3656 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3657 push @$cmd, '-mon', "chardev=qmp,mode=control";
3658
3659 if (min_version($machine_version, 2, 12)) {
3660 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3661 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3662 }
3663
3664 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3665
3666 push @$cmd, '-daemonize';
3667
3668 if ($conf->{smbios1}) {
3669 my $smbios_conf = parse_smbios1($conf->{smbios1});
3670 if ($smbios_conf->{base64}) {
3671 # Do not pass base64 flag to qemu
3672 delete $smbios_conf->{base64};
3673 my $smbios_string = "";
3674 foreach my $key (keys %$smbios_conf) {
3675 my $value;
3676 if ($key eq "uuid") {
3677 $value = $smbios_conf->{uuid}
3678 } else {
3679 $value = decode_base64($smbios_conf->{$key});
3680 }
3681 # qemu accepts any binary data, only commas need escaping by double comma
3682 $value =~ s/,/,,/g;
3683 $smbios_string .= "," . $key . "=" . $value if $value;
3684 }
3685 push @$cmd, '-smbios', "type=1" . $smbios_string;
3686 } else {
3687 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3688 }
3689 }
3690
3691 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3692 my ($code_drive_str, $var_drive_str) =
3693 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3694 push $cmd->@*, '-drive', $code_drive_str;
3695 push $cmd->@*, '-drive', $var_drive_str;
3696 }
3697
3698 if ($q35) { # tell QEMU to load q35 config early
3699 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3700 if (min_version($machine_version, 4, 0)) {
3701 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3702 } else {
3703 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3704 }
3705 }
3706
3707 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3708 push @$cmd, $fixups->@*;
3709 }
3710
3711 if ($conf->{vmgenid}) {
3712 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3713 }
3714
3715 # add usb controllers
3716 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3717 $conf, $bridges, $arch, $machine_type, $machine_version);
3718 push @$devices, @usbcontrollers if @usbcontrollers;
3719 my $vga = parse_vga($conf->{vga});
3720
3721 my $qxlnum = vga_conf_has_spice($conf->{vga});
3722 $vga->{type} = 'qxl' if $qxlnum;
3723
3724 if (!$vga->{type}) {
3725 if ($arch eq 'aarch64') {
3726 $vga->{type} = 'virtio';
3727 } elsif (min_version($machine_version, 2, 9)) {
3728 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3729 } else {
3730 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3731 }
3732 }
3733
3734 # enable absolute mouse coordinates (needed by vnc)
3735 my $tablet = $conf->{tablet};
3736 if (!defined($tablet)) {
3737 $tablet = $defaults->{tablet};
3738 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3739 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3740 }
3741
3742 if ($tablet) {
3743 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3744 my $kbd = print_keyboarddevice_full($conf, $arch);
3745 push @$devices, '-device', $kbd if defined($kbd);
3746 }
3747
3748 my $bootorder = device_bootorder($conf);
3749
3750 # host pci device passthrough
3751 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3752 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3753
3754 # usb devices
3755 my $usb_dev_features = {};
3756 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3757
3758 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3759 $conf, $usb_dev_features, $bootorder, $machine_version);
3760 push @$devices, @usbdevices if @usbdevices;
3761
3762 # serial devices
3763 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3764 my $path = $conf->{"serial$i"} or next;
3765 if ($path eq 'socket') {
3766 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3767 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3768 # On aarch64, serial0 is the UART device. QEMU only allows
3769 # connecting UART devices via the '-serial' command line, as
3770 # the device has a fixed slot on the hardware...
3771 if ($arch eq 'aarch64' && $i == 0) {
3772 push @$devices, '-serial', "chardev:serial$i";
3773 } else {
3774 push @$devices, '-device', "isa-serial,chardev=serial$i";
3775 }
3776 } else {
3777 die "no such serial device\n" if ! -c $path;
3778 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3779 push @$devices, '-device', "isa-serial,chardev=serial$i";
3780 }
3781 }
3782
3783 # parallel devices
3784 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3785 if (my $path = $conf->{"parallel$i"}) {
3786 die "no such parallel device\n" if ! -c $path;
3787 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3788 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3789 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3790 }
3791 }
3792
3793 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3794 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3795 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3796 push @$devices, @$audio_devs;
3797 }
3798
3799 # Add a TPM only if the VM is not a template,
3800 # to support backing up template VMs even if the TPM disk is write-protected.
3801 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
3802
3803 my $sockets = 1;
3804 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3805 $sockets = $conf->{sockets} if $conf->{sockets};
3806
3807 my $cores = $conf->{cores} || 1;
3808
3809 my $maxcpus = $sockets * $cores;
3810
3811 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3812
3813 my $allowed_vcpus = $cpuinfo->{cpus};
3814
3815 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3816
3817 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3818 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3819 for (my $i = 2; $i <= $vcpus; $i++) {
3820 my $cpustr = print_cpu_device($conf,$i);
3821 push @$cmd, '-device', $cpustr;
3822 }
3823
3824 } else {
3825
3826 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3827 }
3828 push @$cmd, '-nodefaults';
3829
3830 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3831
3832 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3833
3834 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3835
3836 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3837 push @$devices, '-device', print_vga_device(
3838 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3839
3840 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3841
3842 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3843 push @$cmd, '-vnc', "unix:$socket,password=on";
3844 } else {
3845 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3846 push @$cmd, '-nographic';
3847 }
3848
3849 # time drift fix
3850 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3851 my $useLocaltime = $conf->{localtime};
3852
3853 if ($winversion >= 5) { # windows
3854 $useLocaltime = 1 if !defined($conf->{localtime});
3855
3856 # use time drift fix when acpi is enabled
3857 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3858 $tdf = 1 if !defined($conf->{tdf});
3859 }
3860 }
3861
3862 if ($winversion >= 6) {
3863 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3864 push @$machineFlags, 'hpet=off';
3865 }
3866
3867 push @$rtcFlags, 'driftfix=slew' if $tdf;
3868
3869 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3870 push @$rtcFlags, "base=$conf->{startdate}";
3871 } elsif ($useLocaltime) {
3872 push @$rtcFlags, 'base=localtime';
3873 }
3874
3875 if ($forcecpu) {
3876 push @$cmd, '-cpu', $forcecpu;
3877 } else {
3878 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3879 }
3880
3881 PVE::QemuServer::Memory::config(
3882 $conf, $vmid, $sockets, $cores, $hotplug_features->{memory}, $cmd);
3883
3884 push @$cmd, '-S' if $conf->{freeze};
3885
3886 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3887
3888 my $guest_agent = parse_guest_agent($conf);
3889
3890 if ($guest_agent->{enabled}) {
3891 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3892 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3893
3894 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3895 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3896 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3897 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3898 } elsif ($guest_agent->{type} eq 'isa') {
3899 push @$devices, '-device', "isa-serial,chardev=qga0";
3900 }
3901 }
3902
3903 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3904 if ($rng && $version_guard->(4, 1, 2)) {
3905 check_rng_source($rng->{source});
3906
3907 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3908 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3909 my $limiter_str = "";
3910 if ($max_bytes) {
3911 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3912 }
3913
3914 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3915 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3916 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3917 }
3918
3919 my $spice_port;
3920
3921 assert_clipboard_config($vga);
3922 my $is_spice = $qxlnum || $vga->{type} =~ /^virtio/;
3923
3924 if ($is_spice || ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc')) {
3925 if ($qxlnum > 1) {
3926 if ($winversion){
3927 for (my $i = 1; $i < $qxlnum; $i++){
3928 push @$devices, '-device', print_vga_device(
3929 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3930 }
3931 } else {
3932 # assume other OS works like Linux
3933 my ($ram, $vram) = ("134217728", "67108864");
3934 if ($vga->{memory}) {
3935 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3936 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3937 }
3938 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3939 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3940 }
3941 }
3942
3943 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3944
3945 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3946 if ($vga->{'clipboard'} && $vga->{'clipboard'} eq 'vnc') {
3947 push @$devices, '-chardev', 'qemu-vdagent,id=vdagent,name=vdagent,clipboard=on';
3948 } else {
3949 push @$devices, '-chardev', 'spicevmc,id=vdagent,name=vdagent';
3950 }
3951 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3952
3953 if ($is_spice) {
3954 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3955 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3956 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3957
3958 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3959 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3960
3961 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3962 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3963 if ($spice_enhancement->{foldersharing}) {
3964 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3965 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3966 }
3967
3968 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3969 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3970 if $spice_enhancement->{videostreaming};
3971 push @$devices, '-spice', "$spice_opts";
3972 }
3973 }
3974
3975 # enable balloon by default, unless explicitly disabled
3976 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3977 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3978 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3979 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3980 push @$devices, '-device', $ballooncmd;
3981 }
3982
3983 if ($conf->{watchdog}) {
3984 my $wdopts = parse_watchdog($conf->{watchdog});
3985 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3986 my $watchdog = $wdopts->{model} || 'i6300esb';
3987 push @$devices, '-device', "$watchdog$pciaddr";
3988 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3989 }
3990
3991 my $vollist = [];
3992 my $scsicontroller = {};
3993 my $ahcicontroller = {};
3994 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3995
3996 # Add iscsi initiator name if available
3997 if (my $initiator = get_initiator_name()) {
3998 push @$devices, '-iscsi', "initiator-name=$initiator";
3999 }
4000
4001 PVE::QemuConfig->foreach_volume($conf, sub {
4002 my ($ds, $drive) = @_;
4003
4004 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
4005 check_volume_storage_type($storecfg, $drive->{file});
4006 push @$vollist, $drive->{file};
4007 }
4008
4009 # ignore efidisk here, already added in bios/fw handling code above
4010 return if $drive->{interface} eq 'efidisk';
4011 # similar for TPM
4012 return if $drive->{interface} eq 'tpmstate';
4013
4014 $use_virtio = 1 if $ds =~ m/^virtio/;
4015
4016 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
4017
4018 if ($drive->{interface} eq 'virtio'){
4019 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
4020 }
4021
4022 if ($drive->{interface} eq 'scsi') {
4023
4024 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
4025
4026 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
4027 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
4028
4029 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
4030 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
4031
4032 my $iothread = '';
4033 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
4034 $iothread .= ",iothread=iothread-$controller_prefix$controller";
4035 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
4036 } elsif ($drive->{iothread}) {
4037 log_warn(
4038 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4039 );
4040 }
4041
4042 my $queues = '';
4043 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4044 $queues = ",num_queues=$drive->{queues}";
4045 }
4046
4047 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4048 if !$scsicontroller->{$controller};
4049 $scsicontroller->{$controller}=1;
4050 }
4051
4052 if ($drive->{interface} eq 'sata') {
4053 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4054 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4055 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4056 if !$ahcicontroller->{$controller};
4057 $ahcicontroller->{$controller}=1;
4058 }
4059
4060 my $pbs_conf = $pbs_backing->{$ds};
4061 my $pbs_name = undef;
4062 if ($pbs_conf) {
4063 $pbs_name = "drive-$ds-pbs";
4064 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4065 }
4066
4067 my $drive_cmd = print_drive_commandline_full(
4068 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4069
4070 # extra protection for templates, but SATA and IDE don't support it..
4071 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4072
4073 push @$devices, '-drive',$drive_cmd;
4074 push @$devices, '-device', print_drivedevice_full(
4075 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4076 });
4077
4078 for (my $i = 0; $i < $MAX_NETS; $i++) {
4079 my $netname = "net$i";
4080
4081 next if !$conf->{$netname};
4082 my $d = parse_net($conf->{$netname});
4083 next if !$d;
4084 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4085
4086 $use_virtio = 1 if $d->{model} eq 'virtio';
4087
4088 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4089
4090 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4091 push @$devices, '-netdev', $netdevfull;
4092
4093 my $netdevicefull = print_netdevice_full(
4094 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4095
4096 push @$devices, '-device', $netdevicefull;
4097 }
4098
4099 if ($conf->{ivshmem}) {
4100 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4101
4102 my $bus;
4103 if ($q35) {
4104 $bus = print_pcie_addr("ivshmem");
4105 } else {
4106 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4107 }
4108
4109 my $ivshmem_name = $ivshmem->{name} // $vmid;
4110 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4111
4112 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4113 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4114 .",size=$ivshmem->{size}M";
4115 }
4116
4117 # pci.4 is nested in pci.1
4118 $bridges->{1} = 1 if $bridges->{4};
4119
4120 if (!$q35) { # add pci bridges
4121 if (min_version($machine_version, 2, 3)) {
4122 $bridges->{1} = 1;
4123 $bridges->{2} = 1;
4124 }
4125 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4126 }
4127
4128 for my $k (sort {$b cmp $a} keys %$bridges) {
4129 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4130
4131 my $k_name = $k;
4132 if ($k == 2 && $legacy_igd) {
4133 $k_name = "$k-igd";
4134 }
4135 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4136 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4137
4138 if ($q35) { # add after -readconfig pve-q35.cfg
4139 splice @$devices, 2, 0, '-device', $devstr;
4140 } else {
4141 unshift @$devices, '-device', $devstr if $k > 0;
4142 }
4143 }
4144
4145 if (!$kvm) {
4146 push @$machineFlags, 'accel=tcg';
4147 }
4148
4149 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4150
4151 my $machine_type_min = $machine_type;
4152 if ($add_pve_version) {
4153 $machine_type_min =~ s/\+pve\d+$//;
4154 $machine_type_min .= "+pve$required_pve_version";
4155 }
4156 push @$machineFlags, "type=${machine_type_min}";
4157
4158 push @$cmd, @$devices;
4159 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4160 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4161 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4162
4163 if (my $vmstate = $conf->{vmstate}) {
4164 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4165 push @$vollist, $vmstate;
4166 push @$cmd, '-loadstate', $statepath;
4167 print "activating and using '$vmstate' as vmstate\n";
4168 }
4169
4170 if (PVE::QemuConfig->is_template($conf)) {
4171 # needed to workaround base volumes being read-only
4172 push @$cmd, '-snapshot';
4173 }
4174
4175 # add custom args
4176 if ($conf->{args}) {
4177 my $aa = PVE::Tools::split_args($conf->{args});
4178 push @$cmd, @$aa;
4179 }
4180
4181 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4182 }
4183
4184 sub check_rng_source {
4185 my ($source) = @_;
4186
4187 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4188 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4189 if ! -e $source;
4190
4191 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4192 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4193 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4194 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4195 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4196 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4197 ." to the host.\n";
4198 }
4199 }
4200
4201 sub spice_port {
4202 my ($vmid) = @_;
4203
4204 my $res = mon_cmd($vmid, 'query-spice');
4205
4206 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4207 }
4208
4209 sub vm_devices_list {
4210 my ($vmid) = @_;
4211
4212 my $res = mon_cmd($vmid, 'query-pci');
4213 my $devices_to_check = [];
4214 my $devices = {};
4215 foreach my $pcibus (@$res) {
4216 push @$devices_to_check, @{$pcibus->{devices}},
4217 }
4218
4219 while (@$devices_to_check) {
4220 my $to_check = [];
4221 for my $d (@$devices_to_check) {
4222 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4223 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4224
4225 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4226 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4227 }
4228 $devices_to_check = $to_check;
4229 }
4230
4231 my $resblock = mon_cmd($vmid, 'query-block');
4232 foreach my $block (@$resblock) {
4233 if($block->{device} =~ m/^drive-(\S+)/){
4234 $devices->{$1} = 1;
4235 }
4236 }
4237
4238 my $resmice = mon_cmd($vmid, 'query-mice');
4239 foreach my $mice (@$resmice) {
4240 if ($mice->{name} eq 'QEMU HID Tablet') {
4241 $devices->{tablet} = 1;
4242 last;
4243 }
4244 }
4245
4246 # for usb devices there is no query-usb
4247 # but we can iterate over the entries in
4248 # qom-list path=/machine/peripheral
4249 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4250 foreach my $per (@$resperipheral) {
4251 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4252 $devices->{$per->{name}} = 1;
4253 }
4254 }
4255
4256 return $devices;
4257 }
4258
4259 sub vm_deviceplug {
4260 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4261
4262 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4263
4264 my $devices_list = vm_devices_list($vmid);
4265 return 1 if defined($devices_list->{$deviceid});
4266
4267 # add PCI bridge if we need it for the device
4268 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4269
4270 if ($deviceid eq 'tablet') {
4271 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4272 } elsif ($deviceid eq 'keyboard') {
4273 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4274 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4275 my $id = $1;
4276 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4277 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4278 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4279 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4280 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4281 qemu_iothread_add($vmid, $deviceid, $device);
4282
4283 qemu_driveadd($storecfg, $vmid, $device);
4284 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4285
4286 qemu_deviceadd($vmid, $devicefull);
4287 eval { qemu_deviceaddverify($vmid, $deviceid); };
4288 if (my $err = $@) {
4289 eval { qemu_drivedel($vmid, $deviceid); };
4290 warn $@ if $@;
4291 die $err;
4292 }
4293 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4294 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4295 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4296 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4297
4298 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4299
4300 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4301 qemu_iothread_add($vmid, $deviceid, $device);
4302 $devicefull .= ",iothread=iothread-$deviceid";
4303 }
4304
4305 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4306 $devicefull .= ",num_queues=$device->{queues}";
4307 }
4308
4309 qemu_deviceadd($vmid, $devicefull);
4310 qemu_deviceaddverify($vmid, $deviceid);
4311 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4312 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4313 qemu_driveadd($storecfg, $vmid, $device);
4314
4315 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4316 eval { qemu_deviceadd($vmid, $devicefull); };
4317 if (my $err = $@) {
4318 eval { qemu_drivedel($vmid, $deviceid); };
4319 warn $@ if $@;
4320 die $err;
4321 }
4322 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4323 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4324
4325 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4326 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4327 my $use_old_bios_files = undef;
4328 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4329
4330 my $netdevicefull = print_netdevice_full(
4331 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4332 qemu_deviceadd($vmid, $netdevicefull);
4333 eval {
4334 qemu_deviceaddverify($vmid, $deviceid);
4335 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4336 };
4337 if (my $err = $@) {
4338 eval { qemu_netdevdel($vmid, $deviceid); };
4339 warn $@ if $@;
4340 die $err;
4341 }
4342 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4343 my $bridgeid = $2;
4344 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4345 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4346
4347 qemu_deviceadd($vmid, $devicefull);
4348 qemu_deviceaddverify($vmid, $deviceid);
4349 } else {
4350 die "can't hotplug device '$deviceid'\n";
4351 }
4352
4353 return 1;
4354 }
4355
4356 # fixme: this should raise exceptions on error!
4357 sub vm_deviceunplug {
4358 my ($vmid, $conf, $deviceid) = @_;
4359
4360 my $devices_list = vm_devices_list($vmid);
4361 return 1 if !defined($devices_list->{$deviceid});
4362
4363 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4364 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4365
4366 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4367 qemu_devicedel($vmid, $deviceid);
4368 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4369 qemu_devicedel($vmid, $deviceid);
4370 qemu_devicedelverify($vmid, $deviceid);
4371 } elsif ($deviceid =~ m/^usb\d+$/) {
4372 qemu_devicedel($vmid, $deviceid);
4373 qemu_devicedelverify($vmid, $deviceid);
4374 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4375 my $device = parse_drive($deviceid, $conf->{$deviceid});
4376
4377 qemu_devicedel($vmid, $deviceid);
4378 qemu_devicedelverify($vmid, $deviceid);
4379 qemu_drivedel($vmid, $deviceid);
4380 qemu_iothread_del($vmid, $deviceid, $device);
4381 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4382 qemu_devicedel($vmid, $deviceid);
4383 qemu_devicedelverify($vmid, $deviceid);
4384 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4385 my $device = parse_drive($deviceid, $conf->{$deviceid});
4386
4387 qemu_devicedel($vmid, $deviceid);
4388 qemu_devicedelverify($vmid, $deviceid);
4389 qemu_drivedel($vmid, $deviceid);
4390 qemu_deletescsihw($conf, $vmid, $deviceid);
4391
4392 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4393 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4394 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4395 qemu_devicedel($vmid, $deviceid);
4396 qemu_devicedelverify($vmid, $deviceid);
4397 qemu_netdevdel($vmid, $deviceid);
4398 } else {
4399 die "can't unplug device '$deviceid'\n";
4400 }
4401
4402 return 1;
4403 }
4404
4405 sub qemu_spice_usbredir_chardev_add {
4406 my ($vmid, $id) = @_;
4407
4408 mon_cmd($vmid, "chardev-add" , (
4409 id => $id,
4410 backend => {
4411 type => 'spicevmc',
4412 data => {
4413 type => "usbredir",
4414 },
4415 },
4416 ));
4417 }
4418
4419 sub qemu_iothread_add {
4420 my ($vmid, $deviceid, $device) = @_;
4421
4422 if ($device->{iothread}) {
4423 my $iothreads = vm_iothreads_list($vmid);
4424 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4425 }
4426 }
4427
4428 sub qemu_iothread_del {
4429 my ($vmid, $deviceid, $device) = @_;
4430
4431 if ($device->{iothread}) {
4432 my $iothreads = vm_iothreads_list($vmid);
4433 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4434 }
4435 }
4436
4437 sub qemu_driveadd {
4438 my ($storecfg, $vmid, $device) = @_;
4439
4440 my $kvmver = get_running_qemu_version($vmid);
4441 my $io_uring = min_version($kvmver, 6, 0);
4442 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4443 $drive =~ s/\\/\\\\/g;
4444 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4445
4446 # If the command succeeds qemu prints: "OK"
4447 return 1 if $ret =~ m/OK/s;
4448
4449 die "adding drive failed: $ret\n";
4450 }
4451
4452 sub qemu_drivedel {
4453 my ($vmid, $deviceid) = @_;
4454
4455 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4456 $ret =~ s/^\s+//;
4457
4458 return 1 if $ret eq "";
4459
4460 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4461 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4462
4463 die "deleting drive $deviceid failed : $ret\n";
4464 }
4465
4466 sub qemu_deviceaddverify {
4467 my ($vmid, $deviceid) = @_;
4468
4469 for (my $i = 0; $i <= 5; $i++) {
4470 my $devices_list = vm_devices_list($vmid);
4471 return 1 if defined($devices_list->{$deviceid});
4472 sleep 1;
4473 }
4474
4475 die "error on hotplug device '$deviceid'\n";
4476 }
4477
4478
4479 sub qemu_devicedelverify {
4480 my ($vmid, $deviceid) = @_;
4481
4482 # need to verify that the device is correctly removed as device_del
4483 # is async and empty return is not reliable
4484
4485 for (my $i = 0; $i <= 5; $i++) {
4486 my $devices_list = vm_devices_list($vmid);
4487 return 1 if !defined($devices_list->{$deviceid});
4488 sleep 1;
4489 }
4490
4491 die "error on hot-unplugging device '$deviceid'\n";
4492 }
4493
4494 sub qemu_findorcreatescsihw {
4495 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4496
4497 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4498
4499 my $scsihwid="$controller_prefix$controller";
4500 my $devices_list = vm_devices_list($vmid);
4501
4502 if (!defined($devices_list->{$scsihwid})) {
4503 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4504 }
4505
4506 return 1;
4507 }
4508
4509 sub qemu_deletescsihw {
4510 my ($conf, $vmid, $opt) = @_;
4511
4512 my $device = parse_drive($opt, $conf->{$opt});
4513
4514 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4515 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4516 return 1;
4517 }
4518
4519 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4520
4521 my $devices_list = vm_devices_list($vmid);
4522 foreach my $opt (keys %{$devices_list}) {
4523 if (is_valid_drivename($opt)) {
4524 my $drive = parse_drive($opt, $conf->{$opt});
4525 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4526 return 1;
4527 }
4528 }
4529 }
4530
4531 my $scsihwid="scsihw$controller";
4532
4533 vm_deviceunplug($vmid, $conf, $scsihwid);
4534
4535 return 1;
4536 }
4537
4538 sub qemu_add_pci_bridge {
4539 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4540
4541 my $bridges = {};
4542
4543 my $bridgeid;
4544
4545 print_pci_addr($device, $bridges, $arch, $machine_type);
4546
4547 while (my ($k, $v) = each %$bridges) {
4548 $bridgeid = $k;
4549 }
4550 return 1 if !defined($bridgeid) || $bridgeid < 1;
4551
4552 my $bridge = "pci.$bridgeid";
4553 my $devices_list = vm_devices_list($vmid);
4554
4555 if (!defined($devices_list->{$bridge})) {
4556 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4557 }
4558
4559 return 1;
4560 }
4561
4562 sub qemu_set_link_status {
4563 my ($vmid, $device, $up) = @_;
4564
4565 mon_cmd($vmid, "set_link", name => $device,
4566 up => $up ? JSON::true : JSON::false);
4567 }
4568
4569 sub qemu_netdevadd {
4570 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4571
4572 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4573 my %options = split(/[=,]/, $netdev);
4574
4575 if (defined(my $vhost = $options{vhost})) {
4576 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4577 }
4578
4579 if (defined(my $queues = $options{queues})) {
4580 $options{queues} = $queues + 0;
4581 }
4582
4583 mon_cmd($vmid, "netdev_add", %options);
4584 return 1;
4585 }
4586
4587 sub qemu_netdevdel {
4588 my ($vmid, $deviceid) = @_;
4589
4590 mon_cmd($vmid, "netdev_del", id => $deviceid);
4591 }
4592
4593 sub qemu_usb_hotplug {
4594 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4595
4596 return if !$device;
4597
4598 # remove the old one first
4599 vm_deviceunplug($vmid, $conf, $deviceid);
4600
4601 # check if xhci controller is necessary and available
4602 my $devicelist = vm_devices_list($vmid);
4603
4604 if (!$devicelist->{xhci}) {
4605 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4606 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4607 }
4608
4609 # add the new one
4610 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4611 }
4612
4613 sub qemu_cpu_hotplug {
4614 my ($vmid, $conf, $vcpus) = @_;
4615
4616 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4617
4618 my $sockets = 1;
4619 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4620 $sockets = $conf->{sockets} if $conf->{sockets};
4621 my $cores = $conf->{cores} || 1;
4622 my $maxcpus = $sockets * $cores;
4623
4624 $vcpus = $maxcpus if !$vcpus;
4625
4626 die "you can't add more vcpus than maxcpus\n"
4627 if $vcpus > $maxcpus;
4628
4629 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4630
4631 if ($vcpus < $currentvcpus) {
4632
4633 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4634
4635 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4636 qemu_devicedel($vmid, "cpu$i");
4637 my $retry = 0;
4638 my $currentrunningvcpus = undef;
4639 while (1) {
4640 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4641 last if scalar(@{$currentrunningvcpus}) == $i-1;
4642 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4643 $retry++;
4644 sleep 1;
4645 }
4646 #update conf after each succesfull cpu unplug
4647 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4648 PVE::QemuConfig->write_config($vmid, $conf);
4649 }
4650 } else {
4651 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4652 }
4653
4654 return;
4655 }
4656
4657 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4658 die "vcpus in running vm does not match its configuration\n"
4659 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4660
4661 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4662
4663 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4664 my $cpustr = print_cpu_device($conf, $i);
4665 qemu_deviceadd($vmid, $cpustr);
4666
4667 my $retry = 0;
4668 my $currentrunningvcpus = undef;
4669 while (1) {
4670 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4671 last if scalar(@{$currentrunningvcpus}) == $i;
4672 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4673 sleep 1;
4674 $retry++;
4675 }
4676 #update conf after each succesfull cpu hotplug
4677 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4678 PVE::QemuConfig->write_config($vmid, $conf);
4679 }
4680 } else {
4681
4682 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4683 mon_cmd($vmid, "cpu-add", id => int($i));
4684 }
4685 }
4686 }
4687
4688 sub qemu_block_set_io_throttle {
4689 my ($vmid, $deviceid,
4690 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4691 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4692 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4693 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4694
4695 return if !check_running($vmid) ;
4696
4697 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4698 bps => int($bps),
4699 bps_rd => int($bps_rd),
4700 bps_wr => int($bps_wr),
4701 iops => int($iops),
4702 iops_rd => int($iops_rd),
4703 iops_wr => int($iops_wr),
4704 bps_max => int($bps_max),
4705 bps_rd_max => int($bps_rd_max),
4706 bps_wr_max => int($bps_wr_max),
4707 iops_max => int($iops_max),
4708 iops_rd_max => int($iops_rd_max),
4709 iops_wr_max => int($iops_wr_max),
4710 bps_max_length => int($bps_max_length),
4711 bps_rd_max_length => int($bps_rd_max_length),
4712 bps_wr_max_length => int($bps_wr_max_length),
4713 iops_max_length => int($iops_max_length),
4714 iops_rd_max_length => int($iops_rd_max_length),
4715 iops_wr_max_length => int($iops_wr_max_length),
4716 );
4717
4718 }
4719
4720 sub qemu_block_resize {
4721 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4722
4723 my $running = check_running($vmid);
4724
4725 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4726
4727 return if !$running;
4728
4729 my $padding = (1024 - $size % 1024) % 1024;
4730 $size = $size + $padding;
4731
4732 mon_cmd(
4733 $vmid,
4734 "block_resize",
4735 device => $deviceid,
4736 size => int($size),
4737 timeout => 60,
4738 );
4739 }
4740
4741 sub qemu_volume_snapshot {
4742 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4743
4744 my $running = check_running($vmid);
4745
4746 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4747 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4748 } else {
4749 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4750 }
4751 }
4752
4753 sub qemu_volume_snapshot_delete {
4754 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4755
4756 my $running = check_running($vmid);
4757
4758 if($running) {
4759
4760 $running = undef;
4761 my $conf = PVE::QemuConfig->load_config($vmid);
4762 PVE::QemuConfig->foreach_volume($conf, sub {
4763 my ($ds, $drive) = @_;
4764 $running = 1 if $drive->{file} eq $volid;
4765 });
4766 }
4767
4768 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4769 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4770 } else {
4771 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4772 }
4773 }
4774
4775 sub set_migration_caps {
4776 my ($vmid, $savevm) = @_;
4777
4778 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4779
4780 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4781 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4782
4783 my $cap_ref = [];
4784
4785 my $enabled_cap = {
4786 "auto-converge" => 1,
4787 "xbzrle" => 1,
4788 "x-rdma-pin-all" => 0,
4789 "zero-blocks" => 0,
4790 "compress" => 0,
4791 "dirty-bitmaps" => $dirty_bitmaps,
4792 };
4793
4794 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4795
4796 for my $supported_capability (@$supported_capabilities) {
4797 push @$cap_ref, {
4798 capability => $supported_capability->{capability},
4799 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4800 };
4801 }
4802
4803 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4804 }
4805
4806 sub foreach_volid {
4807 my ($conf, $func, @param) = @_;
4808
4809 my $volhash = {};
4810
4811 my $test_volid = sub {
4812 my ($key, $drive, $snapname, $pending) = @_;
4813
4814 my $volid = $drive->{file};
4815 return if !$volid;
4816
4817 $volhash->{$volid}->{cdrom} //= 1;
4818 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4819
4820 my $replicate = $drive->{replicate} // 1;
4821 $volhash->{$volid}->{replicate} //= 0;
4822 $volhash->{$volid}->{replicate} = 1 if $replicate;
4823
4824 $volhash->{$volid}->{shared} //= 0;
4825 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4826
4827 $volhash->{$volid}->{is_unused} //= 0;
4828 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4829
4830 $volhash->{$volid}->{is_attached} //= 0;
4831 $volhash->{$volid}->{is_attached} = 1
4832 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4833
4834 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4835 if defined($snapname);
4836
4837 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4838
4839 my $size = $drive->{size};
4840 $volhash->{$volid}->{size} //= $size if $size;
4841
4842 $volhash->{$volid}->{is_vmstate} //= 0;
4843 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4844
4845 $volhash->{$volid}->{is_tpmstate} //= 0;
4846 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4847
4848 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4849 };
4850
4851 my $include_opts = {
4852 extra_keys => ['vmstate'],
4853 include_unused => 1,
4854 };
4855
4856 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4857
4858 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4859 if defined($conf->{pending}) && $conf->{pending}->%*;
4860
4861 foreach my $snapname (keys %{$conf->{snapshots}}) {
4862 my $snap = $conf->{snapshots}->{$snapname};
4863 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4864 }
4865
4866 foreach my $volid (keys %$volhash) {
4867 &$func($volid, $volhash->{$volid}, @param);
4868 }
4869 }
4870
4871 my $fast_plug_option = {
4872 'description' => 1,
4873 'hookscript' => 1,
4874 'lock' => 1,
4875 'migrate_downtime' => 1,
4876 'migrate_speed' => 1,
4877 'name' => 1,
4878 'onboot' => 1,
4879 'protection' => 1,
4880 'shares' => 1,
4881 'startup' => 1,
4882 'tags' => 1,
4883 'vmstatestorage' => 1,
4884 };
4885
4886 for my $opt (keys %$confdesc_cloudinit) {
4887 $fast_plug_option->{$opt} = 1;
4888 };
4889
4890 # hotplug changes in [PENDING]
4891 # $selection hash can be used to only apply specified options, for
4892 # example: { cores => 1 } (only apply changed 'cores')
4893 # $errors ref is used to return error messages
4894 sub vmconfig_hotplug_pending {
4895 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4896
4897 my $defaults = load_defaults();
4898 my $arch = get_vm_arch($conf);
4899 my $machine_type = get_vm_machine($conf, undef, $arch);
4900
4901 # commit values which do not have any impact on running VM first
4902 # Note: those option cannot raise errors, we we do not care about
4903 # $selection and always apply them.
4904
4905 my $add_error = sub {
4906 my ($opt, $msg) = @_;
4907 $errors->{$opt} = "hotplug problem - $msg";
4908 };
4909
4910 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4911
4912 my $cloudinit_record_changed = sub {
4913 my ($conf, $opt, $old, $new) = @_;
4914 return if !$cloudinit_pending_properties->{$opt};
4915
4916 my $ci = ($conf->{cloudinit} //= {});
4917
4918 my $recorded = $ci->{$opt};
4919 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4920
4921 if (defined($new)) {
4922 if (defined($old)) {
4923 # an existing value is being modified
4924 if (defined($recorded)) {
4925 # the value was already not in sync
4926 if ($new eq $recorded) {
4927 # a value is being reverted to the cloud-init state:
4928 delete $ci->{$opt};
4929 delete $added{$opt};
4930 } else {
4931 # the value was changed multiple times, do nothing
4932 }
4933 } elsif ($added{$opt}) {
4934 # the value had been marked as added and is being changed, do nothing
4935 } else {
4936 # the value is new, record it:
4937 $ci->{$opt} = $old;
4938 }
4939 } else {
4940 # a new value is being added
4941 if (defined($recorded)) {
4942 # it was already not in sync
4943 if ($new eq $recorded) {
4944 # a value is being reverted to the cloud-init state:
4945 delete $ci->{$opt};
4946 delete $added{$opt};
4947 } else {
4948 # the value had temporarily been removed, do nothing
4949 }
4950 } elsif ($added{$opt}) {
4951 # the value had been marked as added already, do nothing
4952 } else {
4953 # the value is new, add it
4954 $added{$opt} = 1;
4955 }
4956 }
4957 } elsif (!defined($old)) {
4958 # a non-existent value is being removed? ignore...
4959 } else {
4960 # a value is being deleted
4961 if (defined($recorded)) {
4962 # a value was already recorded, just keep it
4963 } elsif ($added{$opt}) {
4964 # the value was marked as added, remove it
4965 delete $added{$opt};
4966 } else {
4967 # a previously unrecorded value is being removed, record the old value:
4968 $ci->{$opt} = $old;
4969 }
4970 }
4971
4972 my $added = join(',', sort keys %added);
4973 $ci->{added} = $added if length($added);
4974 };
4975
4976 my $changes = 0;
4977 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4978 if ($fast_plug_option->{$opt}) {
4979 my $new = delete $conf->{pending}->{$opt};
4980 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4981 $conf->{$opt} = $new;
4982 $changes = 1;
4983 }
4984 }
4985
4986 if ($changes) {
4987 PVE::QemuConfig->write_config($vmid, $conf);
4988 }
4989
4990 my $ostype = $conf->{ostype};
4991 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4992 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4993 my $usb_hotplug = $hotplug_features->{usb}
4994 && min_version($version, 7, 1)
4995 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4996
4997 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4998 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4999
5000 foreach my $opt (sort keys %$pending_delete_hash) {
5001 next if $selection && !$selection->{$opt};
5002 my $force = $pending_delete_hash->{$opt}->{force};
5003 eval {
5004 if ($opt eq 'hotplug') {
5005 die "skip\n" if ($conf->{hotplug} =~ /(cpu|memory)/);
5006 } elsif ($opt eq 'tablet') {
5007 die "skip\n" if !$hotplug_features->{usb};
5008 if ($defaults->{tablet}) {
5009 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5010 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5011 if $arch eq 'aarch64';
5012 } else {
5013 vm_deviceunplug($vmid, $conf, 'tablet');
5014 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5015 }
5016 } elsif ($opt =~ m/^usb(\d+)$/) {
5017 my $index = $1;
5018 die "skip\n" if !$usb_hotplug;
5019 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5020 vm_deviceunplug($vmid, $conf, $opt);
5021 } elsif ($opt eq 'vcpus') {
5022 die "skip\n" if !$hotplug_features->{cpu};
5023 qemu_cpu_hotplug($vmid, $conf, undef);
5024 } elsif ($opt eq 'balloon') {
5025 # enable balloon device is not hotpluggable
5026 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5027 # here we reset the ballooning value to memory
5028 my $balloon = get_current_memory($conf->{memory});
5029 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5030 } elsif ($fast_plug_option->{$opt}) {
5031 # do nothing
5032 } elsif ($opt =~ m/^net(\d+)$/) {
5033 die "skip\n" if !$hotplug_features->{network};
5034 vm_deviceunplug($vmid, $conf, $opt);
5035 if($have_sdn) {
5036 my $net = PVE::QemuServer::parse_net($conf->{$opt});
5037 PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name});
5038 }
5039 } elsif (is_valid_drivename($opt)) {
5040 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5041 vm_deviceunplug($vmid, $conf, $opt);
5042 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5043 } elsif ($opt =~ m/^memory$/) {
5044 die "skip\n" if !$hotplug_features->{memory};
5045 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf);
5046 } elsif ($opt eq 'cpuunits') {
5047 $cgroup->change_cpu_shares(undef);
5048 } elsif ($opt eq 'cpulimit') {
5049 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5050 } else {
5051 die "skip\n";
5052 }
5053 };
5054 if (my $err = $@) {
5055 &$add_error($opt, $err) if $err ne "skip\n";
5056 } else {
5057 my $old = delete $conf->{$opt};
5058 $cloudinit_record_changed->($conf, $opt, $old, undef);
5059 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5060 }
5061 }
5062
5063 my $cloudinit_opt;
5064 foreach my $opt (keys %{$conf->{pending}}) {
5065 next if $selection && !$selection->{$opt};
5066 my $value = $conf->{pending}->{$opt};
5067 eval {
5068 if ($opt eq 'hotplug') {
5069 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5070 die "skip\n" if ($value =~ /cpu/) || ($value !~ /cpu/ && $conf->{hotplug} =~ /cpu/);
5071 } elsif ($opt eq 'tablet') {
5072 die "skip\n" if !$hotplug_features->{usb};
5073 if ($value == 1) {
5074 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5075 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5076 if $arch eq 'aarch64';
5077 } elsif ($value == 0) {
5078 vm_deviceunplug($vmid, $conf, 'tablet');
5079 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5080 }
5081 } elsif ($opt =~ m/^usb(\d+)$/) {
5082 my $index = $1;
5083 die "skip\n" if !$usb_hotplug;
5084 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5085 my $id = $opt;
5086 if ($d->{host} =~ m/^spice$/i) {
5087 $id = "usbredirdev$index";
5088 }
5089 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5090 } elsif ($opt eq 'vcpus') {
5091 die "skip\n" if !$hotplug_features->{cpu};
5092 qemu_cpu_hotplug($vmid, $conf, $value);
5093 } elsif ($opt eq 'balloon') {
5094 # enable/disable balloning device is not hotpluggable
5095 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5096 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5097 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5098
5099 # allow manual ballooning if shares is set to zero
5100 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5101 my $memory = get_current_memory($conf->{memory});
5102 my $balloon = $conf->{pending}->{balloon} || $memory;
5103 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5104 }
5105 } elsif ($opt =~ m/^net(\d+)$/) {
5106 # some changes can be done without hotplug
5107 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5108 $vmid, $opt, $value, $arch, $machine_type);
5109 } elsif (is_valid_drivename($opt)) {
5110 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5111 # some changes can be done without hotplug
5112 my $drive = parse_drive($opt, $value);
5113 if (drive_is_cloudinit($drive)) {
5114 $cloudinit_opt = [$opt, $drive];
5115 # apply all the other changes first, then generate the cloudinit disk
5116 die "skip\n";
5117 }
5118 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5119 $vmid, $opt, $value, $arch, $machine_type);
5120 } elsif ($opt =~ m/^memory$/) { #dimms
5121 die "skip\n" if !$hotplug_features->{memory};
5122 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $value);
5123 } elsif ($opt eq 'cpuunits') {
5124 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5125 $cgroup->change_cpu_shares($new_cpuunits);
5126 } elsif ($opt eq 'cpulimit') {
5127 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5128 $cgroup->change_cpu_quota($cpulimit, 100000);
5129 } elsif ($opt eq 'agent') {
5130 vmconfig_update_agent($conf, $opt, $value);
5131 } else {
5132 die "skip\n"; # skip non-hot-pluggable options
5133 }
5134 };
5135 if (my $err = $@) {
5136 &$add_error($opt, $err) if $err ne "skip\n";
5137 } else {
5138 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5139 $conf->{$opt} = $value;
5140 delete $conf->{pending}->{$opt};
5141 }
5142 }
5143
5144 if (defined($cloudinit_opt)) {
5145 my ($opt, $drive) = @$cloudinit_opt;
5146 my $value = $conf->{pending}->{$opt};
5147 eval {
5148 my $temp = {%$conf, $opt => $value};
5149 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5150 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5151 $vmid, $opt, $value, $arch, $machine_type);
5152 };
5153 if (my $err = $@) {
5154 &$add_error($opt, $err) if $err ne "skip\n";
5155 } else {
5156 $conf->{$opt} = $value;
5157 delete $conf->{pending}->{$opt};
5158 }
5159 }
5160
5161 # unplug xhci controller if no usb device is left
5162 if ($usb_hotplug) {
5163 my $has_usb = 0;
5164 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5165 next if !defined($conf->{"usb$i"});
5166 $has_usb = 1;
5167 last;
5168 }
5169 if (!$has_usb) {
5170 vm_deviceunplug($vmid, $conf, 'xhci');
5171 }
5172 }
5173
5174 PVE::QemuConfig->write_config($vmid, $conf);
5175
5176 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5177 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5178 }
5179 }
5180
5181 sub try_deallocate_drive {
5182 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5183
5184 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5185 my $volid = $drive->{file};
5186 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5187 my $sid = PVE::Storage::parse_volume_id($volid);
5188 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5189
5190 # check if the disk is really unused
5191 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5192 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5193 PVE::Storage::vdisk_free($storecfg, $volid);
5194 return 1;
5195 } else {
5196 # If vm is not owner of this disk remove from config
5197 return 1;
5198 }
5199 }
5200
5201 return;
5202 }
5203
5204 sub vmconfig_delete_or_detach_drive {
5205 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5206
5207 my $drive = parse_drive($opt, $conf->{$opt});
5208
5209 my $rpcenv = PVE::RPCEnvironment::get();
5210 my $authuser = $rpcenv->get_user();
5211
5212 if ($force) {
5213 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5214 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5215 } else {
5216 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5217 }
5218 }
5219
5220
5221
5222 sub vmconfig_apply_pending {
5223 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5224
5225 return if !scalar(keys %{$conf->{pending}});
5226
5227 my $add_apply_error = sub {
5228 my ($opt, $msg) = @_;
5229 my $err_msg = "unable to apply pending change $opt : $msg";
5230 $errors->{$opt} = $err_msg;
5231 warn $err_msg;
5232 };
5233
5234 # cold plug
5235
5236 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5237 foreach my $opt (sort keys %$pending_delete_hash) {
5238 my $force = $pending_delete_hash->{$opt}->{force};
5239 eval {
5240 if ($opt =~ m/^unused/) {
5241 die "internal error";
5242 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5243 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5244 } elsif (defined($conf->{$opt}) && $opt =~ m/^net\d+$/) {
5245 if($have_sdn) {
5246 my $net = PVE::QemuServer::parse_net($conf->{$opt});
5247 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
5248 warn if $@;
5249 }
5250 }
5251 };
5252 if (my $err = $@) {
5253 $add_apply_error->($opt, $err);
5254 } else {
5255 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5256 delete $conf->{$opt};
5257 }
5258 }
5259
5260 PVE::QemuConfig->cleanup_pending($conf);
5261
5262 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5263
5264 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5265 next if $opt eq 'delete'; # just to be sure
5266 eval {
5267 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5268 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5269 } elsif (defined($conf->{pending}->{$opt}) && $opt =~ m/^net\d+$/) {
5270 if($have_sdn) {
5271 my $new_net = PVE::QemuServer::parse_net($conf->{pending}->{$opt});
5272 if ($conf->{$opt}){
5273 my $old_net = PVE::QemuServer::parse_net($conf->{$opt});
5274
5275 if ($old_net->{bridge} ne $new_net->{bridge} ||
5276 $old_net->{macaddr} ne $new_net->{macaddr}) {
5277 PVE::Network::SDN::Vnets::del_ips_from_mac($old_net->{bridge}, $old_net->{macaddr}, $conf->{name});
5278 }
5279 }
5280 #fixme: reuse ip if mac change && same bridge
5281 PVE::Network::SDN::Vnets::add_next_free_cidr($new_net->{bridge}, $conf->{name}, $new_net->{macaddr}, $vmid, undef, 1);
5282 }
5283 }
5284 };
5285 if (my $err = $@) {
5286 $add_apply_error->($opt, $err);
5287 } else {
5288
5289 if (is_valid_drivename($opt)) {
5290 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5291 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5292 }
5293
5294 $conf->{$opt} = delete $conf->{pending}->{$opt};
5295 }
5296 }
5297
5298 # write all changes at once to avoid unnecessary i/o
5299 PVE::QemuConfig->write_config($vmid, $conf);
5300 if ($generate_cloudinit) {
5301 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5302 # After successful generation and if there were changes to be applied, update the
5303 # config to drop the {cloudinit} entry.
5304 PVE::QemuConfig->write_config($vmid, $conf);
5305 }
5306 }
5307 }
5308
5309 sub vmconfig_update_net {
5310 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5311
5312 my $newnet = parse_net($value);
5313
5314 if ($conf->{$opt}) {
5315 my $oldnet = parse_net($conf->{$opt});
5316
5317 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5318 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5319 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5320 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5321 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5322
5323 # for non online change, we try to hot-unplug
5324 die "skip\n" if !$hotplug;
5325 vm_deviceunplug($vmid, $conf, $opt);
5326
5327 if($have_sdn) {
5328 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5329 }
5330
5331 } else {
5332
5333 die "internal error" if $opt !~ m/net(\d+)/;
5334 my $iface = "tap${vmid}i$1";
5335
5336 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5337 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5338 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5339 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5340 PVE::Network::tap_unplug($iface);
5341
5342 #set link_down in guest if bridge or vlan change to notify guest (dhcp renew for example)
5343 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5344 safe_num_ne($oldnet->{tag}, $newnet->{tag})) {
5345 qemu_set_link_status($vmid, $opt, 0);
5346 }
5347
5348 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge})) {
5349 if ($have_sdn) {
5350 PVE::Network::SDN::Vnets::del_ips_from_mac($oldnet->{bridge}, $oldnet->{macaddr}, $conf->{name});
5351 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5352 }
5353 }
5354
5355 if ($have_sdn) {
5356 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5357 } else {
5358 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5359 }
5360
5361 #set link_up in guest if bridge or vlan change to notify guest (dhcp renew for example)
5362 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5363 safe_num_ne($oldnet->{tag}, $newnet->{tag})) {
5364 qemu_set_link_status($vmid, $opt, 1);
5365 }
5366
5367 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5368 # Rate can be applied on its own but any change above needs to
5369 # include the rate in tap_plug since OVS resets everything.
5370 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5371 }
5372
5373 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5374 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5375 }
5376
5377 return 1;
5378 }
5379 }
5380
5381 if ($hotplug) {
5382 if ($have_sdn) {
5383 PVE::Network::SDN::Vnets::add_next_free_cidr($newnet->{bridge}, $conf->{name}, $newnet->{macaddr}, $vmid, undef, 1);
5384 PVE::Network::SDN::Vnets::add_dhcp_mapping($newnet->{bridge}, $newnet->{macaddr}, $vmid, $conf->{name});
5385 }
5386 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5387 } else {
5388 die "skip\n";
5389 }
5390 }
5391
5392 sub vmconfig_update_agent {
5393 my ($conf, $opt, $value) = @_;
5394
5395 die "skip\n" if !$conf->{$opt};
5396
5397 my $hotplug_options = { fstrim_cloned_disks => 1 };
5398
5399 my $old_agent = parse_guest_agent($conf);
5400 my $agent = parse_guest_agent({$opt => $value});
5401
5402 for my $option (keys %$agent) { # added/changed options
5403 next if defined($hotplug_options->{$option});
5404 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5405 }
5406
5407 for my $option (keys %$old_agent) { # removed options
5408 next if defined($hotplug_options->{$option});
5409 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5410 }
5411
5412 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5413 }
5414
5415 sub vmconfig_update_disk {
5416 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5417
5418 my $drive = parse_drive($opt, $value);
5419
5420 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5421 my $media = $drive->{media} || 'disk';
5422 my $oldmedia = $old_drive->{media} || 'disk';
5423 die "unable to change media type\n" if $media ne $oldmedia;
5424
5425 if (!drive_is_cdrom($old_drive)) {
5426
5427 if ($drive->{file} ne $old_drive->{file}) {
5428
5429 die "skip\n" if !$hotplug;
5430
5431 # unplug and register as unused
5432 vm_deviceunplug($vmid, $conf, $opt);
5433 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5434
5435 } else {
5436 # update existing disk
5437
5438 # skip non hotpluggable value
5439 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5440 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5441 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5442 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5443 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5444 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5445 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5446 die "skip\n";
5447 }
5448
5449 # apply throttle
5450 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5451 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5452 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5453 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5454 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5455 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5456 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5457 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5458 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5459 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5460 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5461 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5462 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5463 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5464 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5465 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5466 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5467 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5468
5469 qemu_block_set_io_throttle(
5470 $vmid,"drive-$opt",
5471 ($drive->{mbps} || 0)*1024*1024,
5472 ($drive->{mbps_rd} || 0)*1024*1024,
5473 ($drive->{mbps_wr} || 0)*1024*1024,
5474 $drive->{iops} || 0,
5475 $drive->{iops_rd} || 0,
5476 $drive->{iops_wr} || 0,
5477 ($drive->{mbps_max} || 0)*1024*1024,
5478 ($drive->{mbps_rd_max} || 0)*1024*1024,
5479 ($drive->{mbps_wr_max} || 0)*1024*1024,
5480 $drive->{iops_max} || 0,
5481 $drive->{iops_rd_max} || 0,
5482 $drive->{iops_wr_max} || 0,
5483 $drive->{bps_max_length} || 1,
5484 $drive->{bps_rd_max_length} || 1,
5485 $drive->{bps_wr_max_length} || 1,
5486 $drive->{iops_max_length} || 1,
5487 $drive->{iops_rd_max_length} || 1,
5488 $drive->{iops_wr_max_length} || 1,
5489 );
5490
5491 }
5492
5493 return 1;
5494 }
5495
5496 } else { # cdrom
5497
5498 if ($drive->{file} eq 'none') {
5499 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5500 if (drive_is_cloudinit($old_drive)) {
5501 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5502 }
5503 } else {
5504 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5505
5506 # force eject if locked
5507 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5508
5509 if ($path) {
5510 mon_cmd($vmid, "blockdev-change-medium",
5511 id => "$opt", filename => "$path");
5512 }
5513 }
5514
5515 return 1;
5516 }
5517 }
5518
5519 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5520 # hotplug new disks
5521 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5522 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5523 }
5524
5525 sub vmconfig_update_cloudinit_drive {
5526 my ($storecfg, $conf, $vmid) = @_;
5527
5528 my $cloudinit_ds = undef;
5529 my $cloudinit_drive = undef;
5530
5531 PVE::QemuConfig->foreach_volume($conf, sub {
5532 my ($ds, $drive) = @_;
5533 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5534 $cloudinit_ds = $ds;
5535 $cloudinit_drive = $drive;
5536 }
5537 });
5538
5539 return if !$cloudinit_drive;
5540
5541 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5542 PVE::QemuConfig->write_config($vmid, $conf);
5543 }
5544
5545 my $running = PVE::QemuServer::check_running($vmid);
5546
5547 if ($running) {
5548 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5549 if ($path) {
5550 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5551 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5552 }
5553 }
5554 }
5555
5556 # called in locked context by incoming migration
5557 sub vm_migrate_get_nbd_disks {
5558 my ($storecfg, $conf, $replicated_volumes) = @_;
5559
5560 my $local_volumes = {};
5561 PVE::QemuConfig->foreach_volume($conf, sub {
5562 my ($ds, $drive) = @_;
5563
5564 return if drive_is_cdrom($drive);
5565 return if $ds eq 'tpmstate0';
5566
5567 my $volid = $drive->{file};
5568
5569 return if !$volid;
5570
5571 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5572
5573 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5574 return if $scfg->{shared};
5575
5576 my $format = qemu_img_format($scfg, $volname);
5577
5578 # replicated disks re-use existing state via bitmap
5579 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5580 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
5581 });
5582 return $local_volumes;
5583 }
5584
5585 # called in locked context by incoming migration
5586 sub vm_migrate_alloc_nbd_disks {
5587 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5588
5589 my $nbd = {};
5590 foreach my $opt (sort keys %$source_volumes) {
5591 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5592
5593 if ($use_existing) {
5594 $nbd->{$opt}->{drivestr} = print_drive($drive);
5595 $nbd->{$opt}->{volid} = $volid;
5596 $nbd->{$opt}->{replicated} = 1;
5597 next;
5598 }
5599
5600 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5601
5602 # order of precedence, filtered by whether storage supports it:
5603 # 1. explicit requested format
5604 # 2. default format of storage
5605 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5606 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
5607
5608 my $size = $drive->{size} / 1024;
5609 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5610 my $newdrive = $drive;
5611 $newdrive->{format} = $format;
5612 $newdrive->{file} = $newvolid;
5613 my $drivestr = print_drive($newdrive);
5614 $nbd->{$opt}->{drivestr} = $drivestr;
5615 $nbd->{$opt}->{volid} = $newvolid;
5616 }
5617
5618 return $nbd;
5619 }
5620
5621 # see vm_start_nolock for parameters, additionally:
5622 # migrate_opts:
5623 # storagemap = parsed storage map for allocating NBD disks
5624 sub vm_start {
5625 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5626
5627 return PVE::QemuConfig->lock_config($vmid, sub {
5628 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5629
5630 die "you can't start a vm if it's a template\n"
5631 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5632
5633 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5634 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5635
5636 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5637
5638 if ($has_backup_lock && $running) {
5639 # a backup is currently running, attempt to start the guest in the
5640 # existing QEMU instance
5641 return vm_resume($vmid);
5642 }
5643
5644 PVE::QemuConfig->check_lock($conf)
5645 if !($params->{skiplock} || $has_suspended_lock);
5646
5647 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5648
5649 die "VM $vmid already running\n" if $running;
5650
5651 if (my $storagemap = $migrate_opts->{storagemap}) {
5652 my $replicated = $migrate_opts->{replicated_volumes};
5653 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5654 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5655
5656 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5657 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5658 }
5659 }
5660
5661 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5662 });
5663 }
5664
5665
5666 # params:
5667 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5668 # skiplock => 0/1, skip checking for config lock
5669 # skiptemplate => 0/1, skip checking whether VM is template
5670 # forcemachine => to force QEMU machine (rollback/migration)
5671 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5672 # timeout => in seconds
5673 # paused => start VM in paused state (backup)
5674 # resume => resume from hibernation
5675 # pbs-backing => {
5676 # sata0 => {
5677 # repository
5678 # snapshot
5679 # keyfile
5680 # archive
5681 # },
5682 # virtio2 => ...
5683 # }
5684 # migrate_opts:
5685 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5686 # migratedfrom => source node
5687 # spice_ticket => used for spice migration, passed via tunnel/stdin
5688 # network => CIDR of migration network
5689 # type => secure/insecure - tunnel over encrypted connection or plain-text
5690 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5691 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5692 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5693 # contained in config
5694 sub vm_start_nolock {
5695 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5696
5697 my $statefile = $params->{statefile};
5698 my $resume = $params->{resume};
5699
5700 my $migratedfrom = $migrate_opts->{migratedfrom};
5701 my $migration_type = $migrate_opts->{type};
5702
5703 my $res = {};
5704
5705 # clean up leftover reboot request files
5706 eval { clear_reboot_request($vmid); };
5707 warn $@ if $@;
5708
5709 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5710 vmconfig_apply_pending($vmid, $conf, $storecfg);
5711 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5712 }
5713
5714 # don't regenerate the ISO if the VM is started as part of a live migration
5715 # this way we can reuse the old ISO with the correct config
5716 if (!$migratedfrom) {
5717 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5718 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5719 # $conf->{cloudinit}, so we could just not do this?
5720 # But we do it above, so for now let's be consistent.
5721 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5722 }
5723 }
5724
5725 # override offline migrated volumes, conf is out of date still
5726 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5727 for my $key (sort keys $offline_volumes->%*) {
5728 my $parsed = parse_drive($key, $conf->{$key});
5729 $parsed->{file} = $offline_volumes->{$key};
5730 $conf->{$key} = print_drive($parsed);
5731 }
5732 }
5733
5734 my $defaults = load_defaults();
5735
5736 # set environment variable useful inside network script
5737 # for remote migration the config is available on the target node!
5738 if (!$migrate_opts->{remote_node}) {
5739 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5740 }
5741
5742 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5743
5744 my $forcemachine = $params->{forcemachine};
5745 my $forcecpu = $params->{forcecpu};
5746 if ($resume) {
5747 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5748 $forcemachine = $conf->{runningmachine};
5749 $forcecpu = $conf->{runningcpu};
5750 print "Resuming suspended VM\n";
5751 }
5752
5753 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5754 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5755
5756 my $migration_ip;
5757 my $get_migration_ip = sub {
5758 my ($nodename) = @_;
5759
5760 return $migration_ip if defined($migration_ip);
5761
5762 my $cidr = $migrate_opts->{network};
5763
5764 if (!defined($cidr)) {
5765 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5766 $cidr = $dc_conf->{migration}->{network};
5767 }
5768
5769 if (defined($cidr)) {
5770 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5771
5772 die "could not get IP: no address configured on local " .
5773 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5774
5775 die "could not get IP: multiple addresses configured on local " .
5776 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5777
5778 $migration_ip = @$ips[0];
5779 }
5780
5781 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5782 if !defined($migration_ip);
5783
5784 return $migration_ip;
5785 };
5786
5787 if ($statefile) {
5788 if ($statefile eq 'tcp') {
5789 my $migrate = $res->{migrate} = { proto => 'tcp' };
5790 $migrate->{addr} = "localhost";
5791 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5792 my $nodename = nodename();
5793
5794 if (!defined($migration_type)) {
5795 if (defined($datacenterconf->{migration}->{type})) {
5796 $migration_type = $datacenterconf->{migration}->{type};
5797 } else {
5798 $migration_type = 'secure';
5799 }
5800 }
5801
5802 if ($migration_type eq 'insecure') {
5803 $migrate->{addr} = $get_migration_ip->($nodename);
5804 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5805 }
5806
5807 # see #4501: port reservation should be done close to usage - tell QEMU where to listen
5808 # via QMP later
5809 push @$cmd, '-incoming', 'defer';
5810 push @$cmd, '-S';
5811
5812 } elsif ($statefile eq 'unix') {
5813 # should be default for secure migrations as a ssh TCP forward
5814 # tunnel is not deterministic reliable ready and fails regurarly
5815 # to set up in time, so use UNIX socket forwards
5816 my $migrate = $res->{migrate} = { proto => 'unix' };
5817 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5818 unlink $migrate->{addr};
5819
5820 $migrate->{uri} = "unix:$migrate->{addr}";
5821 push @$cmd, '-incoming', $migrate->{uri};
5822 push @$cmd, '-S';
5823
5824 } elsif (-e $statefile) {
5825 push @$cmd, '-loadstate', $statefile;
5826 } else {
5827 my $statepath = PVE::Storage::path($storecfg, $statefile);
5828 push @$vollist, $statefile;
5829 push @$cmd, '-loadstate', $statepath;
5830 }
5831 } elsif ($params->{paused}) {
5832 push @$cmd, '-S';
5833 }
5834
5835 my $memory = get_current_memory($conf->{memory});
5836 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $memory, $resume);
5837
5838 my $pci_reserve_list = [];
5839 for my $device (values $pci_devices->%*) {
5840 next if $device->{mdev}; # we don't reserve for mdev devices
5841 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5842 }
5843
5844 # reserve all PCI IDs before actually doing anything with them
5845 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5846
5847 eval {
5848 my $uuid;
5849 for my $id (sort keys %$pci_devices) {
5850 my $d = $pci_devices->{$id};
5851 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5852
5853 my $chosen_mdev;
5854 for my $dev ($d->{ids}->@*) {
5855 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5856 if ($d->{mdev}) {
5857 warn $@ if $@;
5858 $chosen_mdev = $info;
5859 last if $chosen_mdev; # if successful, we're done
5860 } else {
5861 die $@ if $@;
5862 }
5863 }
5864
5865 next if !$d->{mdev};
5866 die "could not create mediated device\n" if !defined($chosen_mdev);
5867
5868 # nvidia grid needs the uuid of the mdev as qemu parameter
5869 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5870 if (defined($conf->{smbios1})) {
5871 my $smbios_conf = parse_smbios1($conf->{smbios1});
5872 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5873 }
5874 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5875 }
5876 }
5877 push @$cmd, '-uuid', $uuid if defined($uuid);
5878 };
5879 if (my $err = $@) {
5880 eval { cleanup_pci_devices($vmid, $conf) };
5881 warn $@ if $@;
5882 die $err;
5883 }
5884
5885 PVE::Storage::activate_volumes($storecfg, $vollist);
5886
5887
5888 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5889 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5890 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5891 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5892 # timeout should be more than enough here...
5893 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5894
5895 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5896
5897 my %run_params = (
5898 timeout => $statefile ? undef : $start_timeout,
5899 umask => 0077,
5900 noerr => 1,
5901 );
5902
5903 # when migrating, prefix QEMU output so other side can pick up any
5904 # errors that might occur and show the user
5905 if ($migratedfrom) {
5906 $run_params{quiet} = 1;
5907 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5908 }
5909
5910 my %systemd_properties = (
5911 Slice => 'qemu.slice',
5912 KillMode => 'process',
5913 SendSIGKILL => 0,
5914 TimeoutStopUSec => ULONG_MAX, # infinity
5915 );
5916
5917 if (PVE::CGroup::cgroup_mode() == 2) {
5918 $systemd_properties{CPUWeight} = $cpuunits;
5919 } else {
5920 $systemd_properties{CPUShares} = $cpuunits;
5921 }
5922
5923 if (my $cpulimit = $conf->{cpulimit}) {
5924 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5925 }
5926 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5927
5928 my $run_qemu = sub {
5929 PVE::Tools::run_fork sub {
5930 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5931
5932 my $tpmpid;
5933 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
5934 # start the TPM emulator so QEMU can connect on start
5935 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5936 }
5937
5938 my $exitcode = run_command($cmd, %run_params);
5939 if ($exitcode) {
5940 if ($tpmpid) {
5941 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5942 kill 'TERM', $tpmpid;
5943 }
5944 die "QEMU exited with code $exitcode\n";
5945 }
5946 };
5947 };
5948
5949 if ($conf->{hugepages}) {
5950
5951 my $code = sub {
5952 my $hotplug_features =
5953 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5954 my $hugepages_topology =
5955 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5956
5957 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5958
5959 PVE::QemuServer::Memory::hugepages_mount();
5960 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5961
5962 eval { $run_qemu->() };
5963 if (my $err = $@) {
5964 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5965 if !$conf->{keephugepages};
5966 die $err;
5967 }
5968
5969 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5970 if !$conf->{keephugepages};
5971 };
5972 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5973
5974 } else {
5975 eval { $run_qemu->() };
5976 }
5977
5978 if (my $err = $@) {
5979 # deactivate volumes if start fails
5980 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5981 warn $@ if $@;
5982 eval { cleanup_pci_devices($vmid, $conf) };
5983 warn $@ if $@;
5984
5985 die "start failed: $err";
5986 }
5987
5988 # re-reserve all PCI IDs now that we can know the actual VM PID
5989 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5990 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5991 warn $@ if $@;
5992
5993 if (defined(my $migrate = $res->{migrate})) {
5994 if ($migrate->{proto} eq 'tcp') {
5995 my $nodename = nodename();
5996 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5997 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5998 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5999 mon_cmd($vmid, "migrate-incoming", uri => $migrate->{uri});
6000 }
6001 print "migration listens on $migrate->{uri}\n";
6002 } elsif ($statefile) {
6003 eval { mon_cmd($vmid, "cont"); };
6004 warn $@ if $@;
6005 }
6006
6007 #start nbd server for storage migration
6008 if (my $nbd = $migrate_opts->{nbd}) {
6009 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
6010
6011 my $migrate_storage_uri;
6012 # nbd_protocol_version > 0 for unix socket support
6013 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
6014 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
6015 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
6016 $migrate_storage_uri = "nbd:unix:$socket_path";
6017 $res->{migrate}->{unix_sockets} = [$socket_path];
6018 } else {
6019 my $nodename = nodename();
6020 my $localip = $get_migration_ip->($nodename);
6021 my $pfamily = PVE::Tools::get_host_address_family($nodename);
6022 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
6023
6024 mon_cmd($vmid, "nbd-server-start", addr => {
6025 type => 'inet',
6026 data => {
6027 host => "${localip}",
6028 port => "${storage_migrate_port}",
6029 },
6030 });
6031 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
6032 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
6033 }
6034
6035 my $block_info = mon_cmd($vmid, "query-block");
6036 $block_info = { map { $_->{device} => $_ } $block_info->@* };
6037
6038 foreach my $opt (sort keys %$nbd) {
6039 my $drivestr = $nbd->{$opt}->{drivestr};
6040 my $volid = $nbd->{$opt}->{volid};
6041
6042 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
6043
6044 mon_cmd(
6045 $vmid,
6046 "block-export-add",
6047 id => "drive-$opt",
6048 'node-name' => $block_node,
6049 writable => JSON::true,
6050 type => "nbd",
6051 name => "drive-$opt", # NBD export name
6052 );
6053
6054 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
6055 print "storage migration listens on $nbd_uri volume:$drivestr\n";
6056 print "re-using replicated volume: $opt - $volid\n"
6057 if $nbd->{$opt}->{replicated};
6058
6059 $res->{drives}->{$opt} = $nbd->{$opt};
6060 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
6061 }
6062 }
6063
6064 if ($migratedfrom) {
6065 eval {
6066 set_migration_caps($vmid);
6067 };
6068 warn $@ if $@;
6069
6070 if ($spice_port) {
6071 print "spice listens on port $spice_port\n";
6072 $res->{spice_port} = $spice_port;
6073 if ($migrate_opts->{spice_ticket}) {
6074 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6075 $migrate_opts->{spice_ticket});
6076 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6077 }
6078 }
6079
6080 } else {
6081 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6082 if !$statefile && $conf->{balloon};
6083
6084 foreach my $opt (keys %$conf) {
6085 next if $opt !~ m/^net\d+$/;
6086 my $nicconf = parse_net($conf->{$opt});
6087 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6088 }
6089 add_nets_bridge_fdb($conf, $vmid);
6090 }
6091
6092 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6093 eval {
6094 mon_cmd(
6095 $vmid,
6096 'qom-set',
6097 path => "machine/peripheral/balloon0",
6098 property => "guest-stats-polling-interval",
6099 value => 2
6100 );
6101 };
6102 log_warn("could not set polling interval for ballooning - $@") if $@;
6103 }
6104
6105 if ($resume) {
6106 print "Resumed VM, removing state\n";
6107 if (my $vmstate = $conf->{vmstate}) {
6108 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6109 PVE::Storage::vdisk_free($storecfg, $vmstate);
6110 }
6111 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6112 PVE::QemuConfig->write_config($vmid, $conf);
6113 }
6114
6115 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6116
6117 my ($current_machine, $is_deprecated) =
6118 PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
6119 if ($is_deprecated) {
6120 log_warn(
6121 "current machine version '$current_machine' is deprecated - see the documentation and ".
6122 "change to a newer one",
6123 );
6124 }
6125
6126 return $res;
6127 }
6128
6129 sub vm_commandline {
6130 my ($storecfg, $vmid, $snapname) = @_;
6131
6132 my $conf = PVE::QemuConfig->load_config($vmid);
6133
6134 my ($forcemachine, $forcecpu);
6135 if ($snapname) {
6136 my $snapshot = $conf->{snapshots}->{$snapname};
6137 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6138
6139 # check for machine or CPU overrides in snapshot
6140 $forcemachine = $snapshot->{runningmachine};
6141 $forcecpu = $snapshot->{runningcpu};
6142
6143 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6144
6145 $conf = $snapshot;
6146 }
6147
6148 my $defaults = load_defaults();
6149
6150 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6151
6152 return PVE::Tools::cmd2string($cmd);
6153 }
6154
6155 sub vm_reset {
6156 my ($vmid, $skiplock) = @_;
6157
6158 PVE::QemuConfig->lock_config($vmid, sub {
6159
6160 my $conf = PVE::QemuConfig->load_config($vmid);
6161
6162 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6163
6164 mon_cmd($vmid, "system_reset");
6165 });
6166 }
6167
6168 sub get_vm_volumes {
6169 my ($conf) = @_;
6170
6171 my $vollist = [];
6172 foreach_volid($conf, sub {
6173 my ($volid, $attr) = @_;
6174
6175 return if $volid =~ m|^/|;
6176
6177 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6178 return if !$sid;
6179
6180 push @$vollist, $volid;
6181 });
6182
6183 return $vollist;
6184 }
6185
6186 sub cleanup_pci_devices {
6187 my ($vmid, $conf) = @_;
6188
6189 foreach my $key (keys %$conf) {
6190 next if $key !~ m/^hostpci(\d+)$/;
6191 my $hostpciindex = $1;
6192 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6193 my $d = parse_hostpci($conf->{$key});
6194 if ($d->{mdev}) {
6195 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6196 # don't want to break ABI just for this two liner
6197 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6198
6199 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6200 # out when we do it first. so wait for 10 seconds and then try it
6201 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
6202 sleep 10;
6203 }
6204
6205 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6206 }
6207 }
6208 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6209 }
6210
6211 sub vm_stop_cleanup {
6212 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6213
6214 eval {
6215
6216 if (!$keepActive) {
6217 my $vollist = get_vm_volumes($conf);
6218 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6219
6220 if (my $tpmdrive = $conf->{tpmstate0}) {
6221 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6222 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6223 if ($storeid) {
6224 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6225 }
6226 }
6227 }
6228
6229 foreach my $ext (qw(mon qmp pid vnc qga)) {
6230 unlink "/var/run/qemu-server/${vmid}.$ext";
6231 }
6232
6233 if ($conf->{ivshmem}) {
6234 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6235 # just delete it for now, VMs which have this already open do not
6236 # are affected, but new VMs will get a separated one. If this
6237 # becomes an issue we either add some sort of ref-counting or just
6238 # add a "don't delete on stop" flag to the ivshmem format.
6239 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6240 }
6241
6242 cleanup_pci_devices($vmid, $conf);
6243
6244 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6245 };
6246 warn $@ if $@; # avoid errors - just warn
6247 }
6248
6249 # call only in locked context
6250 sub _do_vm_stop {
6251 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6252
6253 my $pid = check_running($vmid, $nocheck);
6254 return if !$pid;
6255
6256 my $conf;
6257 if (!$nocheck) {
6258 $conf = PVE::QemuConfig->load_config($vmid);
6259 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6260 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6261 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6262 $timeout = $opts->{down} if $opts->{down};
6263 }
6264 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6265 }
6266
6267 eval {
6268 if ($shutdown) {
6269 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6270 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6271 } else {
6272 mon_cmd($vmid, "system_powerdown");
6273 }
6274 } else {
6275 mon_cmd($vmid, "quit");
6276 }
6277 };
6278 my $err = $@;
6279
6280 if (!$err) {
6281 $timeout = 60 if !defined($timeout);
6282
6283 my $count = 0;
6284 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6285 $count++;
6286 sleep 1;
6287 }
6288
6289 if ($count >= $timeout) {
6290 if ($force) {
6291 warn "VM still running - terminating now with SIGTERM\n";
6292 kill 15, $pid;
6293 } else {
6294 die "VM quit/powerdown failed - got timeout\n";
6295 }
6296 } else {
6297 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6298 return;
6299 }
6300 } else {
6301 if (!check_running($vmid, $nocheck)) {
6302 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6303 return;
6304 }
6305 if ($force) {
6306 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6307 kill 15, $pid;
6308 } else {
6309 die "VM quit/powerdown failed\n";
6310 }
6311 }
6312
6313 # wait again
6314 $timeout = 10;
6315
6316 my $count = 0;
6317 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6318 $count++;
6319 sleep 1;
6320 }
6321
6322 if ($count >= $timeout) {
6323 warn "VM still running - terminating now with SIGKILL\n";
6324 kill 9, $pid;
6325 sleep 1;
6326 }
6327
6328 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6329 }
6330
6331 # Note: use $nocheck to skip tests if VM configuration file exists.
6332 # We need that when migration VMs to other nodes (files already moved)
6333 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6334 sub vm_stop {
6335 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6336
6337 $force = 1 if !defined($force) && !$shutdown;
6338
6339 if ($migratedfrom){
6340 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6341 kill 15, $pid if $pid;
6342 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6343 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6344 return;
6345 }
6346
6347 PVE::QemuConfig->lock_config($vmid, sub {
6348 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6349 });
6350 }
6351
6352 sub vm_reboot {
6353 my ($vmid, $timeout) = @_;
6354
6355 PVE::QemuConfig->lock_config($vmid, sub {
6356 eval {
6357
6358 # only reboot if running, as qmeventd starts it again on a stop event
6359 return if !check_running($vmid);
6360
6361 create_reboot_request($vmid);
6362
6363 my $storecfg = PVE::Storage::config();
6364 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6365
6366 };
6367 if (my $err = $@) {
6368 # avoid that the next normal shutdown will be confused for a reboot
6369 clear_reboot_request($vmid);
6370 die $err;
6371 }
6372 });
6373 }
6374
6375 # note: if using the statestorage parameter, the caller has to check privileges
6376 sub vm_suspend {
6377 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6378
6379 my $conf;
6380 my $path;
6381 my $storecfg;
6382 my $vmstate;
6383
6384 PVE::QemuConfig->lock_config($vmid, sub {
6385
6386 $conf = PVE::QemuConfig->load_config($vmid);
6387
6388 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6389 PVE::QemuConfig->check_lock($conf)
6390 if !($skiplock || $is_backing_up);
6391
6392 die "cannot suspend to disk during backup\n"
6393 if $is_backing_up && $includestate;
6394
6395 if ($includestate) {
6396 $conf->{lock} = 'suspending';
6397 my $date = strftime("%Y-%m-%d", localtime(time()));
6398 $storecfg = PVE::Storage::config();
6399 if (!$statestorage) {
6400 $statestorage = find_vmstate_storage($conf, $storecfg);
6401 # check permissions for the storage
6402 my $rpcenv = PVE::RPCEnvironment::get();
6403 if ($rpcenv->{type} ne 'cli') {
6404 my $authuser = $rpcenv->get_user();
6405 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6406 }
6407 }
6408
6409
6410 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6411 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6412 $path = PVE::Storage::path($storecfg, $vmstate);
6413 PVE::QemuConfig->write_config($vmid, $conf);
6414 } else {
6415 mon_cmd($vmid, "stop");
6416 }
6417 });
6418
6419 if ($includestate) {
6420 # save vm state
6421 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6422
6423 eval {
6424 set_migration_caps($vmid, 1);
6425 mon_cmd($vmid, "savevm-start", statefile => $path);
6426 for(;;) {
6427 my $state = mon_cmd($vmid, "query-savevm");
6428 if (!$state->{status}) {
6429 die "savevm not active\n";
6430 } elsif ($state->{status} eq 'active') {
6431 sleep(1);
6432 next;
6433 } elsif ($state->{status} eq 'completed') {
6434 print "State saved, quitting\n";
6435 last;
6436 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6437 die "query-savevm failed with error '$state->{error}'\n"
6438 } else {
6439 die "query-savevm returned status '$state->{status}'\n";
6440 }
6441 }
6442 };
6443 my $err = $@;
6444
6445 PVE::QemuConfig->lock_config($vmid, sub {
6446 $conf = PVE::QemuConfig->load_config($vmid);
6447 if ($err) {
6448 # cleanup, but leave suspending lock, to indicate something went wrong
6449 eval {
6450 mon_cmd($vmid, "savevm-end");
6451 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6452 PVE::Storage::vdisk_free($storecfg, $vmstate);
6453 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6454 PVE::QemuConfig->write_config($vmid, $conf);
6455 };
6456 warn $@ if $@;
6457 die $err;
6458 }
6459
6460 die "lock changed unexpectedly\n"
6461 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6462
6463 mon_cmd($vmid, "quit");
6464 $conf->{lock} = 'suspended';
6465 PVE::QemuConfig->write_config($vmid, $conf);
6466 });
6467 }
6468 }
6469
6470 # $nocheck is set when called as part of a migration - in this context the
6471 # location of the config file (source or target node) is not deterministic,
6472 # since migration cannot wait for pmxcfs to process the rename
6473 sub vm_resume {
6474 my ($vmid, $skiplock, $nocheck) = @_;
6475
6476 PVE::QemuConfig->lock_config($vmid, sub {
6477 my $res = mon_cmd($vmid, 'query-status');
6478 my $resume_cmd = 'cont';
6479 my $reset = 0;
6480 my $conf;
6481 if ($nocheck) {
6482 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6483 if ($@) {
6484 my $vmlist = PVE::Cluster::get_vmlist();
6485 if (exists($vmlist->{ids}->{$vmid})) {
6486 my $node = $vmlist->{ids}->{$vmid}->{node};
6487 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6488 }
6489 if (!$conf) {
6490 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6491 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6492 }
6493 }
6494 } else {
6495 $conf = PVE::QemuConfig->load_config($vmid);
6496 }
6497
6498 if ($res->{status}) {
6499 return if $res->{status} eq 'running'; # job done, go home
6500 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6501 $reset = 1 if $res->{status} eq 'shutdown';
6502 }
6503
6504 if (!$nocheck) {
6505 PVE::QemuConfig->check_lock($conf)
6506 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6507 }
6508
6509 if ($reset) {
6510 # required if a VM shuts down during a backup and we get a resume
6511 # request before the backup finishes for example
6512 mon_cmd($vmid, "system_reset");
6513 }
6514
6515 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6516
6517 mon_cmd($vmid, $resume_cmd);
6518 });
6519 }
6520
6521 sub vm_sendkey {
6522 my ($vmid, $skiplock, $key) = @_;
6523
6524 PVE::QemuConfig->lock_config($vmid, sub {
6525
6526 my $conf = PVE::QemuConfig->load_config($vmid);
6527
6528 # there is no qmp command, so we use the human monitor command
6529 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6530 die $res if $res ne '';
6531 });
6532 }
6533
6534 sub check_bridge_access {
6535 my ($rpcenv, $authuser, $conf) = @_;
6536
6537 return 1 if $authuser eq 'root@pam';
6538
6539 for my $opt (sort keys $conf->%*) {
6540 next if $opt !~ m/^net\d+$/;
6541 my $net = parse_net($conf->{$opt});
6542 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6543 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6544 }
6545 return 1;
6546 };
6547
6548 sub check_mapping_access {
6549 my ($rpcenv, $user, $conf) = @_;
6550
6551 for my $opt (keys $conf->%*) {
6552 if ($opt =~ m/^usb\d+$/) {
6553 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6554 if (my $host = $device->{host}) {
6555 die "only root can set '$opt' config for real devices\n"
6556 if $host !~ m/^spice$/i && $user ne 'root@pam';
6557 } elsif ($device->{mapping}) {
6558 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6559 } else {
6560 die "either 'host' or 'mapping' must be set.\n";
6561 }
6562 } elsif ($opt =~ m/^hostpci\d+$/) {
6563 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6564 if ($device->{host}) {
6565 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6566 } elsif ($device->{mapping}) {
6567 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6568 } else {
6569 die "either 'host' or 'mapping' must be set.\n";
6570 }
6571 }
6572 }
6573 };
6574
6575 sub check_restore_permissions {
6576 my ($rpcenv, $user, $conf) = @_;
6577
6578 check_bridge_access($rpcenv, $user, $conf);
6579 check_mapping_access($rpcenv, $user, $conf);
6580 }
6581 # vzdump restore implementaion
6582
6583 sub tar_archive_read_firstfile {
6584 my $archive = shift;
6585
6586 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6587
6588 # try to detect archive type first
6589 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6590 die "unable to open file '$archive'\n";
6591 my $firstfile = <$fh>;
6592 kill 15, $pid;
6593 close $fh;
6594
6595 die "ERROR: archive contaions no data\n" if !$firstfile;
6596 chomp $firstfile;
6597
6598 return $firstfile;
6599 }
6600
6601 sub tar_restore_cleanup {
6602 my ($storecfg, $statfile) = @_;
6603
6604 print STDERR "starting cleanup\n";
6605
6606 if (my $fd = IO::File->new($statfile, "r")) {
6607 while (defined(my $line = <$fd>)) {
6608 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6609 my $volid = $2;
6610 eval {
6611 if ($volid =~ m|^/|) {
6612 unlink $volid || die 'unlink failed\n';
6613 } else {
6614 PVE::Storage::vdisk_free($storecfg, $volid);
6615 }
6616 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6617 };
6618 print STDERR "unable to cleanup '$volid' - $@" if $@;
6619 } else {
6620 print STDERR "unable to parse line in statfile - $line";
6621 }
6622 }
6623 $fd->close();
6624 }
6625 }
6626
6627 sub restore_file_archive {
6628 my ($archive, $vmid, $user, $opts) = @_;
6629
6630 return restore_vma_archive($archive, $vmid, $user, $opts)
6631 if $archive eq '-';
6632
6633 my $info = PVE::Storage::archive_info($archive);
6634 my $format = $opts->{format} // $info->{format};
6635 my $comp = $info->{compression};
6636
6637 # try to detect archive format
6638 if ($format eq 'tar') {
6639 return restore_tar_archive($archive, $vmid, $user, $opts);
6640 } else {
6641 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6642 }
6643 }
6644
6645 # hepler to remove disks that will not be used after restore
6646 my $restore_cleanup_oldconf = sub {
6647 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6648
6649 my $kept_disks = {};
6650
6651 PVE::QemuConfig->foreach_volume($oldconf, sub {
6652 my ($ds, $drive) = @_;
6653
6654 return if drive_is_cdrom($drive, 1);
6655
6656 my $volid = $drive->{file};
6657 return if !$volid || $volid =~ m|^/|;
6658
6659 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6660 return if !$path || !$owner || ($owner != $vmid);
6661
6662 # Note: only delete disk we want to restore
6663 # other volumes will become unused
6664 if ($virtdev_hash->{$ds}) {
6665 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6666 if (my $err = $@) {
6667 warn $err;
6668 }
6669 } else {
6670 $kept_disks->{$volid} = 1;
6671 }
6672 });
6673
6674 # after the restore we have no snapshots anymore
6675 for my $snapname (keys $oldconf->{snapshots}->%*) {
6676 my $snap = $oldconf->{snapshots}->{$snapname};
6677 if ($snap->{vmstate}) {
6678 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6679 if (my $err = $@) {
6680 warn $err;
6681 }
6682 }
6683
6684 for my $volid (keys $kept_disks->%*) {
6685 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6686 warn $@ if $@;
6687 }
6688 }
6689 };
6690
6691 # Helper to parse vzdump backup device hints
6692 #
6693 # $rpcenv: Environment, used to ckeck storage permissions
6694 # $user: User ID, to check storage permissions
6695 # $storecfg: Storage configuration
6696 # $fh: the file handle for reading the configuration
6697 # $devinfo: should contain device sizes for all backu-up'ed devices
6698 # $options: backup options (pool, default storage)
6699 #
6700 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6701 my $parse_backup_hints = sub {
6702 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6703
6704 my $check_storage = sub { # assert if an image can be allocate
6705 my ($storeid, $scfg) = @_;
6706 die "Content type 'images' is not available on storage '$storeid'\n"
6707 if !$scfg->{content}->{images};
6708 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6709 if $user ne 'root@pam';
6710 };
6711
6712 my $virtdev_hash = {};
6713 while (defined(my $line = <$fh>)) {
6714 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6715 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6716 die "archive does not contain data for drive '$virtdev'\n"
6717 if !$devinfo->{$devname};
6718
6719 if (defined($options->{storage})) {
6720 $storeid = $options->{storage} || 'local';
6721 } elsif (!$storeid) {
6722 $storeid = 'local';
6723 }
6724 $format = 'raw' if !$format;
6725 $devinfo->{$devname}->{devname} = $devname;
6726 $devinfo->{$devname}->{virtdev} = $virtdev;
6727 $devinfo->{$devname}->{format} = $format;
6728 $devinfo->{$devname}->{storeid} = $storeid;
6729
6730 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6731 $check_storage->($storeid, $scfg); # permission and content type check
6732
6733 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6734 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6735 my $virtdev = $1;
6736 my $drive = parse_drive($virtdev, $2);
6737
6738 if (drive_is_cloudinit($drive)) {
6739 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6740 $storeid = $options->{storage} if defined ($options->{storage});
6741 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6742 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6743
6744 $check_storage->($storeid, $scfg); # permission and content type check
6745
6746 $virtdev_hash->{$virtdev} = {
6747 format => $format,
6748 storeid => $storeid,
6749 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6750 is_cloudinit => 1,
6751 };
6752 }
6753 }
6754 }
6755
6756 return $virtdev_hash;
6757 };
6758
6759 # Helper to allocate and activate all volumes required for a restore
6760 #
6761 # $storecfg: Storage configuration
6762 # $virtdev_hash: as returned by parse_backup_hints()
6763 #
6764 # Returns: { $virtdev => $volid }
6765 my $restore_allocate_devices = sub {
6766 my ($storecfg, $virtdev_hash, $vmid) = @_;
6767
6768 my $map = {};
6769 foreach my $virtdev (sort keys %$virtdev_hash) {
6770 my $d = $virtdev_hash->{$virtdev};
6771 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6772 my $storeid = $d->{storeid};
6773 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6774
6775 # test if requested format is supported
6776 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6777 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6778 $d->{format} = $defFormat if !$supported;
6779
6780 my $name;
6781 if ($d->{is_cloudinit}) {
6782 $name = "vm-$vmid-cloudinit";
6783 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6784 if ($scfg->{path}) {
6785 $name .= ".$d->{format}";
6786 }
6787 }
6788
6789 my $volid = PVE::Storage::vdisk_alloc(
6790 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6791
6792 print STDERR "new volume ID is '$volid'\n";
6793 $d->{volid} = $volid;
6794
6795 PVE::Storage::activate_volumes($storecfg, [$volid]);
6796
6797 $map->{$virtdev} = $volid;
6798 }
6799
6800 return $map;
6801 };
6802
6803 sub restore_update_config_line {
6804 my ($cookie, $map, $line, $unique) = @_;
6805
6806 return '' if $line =~ m/^\#qmdump\#/;
6807 return '' if $line =~ m/^\#vzdump\#/;
6808 return '' if $line =~ m/^lock:/;
6809 return '' if $line =~ m/^unused\d+:/;
6810 return '' if $line =~ m/^parent:/;
6811
6812 my $res = '';
6813
6814 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6815 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6816 # try to convert old 1.X settings
6817 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6818 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6819 my ($model, $macaddr) = split(/\=/, $devconfig);
6820 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6821 my $net = {
6822 model => $model,
6823 bridge => "vmbr$ind",
6824 macaddr => $macaddr,
6825 };
6826 my $netstr = print_net($net);
6827
6828 $res .= "net$cookie->{netcount}: $netstr\n";
6829 $cookie->{netcount}++;
6830 }
6831 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6832 my ($id, $netstr) = ($1, $2);
6833 my $net = parse_net($netstr);
6834 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6835 $netstr = print_net($net);
6836 $res .= "$id: $netstr\n";
6837 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6838 my $virtdev = $1;
6839 my $value = $3;
6840 my $di = parse_drive($virtdev, $value);
6841 if (defined($di->{backup}) && !$di->{backup}) {
6842 $res .= "#$line";
6843 } elsif ($map->{$virtdev}) {
6844 delete $di->{format}; # format can change on restore
6845 $di->{file} = $map->{$virtdev};
6846 $value = print_drive($di);
6847 $res .= "$virtdev: $value\n";
6848 } else {
6849 $res .= $line;
6850 }
6851 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6852 my $vmgenid = $1;
6853 if ($vmgenid ne '0') {
6854 # always generate a new vmgenid if there was a valid one setup
6855 $vmgenid = generate_uuid();
6856 }
6857 $res .= "vmgenid: $vmgenid\n";
6858 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6859 my ($uuid, $uuid_str);
6860 UUID::generate($uuid);
6861 UUID::unparse($uuid, $uuid_str);
6862 my $smbios1 = parse_smbios1($2);
6863 $smbios1->{uuid} = $uuid_str;
6864 $res .= $1.print_smbios1($smbios1)."\n";
6865 } else {
6866 $res .= $line;
6867 }
6868
6869 return $res;
6870 }
6871
6872 my $restore_deactivate_volumes = sub {
6873 my ($storecfg, $virtdev_hash) = @_;
6874
6875 my $vollist = [];
6876 for my $dev (values $virtdev_hash->%*) {
6877 push $vollist->@*, $dev->{volid} if $dev->{volid};
6878 }
6879
6880 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6881 print STDERR $@ if $@;
6882 };
6883
6884 my $restore_destroy_volumes = sub {
6885 my ($storecfg, $virtdev_hash) = @_;
6886
6887 for my $dev (values $virtdev_hash->%*) {
6888 my $volid = $dev->{volid} or next;
6889 eval {
6890 PVE::Storage::vdisk_free($storecfg, $volid);
6891 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6892 };
6893 print STDERR "unable to cleanup '$volid' - $@" if $@;
6894 }
6895 };
6896
6897 sub restore_merge_config {
6898 my ($filename, $backup_conf_raw, $override_conf) = @_;
6899
6900 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6901 for my $key (keys $override_conf->%*) {
6902 $backup_conf->{$key} = $override_conf->{$key};
6903 }
6904
6905 return $backup_conf;
6906 }
6907
6908 sub scan_volids {
6909 my ($cfg, $vmid) = @_;
6910
6911 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6912
6913 my $volid_hash = {};
6914 foreach my $storeid (keys %$info) {
6915 foreach my $item (@{$info->{$storeid}}) {
6916 next if !($item->{volid} && $item->{size});
6917 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6918 $volid_hash->{$item->{volid}} = $item;
6919 }
6920 }
6921
6922 return $volid_hash;
6923 }
6924
6925 sub update_disk_config {
6926 my ($vmid, $conf, $volid_hash) = @_;
6927
6928 my $changes;
6929 my $prefix = "VM $vmid";
6930
6931 # used and unused disks
6932 my $referenced = {};
6933
6934 # Note: it is allowed to define multiple storages with same path (alias), so
6935 # we need to check both 'volid' and real 'path' (two different volid can point
6936 # to the same path).
6937
6938 my $referencedpath = {};
6939
6940 # update size info
6941 PVE::QemuConfig->foreach_volume($conf, sub {
6942 my ($opt, $drive) = @_;
6943
6944 my $volid = $drive->{file};
6945 return if !$volid;
6946 my $volume = $volid_hash->{$volid};
6947
6948 # mark volid as "in-use" for next step
6949 $referenced->{$volid} = 1;
6950 if ($volume && (my $path = $volume->{path})) {
6951 $referencedpath->{$path} = 1;
6952 }
6953
6954 return if drive_is_cdrom($drive);
6955 return if !$volume;
6956
6957 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6958 if (defined($updated)) {
6959 $changes = 1;
6960 $conf->{$opt} = print_drive($updated);
6961 print "$prefix ($opt): $msg\n";
6962 }
6963 });
6964
6965 # remove 'unusedX' entry if volume is used
6966 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6967 my ($opt, $drive) = @_;
6968
6969 my $volid = $drive->{file};
6970 return if !$volid;
6971
6972 my $path;
6973 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6974 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6975 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6976 $changes = 1;
6977 delete $conf->{$opt};
6978 }
6979
6980 $referenced->{$volid} = 1;
6981 $referencedpath->{$path} = 1 if $path;
6982 });
6983
6984 foreach my $volid (sort keys %$volid_hash) {
6985 next if $volid =~ m/vm-$vmid-state-/;
6986 next if $referenced->{$volid};
6987 my $path = $volid_hash->{$volid}->{path};
6988 next if !$path; # just to be sure
6989 next if $referencedpath->{$path};
6990 $changes = 1;
6991 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6992 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6993 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6994 }
6995
6996 return $changes;
6997 }
6998
6999 sub rescan {
7000 my ($vmid, $nolock, $dryrun) = @_;
7001
7002 my $cfg = PVE::Storage::config();
7003
7004 print "rescan volumes...\n";
7005 my $volid_hash = scan_volids($cfg, $vmid);
7006
7007 my $updatefn = sub {
7008 my ($vmid) = @_;
7009
7010 my $conf = PVE::QemuConfig->load_config($vmid);
7011
7012 PVE::QemuConfig->check_lock($conf);
7013
7014 my $vm_volids = {};
7015 foreach my $volid (keys %$volid_hash) {
7016 my $info = $volid_hash->{$volid};
7017 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
7018 }
7019
7020 my $changes = update_disk_config($vmid, $conf, $vm_volids);
7021
7022 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
7023 };
7024
7025 if (defined($vmid)) {
7026 if ($nolock) {
7027 &$updatefn($vmid);
7028 } else {
7029 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7030 }
7031 } else {
7032 my $vmlist = config_list();
7033 foreach my $vmid (keys %$vmlist) {
7034 if ($nolock) {
7035 &$updatefn($vmid);
7036 } else {
7037 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
7038 }
7039 }
7040 }
7041 }
7042
7043 sub restore_proxmox_backup_archive {
7044 my ($archive, $vmid, $user, $options) = @_;
7045
7046 my $storecfg = PVE::Storage::config();
7047
7048 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
7049 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
7050
7051 my $fingerprint = $scfg->{fingerprint};
7052 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
7053
7054 my $repo = PVE::PBSClient::get_repository($scfg);
7055 my $namespace = $scfg->{namespace};
7056
7057 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
7058 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
7059 local $ENV{PBS_PASSWORD} = $password;
7060 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
7061
7062 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
7063 PVE::Storage::parse_volname($storecfg, $archive);
7064
7065 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
7066
7067 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
7068
7069 my $tmpdir = "/var/tmp/vzdumptmp$$";
7070 rmtree $tmpdir;
7071 mkpath $tmpdir;
7072
7073 my $conffile = PVE::QemuConfig->config_file($vmid);
7074 # disable interrupts (always do cleanups)
7075 local $SIG{INT} =
7076 local $SIG{TERM} =
7077 local $SIG{QUIT} =
7078 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7079
7080 # Note: $oldconf is undef if VM does not exists
7081 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7082 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7083 my $new_conf_raw = '';
7084
7085 my $rpcenv = PVE::RPCEnvironment::get();
7086 my $devinfo = {}; # info about drives included in backup
7087 my $virtdev_hash = {}; # info about allocated drives
7088
7089 eval {
7090 # enable interrupts
7091 local $SIG{INT} =
7092 local $SIG{TERM} =
7093 local $SIG{QUIT} =
7094 local $SIG{HUP} =
7095 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7096
7097 my $cfgfn = "$tmpdir/qemu-server.conf";
7098 my $firewall_config_fn = "$tmpdir/fw.conf";
7099 my $index_fn = "$tmpdir/index.json";
7100
7101 my $cmd = "restore";
7102
7103 my $param = [$pbs_backup_name, "index.json", $index_fn];
7104 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7105 my $index = PVE::Tools::file_get_contents($index_fn);
7106 $index = decode_json($index);
7107
7108 foreach my $info (@{$index->{files}}) {
7109 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7110 my $devname = $1;
7111 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7112 $devinfo->{$devname}->{size} = $1;
7113 } else {
7114 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7115 }
7116 }
7117 }
7118
7119 my $is_qemu_server_backup = scalar(
7120 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7121 );
7122 if (!$is_qemu_server_backup) {
7123 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7124 }
7125 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7126
7127 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7128 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7129
7130 if ($has_firewall_config) {
7131 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7132 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7133
7134 my $pve_firewall_dir = '/etc/pve/firewall';
7135 mkdir $pve_firewall_dir; # make sure the dir exists
7136 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7137 }
7138
7139 my $fh = IO::File->new($cfgfn, "r") ||
7140 die "unable to read qemu-server.conf - $!\n";
7141
7142 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7143
7144 # fixme: rate limit?
7145
7146 # create empty/temp config
7147 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7148
7149 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7150
7151 # allocate volumes
7152 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7153
7154 foreach my $virtdev (sort keys %$virtdev_hash) {
7155 my $d = $virtdev_hash->{$virtdev};
7156 next if $d->{is_cloudinit}; # no need to restore cloudinit
7157
7158 # this fails if storage is unavailable
7159 my $volid = $d->{volid};
7160 my $path = PVE::Storage::path($storecfg, $volid);
7161
7162 # for live-restore we only want to preload the efidisk and TPM state
7163 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7164
7165 my @ns_arg;
7166 if (defined(my $ns = $scfg->{namespace})) {
7167 @ns_arg = ('--ns', $ns);
7168 }
7169
7170 my $pbs_restore_cmd = [
7171 '/usr/bin/pbs-restore',
7172 '--repository', $repo,
7173 @ns_arg,
7174 $pbs_backup_name,
7175 "$d->{devname}.img.fidx",
7176 $path,
7177 '--verbose',
7178 ];
7179
7180 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7181 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7182
7183 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7184 push @$pbs_restore_cmd, '--skip-zero';
7185 }
7186
7187 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7188 print "restore proxmox backup image: $dbg_cmdstring\n";
7189 run_command($pbs_restore_cmd);
7190 }
7191
7192 $fh->seek(0, 0) || die "seek failed - $!\n";
7193
7194 my $cookie = { netcount => 0 };
7195 while (defined(my $line = <$fh>)) {
7196 $new_conf_raw .= restore_update_config_line(
7197 $cookie,
7198 $map,
7199 $line,
7200 $options->{unique},
7201 );
7202 }
7203
7204 $fh->close();
7205 };
7206 my $err = $@;
7207
7208 if ($err || !$options->{live}) {
7209 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7210 }
7211
7212 rmtree $tmpdir;
7213
7214 if ($err) {
7215 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7216 die $err;
7217 }
7218
7219 if ($options->{live}) {
7220 # keep lock during live-restore
7221 $new_conf_raw .= "\nlock: create";
7222 }
7223
7224 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7225 check_restore_permissions($rpcenv, $user, $new_conf);
7226 PVE::QemuConfig->write_config($vmid, $new_conf);
7227
7228 eval { rescan($vmid, 1); };
7229 warn $@ if $@;
7230
7231 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7232
7233 if ($options->{live}) {
7234 # enable interrupts
7235 local $SIG{INT} =
7236 local $SIG{TERM} =
7237 local $SIG{QUIT} =
7238 local $SIG{HUP} =
7239 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7240
7241 my $conf = PVE::QemuConfig->load_config($vmid);
7242 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7243
7244 # these special drives are already restored before start
7245 delete $devinfo->{'drive-efidisk0'};
7246 delete $devinfo->{'drive-tpmstate0-backup'};
7247
7248 my $pbs_opts = {
7249 repo => $repo,
7250 keyfile => $keyfile,
7251 snapshot => $pbs_backup_name,
7252 namespace => $namespace,
7253 };
7254 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7255
7256 PVE::QemuConfig->remove_lock($vmid, "create");
7257 }
7258 }
7259
7260 sub pbs_live_restore {
7261 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7262
7263 print "starting VM for live-restore\n";
7264 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7265
7266 my $pbs_backing = {};
7267 for my $ds (keys %$restored_disks) {
7268 $ds =~ m/^drive-(.*)$/;
7269 my $confname = $1;
7270 $pbs_backing->{$confname} = {
7271 repository => $opts->{repo},
7272 snapshot => $opts->{snapshot},
7273 archive => "$ds.img.fidx",
7274 };
7275 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7276 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7277
7278 my $drive = parse_drive($confname, $conf->{$confname});
7279 print "restoring '$ds' to '$drive->{file}'\n";
7280 }
7281
7282 my $drives_streamed = 0;
7283 eval {
7284 # make sure HA doesn't interrupt our restore by stopping the VM
7285 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7286 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7287 }
7288
7289 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7290 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7291 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7292
7293 my $qmeventd_fd = register_qmeventd_handle($vmid);
7294
7295 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7296 # this will effectively collapse the backing image chain consisting of
7297 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7298 # removes itself once all backing images vanish with 'auto-remove=on')
7299 my $jobs = {};
7300 for my $ds (sort keys %$restored_disks) {
7301 my $job_id = "restore-$ds";
7302 mon_cmd($vmid, 'block-stream',
7303 'job-id' => $job_id,
7304 device => "$ds",
7305 );
7306 $jobs->{$job_id} = {};
7307 }
7308
7309 mon_cmd($vmid, 'cont');
7310 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7311
7312 print "restore-drive jobs finished successfully, removing all tracking block devices"
7313 ." to disconnect from Proxmox Backup Server\n";
7314
7315 for my $ds (sort keys %$restored_disks) {
7316 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7317 }
7318
7319 close($qmeventd_fd);
7320 };
7321
7322 my $err = $@;
7323
7324 if ($err) {
7325 warn "An error occurred during live-restore: $err\n";
7326 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7327 die "live-restore failed\n";
7328 }
7329 }
7330
7331 sub restore_vma_archive {
7332 my ($archive, $vmid, $user, $opts, $comp) = @_;
7333
7334 my $readfrom = $archive;
7335
7336 my $cfg = PVE::Storage::config();
7337 my $commands = [];
7338 my $bwlimit = $opts->{bwlimit};
7339
7340 my $dbg_cmdstring = '';
7341 my $add_pipe = sub {
7342 my ($cmd) = @_;
7343 push @$commands, $cmd;
7344 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7345 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7346 $readfrom = '-';
7347 };
7348
7349 my $input = undef;
7350 if ($archive eq '-') {
7351 $input = '<&STDIN';
7352 } else {
7353 # If we use a backup from a PVE defined storage we also consider that
7354 # storage's rate limit:
7355 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7356 if (defined($volid)) {
7357 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7358 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7359 if ($readlimit) {
7360 print STDERR "applying read rate limit: $readlimit\n";
7361 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7362 $add_pipe->($cstream);
7363 }
7364 }
7365 }
7366
7367 if ($comp) {
7368 my $info = PVE::Storage::decompressor_info('vma', $comp);
7369 my $cmd = $info->{decompressor};
7370 push @$cmd, $readfrom;
7371 $add_pipe->($cmd);
7372 }
7373
7374 my $tmpdir = "/var/tmp/vzdumptmp$$";
7375 rmtree $tmpdir;
7376
7377 # disable interrupts (always do cleanups)
7378 local $SIG{INT} =
7379 local $SIG{TERM} =
7380 local $SIG{QUIT} =
7381 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7382
7383 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7384 POSIX::mkfifo($mapfifo, 0600);
7385 my $fifofh;
7386 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7387
7388 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7389
7390 my $devinfo = {}; # info about drives included in backup
7391 my $virtdev_hash = {}; # info about allocated drives
7392
7393 my $rpcenv = PVE::RPCEnvironment::get();
7394
7395 my $conffile = PVE::QemuConfig->config_file($vmid);
7396
7397 # Note: $oldconf is undef if VM does not exist
7398 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7399 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7400 my $new_conf_raw = '';
7401
7402 my %storage_limits;
7403
7404 my $print_devmap = sub {
7405 my $cfgfn = "$tmpdir/qemu-server.conf";
7406
7407 # we can read the config - that is already extracted
7408 my $fh = IO::File->new($cfgfn, "r") ||
7409 die "unable to read qemu-server.conf - $!\n";
7410
7411 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7412 if (-f $fwcfgfn) {
7413 my $pve_firewall_dir = '/etc/pve/firewall';
7414 mkdir $pve_firewall_dir; # make sure the dir exists
7415 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7416 }
7417
7418 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7419
7420 foreach my $info (values %{$virtdev_hash}) {
7421 my $storeid = $info->{storeid};
7422 next if defined($storage_limits{$storeid});
7423
7424 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7425 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7426 $storage_limits{$storeid} = $limit * 1024;
7427 }
7428
7429 foreach my $devname (keys %$devinfo) {
7430 die "found no device mapping information for device '$devname'\n"
7431 if !$devinfo->{$devname}->{virtdev};
7432 }
7433
7434 # create empty/temp config
7435 if ($oldconf) {
7436 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7437 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7438 }
7439
7440 # allocate volumes
7441 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7442
7443 # print restore information to $fifofh
7444 foreach my $virtdev (sort keys %$virtdev_hash) {
7445 my $d = $virtdev_hash->{$virtdev};
7446 next if $d->{is_cloudinit}; # no need to restore cloudinit
7447
7448 my $storeid = $d->{storeid};
7449 my $volid = $d->{volid};
7450
7451 my $map_opts = '';
7452 if (my $limit = $storage_limits{$storeid}) {
7453 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7454 }
7455
7456 my $write_zeros = 1;
7457 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7458 $write_zeros = 0;
7459 }
7460
7461 my $path = PVE::Storage::path($cfg, $volid);
7462
7463 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7464
7465 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7466 }
7467
7468 $fh->seek(0, 0) || die "seek failed - $!\n";
7469
7470 my $cookie = { netcount => 0 };
7471 while (defined(my $line = <$fh>)) {
7472 $new_conf_raw .= restore_update_config_line(
7473 $cookie,
7474 $map,
7475 $line,
7476 $opts->{unique},
7477 );
7478 }
7479
7480 $fh->close();
7481 };
7482
7483 my $oldtimeout;
7484
7485 eval {
7486 # enable interrupts
7487 local $SIG{INT} =
7488 local $SIG{TERM} =
7489 local $SIG{QUIT} =
7490 local $SIG{HUP} =
7491 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7492 local $SIG{ALRM} = sub { die "got timeout\n"; };
7493
7494 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
7495
7496 my $parser = sub {
7497 my $line = shift;
7498
7499 print "$line\n";
7500
7501 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7502 my ($dev_id, $size, $devname) = ($1, $2, $3);
7503 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7504 } elsif ($line =~ m/^CTIME: /) {
7505 # we correctly received the vma config, so we can disable
7506 # the timeout now for disk allocation
7507 alarm($oldtimeout || 0);
7508 $oldtimeout = undef;
7509 &$print_devmap();
7510 print $fifofh "done\n";
7511 close($fifofh);
7512 $fifofh = undef;
7513 }
7514 };
7515
7516 print "restore vma archive: $dbg_cmdstring\n";
7517 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7518 };
7519 my $err = $@;
7520
7521 alarm($oldtimeout) if $oldtimeout;
7522
7523 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7524
7525 close($fifofh) if $fifofh;
7526 unlink $mapfifo;
7527 rmtree $tmpdir;
7528
7529 if ($err) {
7530 $restore_destroy_volumes->($cfg, $virtdev_hash);
7531 die $err;
7532 }
7533
7534 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7535 check_restore_permissions($rpcenv, $user, $new_conf);
7536 PVE::QemuConfig->write_config($vmid, $new_conf);
7537
7538 eval { rescan($vmid, 1); };
7539 warn $@ if $@;
7540
7541 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7542 }
7543
7544 sub restore_tar_archive {
7545 my ($archive, $vmid, $user, $opts) = @_;
7546
7547 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7548 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7549 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7550 }
7551
7552 if ($archive ne '-') {
7553 my $firstfile = tar_archive_read_firstfile($archive);
7554 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7555 if $firstfile ne 'qemu-server.conf';
7556 }
7557
7558 my $storecfg = PVE::Storage::config();
7559
7560 # avoid zombie disks when restoring over an existing VM -> cleanup first
7561 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7562 # skiplock=1 because qmrestore has set the 'create' lock itself already
7563 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7564 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7565
7566 my $tocmd = "/usr/lib/qemu-server/qmextract";
7567
7568 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7569 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7570 $tocmd .= ' --prealloc' if $opts->{prealloc};
7571 $tocmd .= ' --info' if $opts->{info};
7572
7573 # tar option "xf" does not autodetect compression when read from STDIN,
7574 # so we pipe to zcat
7575 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7576 PVE::Tools::shellquote("--to-command=$tocmd");
7577
7578 my $tmpdir = "/var/tmp/vzdumptmp$$";
7579 mkpath $tmpdir;
7580
7581 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7582 local $ENV{VZDUMP_VMID} = $vmid;
7583 local $ENV{VZDUMP_USER} = $user;
7584
7585 my $conffile = PVE::QemuConfig->config_file($vmid);
7586 my $new_conf_raw = '';
7587
7588 # disable interrupts (always do cleanups)
7589 local $SIG{INT} =
7590 local $SIG{TERM} =
7591 local $SIG{QUIT} =
7592 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7593
7594 eval {
7595 # enable interrupts
7596 local $SIG{INT} =
7597 local $SIG{TERM} =
7598 local $SIG{QUIT} =
7599 local $SIG{HUP} =
7600 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7601
7602 if ($archive eq '-') {
7603 print "extracting archive from STDIN\n";
7604 run_command($cmd, input => "<&STDIN");
7605 } else {
7606 print "extracting archive '$archive'\n";
7607 run_command($cmd);
7608 }
7609
7610 return if $opts->{info};
7611
7612 # read new mapping
7613 my $map = {};
7614 my $statfile = "$tmpdir/qmrestore.stat";
7615 if (my $fd = IO::File->new($statfile, "r")) {
7616 while (defined (my $line = <$fd>)) {
7617 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7618 $map->{$1} = $2 if $1;
7619 } else {
7620 print STDERR "unable to parse line in statfile - $line\n";
7621 }
7622 }
7623 $fd->close();
7624 }
7625
7626 my $confsrc = "$tmpdir/qemu-server.conf";
7627
7628 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7629
7630 my $cookie = { netcount => 0 };
7631 while (defined (my $line = <$srcfd>)) {
7632 $new_conf_raw .= restore_update_config_line(
7633 $cookie,
7634 $map,
7635 $line,
7636 $opts->{unique},
7637 );
7638 }
7639
7640 $srcfd->close();
7641 };
7642 if (my $err = $@) {
7643 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7644 die $err;
7645 }
7646
7647 rmtree $tmpdir;
7648
7649 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7650
7651 PVE::Cluster::cfs_update(); # make sure we read new file
7652
7653 eval { rescan($vmid, 1); };
7654 warn $@ if $@;
7655 };
7656
7657 sub foreach_storage_used_by_vm {
7658 my ($conf, $func) = @_;
7659
7660 my $sidhash = {};
7661
7662 PVE::QemuConfig->foreach_volume($conf, sub {
7663 my ($ds, $drive) = @_;
7664 return if drive_is_cdrom($drive);
7665
7666 my $volid = $drive->{file};
7667
7668 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7669 $sidhash->{$sid} = $sid if $sid;
7670 });
7671
7672 foreach my $sid (sort keys %$sidhash) {
7673 &$func($sid);
7674 }
7675 }
7676
7677 my $qemu_snap_storage = {
7678 rbd => 1,
7679 };
7680 sub do_snapshots_with_qemu {
7681 my ($storecfg, $volid, $deviceid) = @_;
7682
7683 return if $deviceid =~ m/tpmstate0/;
7684
7685 my $storage_name = PVE::Storage::parse_volume_id($volid);
7686 my $scfg = $storecfg->{ids}->{$storage_name};
7687 die "could not find storage '$storage_name'\n" if !defined($scfg);
7688
7689 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7690 return 1;
7691 }
7692
7693 if ($volid =~ m/\.(qcow2|qed)$/){
7694 return 1;
7695 }
7696
7697 return;
7698 }
7699
7700 sub qga_check_running {
7701 my ($vmid, $nowarn) = @_;
7702
7703 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7704 if ($@) {
7705 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7706 return 0;
7707 }
7708 return 1;
7709 }
7710
7711 sub template_create {
7712 my ($vmid, $conf, $disk) = @_;
7713
7714 my $storecfg = PVE::Storage::config();
7715
7716 PVE::QemuConfig->foreach_volume($conf, sub {
7717 my ($ds, $drive) = @_;
7718
7719 return if drive_is_cdrom($drive);
7720 return if $disk && $ds ne $disk;
7721
7722 my $volid = $drive->{file};
7723 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7724
7725 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7726 $drive->{file} = $voliddst;
7727 $conf->{$ds} = print_drive($drive);
7728 PVE::QemuConfig->write_config($vmid, $conf);
7729 });
7730 }
7731
7732 sub convert_iscsi_path {
7733 my ($path) = @_;
7734
7735 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7736 my $portal = $1;
7737 my $target = $2;
7738 my $lun = $3;
7739
7740 my $initiator_name = get_initiator_name();
7741
7742 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7743 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7744 }
7745
7746 die "cannot convert iscsi path '$path', unkown format\n";
7747 }
7748
7749 sub qemu_img_convert {
7750 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7751
7752 my $storecfg = PVE::Storage::config();
7753 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7754 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7755
7756 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7757
7758 my $cachemode;
7759 my $src_path;
7760 my $src_is_iscsi = 0;
7761 my $src_format;
7762
7763 if ($src_storeid) {
7764 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7765 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7766 $src_format = qemu_img_format($src_scfg, $src_volname);
7767 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7768 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7769 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7770 } elsif (-f $src_volid || -b $src_volid) {
7771 $src_path = $src_volid;
7772 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7773 $src_format = $1;
7774 }
7775 }
7776
7777 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7778
7779 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7780 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7781 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7782 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7783
7784 my $cmd = [];
7785 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7786 push @$cmd, '-l', "snapshot.name=$snapname"
7787 if $snapname && $src_format && $src_format eq "qcow2";
7788 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7789 push @$cmd, '-T', $cachemode if defined($cachemode);
7790 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7791
7792 if ($src_is_iscsi) {
7793 push @$cmd, '--image-opts';
7794 $src_path = convert_iscsi_path($src_path);
7795 } elsif ($src_format) {
7796 push @$cmd, '-f', $src_format;
7797 }
7798
7799 if ($dst_is_iscsi) {
7800 push @$cmd, '--target-image-opts';
7801 $dst_path = convert_iscsi_path($dst_path);
7802 } else {
7803 push @$cmd, '-O', $dst_format;
7804 }
7805
7806 push @$cmd, $src_path;
7807
7808 if (!$dst_is_iscsi && $is_zero_initialized) {
7809 push @$cmd, "zeroinit:$dst_path";
7810 } else {
7811 push @$cmd, $dst_path;
7812 }
7813
7814 my $parser = sub {
7815 my $line = shift;
7816 if($line =~ m/\((\S+)\/100\%\)/){
7817 my $percent = $1;
7818 my $transferred = int($size * $percent / 100);
7819 my $total_h = render_bytes($size, 1);
7820 my $transferred_h = render_bytes($transferred, 1);
7821
7822 print "transferred $transferred_h of $total_h ($percent%)\n";
7823 }
7824
7825 };
7826
7827 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7828 my $err = $@;
7829 die "copy failed: $err" if $err;
7830 }
7831
7832 sub qemu_img_format {
7833 my ($scfg, $volname) = @_;
7834
7835 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7836 return $1;
7837 } else {
7838 return "raw";
7839 }
7840 }
7841
7842 sub qemu_drive_mirror {
7843 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7844
7845 $jobs = {} if !$jobs;
7846
7847 my $qemu_target;
7848 my $format;
7849 $jobs->{"drive-$drive"} = {};
7850
7851 if ($dst_volid =~ /^nbd:/) {
7852 $qemu_target = $dst_volid;
7853 $format = "nbd";
7854 } else {
7855 my $storecfg = PVE::Storage::config();
7856 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7857
7858 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7859
7860 $format = qemu_img_format($dst_scfg, $dst_volname);
7861
7862 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7863
7864 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7865 }
7866
7867 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7868 $opts->{format} = $format if $format;
7869
7870 if (defined($src_bitmap)) {
7871 $opts->{sync} = 'incremental';
7872 $opts->{bitmap} = $src_bitmap;
7873 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7874 }
7875
7876 if (defined($bwlimit)) {
7877 $opts->{speed} = $bwlimit * 1024;
7878 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7879 } else {
7880 print "drive mirror is starting for drive-$drive\n";
7881 }
7882
7883 # if a job already runs for this device we get an error, catch it for cleanup
7884 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7885 if (my $err = $@) {
7886 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7887 warn "$@\n" if $@;
7888 die "mirroring error: $err\n";
7889 }
7890
7891 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7892 }
7893
7894 # $completion can be either
7895 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7896 # 'cancel': wait until all jobs are ready, block-job-cancel them
7897 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7898 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7899 sub qemu_drive_mirror_monitor {
7900 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7901
7902 $completion //= 'complete';
7903 $op //= "mirror";
7904
7905 eval {
7906 my $err_complete = 0;
7907
7908 my $starttime = time ();
7909 while (1) {
7910 die "block job ('$op') timed out\n" if $err_complete > 300;
7911
7912 my $stats = mon_cmd($vmid, "query-block-jobs");
7913 my $ctime = time();
7914
7915 my $running_jobs = {};
7916 for my $stat (@$stats) {
7917 next if $stat->{type} ne $op;
7918 $running_jobs->{$stat->{device}} = $stat;
7919 }
7920
7921 my $readycounter = 0;
7922
7923 for my $job_id (sort keys %$jobs) {
7924 my $job = $running_jobs->{$job_id};
7925
7926 my $vanished = !defined($job);
7927 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7928 if($complete || ($vanished && $completion eq 'auto')) {
7929 print "$job_id: $op-job finished\n";
7930 delete $jobs->{$job_id};
7931 next;
7932 }
7933
7934 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7935
7936 my $busy = $job->{busy};
7937 my $ready = $job->{ready};
7938 if (my $total = $job->{len}) {
7939 my $transferred = $job->{offset} || 0;
7940 my $remaining = $total - $transferred;
7941 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7942
7943 my $duration = $ctime - $starttime;
7944 my $total_h = render_bytes($total, 1);
7945 my $transferred_h = render_bytes($transferred, 1);
7946
7947 my $status = sprintf(
7948 "transferred $transferred_h of $total_h ($percent%%) in %s",
7949 render_duration($duration),
7950 );
7951
7952 if ($ready) {
7953 if ($busy) {
7954 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7955 } else {
7956 $status .= ", ready";
7957 }
7958 }
7959 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7960 $jobs->{$job_id}->{ready} = $ready;
7961 }
7962
7963 $readycounter++ if $job->{ready};
7964 }
7965
7966 last if scalar(keys %$jobs) == 0;
7967
7968 if ($readycounter == scalar(keys %$jobs)) {
7969 print "all '$op' jobs are ready\n";
7970
7971 # do the complete later (or has already been done)
7972 last if $completion eq 'skip' || $completion eq 'auto';
7973
7974 if ($vmiddst && $vmiddst != $vmid) {
7975 my $agent_running = $qga && qga_check_running($vmid);
7976 if ($agent_running) {
7977 print "freeze filesystem\n";
7978 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7979 warn $@ if $@;
7980 } else {
7981 print "suspend vm\n";
7982 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7983 warn $@ if $@;
7984 }
7985
7986 # if we clone a disk for a new target vm, we don't switch the disk
7987 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7988
7989 if ($agent_running) {
7990 print "unfreeze filesystem\n";
7991 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7992 warn $@ if $@;
7993 } else {
7994 print "resume vm\n";
7995 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7996 warn $@ if $@;
7997 }
7998
7999 last;
8000 } else {
8001
8002 for my $job_id (sort keys %$jobs) {
8003 # try to switch the disk if source and destination are on the same guest
8004 print "$job_id: Completing block job_id...\n";
8005
8006 my $op;
8007 if ($completion eq 'complete') {
8008 $op = 'block-job-complete';
8009 } elsif ($completion eq 'cancel') {
8010 $op = 'block-job-cancel';
8011 } else {
8012 die "invalid completion value: $completion\n";
8013 }
8014 eval { mon_cmd($vmid, $op, device => $job_id) };
8015 if ($@ =~ m/cannot be completed/) {
8016 print "$job_id: block job cannot be completed, trying again.\n";
8017 $err_complete++;
8018 }else {
8019 print "$job_id: Completed successfully.\n";
8020 $jobs->{$job_id}->{complete} = 1;
8021 }
8022 }
8023 }
8024 }
8025 sleep 1;
8026 }
8027 };
8028 my $err = $@;
8029
8030 if ($err) {
8031 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
8032 die "block job ($op) error: $err";
8033 }
8034 }
8035
8036 sub qemu_blockjobs_cancel {
8037 my ($vmid, $jobs) = @_;
8038
8039 foreach my $job (keys %$jobs) {
8040 print "$job: Cancelling block job\n";
8041 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
8042 $jobs->{$job}->{cancel} = 1;
8043 }
8044
8045 while (1) {
8046 my $stats = mon_cmd($vmid, "query-block-jobs");
8047
8048 my $running_jobs = {};
8049 foreach my $stat (@$stats) {
8050 $running_jobs->{$stat->{device}} = $stat;
8051 }
8052
8053 foreach my $job (keys %$jobs) {
8054
8055 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
8056 print "$job: Done.\n";
8057 delete $jobs->{$job};
8058 }
8059 }
8060
8061 last if scalar(keys %$jobs) == 0;
8062
8063 sleep 1;
8064 }
8065 }
8066
8067 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
8068 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
8069 my sub clone_disk_check_io_uring {
8070 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
8071
8072 return if !$use_drive_mirror;
8073
8074 # Don't complain when not changing storage.
8075 # Assume if it works for the source, it'll work for the target too.
8076 return if $src_storeid eq $dst_storeid;
8077
8078 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8079 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8080
8081 my $cache_direct = drive_uses_cache_direct($src_drive);
8082
8083 my $src_uses_io_uring;
8084 if ($src_drive->{aio}) {
8085 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8086 } else {
8087 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8088 }
8089
8090 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8091 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8092 }
8093
8094 sub clone_disk {
8095 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8096
8097 my ($vmid, $running) = $source->@{qw(vmid running)};
8098 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8099
8100 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8101 my ($storage, $format) = $dest->@{qw(storage format)};
8102
8103 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8104
8105 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8106 die "cloning from/to EFI disk requires EFI disk\n"
8107 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8108 die "cloning from/to TPM state requires TPM state\n"
8109 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8110
8111 # This would lead to two device nodes in QEMU pointing to the same backing image!
8112 die "cannot change drive name when cloning disk from/to the same VM\n"
8113 if $use_drive_mirror && $vmid == $newvmid;
8114 }
8115
8116 die "cannot move TPM state while VM is running\n"
8117 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8118
8119 my $newvolid;
8120
8121 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8122 print "$src_drivename " if $src_drivename;
8123 print "($drive->{file})\n";
8124
8125 if (!$full) {
8126 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8127 push @$newvollist, $newvolid;
8128 } else {
8129 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8130 my $storeid = $storage || $src_storeid;
8131
8132 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8133
8134 my $name = undef;
8135 my $size = undef;
8136 if (drive_is_cloudinit($drive)) {
8137 $name = "vm-$newvmid-cloudinit";
8138 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8139 if ($scfg->{path}) {
8140 $name .= ".$dst_format";
8141 }
8142 $snapname = undef;
8143 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8144 } elsif ($dst_drivename eq 'efidisk0') {
8145 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8146 } elsif ($dst_drivename eq 'tpmstate0') {
8147 $dst_format = 'raw';
8148 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8149 } else {
8150 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8151
8152 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8153 }
8154 $newvolid = PVE::Storage::vdisk_alloc(
8155 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8156 );
8157 push @$newvollist, $newvolid;
8158
8159 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8160
8161 if (drive_is_cloudinit($drive)) {
8162 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8163 # if this is the case, we have to complete any block-jobs still there from
8164 # previous drive-mirrors
8165 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8166 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8167 }
8168 goto no_data_clone;
8169 }
8170
8171 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8172 if ($use_drive_mirror) {
8173 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8174 $completion, $qga, $bwlimit);
8175 } else {
8176 if ($dst_drivename eq 'efidisk0') {
8177 # the relevant data on the efidisk may be smaller than the source
8178 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8179 # that is given by the OVMF_VARS.fd
8180 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8181 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8182
8183 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8184
8185 # better for Ceph if block size is not too small, see bug #3324
8186 my $bs = 1024*1024;
8187
8188 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8189
8190 if ($src_format eq 'qcow2' && $snapname) {
8191 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8192 if !min_version(kvm_user_version(), 6, 2);
8193 push $cmd->@*, '-l', $snapname;
8194 }
8195 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8196 run_command($cmd);
8197 } else {
8198 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8199 }
8200 }
8201 }
8202
8203 no_data_clone:
8204 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8205
8206 my $disk = dclone($drive);
8207 delete $disk->{format};
8208 $disk->{file} = $newvolid;
8209 $disk->{size} = $size if defined($size);
8210
8211 return $disk;
8212 }
8213
8214 sub get_running_qemu_version {
8215 my ($vmid) = @_;
8216 my $res = mon_cmd($vmid, "query-version");
8217 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8218 }
8219
8220 sub qemu_use_old_bios_files {
8221 my ($machine_type) = @_;
8222
8223 return if !$machine_type;
8224
8225 my $use_old_bios_files = undef;
8226
8227 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8228 $machine_type = $1;
8229 $use_old_bios_files = 1;
8230 } else {
8231 my $version = extract_version($machine_type, kvm_user_version());
8232 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8233 # load new efi bios files on migration. So this hack is required to allow
8234 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8235 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8236 $use_old_bios_files = !min_version($version, 2, 4);
8237 }
8238
8239 return ($use_old_bios_files, $machine_type);
8240 }
8241
8242 sub get_efivars_size {
8243 my ($conf, $efidisk) = @_;
8244
8245 my $arch = get_vm_arch($conf);
8246 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8247 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8248 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8249 return -s $ovmf_vars;
8250 }
8251
8252 sub update_efidisk_size {
8253 my ($conf) = @_;
8254
8255 return if !defined($conf->{efidisk0});
8256
8257 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8258 $disk->{size} = get_efivars_size($conf);
8259 $conf->{efidisk0} = print_drive($disk);
8260
8261 return;
8262 }
8263
8264 sub update_tpmstate_size {
8265 my ($conf) = @_;
8266
8267 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8268 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8269 $conf->{tpmstate0} = print_drive($disk);
8270 }
8271
8272 sub create_efidisk($$$$$$$) {
8273 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8274
8275 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8276
8277 my $vars_size_b = -s $ovmf_vars;
8278 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8279 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8280 PVE::Storage::activate_volumes($storecfg, [$volid]);
8281
8282 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8283 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8284
8285 return ($volid, $size/1024);
8286 }
8287
8288 sub vm_iothreads_list {
8289 my ($vmid) = @_;
8290
8291 my $res = mon_cmd($vmid, 'query-iothreads');
8292
8293 my $iothreads = {};
8294 foreach my $iothread (@$res) {
8295 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8296 }
8297
8298 return $iothreads;
8299 }
8300
8301 sub scsihw_infos {
8302 my ($conf, $drive) = @_;
8303
8304 my $maxdev = 0;
8305
8306 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8307 $maxdev = 7;
8308 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8309 $maxdev = 1;
8310 } else {
8311 $maxdev = 256;
8312 }
8313
8314 my $controller = int($drive->{index} / $maxdev);
8315 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8316 ? "virtioscsi"
8317 : "scsihw";
8318
8319 return ($maxdev, $controller, $controller_prefix);
8320 }
8321
8322 sub resolve_dst_disk_format {
8323 my ($storecfg, $storeid, $src_volname, $format) = @_;
8324 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8325
8326 if (!$format) {
8327 # if no target format is specified, use the source disk format as hint
8328 if ($src_volname) {
8329 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8330 $format = qemu_img_format($scfg, $src_volname);
8331 } else {
8332 return $defFormat;
8333 }
8334 }
8335
8336 # test if requested format is supported - else use default
8337 my $supported = grep { $_ eq $format } @$validFormats;
8338 $format = $defFormat if !$supported;
8339 return $format;
8340 }
8341
8342 # NOTE: if this logic changes, please update docs & possibly gui logic
8343 sub find_vmstate_storage {
8344 my ($conf, $storecfg) = @_;
8345
8346 # first, return storage from conf if set
8347 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8348
8349 my ($target, $shared, $local);
8350
8351 foreach_storage_used_by_vm($conf, sub {
8352 my ($sid) = @_;
8353 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8354 my $dst = $scfg->{shared} ? \$shared : \$local;
8355 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8356 });
8357
8358 # second, use shared storage where VM has at least one disk
8359 # third, use local storage where VM has at least one disk
8360 # fall back to local storage
8361 $target = $shared // $local // 'local';
8362
8363 return $target;
8364 }
8365
8366 sub generate_uuid {
8367 my ($uuid, $uuid_str);
8368 UUID::generate($uuid);
8369 UUID::unparse($uuid, $uuid_str);
8370 return $uuid_str;
8371 }
8372
8373 sub generate_smbios1_uuid {
8374 return "uuid=".generate_uuid();
8375 }
8376
8377 sub nbd_stop {
8378 my ($vmid) = @_;
8379
8380 mon_cmd($vmid, 'nbd-server-stop', timeout => 25);
8381 }
8382
8383 sub create_reboot_request {
8384 my ($vmid) = @_;
8385 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8386 or die "failed to create reboot trigger file: $!\n";
8387 close($fh);
8388 }
8389
8390 sub clear_reboot_request {
8391 my ($vmid) = @_;
8392 my $path = "/run/qemu-server/$vmid.reboot";
8393 my $res = 0;
8394
8395 $res = unlink($path);
8396 die "could not remove reboot request for $vmid: $!"
8397 if !$res && $! != POSIX::ENOENT;
8398
8399 return $res;
8400 }
8401
8402 sub bootorder_from_legacy {
8403 my ($conf, $bootcfg) = @_;
8404
8405 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8406 my $bootindex_hash = {};
8407 my $i = 1;
8408 foreach my $o (split(//, $boot)) {
8409 $bootindex_hash->{$o} = $i*100;
8410 $i++;
8411 }
8412
8413 my $bootorder = {};
8414
8415 PVE::QemuConfig->foreach_volume($conf, sub {
8416 my ($ds, $drive) = @_;
8417
8418 if (drive_is_cdrom ($drive, 1)) {
8419 if ($bootindex_hash->{d}) {
8420 $bootorder->{$ds} = $bootindex_hash->{d};
8421 $bootindex_hash->{d} += 1;
8422 }
8423 } elsif ($bootindex_hash->{c}) {
8424 $bootorder->{$ds} = $bootindex_hash->{c}
8425 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8426 $bootindex_hash->{c} += 1;
8427 }
8428 });
8429
8430 if ($bootindex_hash->{n}) {
8431 for (my $i = 0; $i < $MAX_NETS; $i++) {
8432 my $netname = "net$i";
8433 next if !$conf->{$netname};
8434 $bootorder->{$netname} = $bootindex_hash->{n};
8435 $bootindex_hash->{n} += 1;
8436 }
8437 }
8438
8439 return $bootorder;
8440 }
8441
8442 # Generate default device list for 'boot: order=' property. Matches legacy
8443 # default boot order, but with explicit device names. This is important, since
8444 # the fallback for when neither 'order' nor the old format is specified relies
8445 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8446 sub get_default_bootdevices {
8447 my ($conf) = @_;
8448
8449 my @ret = ();
8450
8451 # harddisk
8452 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8453 push @ret, $first if $first;
8454
8455 # cdrom
8456 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8457 push @ret, $first if $first;
8458
8459 # network
8460 for (my $i = 0; $i < $MAX_NETS; $i++) {
8461 my $netname = "net$i";
8462 next if !$conf->{$netname};
8463 push @ret, $netname;
8464 last;
8465 }
8466
8467 return \@ret;
8468 }
8469
8470 sub device_bootorder {
8471 my ($conf) = @_;
8472
8473 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8474
8475 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8476
8477 my $bootorder = {};
8478 if (!defined($boot) || $boot->{legacy}) {
8479 $bootorder = bootorder_from_legacy($conf, $boot);
8480 } elsif ($boot->{order}) {
8481 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8482 for my $dev (PVE::Tools::split_list($boot->{order})) {
8483 $bootorder->{$dev} = $i++;
8484 }
8485 }
8486
8487 return $bootorder;
8488 }
8489
8490 sub register_qmeventd_handle {
8491 my ($vmid) = @_;
8492
8493 my $fh;
8494 my $peer = "/var/run/qmeventd.sock";
8495 my $count = 0;
8496
8497 for (;;) {
8498 $count++;
8499 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8500 last if $fh;
8501 if ($! != EINTR && $! != EAGAIN) {
8502 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8503 }
8504 if ($count > 4) {
8505 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8506 . "after $count retries\n";
8507 }
8508 usleep(25000);
8509 }
8510
8511 # send handshake to mark VM as backing up
8512 print $fh to_json({vzdump => {vmid => "$vmid"}});
8513
8514 # return handle to be closed later when inhibit is no longer required
8515 return $fh;
8516 }
8517
8518 # bash completion helper
8519
8520 sub complete_backup_archives {
8521 my ($cmdname, $pname, $cvalue) = @_;
8522
8523 my $cfg = PVE::Storage::config();
8524
8525 my $storeid;
8526
8527 if ($cvalue =~ m/^([^:]+):/) {
8528 $storeid = $1;
8529 }
8530
8531 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8532
8533 my $res = [];
8534 foreach my $id (keys %$data) {
8535 foreach my $item (@{$data->{$id}}) {
8536 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8537 push @$res, $item->{volid} if defined($item->{volid});
8538 }
8539 }
8540
8541 return $res;
8542 }
8543
8544 my $complete_vmid_full = sub {
8545 my ($running) = @_;
8546
8547 my $idlist = vmstatus();
8548
8549 my $res = [];
8550
8551 foreach my $id (keys %$idlist) {
8552 my $d = $idlist->{$id};
8553 if (defined($running)) {
8554 next if $d->{template};
8555 next if $running && $d->{status} ne 'running';
8556 next if !$running && $d->{status} eq 'running';
8557 }
8558 push @$res, $id;
8559
8560 }
8561 return $res;
8562 };
8563
8564 sub complete_vmid {
8565 return &$complete_vmid_full();
8566 }
8567
8568 sub complete_vmid_stopped {
8569 return &$complete_vmid_full(0);
8570 }
8571
8572 sub complete_vmid_running {
8573 return &$complete_vmid_full(1);
8574 }
8575
8576 sub complete_storage {
8577
8578 my $cfg = PVE::Storage::config();
8579 my $ids = $cfg->{ids};
8580
8581 my $res = [];
8582 foreach my $sid (keys %$ids) {
8583 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8584 next if !$ids->{$sid}->{content}->{images};
8585 push @$res, $sid;
8586 }
8587
8588 return $res;
8589 }
8590
8591 sub complete_migration_storage {
8592 my ($cmd, $param, $current_value, $all_args) = @_;
8593
8594 my $targetnode = @$all_args[1];
8595
8596 my $cfg = PVE::Storage::config();
8597 my $ids = $cfg->{ids};
8598
8599 my $res = [];
8600 foreach my $sid (keys %$ids) {
8601 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8602 next if !$ids->{$sid}->{content}->{images};
8603 push @$res, $sid;
8604 }
8605
8606 return $res;
8607 }
8608
8609 sub vm_is_paused {
8610 my ($vmid, $include_suspended) = @_;
8611 my $qmpstatus = eval {
8612 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8613 mon_cmd($vmid, "query-status");
8614 };
8615 warn "$@\n" if $@;
8616 return $qmpstatus && (
8617 $qmpstatus->{status} eq "paused" ||
8618 $qmpstatus->{status} eq "prelaunch" ||
8619 ($include_suspended && $qmpstatus->{status} eq "suspended")
8620 );
8621 }
8622
8623 sub check_volume_storage_type {
8624 my ($storecfg, $vol) = @_;
8625
8626 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8627 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8628 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8629
8630 die "storage '$storeid' does not support content-type '$vtype'\n"
8631 if !$scfg->{content}->{$vtype};
8632
8633 return 1;
8634 }
8635
8636 sub add_nets_bridge_fdb {
8637 my ($conf, $vmid) = @_;
8638
8639 for my $opt (keys %$conf) {
8640 next if $opt !~ m/^net(\d+)$/;
8641 my $iface = "tap${vmid}i$1";
8642 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8643 my $net = parse_net($conf->{$opt}, 1) or next;
8644
8645 my $mac = $net->{macaddr};
8646 if (!$mac) {
8647 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8648 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8649 next;
8650 }
8651
8652 my $bridge = $net->{bridge};
8653 if (!$bridge) {
8654 log_warn("Interface '$iface' not attached to any bridge.");
8655 next;
8656 }
8657 if ($have_sdn) {
8658 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge);
8659 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8660 PVE::Network::add_bridge_fdb($iface, $mac);
8661 }
8662 }
8663 }
8664
8665 sub del_nets_bridge_fdb {
8666 my ($conf, $vmid) = @_;
8667
8668 for my $opt (keys %$conf) {
8669 next if $opt !~ m/^net(\d+)$/;
8670 my $iface = "tap${vmid}i$1";
8671
8672 my $net = parse_net($conf->{$opt}) or next;
8673 my $mac = $net->{macaddr} or next;
8674
8675 my $bridge = $net->{bridge};
8676 if ($have_sdn) {
8677 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge);
8678 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8679 PVE::Network::del_bridge_fdb($iface, $mac);
8680 }
8681 }
8682 }
8683
8684 sub create_ifaces_ipams_ips {
8685 my ($conf, $vmid) = @_;
8686
8687 return if !$have_sdn;
8688
8689 foreach my $opt (keys %$conf) {
8690 if ($opt =~ m/^net(\d+)$/) {
8691 my $value = $conf->{$opt};
8692 my $net = PVE::QemuServer::parse_net($value);
8693 eval { PVE::Network::SDN::Vnets::add_next_free_cidr($net->{bridge}, $conf->{name}, $net->{macaddr}, $vmid, undef, 1) };
8694 warn $@ if $@;
8695 }
8696 }
8697 }
8698
8699 sub delete_ifaces_ipams_ips {
8700 my ($conf, $vmid) = @_;
8701
8702 return if !$have_sdn;
8703
8704 foreach my $opt (keys %$conf) {
8705 if ($opt =~ m/^net(\d+)$/) {
8706 my $net = PVE::QemuServer::parse_net($conf->{$opt});
8707 eval { PVE::Network::SDN::Vnets::del_ips_from_mac($net->{bridge}, $net->{macaddr}, $conf->{name}) };
8708 warn $@ if $@;
8709 }
8710 }
8711 }
8712
8713 1;