]> git.proxmox.com Git - qemu-server.git/blob - PVE/QemuServer.pm
move NUMA-related code into memory module
[qemu-server.git] / PVE / QemuServer.pm
1 package PVE::QemuServer;
2
3 use strict;
4 use warnings;
5
6 use Cwd 'abs_path';
7 use Digest::SHA;
8 use Fcntl ':flock';
9 use Fcntl;
10 use File::Basename;
11 use File::Copy qw(copy);
12 use File::Path;
13 use File::stat;
14 use Getopt::Long;
15 use IO::Dir;
16 use IO::File;
17 use IO::Handle;
18 use IO::Select;
19 use IO::Socket::UNIX;
20 use IPC::Open3;
21 use JSON;
22 use List::Util qw(first);
23 use MIME::Base64;
24 use POSIX;
25 use Storable qw(dclone);
26 use Time::HiRes qw(gettimeofday usleep);
27 use URI::Escape;
28 use UUID;
29
30 use PVE::Cluster qw(cfs_register_file cfs_read_file cfs_write_file);
31 use PVE::CGroup;
32 use PVE::CpuSet;
33 use PVE::DataCenterConfig;
34 use PVE::Exception qw(raise raise_param_exc);
35 use PVE::Format qw(render_duration render_bytes);
36 use PVE::GuestHelpers qw(safe_string_ne safe_num_ne safe_boolean_ne);
37 use PVE::Mapping::PCI;
38 use PVE::Mapping::USB;
39 use PVE::INotify;
40 use PVE::JSONSchema qw(get_standard_option parse_property_string);
41 use PVE::ProcFSTools;
42 use PVE::PBSClient;
43 use PVE::RESTEnvironment qw(log_warn);
44 use PVE::RPCEnvironment;
45 use PVE::Storage;
46 use PVE::SysFSTools;
47 use PVE::Systemd;
48 use PVE::Tools qw(run_command file_read_firstline file_get_contents dir_glob_foreach get_host_arch $IPV6RE);
49
50 use PVE::QMPClient;
51 use PVE::QemuConfig;
52 use PVE::QemuServer::Helpers qw(config_aware_timeout min_version windows_version);
53 use PVE::QemuServer::Cloudinit;
54 use PVE::QemuServer::CGroup;
55 use PVE::QemuServer::CPUConfig qw(print_cpu_device get_cpu_options);
56 use PVE::QemuServer::Drive qw(is_valid_drivename drive_is_cloudinit drive_is_cdrom drive_is_read_only parse_drive print_drive);
57 use PVE::QemuServer::Machine;
58 use PVE::QemuServer::Memory;
59 use PVE::QemuServer::Monitor qw(mon_cmd);
60 use PVE::QemuServer::PCI qw(print_pci_addr print_pcie_addr print_pcie_root_port parse_hostpci);
61 use PVE::QemuServer::USB;
62
63 my $have_sdn;
64 eval {
65 require PVE::Network::SDN::Zones;
66 $have_sdn = 1;
67 };
68
69 my $EDK2_FW_BASE = '/usr/share/pve-edk2-firmware/';
70 my $OVMF = {
71 x86_64 => {
72 '4m-no-smm' => [
73 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
74 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
75 ],
76 '4m-no-smm-ms' => [
77 "$EDK2_FW_BASE/OVMF_CODE_4M.fd",
78 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
79 ],
80 '4m' => [
81 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
82 "$EDK2_FW_BASE/OVMF_VARS_4M.fd",
83 ],
84 '4m-ms' => [
85 "$EDK2_FW_BASE/OVMF_CODE_4M.secboot.fd",
86 "$EDK2_FW_BASE/OVMF_VARS_4M.ms.fd",
87 ],
88 default => [
89 "$EDK2_FW_BASE/OVMF_CODE.fd",
90 "$EDK2_FW_BASE/OVMF_VARS.fd",
91 ],
92 },
93 aarch64 => {
94 default => [
95 "$EDK2_FW_BASE/AAVMF_CODE.fd",
96 "$EDK2_FW_BASE/AAVMF_VARS.fd",
97 ],
98 },
99 };
100
101 my $cpuinfo = PVE::ProcFSTools::read_cpuinfo();
102
103 # Note about locking: we use flock on the config file protect against concurent actions.
104 # Aditionaly, we have a 'lock' setting in the config file. This can be set to 'migrate',
105 # 'backup', 'snapshot' or 'rollback'. Most actions are not allowed when such lock is set.
106 # But you can ignore this kind of lock with the --skiplock flag.
107
108 cfs_register_file(
109 '/qemu-server/',
110 \&parse_vm_config,
111 \&write_vm_config
112 );
113
114 PVE::JSONSchema::register_standard_option('pve-qm-stateuri', {
115 description => "Some command save/restore state from this location.",
116 type => 'string',
117 maxLength => 128,
118 optional => 1,
119 });
120
121 PVE::JSONSchema::register_standard_option('pve-qemu-machine', {
122 description => "Specifies the QEMU machine type.",
123 type => 'string',
124 pattern => '(pc|pc(-i440fx)?-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|q35|pc-q35-\d+(\.\d+)+(\+pve\d+)?(\.pxe)?|virt(?:-\d+(\.\d+)+)?(\+pve\d+)?)',
125 maxLength => 40,
126 optional => 1,
127 });
128
129 # FIXME: remove in favor of just using the INotify one, it's cached there exactly the same way
130 my $nodename_cache;
131 sub nodename {
132 $nodename_cache //= PVE::INotify::nodename();
133 return $nodename_cache;
134 }
135
136 my $watchdog_fmt = {
137 model => {
138 default_key => 1,
139 type => 'string',
140 enum => [qw(i6300esb ib700)],
141 description => "Watchdog type to emulate.",
142 default => 'i6300esb',
143 optional => 1,
144 },
145 action => {
146 type => 'string',
147 enum => [qw(reset shutdown poweroff pause debug none)],
148 description => "The action to perform if after activation the guest fails to poll the watchdog in time.",
149 optional => 1,
150 },
151 };
152 PVE::JSONSchema::register_format('pve-qm-watchdog', $watchdog_fmt);
153
154 my $agent_fmt = {
155 enabled => {
156 description => "Enable/disable communication with a QEMU Guest Agent (QGA) running in the VM.",
157 type => 'boolean',
158 default => 0,
159 default_key => 1,
160 },
161 fstrim_cloned_disks => {
162 description => "Run fstrim after moving a disk or migrating the VM.",
163 type => 'boolean',
164 optional => 1,
165 default => 0,
166 },
167 'freeze-fs-on-backup' => {
168 description => "Freeze/thaw guest filesystems on backup for consistency.",
169 type => 'boolean',
170 optional => 1,
171 default => 1,
172 },
173 type => {
174 description => "Select the agent type",
175 type => 'string',
176 default => 'virtio',
177 optional => 1,
178 enum => [qw(virtio isa)],
179 },
180 };
181
182 my $vga_fmt = {
183 type => {
184 description => "Select the VGA type.",
185 type => 'string',
186 default => 'std',
187 optional => 1,
188 default_key => 1,
189 enum => [qw(cirrus qxl qxl2 qxl3 qxl4 none serial0 serial1 serial2 serial3 std virtio virtio-gl vmware)],
190 },
191 memory => {
192 description => "Sets the VGA memory (in MiB). Has no effect with serial display.",
193 type => 'integer',
194 optional => 1,
195 minimum => 4,
196 maximum => 512,
197 },
198 };
199
200 my $ivshmem_fmt = {
201 size => {
202 type => 'integer',
203 minimum => 1,
204 description => "The size of the file in MB.",
205 },
206 name => {
207 type => 'string',
208 pattern => '[a-zA-Z0-9\-]+',
209 optional => 1,
210 format_description => 'string',
211 description => "The name of the file. Will be prefixed with 'pve-shm-'. Default is the VMID. Will be deleted when the VM is stopped.",
212 },
213 };
214
215 my $audio_fmt = {
216 device => {
217 type => 'string',
218 enum => [qw(ich9-intel-hda intel-hda AC97)],
219 description => "Configure an audio device."
220 },
221 driver => {
222 type => 'string',
223 enum => ['spice', 'none'],
224 default => 'spice',
225 optional => 1,
226 description => "Driver backend for the audio device."
227 },
228 };
229
230 my $spice_enhancements_fmt = {
231 foldersharing => {
232 type => 'boolean',
233 optional => 1,
234 default => '0',
235 description => "Enable folder sharing via SPICE. Needs Spice-WebDAV daemon installed in the VM."
236 },
237 videostreaming => {
238 type => 'string',
239 enum => ['off', 'all', 'filter'],
240 default => 'off',
241 optional => 1,
242 description => "Enable video streaming. Uses compression for detected video streams."
243 },
244 };
245
246 my $rng_fmt = {
247 source => {
248 type => 'string',
249 enum => ['/dev/urandom', '/dev/random', '/dev/hwrng'],
250 default_key => 1,
251 description => "The file on the host to gather entropy from. In most cases '/dev/urandom'"
252 ." should be preferred over '/dev/random' to avoid entropy-starvation issues on the"
253 ." host. Using urandom does *not* decrease security in any meaningful way, as it's"
254 ." still seeded from real entropy, and the bytes provided will most likely be mixed"
255 ." with real entropy on the guest as well. '/dev/hwrng' can be used to pass through"
256 ." a hardware RNG from the host.",
257 },
258 max_bytes => {
259 type => 'integer',
260 description => "Maximum bytes of entropy allowed to get injected into the guest every"
261 ." 'period' milliseconds. Prefer a lower value when using '/dev/random' as source. Use"
262 ." `0` to disable limiting (potentially dangerous!).",
263 optional => 1,
264
265 # default is 1 KiB/s, provides enough entropy to the guest to avoid boot-starvation issues
266 # (e.g. systemd etc...) while allowing no chance of overwhelming the host, provided we're
267 # reading from /dev/urandom
268 default => 1024,
269 },
270 period => {
271 type => 'integer',
272 description => "Every 'period' milliseconds the entropy-injection quota is reset, allowing"
273 ." the guest to retrieve another 'max_bytes' of entropy.",
274 optional => 1,
275 default => 1000,
276 },
277 };
278
279 my $meta_info_fmt = {
280 'ctime' => {
281 type => 'integer',
282 description => "The guest creation timestamp as UNIX epoch time",
283 minimum => 0,
284 optional => 1,
285 },
286 'creation-qemu' => {
287 type => 'string',
288 description => "The QEMU (machine) version from the time this VM was created.",
289 pattern => '\d+(\.\d+)+',
290 optional => 1,
291 },
292 };
293
294 my $confdesc = {
295 onboot => {
296 optional => 1,
297 type => 'boolean',
298 description => "Specifies whether a VM will be started during system bootup.",
299 default => 0,
300 },
301 autostart => {
302 optional => 1,
303 type => 'boolean',
304 description => "Automatic restart after crash (currently ignored).",
305 default => 0,
306 },
307 hotplug => {
308 optional => 1,
309 type => 'string', format => 'pve-hotplug-features',
310 description => "Selectively enable hotplug features. This is a comma separated list of"
311 ." hotplug features: 'network', 'disk', 'cpu', 'memory', 'usb' and 'cloudinit'. Use '0' to disable"
312 ." hotplug completely. Using '1' as value is an alias for the default `network,disk,usb`."
313 ." USB hotplugging is possible for guests with machine version >= 7.1 and ostype l26 or"
314 ." windows > 7.",
315 default => 'network,disk,usb',
316 },
317 reboot => {
318 optional => 1,
319 type => 'boolean',
320 description => "Allow reboot. If set to '0' the VM exit on reboot.",
321 default => 1,
322 },
323 lock => {
324 optional => 1,
325 type => 'string',
326 description => "Lock/unlock the VM.",
327 enum => [qw(backup clone create migrate rollback snapshot snapshot-delete suspending suspended)],
328 },
329 cpulimit => {
330 optional => 1,
331 type => 'number',
332 description => "Limit of CPU usage.",
333 verbose_description => "Limit of CPU usage.\n\nNOTE: If the computer has 2 CPUs, it has"
334 ." total of '2' CPU time. Value '0' indicates no CPU limit.",
335 minimum => 0,
336 maximum => 128,
337 default => 0,
338 },
339 cpuunits => {
340 optional => 1,
341 type => 'integer',
342 description => "CPU weight for a VM, will be clamped to [1, 10000] in cgroup v2.",
343 verbose_description => "CPU weight for a VM. Argument is used in the kernel fair scheduler."
344 ." The larger the number is, the more CPU time this VM gets. Number is relative to"
345 ." weights of all the other running VMs.",
346 minimum => 1,
347 maximum => 262144,
348 default => 'cgroup v1: 1024, cgroup v2: 100',
349 },
350 memory => {
351 optional => 1,
352 type => 'integer',
353 description => "Amount of RAM for the VM in MiB. This is the maximum available memory when"
354 ." you use the balloon device.",
355 minimum => 16,
356 default => 512,
357 },
358 balloon => {
359 optional => 1,
360 type => 'integer',
361 description => "Amount of target RAM for the VM in MiB. Using zero disables the ballon driver.",
362 minimum => 0,
363 },
364 shares => {
365 optional => 1,
366 type => 'integer',
367 description => "Amount of memory shares for auto-ballooning. The larger the number is, the"
368 ." more memory this VM gets. Number is relative to weights of all other running VMs."
369 ." Using zero disables auto-ballooning. Auto-ballooning is done by pvestatd.",
370 minimum => 0,
371 maximum => 50000,
372 default => 1000,
373 },
374 keyboard => {
375 optional => 1,
376 type => 'string',
377 description => "Keyboard layout for VNC server. This option is generally not required and"
378 ." is often better handled from within the guest OS.",
379 enum => PVE::Tools::kvmkeymaplist(),
380 default => undef,
381 },
382 name => {
383 optional => 1,
384 type => 'string', format => 'dns-name',
385 description => "Set a name for the VM. Only used on the configuration web interface.",
386 },
387 scsihw => {
388 optional => 1,
389 type => 'string',
390 description => "SCSI controller model",
391 enum => [qw(lsi lsi53c810 virtio-scsi-pci virtio-scsi-single megasas pvscsi)],
392 default => 'lsi',
393 },
394 description => {
395 optional => 1,
396 type => 'string',
397 description => "Description for the VM. Shown in the web-interface VM's summary."
398 ." This is saved as comment inside the configuration file.",
399 maxLength => 1024 * 8,
400 },
401 ostype => {
402 optional => 1,
403 type => 'string',
404 enum => [qw(other wxp w2k w2k3 w2k8 wvista win7 win8 win10 win11 l24 l26 solaris)],
405 description => "Specify guest operating system.",
406 verbose_description => <<EODESC,
407 Specify guest operating system. This is used to enable special
408 optimization/features for specific operating systems:
409
410 [horizontal]
411 other;; unspecified OS
412 wxp;; Microsoft Windows XP
413 w2k;; Microsoft Windows 2000
414 w2k3;; Microsoft Windows 2003
415 w2k8;; Microsoft Windows 2008
416 wvista;; Microsoft Windows Vista
417 win7;; Microsoft Windows 7
418 win8;; Microsoft Windows 8/2012/2012r2
419 win10;; Microsoft Windows 10/2016/2019
420 win11;; Microsoft Windows 11/2022
421 l24;; Linux 2.4 Kernel
422 l26;; Linux 2.6 - 6.X Kernel
423 solaris;; Solaris/OpenSolaris/OpenIndiania kernel
424 EODESC
425 },
426 boot => {
427 optional => 1,
428 type => 'string', format => 'pve-qm-boot',
429 description => "Specify guest boot order. Use the 'order=' sub-property as usage with no"
430 ." key or 'legacy=' is deprecated.",
431 },
432 bootdisk => {
433 optional => 1,
434 type => 'string', format => 'pve-qm-bootdisk',
435 description => "Enable booting from specified disk. Deprecated: Use 'boot: order=foo;bar' instead.",
436 pattern => '(ide|sata|scsi|virtio)\d+',
437 },
438 smp => {
439 optional => 1,
440 type => 'integer',
441 description => "The number of CPUs. Please use option -sockets instead.",
442 minimum => 1,
443 default => 1,
444 },
445 sockets => {
446 optional => 1,
447 type => 'integer',
448 description => "The number of CPU sockets.",
449 minimum => 1,
450 default => 1,
451 },
452 cores => {
453 optional => 1,
454 type => 'integer',
455 description => "The number of cores per socket.",
456 minimum => 1,
457 default => 1,
458 },
459 numa => {
460 optional => 1,
461 type => 'boolean',
462 description => "Enable/disable NUMA.",
463 default => 0,
464 },
465 hugepages => {
466 optional => 1,
467 type => 'string',
468 description => "Enable/disable hugepages memory.",
469 enum => [qw(any 2 1024)],
470 },
471 keephugepages => {
472 optional => 1,
473 type => 'boolean',
474 default => 0,
475 description => "Use together with hugepages. If enabled, hugepages will not not be deleted"
476 ." after VM shutdown and can be used for subsequent starts.",
477 },
478 vcpus => {
479 optional => 1,
480 type => 'integer',
481 description => "Number of hotplugged vcpus.",
482 minimum => 1,
483 default => 0,
484 },
485 acpi => {
486 optional => 1,
487 type => 'boolean',
488 description => "Enable/disable ACPI.",
489 default => 1,
490 },
491 agent => {
492 optional => 1,
493 description => "Enable/disable communication with the QEMU Guest Agent and its properties.",
494 type => 'string',
495 format => $agent_fmt,
496 },
497 kvm => {
498 optional => 1,
499 type => 'boolean',
500 description => "Enable/disable KVM hardware virtualization.",
501 default => 1,
502 },
503 tdf => {
504 optional => 1,
505 type => 'boolean',
506 description => "Enable/disable time drift fix.",
507 default => 0,
508 },
509 localtime => {
510 optional => 1,
511 type => 'boolean',
512 description => "Set the real time clock (RTC) to local time. This is enabled by default if"
513 ." the `ostype` indicates a Microsoft Windows OS.",
514 },
515 freeze => {
516 optional => 1,
517 type => 'boolean',
518 description => "Freeze CPU at startup (use 'c' monitor command to start execution).",
519 },
520 vga => {
521 optional => 1,
522 type => 'string', format => $vga_fmt,
523 description => "Configure the VGA hardware.",
524 verbose_description => "Configure the VGA Hardware. If you want to use high resolution"
525 ." modes (>= 1280x1024x16) you may need to increase the vga memory option. Since QEMU"
526 ." 2.9 the default VGA display type is 'std' for all OS types besides some Windows"
527 ." versions (XP and older) which use 'cirrus'. The 'qxl' option enables the SPICE"
528 ." display server. For win* OS you can select how many independent displays you want,"
529 ." Linux guests can add displays them self.\nYou can also run without any graphic card,"
530 ." using a serial device as terminal.",
531 },
532 watchdog => {
533 optional => 1,
534 type => 'string', format => 'pve-qm-watchdog',
535 description => "Create a virtual hardware watchdog device.",
536 verbose_description => "Create a virtual hardware watchdog device. Once enabled (by a guest"
537 ." action), the watchdog must be periodically polled by an agent inside the guest or"
538 ." else the watchdog will reset the guest (or execute the respective action specified)",
539 },
540 startdate => {
541 optional => 1,
542 type => 'string',
543 typetext => "(now | YYYY-MM-DD | YYYY-MM-DDTHH:MM:SS)",
544 description => "Set the initial date of the real time clock. Valid format for date are:"
545 ."'now' or '2006-06-17T16:01:21' or '2006-06-17'.",
546 pattern => '(now|\d{4}-\d{1,2}-\d{1,2}(T\d{1,2}:\d{1,2}:\d{1,2})?)',
547 default => 'now',
548 },
549 startup => get_standard_option('pve-startup-order'),
550 template => {
551 optional => 1,
552 type => 'boolean',
553 description => "Enable/disable Template.",
554 default => 0,
555 },
556 args => {
557 optional => 1,
558 type => 'string',
559 description => "Arbitrary arguments passed to kvm.",
560 verbose_description => <<EODESCR,
561 Arbitrary arguments passed to kvm, for example:
562
563 args: -no-reboot -smbios 'type=0,vendor=FOO'
564
565 NOTE: this option is for experts only.
566 EODESCR
567 },
568 tablet => {
569 optional => 1,
570 type => 'boolean',
571 default => 1,
572 description => "Enable/disable the USB tablet device.",
573 verbose_description => "Enable/disable the USB tablet device. This device is usually needed"
574 ." to allow absolute mouse positioning with VNC. Else the mouse runs out of sync with"
575 ." normal VNC clients. If you're running lots of console-only guests on one host, you"
576 ." may consider disabling this to save some context switches. This is turned off by"
577 ." default if you use spice (`qm set <vmid> --vga qxl`).",
578 },
579 migrate_speed => {
580 optional => 1,
581 type => 'integer',
582 description => "Set maximum speed (in MB/s) for migrations. Value 0 is no limit.",
583 minimum => 0,
584 default => 0,
585 },
586 migrate_downtime => {
587 optional => 1,
588 type => 'number',
589 description => "Set maximum tolerated downtime (in seconds) for migrations.",
590 minimum => 0,
591 default => 0.1,
592 },
593 cdrom => {
594 optional => 1,
595 type => 'string', format => 'pve-qm-ide',
596 typetext => '<volume>',
597 description => "This is an alias for option -ide2",
598 },
599 cpu => {
600 optional => 1,
601 description => "Emulated CPU type.",
602 type => 'string',
603 format => 'pve-vm-cpu-conf',
604 },
605 parent => get_standard_option('pve-snapshot-name', {
606 optional => 1,
607 description => "Parent snapshot name. This is used internally, and should not be modified.",
608 }),
609 snaptime => {
610 optional => 1,
611 description => "Timestamp for snapshots.",
612 type => 'integer',
613 minimum => 0,
614 },
615 vmstate => {
616 optional => 1,
617 type => 'string', format => 'pve-volume-id',
618 description => "Reference to a volume which stores the VM state. This is used internally"
619 ." for snapshots.",
620 },
621 vmstatestorage => get_standard_option('pve-storage-id', {
622 description => "Default storage for VM state volumes/files.",
623 optional => 1,
624 }),
625 runningmachine => get_standard_option('pve-qemu-machine', {
626 description => "Specifies the QEMU machine type of the running vm. This is used internally"
627 ." for snapshots.",
628 }),
629 runningcpu => {
630 description => "Specifies the QEMU '-cpu' parameter of the running vm. This is used"
631 ." internally for snapshots.",
632 optional => 1,
633 type => 'string',
634 pattern => $PVE::QemuServer::CPUConfig::qemu_cmdline_cpu_re,
635 format_description => 'QEMU -cpu parameter'
636 },
637 machine => get_standard_option('pve-qemu-machine'),
638 arch => {
639 description => "Virtual processor architecture. Defaults to the host.",
640 optional => 1,
641 type => 'string',
642 enum => [qw(x86_64 aarch64)],
643 },
644 smbios1 => {
645 description => "Specify SMBIOS type 1 fields.",
646 type => 'string', format => 'pve-qm-smbios1',
647 maxLength => 512,
648 optional => 1,
649 },
650 protection => {
651 optional => 1,
652 type => 'boolean',
653 description => "Sets the protection flag of the VM. This will disable the remove VM and"
654 ." remove disk operations.",
655 default => 0,
656 },
657 bios => {
658 optional => 1,
659 type => 'string',
660 enum => [ qw(seabios ovmf) ],
661 description => "Select BIOS implementation.",
662 default => 'seabios',
663 },
664 vmgenid => {
665 type => 'string',
666 pattern => '(?:[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}|[01])',
667 format_description => 'UUID',
668 description => "Set VM Generation ID. Use '1' to autogenerate on create or update, pass '0'"
669 ." to disable explicitly.",
670 verbose_description => "The VM generation ID (vmgenid) device exposes a 128-bit integer"
671 ." value identifier to the guest OS. This allows to notify the guest operating system"
672 ." when the virtual machine is executed with a different configuration (e.g. snapshot"
673 ." execution or creation from a template). The guest operating system notices the"
674 ." change, and is then able to react as appropriate by marking its copies of"
675 ." distributed databases as dirty, re-initializing its random number generator, etc.\n"
676 ."Note that auto-creation only works when done through API/CLI create or update methods"
677 .", but not when manually editing the config file.",
678 default => "1 (autogenerated)",
679 optional => 1,
680 },
681 hookscript => {
682 type => 'string',
683 format => 'pve-volume-id',
684 optional => 1,
685 description => "Script that will be executed during various steps in the vms lifetime.",
686 },
687 ivshmem => {
688 type => 'string',
689 format => $ivshmem_fmt,
690 description => "Inter-VM shared memory. Useful for direct communication between VMs, or to"
691 ." the host.",
692 optional => 1,
693 },
694 audio0 => {
695 type => 'string',
696 format => $audio_fmt,
697 description => "Configure a audio device, useful in combination with QXL/Spice.",
698 optional => 1
699 },
700 spice_enhancements => {
701 type => 'string',
702 format => $spice_enhancements_fmt,
703 description => "Configure additional enhancements for SPICE.",
704 optional => 1
705 },
706 tags => {
707 type => 'string', format => 'pve-tag-list',
708 description => 'Tags of the VM. This is only meta information.',
709 optional => 1,
710 },
711 rng0 => {
712 type => 'string',
713 format => $rng_fmt,
714 description => "Configure a VirtIO-based Random Number Generator.",
715 optional => 1,
716 },
717 meta => {
718 type => 'string',
719 format => $meta_info_fmt,
720 description => "Some (read-only) meta-information about this guest.",
721 optional => 1,
722 },
723 affinity => {
724 type => 'string', format => 'pve-cpuset',
725 description => "List of host cores used to execute guest processes, for example: 0,5,8-11",
726 optional => 1,
727 },
728 };
729
730 my $cicustom_fmt = {
731 meta => {
732 type => 'string',
733 optional => 1,
734 description => 'Specify a custom file containing all meta data passed to the VM via"
735 ." cloud-init. This is provider specific meaning configdrive2 and nocloud differ.',
736 format => 'pve-volume-id',
737 format_description => 'volume',
738 },
739 network => {
740 type => 'string',
741 optional => 1,
742 description => 'To pass a custom file containing all network data to the VM via cloud-init.',
743 format => 'pve-volume-id',
744 format_description => 'volume',
745 },
746 user => {
747 type => 'string',
748 optional => 1,
749 description => 'To pass a custom file containing all user data to the VM via cloud-init.',
750 format => 'pve-volume-id',
751 format_description => 'volume',
752 },
753 vendor => {
754 type => 'string',
755 optional => 1,
756 description => 'To pass a custom file containing all vendor data to the VM via cloud-init.',
757 format => 'pve-volume-id',
758 format_description => 'volume',
759 },
760 };
761 PVE::JSONSchema::register_format('pve-qm-cicustom', $cicustom_fmt);
762
763 # any new option might need to be added to $cloudinitoptions in PVE::API2::Qemu
764 my $confdesc_cloudinit = {
765 citype => {
766 optional => 1,
767 type => 'string',
768 description => 'Specifies the cloud-init configuration format. The default depends on the'
769 .' configured operating system type (`ostype`. We use the `nocloud` format for Linux,'
770 .' and `configdrive2` for windows.',
771 enum => ['configdrive2', 'nocloud', 'opennebula'],
772 },
773 ciuser => {
774 optional => 1,
775 type => 'string',
776 description => "cloud-init: User name to change ssh keys and password for instead of the"
777 ." image's configured default user.",
778 },
779 cipassword => {
780 optional => 1,
781 type => 'string',
782 description => 'cloud-init: Password to assign the user. Using this is generally not'
783 .' recommended. Use ssh keys instead. Also note that older cloud-init versions do not'
784 .' support hashed passwords.',
785 },
786 ciupgrade => {
787 optional => 1,
788 type => 'boolean',
789 description => 'cloud-init: do an automatic package upgrade after the first boot.',
790 default => 1,
791 },
792 cicustom => {
793 optional => 1,
794 type => 'string',
795 description => 'cloud-init: Specify custom files to replace the automatically generated'
796 .' ones at start.',
797 format => 'pve-qm-cicustom',
798 },
799 searchdomain => {
800 optional => 1,
801 type => 'string',
802 description => 'cloud-init: Sets DNS search domains for a container. Create will'
803 .' automatically use the setting from the host if neither searchdomain nor nameserver'
804 .' are set.',
805 },
806 nameserver => {
807 optional => 1,
808 type => 'string', format => 'address-list',
809 description => 'cloud-init: Sets DNS server IP address for a container. Create will'
810 .' automatically use the setting from the host if neither searchdomain nor nameserver'
811 .' are set.',
812 },
813 sshkeys => {
814 optional => 1,
815 type => 'string',
816 format => 'urlencoded',
817 description => "cloud-init: Setup public SSH keys (one key per line, OpenSSH format).",
818 },
819 };
820
821 # what about other qemu settings ?
822 #cpu => 'string',
823 #machine => 'string',
824 #fda => 'file',
825 #fdb => 'file',
826 #mtdblock => 'file',
827 #sd => 'file',
828 #pflash => 'file',
829 #snapshot => 'bool',
830 #bootp => 'file',
831 ##tftp => 'dir',
832 ##smb => 'dir',
833 #kernel => 'file',
834 #append => 'string',
835 #initrd => 'file',
836 ##soundhw => 'string',
837
838 while (my ($k, $v) = each %$confdesc) {
839 PVE::JSONSchema::register_standard_option("pve-qm-$k", $v);
840 }
841
842 my $MAX_NETS = 32;
843 my $MAX_SERIAL_PORTS = 4;
844 my $MAX_PARALLEL_PORTS = 3;
845
846 for (my $i = 0; $i < $PVE::QemuServer::Memory::MAX_NUMA; $i++) {
847 $confdesc->{"numa$i"} = $PVE::QemuServer::Memory::numadesc;
848 }
849
850 my $nic_model_list = [
851 'e1000',
852 'e1000-82540em',
853 'e1000-82544gc',
854 'e1000-82545em',
855 'e1000e',
856 'i82551',
857 'i82557b',
858 'i82559er',
859 'ne2k_isa',
860 'ne2k_pci',
861 'pcnet',
862 'rtl8139',
863 'virtio',
864 'vmxnet3',
865 ];
866 my $nic_model_list_txt = join(' ', sort @$nic_model_list);
867
868 my $net_fmt_bridge_descr = <<__EOD__;
869 Bridge to attach the network device to. The Proxmox VE standard bridge
870 is called 'vmbr0'.
871
872 If you do not specify a bridge, we create a kvm user (NATed) network
873 device, which provides DHCP and DNS services. The following addresses
874 are used:
875
876 10.0.2.2 Gateway
877 10.0.2.3 DNS Server
878 10.0.2.4 SMB Server
879
880 The DHCP server assign addresses to the guest starting from 10.0.2.15.
881 __EOD__
882
883 my $net_fmt = {
884 macaddr => get_standard_option('mac-addr', {
885 description => "MAC address. That address must be unique withing your network. This is"
886 ." automatically generated if not specified.",
887 }),
888 model => {
889 type => 'string',
890 description => "Network Card Model. The 'virtio' model provides the best performance with"
891 ." very low CPU overhead. If your guest does not support this driver, it is usually"
892 ." best to use 'e1000'.",
893 enum => $nic_model_list,
894 default_key => 1,
895 },
896 (map { $_ => { keyAlias => 'model', alias => 'macaddr' }} @$nic_model_list),
897 bridge => get_standard_option('pve-bridge-id', {
898 description => $net_fmt_bridge_descr,
899 optional => 1,
900 }),
901 queues => {
902 type => 'integer',
903 minimum => 0, maximum => 64,
904 description => 'Number of packet queues to be used on the device.',
905 optional => 1,
906 },
907 rate => {
908 type => 'number',
909 minimum => 0,
910 description => "Rate limit in mbps (megabytes per second) as floating point number.",
911 optional => 1,
912 },
913 tag => {
914 type => 'integer',
915 minimum => 1, maximum => 4094,
916 description => 'VLAN tag to apply to packets on this interface.',
917 optional => 1,
918 },
919 trunks => {
920 type => 'string',
921 pattern => qr/\d+(?:-\d+)?(?:;\d+(?:-\d+)?)*/,
922 description => 'VLAN trunks to pass through this interface.',
923 format_description => 'vlanid[;vlanid...]',
924 optional => 1,
925 },
926 firewall => {
927 type => 'boolean',
928 description => 'Whether this interface should be protected by the firewall.',
929 optional => 1,
930 },
931 link_down => {
932 type => 'boolean',
933 description => 'Whether this interface should be disconnected (like pulling the plug).',
934 optional => 1,
935 },
936 mtu => {
937 type => 'integer',
938 minimum => 1, maximum => 65520,
939 description => "Force MTU, for VirtIO only. Set to '1' to use the bridge MTU",
940 optional => 1,
941 },
942 };
943
944 my $netdesc = {
945 optional => 1,
946 type => 'string', format => $net_fmt,
947 description => "Specify network devices.",
948 };
949
950 PVE::JSONSchema::register_standard_option("pve-qm-net", $netdesc);
951
952 my $ipconfig_fmt = {
953 ip => {
954 type => 'string',
955 format => 'pve-ipv4-config',
956 format_description => 'IPv4Format/CIDR',
957 description => 'IPv4 address in CIDR format.',
958 optional => 1,
959 default => 'dhcp',
960 },
961 gw => {
962 type => 'string',
963 format => 'ipv4',
964 format_description => 'GatewayIPv4',
965 description => 'Default gateway for IPv4 traffic.',
966 optional => 1,
967 requires => 'ip',
968 },
969 ip6 => {
970 type => 'string',
971 format => 'pve-ipv6-config',
972 format_description => 'IPv6Format/CIDR',
973 description => 'IPv6 address in CIDR format.',
974 optional => 1,
975 default => 'dhcp',
976 },
977 gw6 => {
978 type => 'string',
979 format => 'ipv6',
980 format_description => 'GatewayIPv6',
981 description => 'Default gateway for IPv6 traffic.',
982 optional => 1,
983 requires => 'ip6',
984 },
985 };
986 PVE::JSONSchema::register_format('pve-qm-ipconfig', $ipconfig_fmt);
987 my $ipconfigdesc = {
988 optional => 1,
989 type => 'string', format => 'pve-qm-ipconfig',
990 description => <<'EODESCR',
991 cloud-init: Specify IP addresses and gateways for the corresponding interface.
992
993 IP addresses use CIDR notation, gateways are optional but need an IP of the same type specified.
994
995 The special string 'dhcp' can be used for IP addresses to use DHCP, in which case no explicit
996 gateway should be provided.
997 For IPv6 the special string 'auto' can be used to use stateless autoconfiguration. This requires
998 cloud-init 19.4 or newer.
999
1000 If cloud-init is enabled and neither an IPv4 nor an IPv6 address is specified, it defaults to using
1001 dhcp on IPv4.
1002 EODESCR
1003 };
1004 PVE::JSONSchema::register_standard_option("pve-qm-ipconfig", $netdesc);
1005
1006 for (my $i = 0; $i < $MAX_NETS; $i++) {
1007 $confdesc->{"net$i"} = $netdesc;
1008 $confdesc_cloudinit->{"ipconfig$i"} = $ipconfigdesc;
1009 }
1010
1011 foreach my $key (keys %$confdesc_cloudinit) {
1012 $confdesc->{$key} = $confdesc_cloudinit->{$key};
1013 }
1014
1015 PVE::JSONSchema::register_format('pve-cpuset', \&pve_verify_cpuset);
1016 sub pve_verify_cpuset {
1017 my ($set_text, $noerr) = @_;
1018
1019 my ($count, $members) = eval { PVE::CpuSet::parse_cpuset($set_text) };
1020
1021 if ($@) {
1022 return if $noerr;
1023 die "unable to parse cpuset option\n";
1024 }
1025
1026 return PVE::CpuSet->new($members)->short_string();
1027 }
1028
1029 PVE::JSONSchema::register_format('pve-volume-id-or-qm-path', \&verify_volume_id_or_qm_path);
1030 sub verify_volume_id_or_qm_path {
1031 my ($volid, $noerr) = @_;
1032
1033 return $volid if $volid eq 'none' || $volid eq 'cdrom';
1034
1035 return verify_volume_id_or_absolute_path($volid, $noerr);
1036 }
1037
1038 PVE::JSONSchema::register_format('pve-volume-id-or-absolute-path', \&verify_volume_id_or_absolute_path);
1039 sub verify_volume_id_or_absolute_path {
1040 my ($volid, $noerr) = @_;
1041
1042 return $volid if $volid =~ m|^/|;
1043
1044 $volid = eval { PVE::JSONSchema::check_format('pve-volume-id', $volid, '') };
1045 if ($@) {
1046 return if $noerr;
1047 die $@;
1048 }
1049 return $volid;
1050 }
1051
1052 my $serialdesc = {
1053 optional => 1,
1054 type => 'string',
1055 pattern => '(/dev/.+|socket)',
1056 description => "Create a serial device inside the VM (n is 0 to 3)",
1057 verbose_description => <<EODESCR,
1058 Create a serial device inside the VM (n is 0 to 3), and pass through a
1059 host serial device (i.e. /dev/ttyS0), or create a unix socket on the
1060 host side (use 'qm terminal' to open a terminal connection).
1061
1062 NOTE: If you pass through a host serial device, it is no longer possible to migrate such machines -
1063 use with special care.
1064
1065 CAUTION: Experimental! User reported problems with this option.
1066 EODESCR
1067 };
1068
1069 my $paralleldesc= {
1070 optional => 1,
1071 type => 'string',
1072 pattern => '/dev/parport\d+|/dev/usb/lp\d+',
1073 description => "Map host parallel devices (n is 0 to 2).",
1074 verbose_description => <<EODESCR,
1075 Map host parallel devices (n is 0 to 2).
1076
1077 NOTE: This option allows direct access to host hardware. So it is no longer possible to migrate such
1078 machines - use with special care.
1079
1080 CAUTION: Experimental! User reported problems with this option.
1081 EODESCR
1082 };
1083
1084 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
1085 $confdesc->{"parallel$i"} = $paralleldesc;
1086 }
1087
1088 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
1089 $confdesc->{"serial$i"} = $serialdesc;
1090 }
1091
1092 for (my $i = 0; $i < $PVE::QemuServer::PCI::MAX_HOSTPCI_DEVICES; $i++) {
1093 $confdesc->{"hostpci$i"} = $PVE::QemuServer::PCI::hostpcidesc;
1094 }
1095
1096 for my $key (keys %{$PVE::QemuServer::Drive::drivedesc_hash}) {
1097 $confdesc->{$key} = $PVE::QemuServer::Drive::drivedesc_hash->{$key};
1098 }
1099
1100 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
1101 $confdesc->{"usb$i"} = $PVE::QemuServer::USB::usbdesc;
1102 }
1103
1104 my $boot_fmt = {
1105 legacy => {
1106 optional => 1,
1107 default_key => 1,
1108 type => 'string',
1109 description => "Boot on floppy (a), hard disk (c), CD-ROM (d), or network (n)."
1110 . " Deprecated, use 'order=' instead.",
1111 pattern => '[acdn]{1,4}',
1112 format_description => "[acdn]{1,4}",
1113
1114 # note: this is also the fallback if boot: is not given at all
1115 default => 'cdn',
1116 },
1117 order => {
1118 optional => 1,
1119 type => 'string',
1120 format => 'pve-qm-bootdev-list',
1121 format_description => "device[;device...]",
1122 description => <<EODESC,
1123 The guest will attempt to boot from devices in the order they appear here.
1124
1125 Disks, optical drives and passed-through storage USB devices will be directly
1126 booted from, NICs will load PXE, and PCIe devices will either behave like disks
1127 (e.g. NVMe) or load an option ROM (e.g. RAID controller, hardware NIC).
1128
1129 Note that only devices in this list will be marked as bootable and thus loaded
1130 by the guest firmware (BIOS/UEFI). If you require multiple disks for booting
1131 (e.g. software-raid), you need to specify all of them here.
1132
1133 Overrides the deprecated 'legacy=[acdn]*' value when given.
1134 EODESC
1135 },
1136 };
1137 PVE::JSONSchema::register_format('pve-qm-boot', $boot_fmt);
1138
1139 PVE::JSONSchema::register_format('pve-qm-bootdev', \&verify_bootdev);
1140 sub verify_bootdev {
1141 my ($dev, $noerr) = @_;
1142
1143 my $special = $dev =~ m/^efidisk/ || $dev =~ m/^tpmstate/;
1144 return $dev if PVE::QemuServer::Drive::is_valid_drivename($dev) && !$special;
1145
1146 my $check = sub {
1147 my ($base) = @_;
1148 return 0 if $dev !~ m/^$base\d+$/;
1149 return 0 if !$confdesc->{$dev};
1150 return 1;
1151 };
1152
1153 return $dev if $check->("net");
1154 return $dev if $check->("usb");
1155 return $dev if $check->("hostpci");
1156
1157 return if $noerr;
1158 die "invalid boot device '$dev'\n";
1159 }
1160
1161 sub print_bootorder {
1162 my ($devs) = @_;
1163 return "" if !@$devs;
1164 my $data = { order => join(';', @$devs) };
1165 return PVE::JSONSchema::print_property_string($data, $boot_fmt);
1166 }
1167
1168 my $kvm_api_version = 0;
1169
1170 sub kvm_version {
1171 return $kvm_api_version if $kvm_api_version;
1172
1173 open my $fh, '<', '/dev/kvm' or return;
1174
1175 # 0xae00 => KVM_GET_API_VERSION
1176 $kvm_api_version = ioctl($fh, 0xae00, 0);
1177 close($fh);
1178
1179 return $kvm_api_version;
1180 }
1181
1182 my $kvm_user_version = {};
1183 my $kvm_mtime = {};
1184
1185 sub kvm_user_version {
1186 my ($binary) = @_;
1187
1188 $binary //= get_command_for_arch(get_host_arch()); # get the native arch by default
1189 my $st = stat($binary);
1190
1191 my $cachedmtime = $kvm_mtime->{$binary} // -1;
1192 return $kvm_user_version->{$binary} if $kvm_user_version->{$binary} &&
1193 $cachedmtime == $st->mtime;
1194
1195 $kvm_user_version->{$binary} = 'unknown';
1196 $kvm_mtime->{$binary} = $st->mtime;
1197
1198 my $code = sub {
1199 my $line = shift;
1200 if ($line =~ m/^QEMU( PC)? emulator version (\d+\.\d+(\.\d+)?)(\.\d+)?[,\s]/) {
1201 $kvm_user_version->{$binary} = $2;
1202 }
1203 };
1204
1205 eval { run_command([$binary, '--version'], outfunc => $code); };
1206 warn $@ if $@;
1207
1208 return $kvm_user_version->{$binary};
1209
1210 }
1211 my sub extract_version {
1212 my ($machine_type, $version) = @_;
1213 $version = kvm_user_version() if !defined($version);
1214 return PVE::QemuServer::Machine::extract_version($machine_type, $version)
1215 }
1216
1217 sub kernel_has_vhost_net {
1218 return -c '/dev/vhost-net';
1219 }
1220
1221 sub option_exists {
1222 my $key = shift;
1223 return defined($confdesc->{$key});
1224 }
1225
1226 my $cdrom_path;
1227 sub get_cdrom_path {
1228
1229 return $cdrom_path if defined($cdrom_path);
1230
1231 $cdrom_path = first { -l $_ } map { "/dev/cdrom$_" } ('', '1', '2');
1232
1233 if (!defined($cdrom_path)) {
1234 log_warn("no physical CD-ROM available, ignoring");
1235 $cdrom_path = '';
1236 }
1237
1238 return $cdrom_path;
1239 }
1240
1241 sub get_iso_path {
1242 my ($storecfg, $vmid, $cdrom) = @_;
1243
1244 if ($cdrom eq 'cdrom') {
1245 return get_cdrom_path();
1246 } elsif ($cdrom eq 'none') {
1247 return '';
1248 } elsif ($cdrom =~ m|^/|) {
1249 return $cdrom;
1250 } else {
1251 return PVE::Storage::path($storecfg, $cdrom);
1252 }
1253 }
1254
1255 # try to convert old style file names to volume IDs
1256 sub filename_to_volume_id {
1257 my ($vmid, $file, $media) = @_;
1258
1259 if (!($file eq 'none' || $file eq 'cdrom' ||
1260 $file =~ m|^/dev/.+| || $file =~ m/^([^:]+):(.+)$/)) {
1261
1262 return if $file =~ m|/|;
1263
1264 if ($media && $media eq 'cdrom') {
1265 $file = "local:iso/$file";
1266 } else {
1267 $file = "local:$vmid/$file";
1268 }
1269 }
1270
1271 return $file;
1272 }
1273
1274 sub verify_media_type {
1275 my ($opt, $vtype, $media) = @_;
1276
1277 return if !$media;
1278
1279 my $etype;
1280 if ($media eq 'disk') {
1281 $etype = 'images';
1282 } elsif ($media eq 'cdrom') {
1283 $etype = 'iso';
1284 } else {
1285 die "internal error";
1286 }
1287
1288 return if ($vtype eq $etype);
1289
1290 raise_param_exc({ $opt => "unexpected media type ($vtype != $etype)" });
1291 }
1292
1293 sub cleanup_drive_path {
1294 my ($opt, $storecfg, $drive) = @_;
1295
1296 # try to convert filesystem paths to volume IDs
1297
1298 if (($drive->{file} !~ m/^(cdrom|none)$/) &&
1299 ($drive->{file} !~ m|^/dev/.+|) &&
1300 ($drive->{file} !~ m/^([^:]+):(.+)$/) &&
1301 ($drive->{file} !~ m/^\d+$/)) {
1302 my ($vtype, $volid) = PVE::Storage::path_to_volume_id($storecfg, $drive->{file});
1303 raise_param_exc({ $opt => "unable to associate path '$drive->{file}' to any storage"})
1304 if !$vtype;
1305 $drive->{media} = 'cdrom' if !$drive->{media} && $vtype eq 'iso';
1306 verify_media_type($opt, $vtype, $drive->{media});
1307 $drive->{file} = $volid;
1308 }
1309
1310 $drive->{media} = 'cdrom' if !$drive->{media} && $drive->{file} =~ m/^(cdrom|none)$/;
1311 }
1312
1313 sub parse_hotplug_features {
1314 my ($data) = @_;
1315
1316 my $res = {};
1317
1318 return $res if $data eq '0';
1319
1320 $data = $confdesc->{hotplug}->{default} if $data eq '1';
1321
1322 foreach my $feature (PVE::Tools::split_list($data)) {
1323 if ($feature =~ m/^(network|disk|cpu|memory|usb|cloudinit)$/) {
1324 $res->{$1} = 1;
1325 } else {
1326 die "invalid hotplug feature '$feature'\n";
1327 }
1328 }
1329 return $res;
1330 }
1331
1332 PVE::JSONSchema::register_format('pve-hotplug-features', \&pve_verify_hotplug_features);
1333 sub pve_verify_hotplug_features {
1334 my ($value, $noerr) = @_;
1335
1336 return $value if parse_hotplug_features($value);
1337
1338 return if $noerr;
1339
1340 die "unable to parse hotplug option\n";
1341 }
1342
1343 sub scsi_inquiry {
1344 my($fh, $noerr) = @_;
1345
1346 my $SG_IO = 0x2285;
1347 my $SG_GET_VERSION_NUM = 0x2282;
1348
1349 my $versionbuf = "\x00" x 8;
1350 my $ret = ioctl($fh, $SG_GET_VERSION_NUM, $versionbuf);
1351 if (!$ret) {
1352 die "scsi ioctl SG_GET_VERSION_NUM failoed - $!\n" if !$noerr;
1353 return;
1354 }
1355 my $version = unpack("I", $versionbuf);
1356 if ($version < 30000) {
1357 die "scsi generic interface too old\n" if !$noerr;
1358 return;
1359 }
1360
1361 my $buf = "\x00" x 36;
1362 my $sensebuf = "\x00" x 8;
1363 my $cmd = pack("C x3 C x1", 0x12, 36);
1364
1365 # see /usr/include/scsi/sg.h
1366 my $sg_io_hdr_t = "i i C C s I P P P I I i P C C C C S S i I I";
1367
1368 my $packet = pack(
1369 $sg_io_hdr_t, ord('S'), -3, length($cmd), length($sensebuf), 0, length($buf), $buf, $cmd, $sensebuf, 6000
1370 );
1371
1372 $ret = ioctl($fh, $SG_IO, $packet);
1373 if (!$ret) {
1374 die "scsi ioctl SG_IO failed - $!\n" if !$noerr;
1375 return;
1376 }
1377
1378 my @res = unpack($sg_io_hdr_t, $packet);
1379 if ($res[17] || $res[18]) {
1380 die "scsi ioctl SG_IO status error - $!\n" if !$noerr;
1381 return;
1382 }
1383
1384 my $res = {};
1385 $res->@{qw(type removable vendor product revision)} = unpack("C C x6 A8 A16 A4", $buf);
1386
1387 $res->{removable} = $res->{removable} & 128 ? 1 : 0;
1388 $res->{type} &= 0x1F;
1389
1390 return $res;
1391 }
1392
1393 sub path_is_scsi {
1394 my ($path) = @_;
1395
1396 my $fh = IO::File->new("+<$path") || return;
1397 my $res = scsi_inquiry($fh, 1);
1398 close($fh);
1399
1400 return $res;
1401 }
1402
1403 sub print_tabletdevice_full {
1404 my ($conf, $arch) = @_;
1405
1406 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1407
1408 # we use uhci for old VMs because tablet driver was buggy in older qemu
1409 my $usbbus;
1410 if ($q35 || $arch eq 'aarch64') {
1411 $usbbus = 'ehci';
1412 } else {
1413 $usbbus = 'uhci';
1414 }
1415
1416 return "usb-tablet,id=tablet,bus=$usbbus.0,port=1";
1417 }
1418
1419 sub print_keyboarddevice_full {
1420 my ($conf, $arch) = @_;
1421
1422 return if $arch ne 'aarch64';
1423
1424 return "usb-kbd,id=keyboard,bus=ehci.0,port=2";
1425 }
1426
1427 my sub get_drive_id {
1428 my ($drive) = @_;
1429 return "$drive->{interface}$drive->{index}";
1430 }
1431
1432 sub print_drivedevice_full {
1433 my ($storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type) = @_;
1434
1435 my $device = '';
1436 my $maxdev = 0;
1437
1438 my $drive_id = get_drive_id($drive);
1439 if ($drive->{interface} eq 'virtio') {
1440 my $pciaddr = print_pci_addr("$drive_id", $bridges, $arch, $machine_type);
1441 $device = "virtio-blk-pci,drive=drive-$drive_id,id=${drive_id}${pciaddr}";
1442 $device .= ",iothread=iothread-$drive_id" if $drive->{iothread};
1443 } elsif ($drive->{interface} eq 'scsi') {
1444
1445 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
1446 my $unit = $drive->{index} % $maxdev;
1447 my $devicetype = 'hd';
1448 my $path = '';
1449 if (drive_is_cdrom($drive)) {
1450 $devicetype = 'cd';
1451 } else {
1452 if ($drive->{file} =~ m|^/|) {
1453 $path = $drive->{file};
1454 if (my $info = path_is_scsi($path)) {
1455 if ($info->{type} == 0 && $drive->{scsiblock}) {
1456 $devicetype = 'block';
1457 } elsif ($info->{type} == 1) { # tape
1458 $devicetype = 'generic';
1459 }
1460 }
1461 } else {
1462 $path = PVE::Storage::path($storecfg, $drive->{file});
1463 }
1464
1465 # for compatibility only, we prefer scsi-hd (#2408, #2355, #2380)
1466 my $version = extract_version($machine_type, kvm_user_version());
1467 if ($path =~ m/^iscsi\:\/\// &&
1468 !min_version($version, 4, 1)) {
1469 $devicetype = 'generic';
1470 }
1471 }
1472
1473 if (!$conf->{scsihw} || $conf->{scsihw} =~ m/^lsi/ || $conf->{scsihw} eq 'pvscsi') {
1474 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,scsi-id=$unit";
1475 } else {
1476 $device = "scsi-$devicetype,bus=$controller_prefix$controller.0,channel=0,scsi-id=0"
1477 .",lun=$drive->{index}";
1478 }
1479 $device .= ",drive=drive-$drive_id,id=$drive_id";
1480
1481 if ($drive->{ssd} && ($devicetype eq 'block' || $devicetype eq 'hd')) {
1482 $device .= ",rotation_rate=1";
1483 }
1484 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1485
1486 } elsif ($drive->{interface} eq 'ide' || $drive->{interface} eq 'sata') {
1487 my $maxdev = ($drive->{interface} eq 'sata') ? $PVE::QemuServer::Drive::MAX_SATA_DISKS : 2;
1488 my $controller = int($drive->{index} / $maxdev);
1489 my $unit = $drive->{index} % $maxdev;
1490
1491 # machine type q35 only supports unit=0 for IDE rather than 2 units. This wasn't handled
1492 # correctly before, so e.g. index=2 was mapped to controller=1,unit=0 rather than
1493 # controller=2,unit=0. Note that odd indices never worked, as they would be mapped to
1494 # unit=1, so to keep backwards compat for migration, it suffices to keep even ones as they
1495 # were before. Move odd ones up by 2 where they don't clash.
1496 if (PVE::QemuServer::Machine::machine_type_is_q35($conf) && $drive->{interface} eq 'ide') {
1497 $controller += 2 * ($unit % 2);
1498 $unit = 0;
1499 }
1500
1501 my $devicetype = ($drive->{media} && $drive->{media} eq 'cdrom') ? "cd" : "hd";
1502
1503 $device = "ide-$devicetype";
1504 if ($drive->{interface} eq 'ide') {
1505 $device .= ",bus=ide.$controller,unit=$unit";
1506 } else {
1507 $device .= ",bus=ahci$controller.$unit";
1508 }
1509 $device .= ",drive=drive-$drive_id,id=$drive_id";
1510
1511 if ($devicetype eq 'hd') {
1512 if (my $model = $drive->{model}) {
1513 $model = URI::Escape::uri_unescape($model);
1514 $device .= ",model=$model";
1515 }
1516 if ($drive->{ssd}) {
1517 $device .= ",rotation_rate=1";
1518 }
1519 }
1520 $device .= ",wwn=$drive->{wwn}" if $drive->{wwn};
1521 } elsif ($drive->{interface} eq 'usb') {
1522 die "implement me";
1523 # -device ide-drive,bus=ide.1,unit=0,drive=drive-ide0-1-0,id=ide0-1-0
1524 } else {
1525 die "unsupported interface type";
1526 }
1527
1528 $device .= ",bootindex=$drive->{bootindex}" if $drive->{bootindex};
1529
1530 if (my $serial = $drive->{serial}) {
1531 $serial = URI::Escape::uri_unescape($serial);
1532 $device .= ",serial=$serial";
1533 }
1534
1535
1536 return $device;
1537 }
1538
1539 sub get_initiator_name {
1540 my $initiator;
1541
1542 my $fh = IO::File->new('/etc/iscsi/initiatorname.iscsi') || return;
1543 while (defined(my $line = <$fh>)) {
1544 next if $line !~ m/^\s*InitiatorName\s*=\s*([\.\-:\w]+)/;
1545 $initiator = $1;
1546 last;
1547 }
1548 $fh->close();
1549
1550 return $initiator;
1551 }
1552
1553 my sub storage_allows_io_uring_default {
1554 my ($scfg, $cache_direct) = @_;
1555
1556 # io_uring with cache mode writeback or writethrough on krbd will hang...
1557 return if $scfg && $scfg->{type} eq 'rbd' && $scfg->{krbd} && !$cache_direct;
1558
1559 # io_uring with cache mode writeback or writethrough on LVM will hang, without cache only
1560 # sometimes, just plain disable...
1561 return if $scfg && $scfg->{type} eq 'lvm';
1562
1563 # io_uring causes problems when used with CIFS since kernel 5.15
1564 # Some discussion: https://www.spinics.net/lists/linux-cifs/msg26734.html
1565 return if $scfg && $scfg->{type} eq 'cifs';
1566
1567 return 1;
1568 }
1569
1570 my sub drive_uses_cache_direct {
1571 my ($drive, $scfg) = @_;
1572
1573 my $cache_direct = 0;
1574
1575 if (my $cache = $drive->{cache}) {
1576 $cache_direct = $cache =~ /^(?:off|none|directsync)$/;
1577 } elsif (!drive_is_cdrom($drive) && !($scfg && $scfg->{type} eq 'btrfs' && !$scfg->{nocow})) {
1578 $cache_direct = 1;
1579 }
1580
1581 return $cache_direct;
1582 }
1583
1584 sub print_drive_commandline_full {
1585 my ($storecfg, $vmid, $drive, $pbs_name, $io_uring) = @_;
1586
1587 my $path;
1588 my $volid = $drive->{file};
1589 my $format = $drive->{format};
1590 my $drive_id = get_drive_id($drive);
1591
1592 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
1593 my $scfg = $storeid ? PVE::Storage::storage_config($storecfg, $storeid) : undef;
1594
1595 if (drive_is_cdrom($drive)) {
1596 $path = get_iso_path($storecfg, $vmid, $volid);
1597 die "$drive_id: cannot back cdrom drive with PBS snapshot\n" if $pbs_name;
1598 } else {
1599 if ($storeid) {
1600 $path = PVE::Storage::path($storecfg, $volid);
1601 $format //= qemu_img_format($scfg, $volname);
1602 } else {
1603 $path = $volid;
1604 $format //= "raw";
1605 }
1606 }
1607
1608 my $is_rbd = $path =~ m/^rbd:/;
1609
1610 my $opts = '';
1611 my @qemu_drive_options = qw(heads secs cyls trans media cache rerror werror aio discard);
1612 foreach my $o (@qemu_drive_options) {
1613 $opts .= ",$o=$drive->{$o}" if defined($drive->{$o});
1614 }
1615
1616 # snapshot only accepts on|off
1617 if (defined($drive->{snapshot})) {
1618 my $v = $drive->{snapshot} ? 'on' : 'off';
1619 $opts .= ",snapshot=$v";
1620 }
1621
1622 if (defined($drive->{ro})) { # ro maps to QEMUs `readonly`, which accepts `on` or `off` only
1623 $opts .= ",readonly=" . ($drive->{ro} ? 'on' : 'off');
1624 }
1625
1626 foreach my $type (['', '-total'], [_rd => '-read'], [_wr => '-write']) {
1627 my ($dir, $qmpname) = @$type;
1628 if (my $v = $drive->{"mbps$dir"}) {
1629 $opts .= ",throttling.bps$qmpname=".int($v*1024*1024);
1630 }
1631 if (my $v = $drive->{"mbps${dir}_max"}) {
1632 $opts .= ",throttling.bps$qmpname-max=".int($v*1024*1024);
1633 }
1634 if (my $v = $drive->{"bps${dir}_max_length"}) {
1635 $opts .= ",throttling.bps$qmpname-max-length=$v";
1636 }
1637 if (my $v = $drive->{"iops${dir}"}) {
1638 $opts .= ",throttling.iops$qmpname=$v";
1639 }
1640 if (my $v = $drive->{"iops${dir}_max"}) {
1641 $opts .= ",throttling.iops$qmpname-max=$v";
1642 }
1643 if (my $v = $drive->{"iops${dir}_max_length"}) {
1644 $opts .= ",throttling.iops$qmpname-max-length=$v";
1645 }
1646 }
1647
1648 if ($pbs_name) {
1649 $format = "rbd" if $is_rbd;
1650 die "$drive_id: Proxmox Backup Server backed drive cannot auto-detect the format\n"
1651 if !$format;
1652 $opts .= ",format=alloc-track,file.driver=$format";
1653 } elsif ($format) {
1654 $opts .= ",format=$format";
1655 }
1656
1657 my $cache_direct = drive_uses_cache_direct($drive, $scfg);
1658
1659 $opts .= ",cache=none" if !$drive->{cache} && $cache_direct;
1660
1661 if (!$drive->{aio}) {
1662 if ($io_uring && storage_allows_io_uring_default($scfg, $cache_direct)) {
1663 # io_uring supports all cache modes
1664 $opts .= ",aio=io_uring";
1665 } else {
1666 # aio native works only with O_DIRECT
1667 if($cache_direct) {
1668 $opts .= ",aio=native";
1669 } else {
1670 $opts .= ",aio=threads";
1671 }
1672 }
1673 }
1674
1675 if (!drive_is_cdrom($drive)) {
1676 my $detectzeroes;
1677 if (defined($drive->{detect_zeroes}) && !$drive->{detect_zeroes}) {
1678 $detectzeroes = 'off';
1679 } elsif ($drive->{discard}) {
1680 $detectzeroes = $drive->{discard} eq 'on' ? 'unmap' : 'on';
1681 } else {
1682 # This used to be our default with discard not being specified:
1683 $detectzeroes = 'on';
1684 }
1685
1686 # note: 'detect-zeroes' works per blockdev and we want it to persist
1687 # after the alloc-track is removed, so put it on 'file' directly
1688 my $dz_param = $pbs_name ? "file.detect-zeroes" : "detect-zeroes";
1689 $opts .= ",$dz_param=$detectzeroes" if $detectzeroes;
1690 }
1691
1692 if ($pbs_name) {
1693 $opts .= ",backing=$pbs_name";
1694 $opts .= ",auto-remove=on";
1695 }
1696
1697 # my $file_param = $pbs_name ? "file.file.filename" : "file";
1698 my $file_param = "file";
1699 if ($pbs_name) {
1700 # non-rbd drivers require the underlying file to be a seperate block
1701 # node, so add a second .file indirection
1702 $file_param .= ".file" if !$is_rbd;
1703 $file_param .= ".filename";
1704 }
1705 my $pathinfo = $path ? "$file_param=$path," : '';
1706
1707 return "${pathinfo}if=none,id=drive-$drive->{interface}$drive->{index}$opts";
1708 }
1709
1710 sub print_pbs_blockdev {
1711 my ($pbs_conf, $pbs_name) = @_;
1712 my $blockdev = "driver=pbs,node-name=$pbs_name,read-only=on";
1713 $blockdev .= ",repository=$pbs_conf->{repository}";
1714 $blockdev .= ",namespace=$pbs_conf->{namespace}" if $pbs_conf->{namespace};
1715 $blockdev .= ",snapshot=$pbs_conf->{snapshot}";
1716 $blockdev .= ",archive=$pbs_conf->{archive}";
1717 $blockdev .= ",keyfile=$pbs_conf->{keyfile}" if $pbs_conf->{keyfile};
1718 return $blockdev;
1719 }
1720
1721 sub print_netdevice_full {
1722 my ($vmid, $conf, $net, $netid, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version) = @_;
1723
1724 my $device = $net->{model};
1725 if ($net->{model} eq 'virtio') {
1726 $device = 'virtio-net-pci';
1727 };
1728
1729 my $pciaddr = print_pci_addr("$netid", $bridges, $arch, $machine_type);
1730 my $tmpstr = "$device,mac=$net->{macaddr},netdev=$netid$pciaddr,id=$netid";
1731 if ($net->{queues} && $net->{queues} > 1 && $net->{model} eq 'virtio'){
1732 # Consider we have N queues, the number of vectors needed is 2 * N + 2, i.e., one per in
1733 # and out of each queue plus one config interrupt and control vector queue
1734 my $vectors = $net->{queues} * 2 + 2;
1735 $tmpstr .= ",vectors=$vectors,mq=on";
1736 if (min_version($machine_version, 7, 1)) {
1737 $tmpstr .= ",packed=on";
1738 }
1739 }
1740
1741 if (min_version($machine_version, 7, 1) && $net->{model} eq 'virtio'){
1742 $tmpstr .= ",rx_queue_size=1024,tx_queue_size=256";
1743 }
1744
1745 $tmpstr .= ",bootindex=$net->{bootindex}" if $net->{bootindex} ;
1746
1747 if (my $mtu = $net->{mtu}) {
1748 if ($net->{model} eq 'virtio' && $net->{bridge}) {
1749 my $bridge_mtu = PVE::Network::read_bridge_mtu($net->{bridge});
1750 if ($mtu == 1) {
1751 $mtu = $bridge_mtu;
1752 } elsif ($mtu < 576) {
1753 die "netdev $netid: MTU '$mtu' is smaller than the IP minimum MTU '576'\n";
1754 } elsif ($mtu > $bridge_mtu) {
1755 die "netdev $netid: MTU '$mtu' is bigger than the bridge MTU '$bridge_mtu'\n";
1756 }
1757 $tmpstr .= ",host_mtu=$mtu";
1758 } else {
1759 warn "WARN: netdev $netid: ignoring MTU '$mtu', not using VirtIO or no bridge configured.\n";
1760 }
1761 }
1762
1763 if ($use_old_bios_files) {
1764 my $romfile;
1765 if ($device eq 'virtio-net-pci') {
1766 $romfile = 'pxe-virtio.rom';
1767 } elsif ($device eq 'e1000') {
1768 $romfile = 'pxe-e1000.rom';
1769 } elsif ($device eq 'e1000e') {
1770 $romfile = 'pxe-e1000e.rom';
1771 } elsif ($device eq 'ne2k') {
1772 $romfile = 'pxe-ne2k_pci.rom';
1773 } elsif ($device eq 'pcnet') {
1774 $romfile = 'pxe-pcnet.rom';
1775 } elsif ($device eq 'rtl8139') {
1776 $romfile = 'pxe-rtl8139.rom';
1777 }
1778 $tmpstr .= ",romfile=$romfile" if $romfile;
1779 }
1780
1781 return $tmpstr;
1782 }
1783
1784 sub print_netdev_full {
1785 my ($vmid, $conf, $arch, $net, $netid, $hotplug) = @_;
1786
1787 my $i = '';
1788 if ($netid =~ m/^net(\d+)$/) {
1789 $i = int($1);
1790 }
1791
1792 die "got strange net id '$i'\n" if $i >= ${MAX_NETS};
1793
1794 my $ifname = "tap${vmid}i$i";
1795
1796 # kvm uses TUNSETIFF ioctl, and that limits ifname length
1797 die "interface name '$ifname' is too long (max 15 character)\n"
1798 if length($ifname) >= 16;
1799
1800 my $vhostparam = '';
1801 if (is_native($arch)) {
1802 $vhostparam = ',vhost=on' if kernel_has_vhost_net() && $net->{model} eq 'virtio';
1803 }
1804
1805 my $vmname = $conf->{name} || "vm$vmid";
1806
1807 my $netdev = "";
1808 my $script = $hotplug ? "pve-bridge-hotplug" : "pve-bridge";
1809
1810 if ($net->{bridge}) {
1811 $netdev = "type=tap,id=$netid,ifname=${ifname},script=/var/lib/qemu-server/$script"
1812 .",downscript=/var/lib/qemu-server/pve-bridgedown$vhostparam";
1813 } else {
1814 $netdev = "type=user,id=$netid,hostname=$vmname";
1815 }
1816
1817 $netdev .= ",queues=$net->{queues}" if ($net->{queues} && $net->{model} eq 'virtio');
1818
1819 return $netdev;
1820 }
1821
1822 my $vga_map = {
1823 'cirrus' => 'cirrus-vga',
1824 'std' => 'VGA',
1825 'vmware' => 'vmware-svga',
1826 'virtio' => 'virtio-vga',
1827 'virtio-gl' => 'virtio-vga-gl',
1828 };
1829
1830 sub print_vga_device {
1831 my ($conf, $vga, $arch, $machine_version, $machine, $id, $qxlnum, $bridges) = @_;
1832
1833 my $type = $vga_map->{$vga->{type}};
1834 if ($arch eq 'aarch64' && defined($type) && $type eq 'virtio-vga') {
1835 $type = 'virtio-gpu';
1836 }
1837 my $vgamem_mb = $vga->{memory};
1838
1839 my $max_outputs = '';
1840 if ($qxlnum) {
1841 $type = $id ? 'qxl' : 'qxl-vga';
1842
1843 if (!$conf->{ostype} || $conf->{ostype} =~ m/^(?:l\d\d)|(?:other)$/) {
1844 # set max outputs so linux can have up to 4 qxl displays with one device
1845 if (min_version($machine_version, 4, 1)) {
1846 $max_outputs = ",max_outputs=4";
1847 }
1848 }
1849 }
1850
1851 die "no devicetype for $vga->{type}\n" if !$type;
1852
1853 my $memory = "";
1854 if ($vgamem_mb) {
1855 if ($vga->{type} =~ /^virtio/) {
1856 my $bytes = PVE::Tools::convert_size($vgamem_mb, "mb" => "b");
1857 $memory = ",max_hostmem=$bytes";
1858 } elsif ($qxlnum) {
1859 # from https://www.spice-space.org/multiple-monitors.html
1860 $memory = ",vgamem_mb=$vga->{memory}";
1861 my $ram = $vgamem_mb * 4;
1862 my $vram = $vgamem_mb * 2;
1863 $memory .= ",ram_size_mb=$ram,vram_size_mb=$vram";
1864 } else {
1865 $memory = ",vgamem_mb=$vga->{memory}";
1866 }
1867 } elsif ($qxlnum && $id) {
1868 $memory = ",ram_size=67108864,vram_size=33554432";
1869 }
1870
1871 my $edidoff = "";
1872 if ($type eq 'VGA' && windows_version($conf->{ostype})) {
1873 $edidoff=",edid=off" if (!defined($conf->{bios}) || $conf->{bios} ne 'ovmf');
1874 }
1875
1876 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
1877 my $vgaid = "vga" . ($id // '');
1878 my $pciaddr;
1879 if ($q35 && $vgaid eq 'vga') {
1880 # the first display uses pcie.0 bus on q35 machines
1881 $pciaddr = print_pcie_addr($vgaid, $bridges, $arch, $machine);
1882 } else {
1883 $pciaddr = print_pci_addr($vgaid, $bridges, $arch, $machine);
1884 }
1885
1886 if ($vga->{type} eq 'virtio-gl') {
1887 my $base = '/usr/lib/x86_64-linux-gnu/lib';
1888 die "missing libraries for '$vga->{type}' detected! Please install 'libgl1' and 'libegl1'\n"
1889 if !-e "${base}EGL.so.1" || !-e "${base}GL.so.1";
1890
1891 die "no DRM render node detected (/dev/dri/renderD*), no GPU? - needed for '$vga->{type}' display\n"
1892 if !PVE::Tools::dir_glob_regex('/dev/dri/', "renderD.*");
1893 }
1894
1895 return "$type,id=${vgaid}${memory}${max_outputs}${pciaddr}${edidoff}";
1896 }
1897
1898 # netX: e1000=XX:XX:XX:XX:XX:XX,bridge=vmbr0,rate=<mbps>
1899 sub parse_net {
1900 my ($data, $disable_mac_autogen) = @_;
1901
1902 my $res = eval { parse_property_string($net_fmt, $data) };
1903 if ($@) {
1904 warn $@;
1905 return;
1906 }
1907 if (!defined($res->{macaddr}) && !$disable_mac_autogen) {
1908 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
1909 $res->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix});
1910 }
1911 return $res;
1912 }
1913
1914 # ipconfigX ip=cidr,gw=ip,ip6=cidr,gw6=ip
1915 sub parse_ipconfig {
1916 my ($data) = @_;
1917
1918 my $res = eval { parse_property_string($ipconfig_fmt, $data) };
1919 if ($@) {
1920 warn $@;
1921 return;
1922 }
1923
1924 if ($res->{gw} && !$res->{ip}) {
1925 warn 'gateway specified without specifying an IP address';
1926 return;
1927 }
1928 if ($res->{gw6} && !$res->{ip6}) {
1929 warn 'IPv6 gateway specified without specifying an IPv6 address';
1930 return;
1931 }
1932 if ($res->{gw} && $res->{ip} eq 'dhcp') {
1933 warn 'gateway specified together with DHCP';
1934 return;
1935 }
1936 if ($res->{gw6} && $res->{ip6} !~ /^$IPV6RE/) {
1937 # gw6 + auto/dhcp
1938 warn "IPv6 gateway specified together with $res->{ip6} address";
1939 return;
1940 }
1941
1942 if (!$res->{ip} && !$res->{ip6}) {
1943 return { ip => 'dhcp', ip6 => 'dhcp' };
1944 }
1945
1946 return $res;
1947 }
1948
1949 sub print_net {
1950 my $net = shift;
1951
1952 return PVE::JSONSchema::print_property_string($net, $net_fmt);
1953 }
1954
1955 sub add_random_macs {
1956 my ($settings) = @_;
1957
1958 foreach my $opt (keys %$settings) {
1959 next if $opt !~ m/^net(\d+)$/;
1960 my $net = parse_net($settings->{$opt});
1961 next if !$net;
1962 $settings->{$opt} = print_net($net);
1963 }
1964 }
1965
1966 sub vm_is_volid_owner {
1967 my ($storecfg, $vmid, $volid) = @_;
1968
1969 if ($volid !~ m|^/|) {
1970 my ($path, $owner);
1971 eval { ($path, $owner) = PVE::Storage::path($storecfg, $volid); };
1972 if ($owner && ($owner == $vmid)) {
1973 return 1;
1974 }
1975 }
1976
1977 return;
1978 }
1979
1980 sub vmconfig_register_unused_drive {
1981 my ($storecfg, $vmid, $conf, $drive) = @_;
1982
1983 if (drive_is_cloudinit($drive)) {
1984 eval { PVE::Storage::vdisk_free($storecfg, $drive->{file}) };
1985 warn $@ if $@;
1986 delete $conf->{cloudinit};
1987 } elsif (!drive_is_cdrom($drive)) {
1988 my $volid = $drive->{file};
1989 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
1990 PVE::QemuConfig->add_unused_volume($conf, $volid, $vmid);
1991 }
1992 }
1993 }
1994
1995 # smbios: [manufacturer=str][,product=str][,version=str][,serial=str][,uuid=uuid][,sku=str][,family=str][,base64=bool]
1996 my $smbios1_fmt = {
1997 uuid => {
1998 type => 'string',
1999 pattern => '[a-fA-F0-9]{8}(?:-[a-fA-F0-9]{4}){3}-[a-fA-F0-9]{12}',
2000 format_description => 'UUID',
2001 description => "Set SMBIOS1 UUID.",
2002 optional => 1,
2003 },
2004 version => {
2005 type => 'string',
2006 pattern => '[A-Za-z0-9+\/]+={0,2}',
2007 format_description => 'Base64 encoded string',
2008 description => "Set SMBIOS1 version.",
2009 optional => 1,
2010 },
2011 serial => {
2012 type => 'string',
2013 pattern => '[A-Za-z0-9+\/]+={0,2}',
2014 format_description => 'Base64 encoded string',
2015 description => "Set SMBIOS1 serial number.",
2016 optional => 1,
2017 },
2018 manufacturer => {
2019 type => 'string',
2020 pattern => '[A-Za-z0-9+\/]+={0,2}',
2021 format_description => 'Base64 encoded string',
2022 description => "Set SMBIOS1 manufacturer.",
2023 optional => 1,
2024 },
2025 product => {
2026 type => 'string',
2027 pattern => '[A-Za-z0-9+\/]+={0,2}',
2028 format_description => 'Base64 encoded string',
2029 description => "Set SMBIOS1 product ID.",
2030 optional => 1,
2031 },
2032 sku => {
2033 type => 'string',
2034 pattern => '[A-Za-z0-9+\/]+={0,2}',
2035 format_description => 'Base64 encoded string',
2036 description => "Set SMBIOS1 SKU string.",
2037 optional => 1,
2038 },
2039 family => {
2040 type => 'string',
2041 pattern => '[A-Za-z0-9+\/]+={0,2}',
2042 format_description => 'Base64 encoded string',
2043 description => "Set SMBIOS1 family string.",
2044 optional => 1,
2045 },
2046 base64 => {
2047 type => 'boolean',
2048 description => 'Flag to indicate that the SMBIOS values are base64 encoded',
2049 optional => 1,
2050 },
2051 };
2052
2053 sub parse_smbios1 {
2054 my ($data) = @_;
2055
2056 my $res = eval { parse_property_string($smbios1_fmt, $data) };
2057 warn $@ if $@;
2058 return $res;
2059 }
2060
2061 sub print_smbios1 {
2062 my ($smbios1) = @_;
2063 return PVE::JSONSchema::print_property_string($smbios1, $smbios1_fmt);
2064 }
2065
2066 PVE::JSONSchema::register_format('pve-qm-smbios1', $smbios1_fmt);
2067
2068 sub parse_watchdog {
2069 my ($value) = @_;
2070
2071 return if !$value;
2072
2073 my $res = eval { parse_property_string($watchdog_fmt, $value) };
2074 warn $@ if $@;
2075 return $res;
2076 }
2077
2078 sub parse_guest_agent {
2079 my ($conf) = @_;
2080
2081 return {} if !defined($conf->{agent});
2082
2083 my $res = eval { parse_property_string($agent_fmt, $conf->{agent}) };
2084 warn $@ if $@;
2085
2086 # if the agent is disabled ignore the other potentially set properties
2087 return {} if !$res->{enabled};
2088 return $res;
2089 }
2090
2091 sub get_qga_key {
2092 my ($conf, $key) = @_;
2093 return undef if !defined($conf->{agent});
2094
2095 my $agent = parse_guest_agent($conf);
2096 return $agent->{$key};
2097 }
2098
2099 sub parse_vga {
2100 my ($value) = @_;
2101
2102 return {} if !$value;
2103 my $res = eval { parse_property_string($vga_fmt, $value) };
2104 warn $@ if $@;
2105 return $res;
2106 }
2107
2108 sub parse_rng {
2109 my ($value) = @_;
2110
2111 return if !$value;
2112
2113 my $res = eval { parse_property_string($rng_fmt, $value) };
2114 warn $@ if $@;
2115 return $res;
2116 }
2117
2118 sub parse_meta_info {
2119 my ($value) = @_;
2120
2121 return if !$value;
2122
2123 my $res = eval { parse_property_string($meta_info_fmt, $value) };
2124 warn $@ if $@;
2125 return $res;
2126 }
2127
2128 sub new_meta_info_string {
2129 my () = @_; # for now do not allow to override any value
2130
2131 return PVE::JSONSchema::print_property_string(
2132 {
2133 'creation-qemu' => kvm_user_version(),
2134 ctime => "". int(time()),
2135 },
2136 $meta_info_fmt
2137 );
2138 }
2139
2140 sub qemu_created_version_fixups {
2141 my ($conf, $forcemachine, $kvmver) = @_;
2142
2143 my $meta = parse_meta_info($conf->{meta}) // {};
2144 my $forced_vers = PVE::QemuServer::Machine::extract_version($forcemachine);
2145
2146 # check if we need to apply some handling for VMs that always use the latest machine version but
2147 # had a machine version transition happen that affected HW such that, e.g., an OS config change
2148 # would be required (we do not want to pin machine version for non-windows OS type)
2149 if (
2150 (!defined($conf->{machine}) || $conf->{machine} =~ m/^(?:pc|q35|virt)$/) # non-versioned machine
2151 && (!defined($meta->{'creation-qemu'}) || !min_version($meta->{'creation-qemu'}, 6, 1)) # created before 6.1
2152 && (!$forced_vers || min_version($forced_vers, 6, 1)) # handle snapshot-rollback/migrations
2153 && min_version($kvmver, 6, 1) # only need to apply the change since 6.1
2154 ) {
2155 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
2156 if ($q35 && $conf->{ostype} && $conf->{ostype} eq 'l26') {
2157 # this changed to default-on in Q 6.1 for q35 machines, it will mess with PCI slot view
2158 # and thus with the predictable interface naming of systemd
2159 return ['-global', 'ICH9-LPC.acpi-pci-hotplug-with-bridge-support=off'];
2160 }
2161 }
2162 return;
2163 }
2164
2165 # add JSON properties for create and set function
2166 sub json_config_properties {
2167 my ($prop, $with_disk_alloc) = @_;
2168
2169 my $skip_json_config_opts = {
2170 parent => 1,
2171 snaptime => 1,
2172 vmstate => 1,
2173 runningmachine => 1,
2174 runningcpu => 1,
2175 meta => 1,
2176 };
2177
2178 foreach my $opt (keys %$confdesc) {
2179 next if $skip_json_config_opts->{$opt};
2180
2181 if ($with_disk_alloc && is_valid_drivename($opt)) {
2182 $prop->{$opt} = $PVE::QemuServer::Drive::drivedesc_hash_with_alloc->{$opt};
2183 } else {
2184 $prop->{$opt} = $confdesc->{$opt};
2185 }
2186 }
2187
2188 return $prop;
2189 }
2190
2191 # Properties that we can read from an OVF file
2192 sub json_ovf_properties {
2193 my $prop = {};
2194
2195 for my $device (PVE::QemuServer::Drive::valid_drive_names()) {
2196 $prop->{$device} = {
2197 type => 'string',
2198 format => 'pve-volume-id-or-absolute-path',
2199 description => "Disk image that gets imported to $device",
2200 optional => 1,
2201 };
2202 }
2203
2204 $prop->{cores} = {
2205 type => 'integer',
2206 description => "The number of CPU cores.",
2207 optional => 1,
2208 };
2209 $prop->{memory} = {
2210 type => 'integer',
2211 description => "Amount of RAM for the VM in MB.",
2212 optional => 1,
2213 };
2214 $prop->{name} = {
2215 type => 'string',
2216 description => "Name of the VM.",
2217 optional => 1,
2218 };
2219
2220 return $prop;
2221 }
2222
2223 # return copy of $confdesc_cloudinit to generate documentation
2224 sub cloudinit_config_properties {
2225
2226 return dclone($confdesc_cloudinit);
2227 }
2228
2229 sub cloudinit_pending_properties {
2230 my $p = {
2231 map { $_ => 1 } keys $confdesc_cloudinit->%*,
2232 name => 1,
2233 };
2234 $p->{"net$_"} = 1 for 0..($MAX_NETS-1);
2235 return $p;
2236 }
2237
2238 sub check_type {
2239 my ($key, $value) = @_;
2240
2241 die "unknown setting '$key'\n" if !$confdesc->{$key};
2242
2243 my $type = $confdesc->{$key}->{type};
2244
2245 if (!defined($value)) {
2246 die "got undefined value\n";
2247 }
2248
2249 if ($value =~ m/[\n\r]/) {
2250 die "property contains a line feed\n";
2251 }
2252
2253 if ($type eq 'boolean') {
2254 return 1 if ($value eq '1') || ($value =~ m/^(on|yes|true)$/i);
2255 return 0 if ($value eq '0') || ($value =~ m/^(off|no|false)$/i);
2256 die "type check ('boolean') failed - got '$value'\n";
2257 } elsif ($type eq 'integer') {
2258 return int($1) if $value =~ m/^(\d+)$/;
2259 die "type check ('integer') failed - got '$value'\n";
2260 } elsif ($type eq 'number') {
2261 return $value if $value =~ m/^(\d+)(\.\d+)?$/;
2262 die "type check ('number') failed - got '$value'\n";
2263 } elsif ($type eq 'string') {
2264 if (my $fmt = $confdesc->{$key}->{format}) {
2265 PVE::JSONSchema::check_format($fmt, $value);
2266 return $value;
2267 }
2268 $value =~ s/^\"(.*)\"$/$1/;
2269 return $value;
2270 } else {
2271 die "internal error"
2272 }
2273 }
2274
2275 sub destroy_vm {
2276 my ($storecfg, $vmid, $skiplock, $replacement_conf, $purge_unreferenced) = @_;
2277
2278 my $conf = PVE::QemuConfig->load_config($vmid);
2279
2280 if (!$skiplock && !PVE::QemuConfig->has_lock($conf, 'suspended')) {
2281 PVE::QemuConfig->check_lock($conf);
2282 }
2283
2284 if ($conf->{template}) {
2285 # check if any base image is still used by a linked clone
2286 PVE::QemuConfig->foreach_volume_full($conf, { include_unused => 1 }, sub {
2287 my ($ds, $drive) = @_;
2288 return if drive_is_cdrom($drive);
2289
2290 my $volid = $drive->{file};
2291 return if !$volid || $volid =~ m|^/|;
2292
2293 die "base volume '$volid' is still in use by linked cloned\n"
2294 if PVE::Storage::volume_is_base_and_used($storecfg, $volid);
2295
2296 });
2297 }
2298
2299 my $volids = {};
2300 my $remove_owned_drive = sub {
2301 my ($ds, $drive) = @_;
2302 return if drive_is_cdrom($drive, 1);
2303
2304 my $volid = $drive->{file};
2305 return if !$volid || $volid =~ m|^/|;
2306 return if $volids->{$volid};
2307
2308 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
2309 return if !$path || !$owner || ($owner != $vmid);
2310
2311 $volids->{$volid} = 1;
2312 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2313 warn "Could not remove disk '$volid', check manually: $@" if $@;
2314 };
2315
2316 # only remove disks owned by this VM (referenced in the config)
2317 my $include_opts = {
2318 include_unused => 1,
2319 extra_keys => ['vmstate'],
2320 };
2321 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $remove_owned_drive);
2322
2323 for my $snap (values %{$conf->{snapshots}}) {
2324 next if !defined($snap->{vmstate});
2325 my $drive = PVE::QemuConfig->parse_volume('vmstate', $snap->{vmstate}, 1);
2326 next if !defined($drive);
2327 $remove_owned_drive->('vmstate', $drive);
2328 }
2329
2330 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $remove_owned_drive);
2331
2332 if ($purge_unreferenced) { # also remove unreferenced disk
2333 my $vmdisks = PVE::Storage::vdisk_list($storecfg, undef, $vmid, undef, 'images');
2334 PVE::Storage::foreach_volid($vmdisks, sub {
2335 my ($volid, $sid, $volname, $d) = @_;
2336 eval { PVE::Storage::vdisk_free($storecfg, $volid) };
2337 warn $@ if $@;
2338 });
2339 }
2340
2341 if (defined $replacement_conf) {
2342 PVE::QemuConfig->write_config($vmid, $replacement_conf);
2343 } else {
2344 PVE::QemuConfig->destroy_config($vmid);
2345 }
2346 }
2347
2348 sub parse_vm_config {
2349 my ($filename, $raw, $strict) = @_;
2350
2351 return if !defined($raw);
2352
2353 my $res = {
2354 digest => Digest::SHA::sha1_hex($raw),
2355 snapshots => {},
2356 pending => {},
2357 cloudinit => {},
2358 };
2359
2360 my $handle_error = sub {
2361 my ($msg) = @_;
2362
2363 if ($strict) {
2364 die $msg;
2365 } else {
2366 warn $msg;
2367 }
2368 };
2369
2370 $filename =~ m|/qemu-server/(\d+)\.conf$|
2371 || die "got strange filename '$filename'";
2372
2373 my $vmid = $1;
2374
2375 my $conf = $res;
2376 my $descr;
2377 my $finish_description = sub {
2378 if (defined($descr)) {
2379 $descr =~ s/\s+$//;
2380 $conf->{description} = $descr;
2381 }
2382 $descr = undef;
2383 };
2384 my $section = '';
2385
2386 my @lines = split(/\n/, $raw);
2387 foreach my $line (@lines) {
2388 next if $line =~ m/^\s*$/;
2389
2390 if ($line =~ m/^\[PENDING\]\s*$/i) {
2391 $section = 'pending';
2392 $finish_description->();
2393 $conf = $res->{$section} = {};
2394 next;
2395 } elsif ($line =~ m/^\[special:cloudinit\]\s*$/i) {
2396 $section = 'cloudinit';
2397 $finish_description->();
2398 $conf = $res->{$section} = {};
2399 next;
2400
2401 } elsif ($line =~ m/^\[([a-z][a-z0-9_\-]+)\]\s*$/i) {
2402 $section = $1;
2403 $finish_description->();
2404 $conf = $res->{snapshots}->{$section} = {};
2405 next;
2406 }
2407
2408 if ($line =~ m/^\#(.*)$/) {
2409 $descr = '' if !defined($descr);
2410 $descr .= PVE::Tools::decode_text($1) . "\n";
2411 next;
2412 }
2413
2414 if ($line =~ m/^(description):\s*(.*\S)\s*$/) {
2415 $descr = '' if !defined($descr);
2416 $descr .= PVE::Tools::decode_text($2);
2417 } elsif ($line =~ m/snapstate:\s*(prepare|delete)\s*$/) {
2418 $conf->{snapstate} = $1;
2419 } elsif ($line =~ m/^(args):\s*(.*\S)\s*$/) {
2420 my $key = $1;
2421 my $value = $2;
2422 $conf->{$key} = $value;
2423 } elsif ($line =~ m/^delete:\s*(.*\S)\s*$/) {
2424 my $value = $1;
2425 if ($section eq 'pending') {
2426 $conf->{delete} = $value; # we parse this later
2427 } else {
2428 $handle_error->("vm $vmid - property 'delete' is only allowed in [PENDING]\n");
2429 }
2430 } elsif ($line =~ m/^([a-z][a-z_]*\d*):\s*(.+?)\s*$/) {
2431 my $key = $1;
2432 my $value = $2;
2433 if ($section eq 'cloudinit') {
2434 # ignore validation only used for informative purpose
2435 $conf->{$key} = $value;
2436 next;
2437 }
2438 eval { $value = check_type($key, $value); };
2439 if ($@) {
2440 $handle_error->("vm $vmid - unable to parse value of '$key' - $@");
2441 } else {
2442 $key = 'ide2' if $key eq 'cdrom';
2443 my $fmt = $confdesc->{$key}->{format};
2444 if ($fmt && $fmt =~ /^pve-qm-(?:ide|scsi|virtio|sata)$/) {
2445 my $v = parse_drive($key, $value);
2446 if (my $volid = filename_to_volume_id($vmid, $v->{file}, $v->{media})) {
2447 $v->{file} = $volid;
2448 $value = print_drive($v);
2449 } else {
2450 $handle_error->("vm $vmid - unable to parse value of '$key'\n");
2451 next;
2452 }
2453 }
2454
2455 $conf->{$key} = $value;
2456 }
2457 } else {
2458 $handle_error->("vm $vmid - unable to parse config: $line\n");
2459 }
2460 }
2461
2462 $finish_description->();
2463 delete $res->{snapstate}; # just to be sure
2464
2465 return $res;
2466 }
2467
2468 sub write_vm_config {
2469 my ($filename, $conf) = @_;
2470
2471 delete $conf->{snapstate}; # just to be sure
2472
2473 if ($conf->{cdrom}) {
2474 die "option ide2 conflicts with cdrom\n" if $conf->{ide2};
2475 $conf->{ide2} = $conf->{cdrom};
2476 delete $conf->{cdrom};
2477 }
2478
2479 # we do not use 'smp' any longer
2480 if ($conf->{sockets}) {
2481 delete $conf->{smp};
2482 } elsif ($conf->{smp}) {
2483 $conf->{sockets} = $conf->{smp};
2484 delete $conf->{cores};
2485 delete $conf->{smp};
2486 }
2487
2488 my $used_volids = {};
2489
2490 my $cleanup_config = sub {
2491 my ($cref, $pending, $snapname) = @_;
2492
2493 foreach my $key (keys %$cref) {
2494 next if $key eq 'digest' || $key eq 'description' || $key eq 'snapshots' ||
2495 $key eq 'snapstate' || $key eq 'pending' || $key eq 'cloudinit';
2496 my $value = $cref->{$key};
2497 if ($key eq 'delete') {
2498 die "propertry 'delete' is only allowed in [PENDING]\n"
2499 if !$pending;
2500 # fixme: check syntax?
2501 next;
2502 }
2503 eval { $value = check_type($key, $value); };
2504 die "unable to parse value of '$key' - $@" if $@;
2505
2506 $cref->{$key} = $value;
2507
2508 if (!$snapname && is_valid_drivename($key)) {
2509 my $drive = parse_drive($key, $value);
2510 $used_volids->{$drive->{file}} = 1 if $drive && $drive->{file};
2511 }
2512 }
2513 };
2514
2515 &$cleanup_config($conf);
2516
2517 &$cleanup_config($conf->{pending}, 1);
2518
2519 foreach my $snapname (keys %{$conf->{snapshots}}) {
2520 die "internal error: snapshot name '$snapname' is forbidden" if lc($snapname) eq 'pending';
2521 &$cleanup_config($conf->{snapshots}->{$snapname}, undef, $snapname);
2522 }
2523
2524 # remove 'unusedX' settings if we re-add a volume
2525 foreach my $key (keys %$conf) {
2526 my $value = $conf->{$key};
2527 if ($key =~ m/^unused/ && $used_volids->{$value}) {
2528 delete $conf->{$key};
2529 }
2530 }
2531
2532 my $generate_raw_config = sub {
2533 my ($conf, $pending) = @_;
2534
2535 my $raw = '';
2536
2537 # add description as comment to top of file
2538 if (defined(my $descr = $conf->{description})) {
2539 if ($descr) {
2540 foreach my $cl (split(/\n/, $descr)) {
2541 $raw .= '#' . PVE::Tools::encode_text($cl) . "\n";
2542 }
2543 } else {
2544 $raw .= "#\n" if $pending;
2545 }
2546 }
2547
2548 foreach my $key (sort keys %$conf) {
2549 next if $key =~ /^(digest|description|pending|cloudinit|snapshots)$/;
2550 $raw .= "$key: $conf->{$key}\n";
2551 }
2552 return $raw;
2553 };
2554
2555 my $raw = &$generate_raw_config($conf);
2556
2557 if (scalar(keys %{$conf->{pending}})){
2558 $raw .= "\n[PENDING]\n";
2559 $raw .= &$generate_raw_config($conf->{pending}, 1);
2560 }
2561
2562 if (scalar(keys %{$conf->{cloudinit}}) && PVE::QemuConfig->has_cloudinit($conf)){
2563 $raw .= "\n[special:cloudinit]\n";
2564 $raw .= &$generate_raw_config($conf->{cloudinit});
2565 }
2566
2567 foreach my $snapname (sort keys %{$conf->{snapshots}}) {
2568 $raw .= "\n[$snapname]\n";
2569 $raw .= &$generate_raw_config($conf->{snapshots}->{$snapname});
2570 }
2571
2572 return $raw;
2573 }
2574
2575 sub load_defaults {
2576
2577 my $res = {};
2578
2579 # we use static defaults from our JSON schema configuration
2580 foreach my $key (keys %$confdesc) {
2581 if (defined(my $default = $confdesc->{$key}->{default})) {
2582 $res->{$key} = $default;
2583 }
2584 }
2585
2586 return $res;
2587 }
2588
2589 sub config_list {
2590 my $vmlist = PVE::Cluster::get_vmlist();
2591 my $res = {};
2592 return $res if !$vmlist || !$vmlist->{ids};
2593 my $ids = $vmlist->{ids};
2594 my $nodename = nodename();
2595
2596 foreach my $vmid (keys %$ids) {
2597 my $d = $ids->{$vmid};
2598 next if !$d->{node} || $d->{node} ne $nodename;
2599 next if !$d->{type} || $d->{type} ne 'qemu';
2600 $res->{$vmid}->{exists} = 1;
2601 }
2602 return $res;
2603 }
2604
2605 # test if VM uses local resources (to prevent migration)
2606 sub check_local_resources {
2607 my ($conf, $noerr) = @_;
2608
2609 my @loc_res = ();
2610 my $mapped_res = [];
2611
2612 my $nodelist = PVE::Cluster::get_nodelist();
2613 my $pci_map = PVE::Mapping::PCI::config();
2614 my $usb_map = PVE::Mapping::USB::config();
2615
2616 my $missing_mappings_by_node = { map { $_ => [] } @$nodelist };
2617
2618 my $add_missing_mapping = sub {
2619 my ($type, $key, $id) = @_;
2620 for my $node (@$nodelist) {
2621 my $entry;
2622 if ($type eq 'pci') {
2623 $entry = PVE::Mapping::PCI::get_node_mapping($pci_map, $id, $node);
2624 } elsif ($type eq 'usb') {
2625 $entry = PVE::Mapping::USB::get_node_mapping($usb_map, $id, $node);
2626 }
2627 if (!scalar($entry->@*)) {
2628 push @{$missing_mappings_by_node->{$node}}, $key;
2629 }
2630 }
2631 };
2632
2633 push @loc_res, "hostusb" if $conf->{hostusb}; # old syntax
2634 push @loc_res, "hostpci" if $conf->{hostpci}; # old syntax
2635
2636 push @loc_res, "ivshmem" if $conf->{ivshmem};
2637
2638 foreach my $k (keys %$conf) {
2639 if ($k =~ m/^usb/) {
2640 my $entry = parse_property_string('pve-qm-usb', $conf->{$k});
2641 next if $entry->{host} =~ m/^spice$/i;
2642 if ($entry->{mapping}) {
2643 $add_missing_mapping->('usb', $k, $entry->{mapping});
2644 push @$mapped_res, $k;
2645 }
2646 }
2647 if ($k =~ m/^hostpci/) {
2648 my $entry = parse_property_string('pve-qm-hostpci', $conf->{$k});
2649 if ($entry->{mapping}) {
2650 $add_missing_mapping->('pci', $k, $entry->{mapping});
2651 push @$mapped_res, $k;
2652 }
2653 }
2654 # sockets are safe: they will recreated be on the target side post-migrate
2655 next if $k =~ m/^serial/ && ($conf->{$k} eq 'socket');
2656 push @loc_res, $k if $k =~ m/^(usb|hostpci|serial|parallel)\d+$/;
2657 }
2658
2659 die "VM uses local resources\n" if scalar @loc_res && !$noerr;
2660
2661 return wantarray ? (\@loc_res, $mapped_res, $missing_mappings_by_node) : \@loc_res;
2662 }
2663
2664 # check if used storages are available on all nodes (use by migrate)
2665 sub check_storage_availability {
2666 my ($storecfg, $conf, $node) = @_;
2667
2668 PVE::QemuConfig->foreach_volume($conf, sub {
2669 my ($ds, $drive) = @_;
2670
2671 my $volid = $drive->{file};
2672 return if !$volid;
2673
2674 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2675 return if !$sid;
2676
2677 # check if storage is available on both nodes
2678 my $scfg = PVE::Storage::storage_check_enabled($storecfg, $sid);
2679 PVE::Storage::storage_check_enabled($storecfg, $sid, $node);
2680
2681 my ($vtype) = PVE::Storage::parse_volname($storecfg, $volid);
2682
2683 die "$volid: content type '$vtype' is not available on storage '$sid'\n"
2684 if !$scfg->{content}->{$vtype};
2685 });
2686 }
2687
2688 # list nodes where all VM images are available (used by has_feature API)
2689 sub shared_nodes {
2690 my ($conf, $storecfg) = @_;
2691
2692 my $nodelist = PVE::Cluster::get_nodelist();
2693 my $nodehash = { map { $_ => 1 } @$nodelist };
2694 my $nodename = nodename();
2695
2696 PVE::QemuConfig->foreach_volume($conf, sub {
2697 my ($ds, $drive) = @_;
2698
2699 my $volid = $drive->{file};
2700 return if !$volid;
2701
2702 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2703 if ($storeid) {
2704 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2705 if ($scfg->{disable}) {
2706 $nodehash = {};
2707 } elsif (my $avail = $scfg->{nodes}) {
2708 foreach my $node (keys %$nodehash) {
2709 delete $nodehash->{$node} if !$avail->{$node};
2710 }
2711 } elsif (!$scfg->{shared}) {
2712 foreach my $node (keys %$nodehash) {
2713 delete $nodehash->{$node} if $node ne $nodename
2714 }
2715 }
2716 }
2717 });
2718
2719 return $nodehash
2720 }
2721
2722 sub check_local_storage_availability {
2723 my ($conf, $storecfg) = @_;
2724
2725 my $nodelist = PVE::Cluster::get_nodelist();
2726 my $nodehash = { map { $_ => {} } @$nodelist };
2727
2728 PVE::QemuConfig->foreach_volume($conf, sub {
2729 my ($ds, $drive) = @_;
2730
2731 my $volid = $drive->{file};
2732 return if !$volid;
2733
2734 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
2735 if ($storeid) {
2736 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
2737
2738 if ($scfg->{disable}) {
2739 foreach my $node (keys %$nodehash) {
2740 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2741 }
2742 } elsif (my $avail = $scfg->{nodes}) {
2743 foreach my $node (keys %$nodehash) {
2744 if (!$avail->{$node}) {
2745 $nodehash->{$node}->{unavailable_storages}->{$storeid} = 1;
2746 }
2747 }
2748 }
2749 }
2750 });
2751
2752 foreach my $node (values %$nodehash) {
2753 if (my $unavail = $node->{unavailable_storages}) {
2754 $node->{unavailable_storages} = [ sort keys %$unavail ];
2755 }
2756 }
2757
2758 return $nodehash
2759 }
2760
2761 # Compat only, use assert_config_exists_on_node and vm_running_locally where possible
2762 sub check_running {
2763 my ($vmid, $nocheck, $node) = @_;
2764
2765 # $nocheck is set when called during a migration, in which case the config
2766 # file might still or already reside on the *other* node
2767 # - because rename has already happened, and current node is source
2768 # - because rename hasn't happened yet, and current node is target
2769 # - because rename has happened, current node is target, but hasn't yet
2770 # processed it yet
2771 PVE::QemuConfig::assert_config_exists_on_node($vmid, $node) if !$nocheck;
2772 return PVE::QemuServer::Helpers::vm_running_locally($vmid);
2773 }
2774
2775 sub vzlist {
2776
2777 my $vzlist = config_list();
2778
2779 my $fd = IO::Dir->new($PVE::QemuServer::Helpers::var_run_tmpdir) || return $vzlist;
2780
2781 while (defined(my $de = $fd->read)) {
2782 next if $de !~ m/^(\d+)\.pid$/;
2783 my $vmid = $1;
2784 next if !defined($vzlist->{$vmid});
2785 if (my $pid = check_running($vmid)) {
2786 $vzlist->{$vmid}->{pid} = $pid;
2787 }
2788 }
2789
2790 return $vzlist;
2791 }
2792
2793 our $vmstatus_return_properties = {
2794 vmid => get_standard_option('pve-vmid'),
2795 status => {
2796 description => "QEMU process status.",
2797 type => 'string',
2798 enum => ['stopped', 'running'],
2799 },
2800 maxmem => {
2801 description => "Maximum memory in bytes.",
2802 type => 'integer',
2803 optional => 1,
2804 renderer => 'bytes',
2805 },
2806 maxdisk => {
2807 description => "Root disk size in bytes.",
2808 type => 'integer',
2809 optional => 1,
2810 renderer => 'bytes',
2811 },
2812 name => {
2813 description => "VM name.",
2814 type => 'string',
2815 optional => 1,
2816 },
2817 qmpstatus => {
2818 description => "VM run state from the 'query-status' QMP monitor command.",
2819 type => 'string',
2820 optional => 1,
2821 },
2822 pid => {
2823 description => "PID of running qemu process.",
2824 type => 'integer',
2825 optional => 1,
2826 },
2827 uptime => {
2828 description => "Uptime.",
2829 type => 'integer',
2830 optional => 1,
2831 renderer => 'duration',
2832 },
2833 cpus => {
2834 description => "Maximum usable CPUs.",
2835 type => 'number',
2836 optional => 1,
2837 },
2838 lock => {
2839 description => "The current config lock, if any.",
2840 type => 'string',
2841 optional => 1,
2842 },
2843 tags => {
2844 description => "The current configured tags, if any",
2845 type => 'string',
2846 optional => 1,
2847 },
2848 'running-machine' => {
2849 description => "The currently running machine type (if running).",
2850 type => 'string',
2851 optional => 1,
2852 },
2853 'running-qemu' => {
2854 description => "The currently running QEMU version (if running).",
2855 type => 'string',
2856 optional => 1,
2857 },
2858 };
2859
2860 my $last_proc_pid_stat;
2861
2862 # get VM status information
2863 # This must be fast and should not block ($full == false)
2864 # We only query KVM using QMP if $full == true (this can be slow)
2865 sub vmstatus {
2866 my ($opt_vmid, $full) = @_;
2867
2868 my $res = {};
2869
2870 my $storecfg = PVE::Storage::config();
2871
2872 my $list = vzlist();
2873 my $defaults = load_defaults();
2874
2875 my ($uptime) = PVE::ProcFSTools::read_proc_uptime(1);
2876
2877 my $cpucount = $cpuinfo->{cpus} || 1;
2878
2879 foreach my $vmid (keys %$list) {
2880 next if $opt_vmid && ($vmid ne $opt_vmid);
2881
2882 my $conf = PVE::QemuConfig->load_config($vmid);
2883
2884 my $d = { vmid => int($vmid) };
2885 $d->{pid} = int($list->{$vmid}->{pid}) if $list->{$vmid}->{pid};
2886
2887 # fixme: better status?
2888 $d->{status} = $list->{$vmid}->{pid} ? 'running' : 'stopped';
2889
2890 my $size = PVE::QemuServer::Drive::bootdisk_size($storecfg, $conf);
2891 if (defined($size)) {
2892 $d->{disk} = 0; # no info available
2893 $d->{maxdisk} = $size;
2894 } else {
2895 $d->{disk} = 0;
2896 $d->{maxdisk} = 0;
2897 }
2898
2899 $d->{cpus} = ($conf->{sockets} || $defaults->{sockets})
2900 * ($conf->{cores} || $defaults->{cores});
2901 $d->{cpus} = $cpucount if $d->{cpus} > $cpucount;
2902 $d->{cpus} = $conf->{vcpus} if $conf->{vcpus};
2903
2904 $d->{name} = $conf->{name} || "VM $vmid";
2905 $d->{maxmem} = $conf->{memory} ? $conf->{memory}*(1024*1024)
2906 : $defaults->{memory}*(1024*1024);
2907
2908 if ($conf->{balloon}) {
2909 $d->{balloon_min} = $conf->{balloon}*(1024*1024);
2910 $d->{shares} = defined($conf->{shares}) ? $conf->{shares}
2911 : $defaults->{shares};
2912 }
2913
2914 $d->{uptime} = 0;
2915 $d->{cpu} = 0;
2916 $d->{mem} = 0;
2917
2918 $d->{netout} = 0;
2919 $d->{netin} = 0;
2920
2921 $d->{diskread} = 0;
2922 $d->{diskwrite} = 0;
2923
2924 $d->{template} = 1 if PVE::QemuConfig->is_template($conf);
2925
2926 $d->{serial} = 1 if conf_has_serial($conf);
2927 $d->{lock} = $conf->{lock} if $conf->{lock};
2928 $d->{tags} = $conf->{tags} if defined($conf->{tags});
2929
2930 $res->{$vmid} = $d;
2931 }
2932
2933 my $netdev = PVE::ProcFSTools::read_proc_net_dev();
2934 foreach my $dev (keys %$netdev) {
2935 next if $dev !~ m/^tap([1-9]\d*)i/;
2936 my $vmid = $1;
2937 my $d = $res->{$vmid};
2938 next if !$d;
2939
2940 $d->{netout} += $netdev->{$dev}->{receive};
2941 $d->{netin} += $netdev->{$dev}->{transmit};
2942
2943 if ($full) {
2944 $d->{nics}->{$dev}->{netout} = int($netdev->{$dev}->{receive});
2945 $d->{nics}->{$dev}->{netin} = int($netdev->{$dev}->{transmit});
2946 }
2947
2948 }
2949
2950 my $ctime = gettimeofday;
2951
2952 foreach my $vmid (keys %$list) {
2953
2954 my $d = $res->{$vmid};
2955 my $pid = $d->{pid};
2956 next if !$pid;
2957
2958 my $pstat = PVE::ProcFSTools::read_proc_pid_stat($pid);
2959 next if !$pstat; # not running
2960
2961 my $used = $pstat->{utime} + $pstat->{stime};
2962
2963 $d->{uptime} = int(($uptime - $pstat->{starttime})/$cpuinfo->{user_hz});
2964
2965 if ($pstat->{vsize}) {
2966 $d->{mem} = int(($pstat->{rss}/$pstat->{vsize})*$d->{maxmem});
2967 }
2968
2969 my $old = $last_proc_pid_stat->{$pid};
2970 if (!$old) {
2971 $last_proc_pid_stat->{$pid} = {
2972 time => $ctime,
2973 used => $used,
2974 cpu => 0,
2975 };
2976 next;
2977 }
2978
2979 my $dtime = ($ctime - $old->{time}) * $cpucount * $cpuinfo->{user_hz};
2980
2981 if ($dtime > 1000) {
2982 my $dutime = $used - $old->{used};
2983
2984 $d->{cpu} = (($dutime/$dtime)* $cpucount) / $d->{cpus};
2985 $last_proc_pid_stat->{$pid} = {
2986 time => $ctime,
2987 used => $used,
2988 cpu => $d->{cpu},
2989 };
2990 } else {
2991 $d->{cpu} = $old->{cpu};
2992 }
2993 }
2994
2995 return $res if !$full;
2996
2997 my $qmpclient = PVE::QMPClient->new();
2998
2999 my $ballooncb = sub {
3000 my ($vmid, $resp) = @_;
3001
3002 my $info = $resp->{'return'};
3003 return if !$info->{max_mem};
3004
3005 my $d = $res->{$vmid};
3006
3007 # use memory assigned to VM
3008 $d->{maxmem} = $info->{max_mem};
3009 $d->{balloon} = $info->{actual};
3010
3011 if (defined($info->{total_mem}) && defined($info->{free_mem})) {
3012 $d->{mem} = $info->{total_mem} - $info->{free_mem};
3013 $d->{freemem} = $info->{free_mem};
3014 }
3015
3016 $d->{ballooninfo} = $info;
3017 };
3018
3019 my $blockstatscb = sub {
3020 my ($vmid, $resp) = @_;
3021 my $data = $resp->{'return'} || [];
3022 my $totalrdbytes = 0;
3023 my $totalwrbytes = 0;
3024
3025 for my $blockstat (@$data) {
3026 $totalrdbytes = $totalrdbytes + $blockstat->{stats}->{rd_bytes};
3027 $totalwrbytes = $totalwrbytes + $blockstat->{stats}->{wr_bytes};
3028
3029 $blockstat->{device} =~ s/drive-//;
3030 $res->{$vmid}->{blockstat}->{$blockstat->{device}} = $blockstat->{stats};
3031 }
3032 $res->{$vmid}->{diskread} = $totalrdbytes;
3033 $res->{$vmid}->{diskwrite} = $totalwrbytes;
3034 };
3035
3036 my $machinecb = sub {
3037 my ($vmid, $resp) = @_;
3038 my $data = $resp->{'return'} || [];
3039
3040 $res->{$vmid}->{'running-machine'} =
3041 PVE::QemuServer::Machine::current_from_query_machines($data);
3042 };
3043
3044 my $versioncb = sub {
3045 my ($vmid, $resp) = @_;
3046 my $data = $resp->{'return'} // {};
3047 my $version = 'unknown';
3048
3049 if (my $v = $data->{qemu}) {
3050 $version = $v->{major} . "." . $v->{minor} . "." . $v->{micro};
3051 }
3052
3053 $res->{$vmid}->{'running-qemu'} = $version;
3054 };
3055
3056 my $statuscb = sub {
3057 my ($vmid, $resp) = @_;
3058
3059 $qmpclient->queue_cmd($vmid, $blockstatscb, 'query-blockstats');
3060 $qmpclient->queue_cmd($vmid, $machinecb, 'query-machines');
3061 $qmpclient->queue_cmd($vmid, $versioncb, 'query-version');
3062 # this fails if ballon driver is not loaded, so this must be
3063 # the last commnand (following command are aborted if this fails).
3064 $qmpclient->queue_cmd($vmid, $ballooncb, 'query-balloon');
3065
3066 my $status = 'unknown';
3067 if (!defined($status = $resp->{'return'}->{status})) {
3068 warn "unable to get VM status\n";
3069 return;
3070 }
3071
3072 $res->{$vmid}->{qmpstatus} = $resp->{'return'}->{status};
3073 };
3074
3075 foreach my $vmid (keys %$list) {
3076 next if $opt_vmid && ($vmid ne $opt_vmid);
3077 next if !$res->{$vmid}->{pid}; # not running
3078 $qmpclient->queue_cmd($vmid, $statuscb, 'query-status');
3079 }
3080
3081 $qmpclient->queue_execute(undef, 2);
3082
3083 foreach my $vmid (keys %$list) {
3084 next if $opt_vmid && ($vmid ne $opt_vmid);
3085 next if !$res->{$vmid}->{pid}; #not running
3086
3087 # we can't use the $qmpclient since it might have already aborted on
3088 # 'query-balloon', but this might also fail for older versions...
3089 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
3090 $res->{$vmid}->{'proxmox-support'} = $qemu_support // {};
3091 }
3092
3093 foreach my $vmid (keys %$list) {
3094 next if $opt_vmid && ($vmid ne $opt_vmid);
3095 $res->{$vmid}->{qmpstatus} = $res->{$vmid}->{status} if !$res->{$vmid}->{qmpstatus};
3096 }
3097
3098 return $res;
3099 }
3100
3101 sub conf_has_serial {
3102 my ($conf) = @_;
3103
3104 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3105 if ($conf->{"serial$i"}) {
3106 return 1;
3107 }
3108 }
3109
3110 return 0;
3111 }
3112
3113 sub conf_has_audio {
3114 my ($conf, $id) = @_;
3115
3116 $id //= 0;
3117 my $audio = $conf->{"audio$id"};
3118 return if !defined($audio);
3119
3120 my $audioproperties = parse_property_string($audio_fmt, $audio);
3121 my $audiodriver = $audioproperties->{driver} // 'spice';
3122
3123 return {
3124 dev => $audioproperties->{device},
3125 dev_id => "audiodev$id",
3126 backend => $audiodriver,
3127 backend_id => "$audiodriver-backend${id}",
3128 };
3129 }
3130
3131 sub audio_devs {
3132 my ($audio, $audiopciaddr, $machine_version) = @_;
3133
3134 my $devs = [];
3135
3136 my $id = $audio->{dev_id};
3137 my $audiodev = "";
3138 if (min_version($machine_version, 4, 2)) {
3139 $audiodev = ",audiodev=$audio->{backend_id}";
3140 }
3141
3142 if ($audio->{dev} eq 'AC97') {
3143 push @$devs, '-device', "AC97,id=${id}${audiopciaddr}$audiodev";
3144 } elsif ($audio->{dev} =~ /intel\-hda$/) {
3145 push @$devs, '-device', "$audio->{dev},id=${id}${audiopciaddr}";
3146 push @$devs, '-device', "hda-micro,id=${id}-codec0,bus=${id}.0,cad=0$audiodev";
3147 push @$devs, '-device', "hda-duplex,id=${id}-codec1,bus=${id}.0,cad=1$audiodev";
3148 } else {
3149 die "unkown audio device '$audio->{dev}', implement me!";
3150 }
3151
3152 push @$devs, '-audiodev', "$audio->{backend},id=$audio->{backend_id}";
3153
3154 return $devs;
3155 }
3156
3157 sub get_tpm_paths {
3158 my ($vmid) = @_;
3159 return {
3160 socket => "/var/run/qemu-server/$vmid.swtpm",
3161 pid => "/var/run/qemu-server/$vmid.swtpm.pid",
3162 };
3163 }
3164
3165 sub add_tpm_device {
3166 my ($vmid, $devices, $conf) = @_;
3167
3168 return if !$conf->{tpmstate0};
3169
3170 my $paths = get_tpm_paths($vmid);
3171
3172 push @$devices, "-chardev", "socket,id=tpmchar,path=$paths->{socket}";
3173 push @$devices, "-tpmdev", "emulator,id=tpmdev,chardev=tpmchar";
3174 push @$devices, "-device", "tpm-tis,tpmdev=tpmdev";
3175 }
3176
3177 sub start_swtpm {
3178 my ($storecfg, $vmid, $tpmdrive, $migration) = @_;
3179
3180 return if !$tpmdrive;
3181
3182 my $state;
3183 my $tpm = parse_drive("tpmstate0", $tpmdrive);
3184 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
3185 if ($storeid) {
3186 $state = PVE::Storage::map_volume($storecfg, $tpm->{file});
3187 } else {
3188 $state = $tpm->{file};
3189 }
3190
3191 my $paths = get_tpm_paths($vmid);
3192
3193 # during migration, we will get state from remote
3194 #
3195 if (!$migration) {
3196 # run swtpm_setup to create a new TPM state if it doesn't exist yet
3197 my $setup_cmd = [
3198 "swtpm_setup",
3199 "--tpmstate",
3200 "file://$state",
3201 "--createek",
3202 "--create-ek-cert",
3203 "--create-platform-cert",
3204 "--lock-nvram",
3205 "--config",
3206 "/etc/swtpm_setup.conf", # do not use XDG configs
3207 "--runas",
3208 "0", # force creation as root, error if not possible
3209 "--not-overwrite", # ignore existing state, do not modify
3210 ];
3211
3212 push @$setup_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3213 # TPM 2.0 supports ECC crypto, use if possible
3214 push @$setup_cmd, "--ecc" if $tpm->{version} eq 'v2.0';
3215
3216 run_command($setup_cmd, outfunc => sub {
3217 print "swtpm_setup: $1\n";
3218 });
3219 }
3220
3221 # Used to distinguish different invocations in the log.
3222 my $log_prefix = "[id=" . int(time()) . "] ";
3223
3224 my $emulator_cmd = [
3225 "swtpm",
3226 "socket",
3227 "--tpmstate",
3228 "backend-uri=file://$state,mode=0600",
3229 "--ctrl",
3230 "type=unixio,path=$paths->{socket},mode=0600",
3231 "--pid",
3232 "file=$paths->{pid}",
3233 "--terminate", # terminate on QEMU disconnect
3234 "--daemon",
3235 "--log",
3236 "file=/run/qemu-server/$vmid-swtpm.log,level=1,prefix=$log_prefix",
3237 ];
3238 push @$emulator_cmd, "--tpm2" if $tpm->{version} eq 'v2.0';
3239 run_command($emulator_cmd, outfunc => sub { print $1; });
3240
3241 my $tries = 100; # swtpm may take a bit to start before daemonizing, wait up to 5s for pid
3242 while (! -e $paths->{pid}) {
3243 die "failed to start swtpm: pid file '$paths->{pid}' wasn't created.\n" if --$tries == 0;
3244 usleep(50_000);
3245 }
3246
3247 # return untainted PID of swtpm daemon so it can be killed on error
3248 file_read_firstline($paths->{pid}) =~ m/(\d+)/;
3249 return $1;
3250 }
3251
3252 sub vga_conf_has_spice {
3253 my ($vga) = @_;
3254
3255 my $vgaconf = parse_vga($vga);
3256 my $vgatype = $vgaconf->{type};
3257 return 0 if !$vgatype || $vgatype !~ m/^qxl([234])?$/;
3258
3259 return $1 || 1;
3260 }
3261
3262 sub is_native($) {
3263 my ($arch) = @_;
3264 return get_host_arch() eq $arch;
3265 }
3266
3267 sub get_vm_arch {
3268 my ($conf) = @_;
3269 return $conf->{arch} // get_host_arch();
3270 }
3271
3272 my $default_machines = {
3273 x86_64 => 'pc',
3274 aarch64 => 'virt',
3275 };
3276
3277 sub get_installed_machine_version {
3278 my ($kvmversion) = @_;
3279 $kvmversion = kvm_user_version() if !defined($kvmversion);
3280 $kvmversion =~ m/^(\d+\.\d+)/;
3281 return $1;
3282 }
3283
3284 sub windows_get_pinned_machine_version {
3285 my ($machine, $base_version, $kvmversion) = @_;
3286
3287 my $pin_version = $base_version;
3288 if (!defined($base_version) ||
3289 !PVE::QemuServer::Machine::can_run_pve_machine_version($base_version, $kvmversion)
3290 ) {
3291 $pin_version = get_installed_machine_version($kvmversion);
3292 }
3293 if (!$machine || $machine eq 'pc') {
3294 $machine = "pc-i440fx-$pin_version";
3295 } elsif ($machine eq 'q35') {
3296 $machine = "pc-q35-$pin_version";
3297 } elsif ($machine eq 'virt') {
3298 $machine = "virt-$pin_version";
3299 } else {
3300 warn "unknown machine type '$machine', not touching that!\n";
3301 }
3302
3303 return $machine;
3304 }
3305
3306 sub get_vm_machine {
3307 my ($conf, $forcemachine, $arch, $add_pve_version, $kvmversion) = @_;
3308
3309 my $machine = $forcemachine || $conf->{machine};
3310
3311 if (!$machine || $machine =~ m/^(?:pc|q35|virt)$/) {
3312 $kvmversion //= kvm_user_version();
3313 # we must pin Windows VMs without a specific version to 5.1, as 5.2 fixed a bug in ACPI
3314 # layout which confuses windows quite a bit and may result in various regressions..
3315 # see: https://lists.gnu.org/archive/html/qemu-devel/2021-02/msg08484.html
3316 if (windows_version($conf->{ostype})) {
3317 $machine = windows_get_pinned_machine_version($machine, '5.1', $kvmversion);
3318 }
3319 $arch //= 'x86_64';
3320 $machine ||= $default_machines->{$arch};
3321 if ($add_pve_version) {
3322 my $pvever = PVE::QemuServer::Machine::get_pve_version($kvmversion);
3323 $machine .= "+pve$pvever";
3324 }
3325 }
3326
3327 if ($add_pve_version && $machine !~ m/\+pve\d+?(?:\.pxe)?$/) {
3328 my $is_pxe = $machine =~ m/^(.*?)\.pxe$/;
3329 $machine = $1 if $is_pxe;
3330
3331 # for version-pinned machines that do not include a pve-version (e.g.
3332 # pc-q35-4.1), we assume 0 to keep them stable in case we bump
3333 $machine .= '+pve0';
3334
3335 $machine .= '.pxe' if $is_pxe;
3336 }
3337
3338 return $machine;
3339 }
3340
3341 sub get_ovmf_files($$$) {
3342 my ($arch, $efidisk, $smm) = @_;
3343
3344 my $types = $OVMF->{$arch}
3345 or die "no OVMF images known for architecture '$arch'\n";
3346
3347 my $type = 'default';
3348 if ($arch ne "aarch64" && defined($efidisk->{efitype}) && $efidisk->{efitype} eq '4m') {
3349 $type = $smm ? "4m" : "4m-no-smm";
3350 $type .= '-ms' if $efidisk->{'pre-enrolled-keys'};
3351 }
3352
3353 my ($ovmf_code, $ovmf_vars) = $types->{$type}->@*;
3354 die "EFI base image '$ovmf_code' not found\n" if ! -f $ovmf_code;
3355 die "EFI vars image '$ovmf_vars' not found\n" if ! -f $ovmf_vars;
3356
3357 return ($ovmf_code, $ovmf_vars);
3358 }
3359
3360 my $Arch2Qemu = {
3361 aarch64 => '/usr/bin/qemu-system-aarch64',
3362 x86_64 => '/usr/bin/qemu-system-x86_64',
3363 };
3364 sub get_command_for_arch($) {
3365 my ($arch) = @_;
3366 return '/usr/bin/kvm' if is_native($arch);
3367
3368 my $cmd = $Arch2Qemu->{$arch}
3369 or die "don't know how to emulate architecture '$arch'\n";
3370 return $cmd;
3371 }
3372
3373 # To use query_supported_cpu_flags and query_understood_cpu_flags to get flags
3374 # to use in a QEMU command line (-cpu element), first array_intersect the result
3375 # of query_supported_ with query_understood_. This is necessary because:
3376 #
3377 # a) query_understood_ returns flags the host cannot use and
3378 # b) query_supported_ (rather the QMP call) doesn't actually return CPU
3379 # flags, but CPU settings - with most of them being flags. Those settings
3380 # (and some flags, curiously) cannot be specified as a "-cpu" argument.
3381 #
3382 # query_supported_ needs to start up to 2 temporary VMs and is therefore rather
3383 # expensive. If you need the value returned from this, you can get it much
3384 # cheaper from pmxcfs using PVE::Cluster::get_node_kv('cpuflags-$accel') with
3385 # $accel being 'kvm' or 'tcg'.
3386 #
3387 # pvestatd calls this function on startup and whenever the QEMU/KVM version
3388 # changes, automatically populating pmxcfs.
3389 #
3390 # Returns: { kvm => [ flagX, flagY, ... ], tcg => [ flag1, flag2, ... ] }
3391 # since kvm and tcg machines support different flags
3392 #
3393 sub query_supported_cpu_flags {
3394 my ($arch) = @_;
3395
3396 $arch //= get_host_arch();
3397 my $default_machine = $default_machines->{$arch};
3398
3399 my $flags = {};
3400
3401 # FIXME: Once this is merged, the code below should work for ARM as well:
3402 # https://lists.nongnu.org/archive/html/qemu-devel/2019-06/msg04947.html
3403 die "QEMU/KVM cannot detect CPU flags on ARM (aarch64)\n" if
3404 $arch eq "aarch64";
3405
3406 my $kvm_supported = defined(kvm_version());
3407 my $qemu_cmd = get_command_for_arch($arch);
3408 my $fakevmid = -1;
3409 my $pidfile = PVE::QemuServer::Helpers::pidfile_name($fakevmid);
3410
3411 # Start a temporary (frozen) VM with vmid -1 to allow sending a QMP command
3412 my $query_supported_run_qemu = sub {
3413 my ($kvm) = @_;
3414
3415 my $flags = {};
3416 my $cmd = [
3417 $qemu_cmd,
3418 '-machine', $default_machine,
3419 '-display', 'none',
3420 '-chardev', "socket,id=qmp,path=/var/run/qemu-server/$fakevmid.qmp,server=on,wait=off",
3421 '-mon', 'chardev=qmp,mode=control',
3422 '-pidfile', $pidfile,
3423 '-S', '-daemonize'
3424 ];
3425
3426 if (!$kvm) {
3427 push @$cmd, '-accel', 'tcg';
3428 }
3429
3430 my $rc = run_command($cmd, noerr => 1, quiet => 0);
3431 die "QEMU flag querying VM exited with code " . $rc if $rc;
3432
3433 eval {
3434 my $cmd_result = mon_cmd(
3435 $fakevmid,
3436 'query-cpu-model-expansion',
3437 type => 'full',
3438 model => { name => 'host' }
3439 );
3440
3441 my $props = $cmd_result->{model}->{props};
3442 foreach my $prop (keys %$props) {
3443 next if $props->{$prop} ne '1';
3444 # QEMU returns some flags multiple times, with '_', '.' or '-'
3445 # (e.g. lahf_lm and lahf-lm; sse4.2, sse4-2 and sse4_2; ...).
3446 # We only keep those with underscores, to match /proc/cpuinfo
3447 $prop =~ s/\.|-/_/g;
3448 $flags->{$prop} = 1;
3449 }
3450 };
3451 my $err = $@;
3452
3453 # force stop with 10 sec timeout and 'nocheck', always stop, even if QMP failed
3454 vm_stop(undef, $fakevmid, 1, 1, 10, 0, 1);
3455
3456 die $err if $err;
3457
3458 return [ sort keys %$flags ];
3459 };
3460
3461 # We need to query QEMU twice, since KVM and TCG have different supported flags
3462 PVE::QemuConfig->lock_config($fakevmid, sub {
3463 $flags->{tcg} = eval { $query_supported_run_qemu->(0) };
3464 warn "warning: failed querying supported tcg flags: $@\n" if $@;
3465
3466 if ($kvm_supported) {
3467 $flags->{kvm} = eval { $query_supported_run_qemu->(1) };
3468 warn "warning: failed querying supported kvm flags: $@\n" if $@;
3469 }
3470 });
3471
3472 return $flags;
3473 }
3474
3475 # Understood CPU flags are written to a file at 'pve-qemu' compile time
3476 my $understood_cpu_flag_dir = "/usr/share/kvm";
3477 sub query_understood_cpu_flags {
3478 my $arch = get_host_arch();
3479 my $filepath = "$understood_cpu_flag_dir/recognized-CPUID-flags-$arch";
3480
3481 die "Cannot query understood QEMU CPU flags for architecture: $arch (file not found)\n"
3482 if ! -e $filepath;
3483
3484 my $raw = file_get_contents($filepath);
3485 $raw =~ s/^\s+|\s+$//g;
3486 my @flags = split(/\s+/, $raw);
3487
3488 return \@flags;
3489 }
3490
3491 # Since commit 277d33454f77ec1d1e0bc04e37621e4dd2424b67 in pve-qemu, smm is not off by default
3492 # anymore. But smm=off seems to be required when using SeaBIOS and serial display.
3493 my sub should_disable_smm {
3494 my ($conf, $vga, $machine) = @_;
3495
3496 return if $machine =~ m/^virt/; # there is no smm flag that could be disabled
3497
3498 return (!defined($conf->{bios}) || $conf->{bios} eq 'seabios') &&
3499 $vga->{type} && $vga->{type} =~ m/^(serial\d+|none)$/;
3500 }
3501
3502 my sub print_ovmf_drive_commandlines {
3503 my ($conf, $storecfg, $vmid, $arch, $q35, $version_guard) = @_;
3504
3505 my $d = $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
3506
3507 my ($ovmf_code, $ovmf_vars) = get_ovmf_files($arch, $d, $q35);
3508
3509 my $var_drive_str = "if=pflash,unit=1,id=drive-efidisk0";
3510 if ($d) {
3511 my ($storeid, $volname) = PVE::Storage::parse_volume_id($d->{file}, 1);
3512 my ($path, $format) = $d->@{'file', 'format'};
3513 if ($storeid) {
3514 $path = PVE::Storage::path($storecfg, $d->{file});
3515 if (!defined($format)) {
3516 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
3517 $format = qemu_img_format($scfg, $volname);
3518 }
3519 } elsif (!defined($format)) {
3520 die "efidisk format must be specified\n";
3521 }
3522 # SPI flash does lots of read-modify-write OPs, without writeback this gets really slow #3329
3523 if ($path =~ m/^rbd:/) {
3524 $var_drive_str .= ',cache=writeback';
3525 $path .= ':rbd_cache_policy=writeback'; # avoid write-around, we *need* to cache writes too
3526 }
3527 $var_drive_str .= ",format=$format,file=$path";
3528
3529 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $format eq 'raw' && $version_guard->(4, 1, 2);
3530 $var_drive_str .= ',readonly=on' if drive_is_read_only($conf, $d);
3531 } else {
3532 log_warn("no efidisk configured! Using temporary efivars disk.");
3533 my $path = "/tmp/$vmid-ovmf.fd";
3534 PVE::Tools::file_copy($ovmf_vars, $path, -s $ovmf_vars);
3535 $var_drive_str .= ",format=raw,file=$path";
3536 $var_drive_str .= ",size=" . (-s $ovmf_vars) if $version_guard->(4, 1, 2);
3537 }
3538
3539 return ("if=pflash,unit=0,format=raw,readonly=on,file=$ovmf_code", $var_drive_str);
3540 }
3541
3542 sub config_to_command {
3543 my ($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu,
3544 $pbs_backing) = @_;
3545
3546 my ($globalFlags, $machineFlags, $rtcFlags) = ([], [], []);
3547 my $devices = [];
3548 my $bridges = {};
3549 my $ostype = $conf->{ostype};
3550 my $winversion = windows_version($ostype);
3551 my $kvm = $conf->{kvm};
3552 my $nodename = nodename();
3553
3554 my $arch = get_vm_arch($conf);
3555 my $kvm_binary = get_command_for_arch($arch);
3556 my $kvmver = kvm_user_version($kvm_binary);
3557
3558 if (!$kvmver || $kvmver !~ m/^(\d+)\.(\d+)/ || $1 < 3) {
3559 $kvmver //= "undefined";
3560 die "Detected old QEMU binary ('$kvmver', at least 3.0 is required)\n";
3561 }
3562
3563 my $add_pve_version = min_version($kvmver, 4, 1);
3564
3565 my $machine_type = get_vm_machine($conf, $forcemachine, $arch, $add_pve_version);
3566 my $machine_version = extract_version($machine_type, $kvmver);
3567 $kvm //= 1 if is_native($arch);
3568
3569 $machine_version =~ m/(\d+)\.(\d+)/;
3570 my ($machine_major, $machine_minor) = ($1, $2);
3571
3572 if ($kvmver =~ m/^\d+\.\d+\.(\d+)/ && $1 >= 90) {
3573 warn "warning: Installed QEMU version ($kvmver) is a release candidate, ignoring version checks\n";
3574 } elsif (!min_version($kvmver, $machine_major, $machine_minor)) {
3575 die "Installed QEMU version '$kvmver' is too old to run machine type '$machine_type',"
3576 ." please upgrade node '$nodename'\n"
3577 } elsif (!PVE::QemuServer::Machine::can_run_pve_machine_version($machine_version, $kvmver)) {
3578 my $max_pve_version = PVE::QemuServer::Machine::get_pve_version($machine_version);
3579 die "Installed qemu-server (max feature level for $machine_major.$machine_minor is"
3580 ." pve$max_pve_version) is too old to run machine type '$machine_type', please upgrade"
3581 ." node '$nodename'\n";
3582 }
3583
3584 # if a specific +pve version is required for a feature, use $version_guard
3585 # instead of min_version to allow machines to be run with the minimum
3586 # required version
3587 my $required_pve_version = 0;
3588 my $version_guard = sub {
3589 my ($major, $minor, $pve) = @_;
3590 return 0 if !min_version($machine_version, $major, $minor, $pve);
3591 my $max_pve = PVE::QemuServer::Machine::get_pve_version("$major.$minor");
3592 return 1 if min_version($machine_version, $major, $minor, $max_pve+1);
3593 $required_pve_version = $pve if $pve && $pve > $required_pve_version;
3594 return 1;
3595 };
3596
3597 if ($kvm && !defined kvm_version()) {
3598 die "KVM virtualisation configured, but not available. Either disable in VM configuration"
3599 ." or enable in BIOS.\n";
3600 }
3601
3602 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
3603 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
3604 my $use_old_bios_files = undef;
3605 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
3606
3607 my $cmd = [];
3608 if ($conf->{affinity}) {
3609 push @$cmd, '/usr/bin/taskset', '--cpu-list', '--all-tasks', $conf->{affinity};
3610 }
3611
3612 push @$cmd, $kvm_binary;
3613
3614 push @$cmd, '-id', $vmid;
3615
3616 my $vmname = $conf->{name} || "vm$vmid";
3617
3618 push @$cmd, '-name', "$vmname,debug-threads=on";
3619
3620 push @$cmd, '-no-shutdown';
3621
3622 my $use_virtio = 0;
3623
3624 my $qmpsocket = PVE::QemuServer::Helpers::qmp_socket($vmid);
3625 push @$cmd, '-chardev', "socket,id=qmp,path=$qmpsocket,server=on,wait=off";
3626 push @$cmd, '-mon', "chardev=qmp,mode=control";
3627
3628 if (min_version($machine_version, 2, 12)) {
3629 push @$cmd, '-chardev', "socket,id=qmp-event,path=/var/run/qmeventd.sock,reconnect=5";
3630 push @$cmd, '-mon', "chardev=qmp-event,mode=control";
3631 }
3632
3633 push @$cmd, '-pidfile' , PVE::QemuServer::Helpers::pidfile_name($vmid);
3634
3635 push @$cmd, '-daemonize';
3636
3637 if ($conf->{smbios1}) {
3638 my $smbios_conf = parse_smbios1($conf->{smbios1});
3639 if ($smbios_conf->{base64}) {
3640 # Do not pass base64 flag to qemu
3641 delete $smbios_conf->{base64};
3642 my $smbios_string = "";
3643 foreach my $key (keys %$smbios_conf) {
3644 my $value;
3645 if ($key eq "uuid") {
3646 $value = $smbios_conf->{uuid}
3647 } else {
3648 $value = decode_base64($smbios_conf->{$key});
3649 }
3650 # qemu accepts any binary data, only commas need escaping by double comma
3651 $value =~ s/,/,,/g;
3652 $smbios_string .= "," . $key . "=" . $value if $value;
3653 }
3654 push @$cmd, '-smbios', "type=1" . $smbios_string;
3655 } else {
3656 push @$cmd, '-smbios', "type=1,$conf->{smbios1}";
3657 }
3658 }
3659
3660 if ($conf->{bios} && $conf->{bios} eq 'ovmf') {
3661 my ($code_drive_str, $var_drive_str) =
3662 print_ovmf_drive_commandlines($conf, $storecfg, $vmid, $arch, $q35, $version_guard);
3663 push $cmd->@*, '-drive', $code_drive_str;
3664 push $cmd->@*, '-drive', $var_drive_str;
3665 }
3666
3667 if ($q35) { # tell QEMU to load q35 config early
3668 # we use different pcie-port hardware for qemu >= 4.0 for passthrough
3669 if (min_version($machine_version, 4, 0)) {
3670 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35-4.0.cfg';
3671 } else {
3672 push @$devices, '-readconfig', '/usr/share/qemu-server/pve-q35.cfg';
3673 }
3674 }
3675
3676 if (defined(my $fixups = qemu_created_version_fixups($conf, $forcemachine, $kvmver))) {
3677 push @$cmd, $fixups->@*;
3678 }
3679
3680 if ($conf->{vmgenid}) {
3681 push @$devices, '-device', 'vmgenid,guid='.$conf->{vmgenid};
3682 }
3683
3684 # add usb controllers
3685 my @usbcontrollers = PVE::QemuServer::USB::get_usb_controllers(
3686 $conf, $bridges, $arch, $machine_type, $machine_version);
3687 push @$devices, @usbcontrollers if @usbcontrollers;
3688 my $vga = parse_vga($conf->{vga});
3689
3690 my $qxlnum = vga_conf_has_spice($conf->{vga});
3691 $vga->{type} = 'qxl' if $qxlnum;
3692
3693 if (!$vga->{type}) {
3694 if ($arch eq 'aarch64') {
3695 $vga->{type} = 'virtio';
3696 } elsif (min_version($machine_version, 2, 9)) {
3697 $vga->{type} = (!$winversion || $winversion >= 6) ? 'std' : 'cirrus';
3698 } else {
3699 $vga->{type} = ($winversion >= 6) ? 'std' : 'cirrus';
3700 }
3701 }
3702
3703 # enable absolute mouse coordinates (needed by vnc)
3704 my $tablet = $conf->{tablet};
3705 if (!defined($tablet)) {
3706 $tablet = $defaults->{tablet};
3707 $tablet = 0 if $qxlnum; # disable for spice because it is not needed
3708 $tablet = 0 if $vga->{type} =~ m/^serial\d+$/; # disable if we use serial terminal (no vga card)
3709 }
3710
3711 if ($tablet) {
3712 push @$devices, '-device', print_tabletdevice_full($conf, $arch) if $tablet;
3713 my $kbd = print_keyboarddevice_full($conf, $arch);
3714 push @$devices, '-device', $kbd if defined($kbd);
3715 }
3716
3717 my $bootorder = device_bootorder($conf);
3718
3719 # host pci device passthrough
3720 my ($kvm_off, $gpu_passthrough, $legacy_igd, $pci_devices) = PVE::QemuServer::PCI::print_hostpci_devices(
3721 $vmid, $conf, $devices, $vga, $winversion, $bridges, $arch, $machine_type, $bootorder);
3722
3723 # usb devices
3724 my $usb_dev_features = {};
3725 $usb_dev_features->{spice_usb3} = 1 if min_version($machine_version, 4, 0);
3726
3727 my @usbdevices = PVE::QemuServer::USB::get_usb_devices(
3728 $conf, $usb_dev_features, $bootorder, $machine_version);
3729 push @$devices, @usbdevices if @usbdevices;
3730
3731 # serial devices
3732 for (my $i = 0; $i < $MAX_SERIAL_PORTS; $i++) {
3733 my $path = $conf->{"serial$i"} or next;
3734 if ($path eq 'socket') {
3735 my $socket = "/var/run/qemu-server/${vmid}.serial$i";
3736 push @$devices, '-chardev', "socket,id=serial$i,path=$socket,server=on,wait=off";
3737 # On aarch64, serial0 is the UART device. QEMU only allows
3738 # connecting UART devices via the '-serial' command line, as
3739 # the device has a fixed slot on the hardware...
3740 if ($arch eq 'aarch64' && $i == 0) {
3741 push @$devices, '-serial', "chardev:serial$i";
3742 } else {
3743 push @$devices, '-device', "isa-serial,chardev=serial$i";
3744 }
3745 } else {
3746 die "no such serial device\n" if ! -c $path;
3747 push @$devices, '-chardev', "serial,id=serial$i,path=$path";
3748 push @$devices, '-device', "isa-serial,chardev=serial$i";
3749 }
3750 }
3751
3752 # parallel devices
3753 for (my $i = 0; $i < $MAX_PARALLEL_PORTS; $i++) {
3754 if (my $path = $conf->{"parallel$i"}) {
3755 die "no such parallel device\n" if ! -c $path;
3756 my $devtype = $path =~ m!^/dev/usb/lp! ? 'serial' : 'parallel';
3757 push @$devices, '-chardev', "$devtype,id=parallel$i,path=$path";
3758 push @$devices, '-device', "isa-parallel,chardev=parallel$i";
3759 }
3760 }
3761
3762 if (min_version($machine_version, 4, 0) && (my $audio = conf_has_audio($conf))) {
3763 my $audiopciaddr = print_pci_addr("audio0", $bridges, $arch, $machine_type);
3764 my $audio_devs = audio_devs($audio, $audiopciaddr, $machine_version);
3765 push @$devices, @$audio_devs;
3766 }
3767
3768 # Add a TPM only if the VM is not a template,
3769 # to support backing up template VMs even if the TPM disk is write-protected.
3770 add_tpm_device($vmid, $devices, $conf) if (!PVE::QemuConfig->is_template($conf));
3771
3772 my $sockets = 1;
3773 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
3774 $sockets = $conf->{sockets} if $conf->{sockets};
3775
3776 my $cores = $conf->{cores} || 1;
3777
3778 my $maxcpus = $sockets * $cores;
3779
3780 my $vcpus = $conf->{vcpus} ? $conf->{vcpus} : $maxcpus;
3781
3782 my $allowed_vcpus = $cpuinfo->{cpus};
3783
3784 die "MAX $allowed_vcpus vcpus allowed per VM on this node\n" if ($allowed_vcpus < $maxcpus);
3785
3786 if ($hotplug_features->{cpu} && min_version($machine_version, 2, 7)) {
3787 push @$cmd, '-smp', "1,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3788 for (my $i = 2; $i <= $vcpus; $i++) {
3789 my $cpustr = print_cpu_device($conf,$i);
3790 push @$cmd, '-device', $cpustr;
3791 }
3792
3793 } else {
3794
3795 push @$cmd, '-smp', "$vcpus,sockets=$sockets,cores=$cores,maxcpus=$maxcpus";
3796 }
3797 push @$cmd, '-nodefaults';
3798
3799 push @$cmd, '-boot', "menu=on,strict=on,reboot-timeout=1000,splash=/usr/share/qemu-server/bootsplash.jpg";
3800
3801 push $machineFlags->@*, 'acpi=off' if defined($conf->{acpi}) && $conf->{acpi} == 0;
3802
3803 push @$cmd, '-no-reboot' if defined($conf->{reboot}) && $conf->{reboot} == 0;
3804
3805 if ($vga->{type} && $vga->{type} !~ m/^serial\d+$/ && $vga->{type} ne 'none'){
3806 push @$devices, '-device', print_vga_device(
3807 $conf, $vga, $arch, $machine_version, $machine_type, undef, $qxlnum, $bridges);
3808
3809 push @$cmd, '-display', 'egl-headless,gl=core' if $vga->{type} eq 'virtio-gl'; # VIRGL
3810
3811 my $socket = PVE::QemuServer::Helpers::vnc_socket($vmid);
3812 push @$cmd, '-vnc', "unix:$socket,password=on";
3813 } else {
3814 push @$cmd, '-vga', 'none' if $vga->{type} eq 'none';
3815 push @$cmd, '-nographic';
3816 }
3817
3818 # time drift fix
3819 my $tdf = defined($conf->{tdf}) ? $conf->{tdf} : $defaults->{tdf};
3820 my $useLocaltime = $conf->{localtime};
3821
3822 if ($winversion >= 5) { # windows
3823 $useLocaltime = 1 if !defined($conf->{localtime});
3824
3825 # use time drift fix when acpi is enabled
3826 if (!(defined($conf->{acpi}) && $conf->{acpi} == 0)) {
3827 $tdf = 1 if !defined($conf->{tdf});
3828 }
3829 }
3830
3831 if ($winversion >= 6) {
3832 push @$globalFlags, 'kvm-pit.lost_tick_policy=discard';
3833 push @$machineFlags, 'hpet=off';
3834 }
3835
3836 push @$rtcFlags, 'driftfix=slew' if $tdf;
3837
3838 if ($conf->{startdate} && $conf->{startdate} ne 'now') {
3839 push @$rtcFlags, "base=$conf->{startdate}";
3840 } elsif ($useLocaltime) {
3841 push @$rtcFlags, 'base=localtime';
3842 }
3843
3844 if ($forcecpu) {
3845 push @$cmd, '-cpu', $forcecpu;
3846 } else {
3847 push @$cmd, get_cpu_options($conf, $arch, $kvm, $kvm_off, $machine_version, $winversion, $gpu_passthrough);
3848 }
3849
3850 PVE::QemuServer::Memory::config(
3851 $conf, $vmid, $sockets, $cores, $defaults, $hotplug_features->{memory}, $cmd);
3852
3853 push @$cmd, '-S' if $conf->{freeze};
3854
3855 push @$cmd, '-k', $conf->{keyboard} if defined($conf->{keyboard});
3856
3857 my $guest_agent = parse_guest_agent($conf);
3858
3859 if ($guest_agent->{enabled}) {
3860 my $qgasocket = PVE::QemuServer::Helpers::qmp_socket($vmid, 1);
3861 push @$devices, '-chardev', "socket,path=$qgasocket,server=on,wait=off,id=qga0";
3862
3863 if (!$guest_agent->{type} || $guest_agent->{type} eq 'virtio') {
3864 my $pciaddr = print_pci_addr("qga0", $bridges, $arch, $machine_type);
3865 push @$devices, '-device', "virtio-serial,id=qga0$pciaddr";
3866 push @$devices, '-device', 'virtserialport,chardev=qga0,name=org.qemu.guest_agent.0';
3867 } elsif ($guest_agent->{type} eq 'isa') {
3868 push @$devices, '-device', "isa-serial,chardev=qga0";
3869 }
3870 }
3871
3872 my $rng = $conf->{rng0} ? parse_rng($conf->{rng0}) : undef;
3873 if ($rng && $version_guard->(4, 1, 2)) {
3874 check_rng_source($rng->{source});
3875
3876 my $max_bytes = $rng->{max_bytes} // $rng_fmt->{max_bytes}->{default};
3877 my $period = $rng->{period} // $rng_fmt->{period}->{default};
3878 my $limiter_str = "";
3879 if ($max_bytes) {
3880 $limiter_str = ",max-bytes=$max_bytes,period=$period";
3881 }
3882
3883 my $rng_addr = print_pci_addr("rng0", $bridges, $arch, $machine_type);
3884 push @$devices, '-object', "rng-random,filename=$rng->{source},id=rng0";
3885 push @$devices, '-device', "virtio-rng-pci,rng=rng0$limiter_str$rng_addr";
3886 }
3887
3888 my $spice_port;
3889
3890 if ($qxlnum || $vga->{type} =~ /^virtio/) {
3891 if ($qxlnum > 1) {
3892 if ($winversion){
3893 for (my $i = 1; $i < $qxlnum; $i++){
3894 push @$devices, '-device', print_vga_device(
3895 $conf, $vga, $arch, $machine_version, $machine_type, $i, $qxlnum, $bridges);
3896 }
3897 } else {
3898 # assume other OS works like Linux
3899 my ($ram, $vram) = ("134217728", "67108864");
3900 if ($vga->{memory}) {
3901 $ram = PVE::Tools::convert_size($qxlnum*4*$vga->{memory}, 'mb' => 'b');
3902 $vram = PVE::Tools::convert_size($qxlnum*2*$vga->{memory}, 'mb' => 'b');
3903 }
3904 push @$cmd, '-global', "qxl-vga.ram_size=$ram";
3905 push @$cmd, '-global', "qxl-vga.vram_size=$vram";
3906 }
3907 }
3908
3909 my $pciaddr = print_pci_addr("spice", $bridges, $arch, $machine_type);
3910
3911 my $pfamily = PVE::Tools::get_host_address_family($nodename);
3912 my @nodeaddrs = PVE::Tools::getaddrinfo_all('localhost', family => $pfamily);
3913 die "failed to get an ip address of type $pfamily for 'localhost'\n" if !@nodeaddrs;
3914
3915 push @$devices, '-device', "virtio-serial,id=spice$pciaddr";
3916 push @$devices, '-chardev', "spicevmc,id=vdagent,name=vdagent";
3917 push @$devices, '-device', "virtserialport,chardev=vdagent,name=com.redhat.spice.0";
3918
3919 my $localhost = PVE::Network::addr_to_ip($nodeaddrs[0]->{addr});
3920 $spice_port = PVE::Tools::next_spice_port($pfamily, $localhost);
3921
3922 my $spice_enhancement_str = $conf->{spice_enhancements} // '';
3923 my $spice_enhancement = parse_property_string($spice_enhancements_fmt, $spice_enhancement_str);
3924 if ($spice_enhancement->{foldersharing}) {
3925 push @$devices, '-chardev', "spiceport,id=foldershare,name=org.spice-space.webdav.0";
3926 push @$devices, '-device', "virtserialport,chardev=foldershare,name=org.spice-space.webdav.0";
3927 }
3928
3929 my $spice_opts = "tls-port=${spice_port},addr=$localhost,tls-ciphers=HIGH,seamless-migration=on";
3930 $spice_opts .= ",streaming-video=$spice_enhancement->{videostreaming}"
3931 if $spice_enhancement->{videostreaming};
3932
3933 push @$devices, '-spice', "$spice_opts";
3934 }
3935
3936 # enable balloon by default, unless explicitly disabled
3937 if (!defined($conf->{balloon}) || $conf->{balloon}) {
3938 my $pciaddr = print_pci_addr("balloon0", $bridges, $arch, $machine_type);
3939 my $ballooncmd = "virtio-balloon-pci,id=balloon0$pciaddr";
3940 $ballooncmd .= ",free-page-reporting=on" if min_version($machine_version, 6, 2);
3941 push @$devices, '-device', $ballooncmd;
3942 }
3943
3944 if ($conf->{watchdog}) {
3945 my $wdopts = parse_watchdog($conf->{watchdog});
3946 my $pciaddr = print_pci_addr("watchdog", $bridges, $arch, $machine_type);
3947 my $watchdog = $wdopts->{model} || 'i6300esb';
3948 push @$devices, '-device', "$watchdog$pciaddr";
3949 push @$devices, '-watchdog-action', $wdopts->{action} if $wdopts->{action};
3950 }
3951
3952 my $vollist = [];
3953 my $scsicontroller = {};
3954 my $ahcicontroller = {};
3955 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : $defaults->{scsihw};
3956
3957 # Add iscsi initiator name if available
3958 if (my $initiator = get_initiator_name()) {
3959 push @$devices, '-iscsi', "initiator-name=$initiator";
3960 }
3961
3962 PVE::QemuConfig->foreach_volume($conf, sub {
3963 my ($ds, $drive) = @_;
3964
3965 if (PVE::Storage::parse_volume_id($drive->{file}, 1)) {
3966 check_volume_storage_type($storecfg, $drive->{file});
3967 push @$vollist, $drive->{file};
3968 }
3969
3970 # ignore efidisk here, already added in bios/fw handling code above
3971 return if $drive->{interface} eq 'efidisk';
3972 # similar for TPM
3973 return if $drive->{interface} eq 'tpmstate';
3974
3975 $use_virtio = 1 if $ds =~ m/^virtio/;
3976
3977 $drive->{bootindex} = $bootorder->{$ds} if $bootorder->{$ds};
3978
3979 if ($drive->{interface} eq 'virtio'){
3980 push @$cmd, '-object', "iothread,id=iothread-$ds" if $drive->{iothread};
3981 }
3982
3983 if ($drive->{interface} eq 'scsi') {
3984
3985 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $drive);
3986
3987 die "scsi$drive->{index}: machine version 4.1~pve2 or higher is required to use more than 14 SCSI disks\n"
3988 if $drive->{index} > 13 && !&$version_guard(4, 1, 2);
3989
3990 my $pciaddr = print_pci_addr("$controller_prefix$controller", $bridges, $arch, $machine_type);
3991 my $scsihw_type = $scsihw =~ m/^virtio-scsi-single/ ? "virtio-scsi-pci" : $scsihw;
3992
3993 my $iothread = '';
3994 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{iothread}){
3995 $iothread .= ",iothread=iothread-$controller_prefix$controller";
3996 push @$cmd, '-object', "iothread,id=iothread-$controller_prefix$controller";
3997 } elsif ($drive->{iothread}) {
3998 log_warn(
3999 "iothread is only valid with virtio disk or virtio-scsi-single controller, ignoring\n"
4000 );
4001 }
4002
4003 my $queues = '';
4004 if($conf->{scsihw} && $conf->{scsihw} eq "virtio-scsi-single" && $drive->{queues}){
4005 $queues = ",num_queues=$drive->{queues}";
4006 }
4007
4008 push @$devices, '-device', "$scsihw_type,id=$controller_prefix$controller$pciaddr$iothread$queues"
4009 if !$scsicontroller->{$controller};
4010 $scsicontroller->{$controller}=1;
4011 }
4012
4013 if ($drive->{interface} eq 'sata') {
4014 my $controller = int($drive->{index} / $PVE::QemuServer::Drive::MAX_SATA_DISKS);
4015 my $pciaddr = print_pci_addr("ahci$controller", $bridges, $arch, $machine_type);
4016 push @$devices, '-device', "ahci,id=ahci$controller,multifunction=on$pciaddr"
4017 if !$ahcicontroller->{$controller};
4018 $ahcicontroller->{$controller}=1;
4019 }
4020
4021 my $pbs_conf = $pbs_backing->{$ds};
4022 my $pbs_name = undef;
4023 if ($pbs_conf) {
4024 $pbs_name = "drive-$ds-pbs";
4025 push @$devices, '-blockdev', print_pbs_blockdev($pbs_conf, $pbs_name);
4026 }
4027
4028 my $drive_cmd = print_drive_commandline_full(
4029 $storecfg, $vmid, $drive, $pbs_name, min_version($kvmver, 6, 0));
4030
4031 # extra protection for templates, but SATA and IDE don't support it..
4032 $drive_cmd .= ',readonly=on' if drive_is_read_only($conf, $drive);
4033
4034 push @$devices, '-drive',$drive_cmd;
4035 push @$devices, '-device', print_drivedevice_full(
4036 $storecfg, $conf, $vmid, $drive, $bridges, $arch, $machine_type);
4037 });
4038
4039 for (my $i = 0; $i < $MAX_NETS; $i++) {
4040 my $netname = "net$i";
4041
4042 next if !$conf->{$netname};
4043 my $d = parse_net($conf->{$netname});
4044 next if !$d;
4045 # save the MAC addr here (could be auto-gen. in some odd setups) for FDB registering later?
4046
4047 $use_virtio = 1 if $d->{model} eq 'virtio';
4048
4049 $d->{bootindex} = $bootorder->{$netname} if $bootorder->{$netname};
4050
4051 my $netdevfull = print_netdev_full($vmid, $conf, $arch, $d, $netname);
4052 push @$devices, '-netdev', $netdevfull;
4053
4054 my $netdevicefull = print_netdevice_full(
4055 $vmid, $conf, $d, $netname, $bridges, $use_old_bios_files, $arch, $machine_type, $machine_version);
4056
4057 push @$devices, '-device', $netdevicefull;
4058 }
4059
4060 if ($conf->{ivshmem}) {
4061 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
4062
4063 my $bus;
4064 if ($q35) {
4065 $bus = print_pcie_addr("ivshmem");
4066 } else {
4067 $bus = print_pci_addr("ivshmem", $bridges, $arch, $machine_type);
4068 }
4069
4070 my $ivshmem_name = $ivshmem->{name} // $vmid;
4071 my $path = '/dev/shm/pve-shm-' . $ivshmem_name;
4072
4073 push @$devices, '-device', "ivshmem-plain,memdev=ivshmem$bus,";
4074 push @$devices, '-object', "memory-backend-file,id=ivshmem,share=on,mem-path=$path"
4075 .",size=$ivshmem->{size}M";
4076 }
4077
4078 # pci.4 is nested in pci.1
4079 $bridges->{1} = 1 if $bridges->{4};
4080
4081 if (!$q35) { # add pci bridges
4082 if (min_version($machine_version, 2, 3)) {
4083 $bridges->{1} = 1;
4084 $bridges->{2} = 1;
4085 }
4086 $bridges->{3} = 1 if $scsihw =~ m/^virtio-scsi-single/;
4087 }
4088
4089 for my $k (sort {$b cmp $a} keys %$bridges) {
4090 next if $q35 && $k < 4; # q35.cfg already includes bridges up to 3
4091
4092 my $k_name = $k;
4093 if ($k == 2 && $legacy_igd) {
4094 $k_name = "$k-igd";
4095 }
4096 my $pciaddr = print_pci_addr("pci.$k_name", undef, $arch, $machine_type);
4097 my $devstr = "pci-bridge,id=pci.$k,chassis_nr=$k$pciaddr";
4098
4099 if ($q35) { # add after -readconfig pve-q35.cfg
4100 splice @$devices, 2, 0, '-device', $devstr;
4101 } else {
4102 unshift @$devices, '-device', $devstr if $k > 0;
4103 }
4104 }
4105
4106 if (!$kvm) {
4107 push @$machineFlags, 'accel=tcg';
4108 }
4109
4110 push @$machineFlags, 'smm=off' if should_disable_smm($conf, $vga, $machine_type);
4111
4112 my $machine_type_min = $machine_type;
4113 if ($add_pve_version) {
4114 $machine_type_min =~ s/\+pve\d+$//;
4115 $machine_type_min .= "+pve$required_pve_version";
4116 }
4117 push @$machineFlags, "type=${machine_type_min}";
4118
4119 push @$cmd, @$devices;
4120 push @$cmd, '-rtc', join(',', @$rtcFlags) if scalar(@$rtcFlags);
4121 push @$cmd, '-machine', join(',', @$machineFlags) if scalar(@$machineFlags);
4122 push @$cmd, '-global', join(',', @$globalFlags) if scalar(@$globalFlags);
4123
4124 if (my $vmstate = $conf->{vmstate}) {
4125 my $statepath = PVE::Storage::path($storecfg, $vmstate);
4126 push @$vollist, $vmstate;
4127 push @$cmd, '-loadstate', $statepath;
4128 print "activating and using '$vmstate' as vmstate\n";
4129 }
4130
4131 if (PVE::QemuConfig->is_template($conf)) {
4132 # needed to workaround base volumes being read-only
4133 push @$cmd, '-snapshot';
4134 }
4135
4136 # add custom args
4137 if ($conf->{args}) {
4138 my $aa = PVE::Tools::split_args($conf->{args});
4139 push @$cmd, @$aa;
4140 }
4141
4142 return wantarray ? ($cmd, $vollist, $spice_port, $pci_devices) : $cmd;
4143 }
4144
4145 sub check_rng_source {
4146 my ($source) = @_;
4147
4148 # mostly relevant for /dev/hwrng, but doesn't hurt to check others too
4149 die "cannot create VirtIO RNG device: source file '$source' doesn't exist\n"
4150 if ! -e $source;
4151
4152 my $rng_current = '/sys/devices/virtual/misc/hw_random/rng_current';
4153 if ($source eq '/dev/hwrng' && file_read_firstline($rng_current) eq 'none') {
4154 # Needs to abort, otherwise QEMU crashes on first rng access. Note that rng_current cannot
4155 # be changed to 'none' manually, so once the VM is past this point, it's no longer an issue.
4156 die "Cannot start VM with passed-through RNG device: '/dev/hwrng' exists, but"
4157 ." '$rng_current' is set to 'none'. Ensure that a compatible hardware-RNG is attached"
4158 ." to the host.\n";
4159 }
4160 }
4161
4162 sub spice_port {
4163 my ($vmid) = @_;
4164
4165 my $res = mon_cmd($vmid, 'query-spice');
4166
4167 return $res->{'tls-port'} || $res->{'port'} || die "no spice port\n";
4168 }
4169
4170 sub vm_devices_list {
4171 my ($vmid) = @_;
4172
4173 my $res = mon_cmd($vmid, 'query-pci');
4174 my $devices_to_check = [];
4175 my $devices = {};
4176 foreach my $pcibus (@$res) {
4177 push @$devices_to_check, @{$pcibus->{devices}},
4178 }
4179
4180 while (@$devices_to_check) {
4181 my $to_check = [];
4182 for my $d (@$devices_to_check) {
4183 $devices->{$d->{'qdev_id'}} = 1 if $d->{'qdev_id'};
4184 next if !$d->{'pci_bridge'} || !$d->{'pci_bridge'}->{devices};
4185
4186 $devices->{$d->{'qdev_id'}} += scalar(@{$d->{'pci_bridge'}->{devices}});
4187 push @$to_check, @{$d->{'pci_bridge'}->{devices}};
4188 }
4189 $devices_to_check = $to_check;
4190 }
4191
4192 my $resblock = mon_cmd($vmid, 'query-block');
4193 foreach my $block (@$resblock) {
4194 if($block->{device} =~ m/^drive-(\S+)/){
4195 $devices->{$1} = 1;
4196 }
4197 }
4198
4199 my $resmice = mon_cmd($vmid, 'query-mice');
4200 foreach my $mice (@$resmice) {
4201 if ($mice->{name} eq 'QEMU HID Tablet') {
4202 $devices->{tablet} = 1;
4203 last;
4204 }
4205 }
4206
4207 # for usb devices there is no query-usb
4208 # but we can iterate over the entries in
4209 # qom-list path=/machine/peripheral
4210 my $resperipheral = mon_cmd($vmid, 'qom-list', path => '/machine/peripheral');
4211 foreach my $per (@$resperipheral) {
4212 if ($per->{name} =~ m/^usb(?:redirdev)?\d+$/) {
4213 $devices->{$per->{name}} = 1;
4214 }
4215 }
4216
4217 return $devices;
4218 }
4219
4220 sub vm_deviceplug {
4221 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4222
4223 my $q35 = PVE::QemuServer::Machine::machine_type_is_q35($conf);
4224
4225 my $devices_list = vm_devices_list($vmid);
4226 return 1 if defined($devices_list->{$deviceid});
4227
4228 # add PCI bridge if we need it for the device
4229 qemu_add_pci_bridge($storecfg, $conf, $vmid, $deviceid, $arch, $machine_type);
4230
4231 if ($deviceid eq 'tablet') {
4232 qemu_deviceadd($vmid, print_tabletdevice_full($conf, $arch));
4233 } elsif ($deviceid eq 'keyboard') {
4234 qemu_deviceadd($vmid, print_keyboarddevice_full($conf, $arch));
4235 } elsif ($deviceid =~ m/^usbredirdev(\d+)$/) {
4236 my $id = $1;
4237 qemu_spice_usbredir_chardev_add($vmid, "usbredirchardev$id");
4238 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_spice_usbdevice($id, "xhci", $id + 1));
4239 } elsif ($deviceid =~ m/^usb(\d+)$/) {
4240 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_usbdevice_full($conf, $deviceid, $device, {}, $1 + 1));
4241 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4242 qemu_iothread_add($vmid, $deviceid, $device);
4243
4244 qemu_driveadd($storecfg, $vmid, $device);
4245 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4246
4247 qemu_deviceadd($vmid, $devicefull);
4248 eval { qemu_deviceaddverify($vmid, $deviceid); };
4249 if (my $err = $@) {
4250 eval { qemu_drivedel($vmid, $deviceid); };
4251 warn $@ if $@;
4252 die $err;
4253 }
4254 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4255 my $scsihw = defined($conf->{scsihw}) ? $conf->{scsihw} : "lsi";
4256 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4257 my $scsihw_type = $scsihw eq 'virtio-scsi-single' ? "virtio-scsi-pci" : $scsihw;
4258
4259 my $devicefull = "$scsihw_type,id=$deviceid$pciaddr";
4260
4261 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{iothread}) {
4262 qemu_iothread_add($vmid, $deviceid, $device);
4263 $devicefull .= ",iothread=iothread-$deviceid";
4264 }
4265
4266 if($deviceid =~ m/^virtioscsi(\d+)$/ && $device->{queues}) {
4267 $devicefull .= ",num_queues=$device->{queues}";
4268 }
4269
4270 qemu_deviceadd($vmid, $devicefull);
4271 qemu_deviceaddverify($vmid, $deviceid);
4272 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4273 qemu_findorcreatescsihw($storecfg,$conf, $vmid, $device, $arch, $machine_type);
4274 qemu_driveadd($storecfg, $vmid, $device);
4275
4276 my $devicefull = print_drivedevice_full($storecfg, $conf, $vmid, $device, undef, $arch, $machine_type);
4277 eval { qemu_deviceadd($vmid, $devicefull); };
4278 if (my $err = $@) {
4279 eval { qemu_drivedel($vmid, $deviceid); };
4280 warn $@ if $@;
4281 die $err;
4282 }
4283 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4284 return if !qemu_netdevadd($vmid, $conf, $arch, $device, $deviceid);
4285
4286 my $machine_type = PVE::QemuServer::Machine::qemu_machine_pxe($vmid, $conf);
4287 my $machine_version = PVE::QemuServer::Machine::extract_version($machine_type);
4288 my $use_old_bios_files = undef;
4289 ($use_old_bios_files, $machine_type) = qemu_use_old_bios_files($machine_type);
4290
4291 my $netdevicefull = print_netdevice_full(
4292 $vmid, $conf, $device, $deviceid, undef, $use_old_bios_files, $arch, $machine_type, $machine_version);
4293 qemu_deviceadd($vmid, $netdevicefull);
4294 eval {
4295 qemu_deviceaddverify($vmid, $deviceid);
4296 qemu_set_link_status($vmid, $deviceid, !$device->{link_down});
4297 };
4298 if (my $err = $@) {
4299 eval { qemu_netdevdel($vmid, $deviceid); };
4300 warn $@ if $@;
4301 die $err;
4302 }
4303 } elsif (!$q35 && $deviceid =~ m/^(pci\.)(\d+)$/) {
4304 my $bridgeid = $2;
4305 my $pciaddr = print_pci_addr($deviceid, undef, $arch, $machine_type);
4306 my $devicefull = "pci-bridge,id=pci.$bridgeid,chassis_nr=$bridgeid$pciaddr";
4307
4308 qemu_deviceadd($vmid, $devicefull);
4309 qemu_deviceaddverify($vmid, $deviceid);
4310 } else {
4311 die "can't hotplug device '$deviceid'\n";
4312 }
4313
4314 return 1;
4315 }
4316
4317 # fixme: this should raise exceptions on error!
4318 sub vm_deviceunplug {
4319 my ($vmid, $conf, $deviceid) = @_;
4320
4321 my $devices_list = vm_devices_list($vmid);
4322 return 1 if !defined($devices_list->{$deviceid});
4323
4324 my $bootdisks = PVE::QemuServer::Drive::get_bootdisks($conf);
4325 die "can't unplug bootdisk '$deviceid'\n" if grep {$_ eq $deviceid} @$bootdisks;
4326
4327 if ($deviceid eq 'tablet' || $deviceid eq 'keyboard' || $deviceid eq 'xhci') {
4328 qemu_devicedel($vmid, $deviceid);
4329 } elsif ($deviceid =~ m/^usbredirdev\d+$/) {
4330 qemu_devicedel($vmid, $deviceid);
4331 qemu_devicedelverify($vmid, $deviceid);
4332 } elsif ($deviceid =~ m/^usb\d+$/) {
4333 qemu_devicedel($vmid, $deviceid);
4334 qemu_devicedelverify($vmid, $deviceid);
4335 } elsif ($deviceid =~ m/^(virtio)(\d+)$/) {
4336 my $device = parse_drive($deviceid, $conf->{$deviceid});
4337
4338 qemu_devicedel($vmid, $deviceid);
4339 qemu_devicedelverify($vmid, $deviceid);
4340 qemu_drivedel($vmid, $deviceid);
4341 qemu_iothread_del($vmid, $deviceid, $device);
4342 } elsif ($deviceid =~ m/^(virtioscsi|scsihw)(\d+)$/) {
4343 qemu_devicedel($vmid, $deviceid);
4344 qemu_devicedelverify($vmid, $deviceid);
4345 } elsif ($deviceid =~ m/^(scsi)(\d+)$/) {
4346 my $device = parse_drive($deviceid, $conf->{$deviceid});
4347
4348 qemu_devicedel($vmid, $deviceid);
4349 qemu_devicedelverify($vmid, $deviceid);
4350 qemu_drivedel($vmid, $deviceid);
4351 qemu_deletescsihw($conf, $vmid, $deviceid);
4352
4353 qemu_iothread_del($vmid, "virtioscsi$device->{index}", $device)
4354 if $conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single');
4355 } elsif ($deviceid =~ m/^(net)(\d+)$/) {
4356 qemu_devicedel($vmid, $deviceid);
4357 qemu_devicedelverify($vmid, $deviceid);
4358 qemu_netdevdel($vmid, $deviceid);
4359 } else {
4360 die "can't unplug device '$deviceid'\n";
4361 }
4362
4363 return 1;
4364 }
4365
4366 sub qemu_spice_usbredir_chardev_add {
4367 my ($vmid, $id) = @_;
4368
4369 mon_cmd($vmid, "chardev-add" , (
4370 id => $id,
4371 backend => {
4372 type => 'spicevmc',
4373 data => {
4374 type => "usbredir",
4375 },
4376 },
4377 ));
4378 }
4379
4380 sub qemu_deviceadd {
4381 my ($vmid, $devicefull) = @_;
4382
4383 $devicefull = "driver=".$devicefull;
4384 my %options = split(/[=,]/, $devicefull);
4385
4386 mon_cmd($vmid, "device_add" , %options);
4387 }
4388
4389 sub qemu_devicedel {
4390 my ($vmid, $deviceid) = @_;
4391
4392 my $ret = mon_cmd($vmid, "device_del", id => $deviceid);
4393 }
4394
4395 sub qemu_iothread_add {
4396 my ($vmid, $deviceid, $device) = @_;
4397
4398 if ($device->{iothread}) {
4399 my $iothreads = vm_iothreads_list($vmid);
4400 qemu_objectadd($vmid, "iothread-$deviceid", "iothread") if !$iothreads->{"iothread-$deviceid"};
4401 }
4402 }
4403
4404 sub qemu_iothread_del {
4405 my ($vmid, $deviceid, $device) = @_;
4406
4407 if ($device->{iothread}) {
4408 my $iothreads = vm_iothreads_list($vmid);
4409 qemu_objectdel($vmid, "iothread-$deviceid") if $iothreads->{"iothread-$deviceid"};
4410 }
4411 }
4412
4413 sub qemu_objectadd {
4414 my ($vmid, $objectid, $qomtype) = @_;
4415
4416 mon_cmd($vmid, "object-add", id => $objectid, "qom-type" => $qomtype);
4417
4418 return 1;
4419 }
4420
4421 sub qemu_objectdel {
4422 my ($vmid, $objectid) = @_;
4423
4424 mon_cmd($vmid, "object-del", id => $objectid);
4425
4426 return 1;
4427 }
4428
4429 sub qemu_driveadd {
4430 my ($storecfg, $vmid, $device) = @_;
4431
4432 my $kvmver = get_running_qemu_version($vmid);
4433 my $io_uring = min_version($kvmver, 6, 0);
4434 my $drive = print_drive_commandline_full($storecfg, $vmid, $device, undef, $io_uring);
4435 $drive =~ s/\\/\\\\/g;
4436 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_add auto \"$drive\"");
4437
4438 # If the command succeeds qemu prints: "OK"
4439 return 1 if $ret =~ m/OK/s;
4440
4441 die "adding drive failed: $ret\n";
4442 }
4443
4444 sub qemu_drivedel {
4445 my ($vmid, $deviceid) = @_;
4446
4447 my $ret = PVE::QemuServer::Monitor::hmp_cmd($vmid, "drive_del drive-$deviceid");
4448 $ret =~ s/^\s+//;
4449
4450 return 1 if $ret eq "";
4451
4452 # NB: device not found errors mean the drive was auto-deleted and we ignore the error
4453 return 1 if $ret =~ m/Device \'.*?\' not found/s;
4454
4455 die "deleting drive $deviceid failed : $ret\n";
4456 }
4457
4458 sub qemu_deviceaddverify {
4459 my ($vmid, $deviceid) = @_;
4460
4461 for (my $i = 0; $i <= 5; $i++) {
4462 my $devices_list = vm_devices_list($vmid);
4463 return 1 if defined($devices_list->{$deviceid});
4464 sleep 1;
4465 }
4466
4467 die "error on hotplug device '$deviceid'\n";
4468 }
4469
4470
4471 sub qemu_devicedelverify {
4472 my ($vmid, $deviceid) = @_;
4473
4474 # need to verify that the device is correctly removed as device_del
4475 # is async and empty return is not reliable
4476
4477 for (my $i = 0; $i <= 5; $i++) {
4478 my $devices_list = vm_devices_list($vmid);
4479 return 1 if !defined($devices_list->{$deviceid});
4480 sleep 1;
4481 }
4482
4483 die "error on hot-unplugging device '$deviceid'\n";
4484 }
4485
4486 sub qemu_findorcreatescsihw {
4487 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4488
4489 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4490
4491 my $scsihwid="$controller_prefix$controller";
4492 my $devices_list = vm_devices_list($vmid);
4493
4494 if (!defined($devices_list->{$scsihwid})) {
4495 vm_deviceplug($storecfg, $conf, $vmid, $scsihwid, $device, $arch, $machine_type);
4496 }
4497
4498 return 1;
4499 }
4500
4501 sub qemu_deletescsihw {
4502 my ($conf, $vmid, $opt) = @_;
4503
4504 my $device = parse_drive($opt, $conf->{$opt});
4505
4506 if ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
4507 vm_deviceunplug($vmid, $conf, "virtioscsi$device->{index}");
4508 return 1;
4509 }
4510
4511 my ($maxdev, $controller, $controller_prefix) = scsihw_infos($conf, $device);
4512
4513 my $devices_list = vm_devices_list($vmid);
4514 foreach my $opt (keys %{$devices_list}) {
4515 if (is_valid_drivename($opt)) {
4516 my $drive = parse_drive($opt, $conf->{$opt});
4517 if ($drive->{interface} eq 'scsi' && $drive->{index} < (($maxdev-1)*($controller+1))) {
4518 return 1;
4519 }
4520 }
4521 }
4522
4523 my $scsihwid="scsihw$controller";
4524
4525 vm_deviceunplug($vmid, $conf, $scsihwid);
4526
4527 return 1;
4528 }
4529
4530 sub qemu_add_pci_bridge {
4531 my ($storecfg, $conf, $vmid, $device, $arch, $machine_type) = @_;
4532
4533 my $bridges = {};
4534
4535 my $bridgeid;
4536
4537 print_pci_addr($device, $bridges, $arch, $machine_type);
4538
4539 while (my ($k, $v) = each %$bridges) {
4540 $bridgeid = $k;
4541 }
4542 return 1 if !defined($bridgeid) || $bridgeid < 1;
4543
4544 my $bridge = "pci.$bridgeid";
4545 my $devices_list = vm_devices_list($vmid);
4546
4547 if (!defined($devices_list->{$bridge})) {
4548 vm_deviceplug($storecfg, $conf, $vmid, $bridge, $arch, $machine_type);
4549 }
4550
4551 return 1;
4552 }
4553
4554 sub qemu_set_link_status {
4555 my ($vmid, $device, $up) = @_;
4556
4557 mon_cmd($vmid, "set_link", name => $device,
4558 up => $up ? JSON::true : JSON::false);
4559 }
4560
4561 sub qemu_netdevadd {
4562 my ($vmid, $conf, $arch, $device, $deviceid) = @_;
4563
4564 my $netdev = print_netdev_full($vmid, $conf, $arch, $device, $deviceid, 1);
4565 my %options = split(/[=,]/, $netdev);
4566
4567 if (defined(my $vhost = $options{vhost})) {
4568 $options{vhost} = JSON::boolean(PVE::JSONSchema::parse_boolean($vhost));
4569 }
4570
4571 if (defined(my $queues = $options{queues})) {
4572 $options{queues} = $queues + 0;
4573 }
4574
4575 mon_cmd($vmid, "netdev_add", %options);
4576 return 1;
4577 }
4578
4579 sub qemu_netdevdel {
4580 my ($vmid, $deviceid) = @_;
4581
4582 mon_cmd($vmid, "netdev_del", id => $deviceid);
4583 }
4584
4585 sub qemu_usb_hotplug {
4586 my ($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type) = @_;
4587
4588 return if !$device;
4589
4590 # remove the old one first
4591 vm_deviceunplug($vmid, $conf, $deviceid);
4592
4593 # check if xhci controller is necessary and available
4594 my $devicelist = vm_devices_list($vmid);
4595
4596 if (!$devicelist->{xhci}) {
4597 my $pciaddr = print_pci_addr("xhci", undef, $arch, $machine_type);
4598 qemu_deviceadd($vmid, PVE::QemuServer::USB::print_qemu_xhci_controller($pciaddr));
4599 }
4600
4601 # add the new one
4602 vm_deviceplug($storecfg, $conf, $vmid, $deviceid, $device, $arch, $machine_type);
4603 }
4604
4605 sub qemu_cpu_hotplug {
4606 my ($vmid, $conf, $vcpus) = @_;
4607
4608 my $machine_type = PVE::QemuServer::Machine::get_current_qemu_machine($vmid);
4609
4610 my $sockets = 1;
4611 $sockets = $conf->{smp} if $conf->{smp}; # old style - no longer iused
4612 $sockets = $conf->{sockets} if $conf->{sockets};
4613 my $cores = $conf->{cores} || 1;
4614 my $maxcpus = $sockets * $cores;
4615
4616 $vcpus = $maxcpus if !$vcpus;
4617
4618 die "you can't add more vcpus than maxcpus\n"
4619 if $vcpus > $maxcpus;
4620
4621 my $currentvcpus = $conf->{vcpus} || $maxcpus;
4622
4623 if ($vcpus < $currentvcpus) {
4624
4625 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4626
4627 for (my $i = $currentvcpus; $i > $vcpus; $i--) {
4628 qemu_devicedel($vmid, "cpu$i");
4629 my $retry = 0;
4630 my $currentrunningvcpus = undef;
4631 while (1) {
4632 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4633 last if scalar(@{$currentrunningvcpus}) == $i-1;
4634 raise_param_exc({ vcpus => "error unplugging cpu$i" }) if $retry > 5;
4635 $retry++;
4636 sleep 1;
4637 }
4638 #update conf after each succesfull cpu unplug
4639 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4640 PVE::QemuConfig->write_config($vmid, $conf);
4641 }
4642 } else {
4643 die "cpu hot-unplugging requires qemu version 2.7 or higher\n";
4644 }
4645
4646 return;
4647 }
4648
4649 my $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4650 die "vcpus in running vm does not match its configuration\n"
4651 if scalar(@{$currentrunningvcpus}) != $currentvcpus;
4652
4653 if (PVE::QemuServer::Machine::machine_version($machine_type, 2, 7)) {
4654
4655 for (my $i = $currentvcpus+1; $i <= $vcpus; $i++) {
4656 my $cpustr = print_cpu_device($conf, $i);
4657 qemu_deviceadd($vmid, $cpustr);
4658
4659 my $retry = 0;
4660 my $currentrunningvcpus = undef;
4661 while (1) {
4662 $currentrunningvcpus = mon_cmd($vmid, "query-cpus-fast");
4663 last if scalar(@{$currentrunningvcpus}) == $i;
4664 raise_param_exc({ vcpus => "error hotplugging cpu$i" }) if $retry > 10;
4665 sleep 1;
4666 $retry++;
4667 }
4668 #update conf after each succesfull cpu hotplug
4669 $conf->{vcpus} = scalar(@{$currentrunningvcpus});
4670 PVE::QemuConfig->write_config($vmid, $conf);
4671 }
4672 } else {
4673
4674 for (my $i = $currentvcpus; $i < $vcpus; $i++) {
4675 mon_cmd($vmid, "cpu-add", id => int($i));
4676 }
4677 }
4678 }
4679
4680 sub qemu_block_set_io_throttle {
4681 my ($vmid, $deviceid,
4682 $bps, $bps_rd, $bps_wr, $iops, $iops_rd, $iops_wr,
4683 $bps_max, $bps_rd_max, $bps_wr_max, $iops_max, $iops_rd_max, $iops_wr_max,
4684 $bps_max_length, $bps_rd_max_length, $bps_wr_max_length,
4685 $iops_max_length, $iops_rd_max_length, $iops_wr_max_length) = @_;
4686
4687 return if !check_running($vmid) ;
4688
4689 mon_cmd($vmid, "block_set_io_throttle", device => $deviceid,
4690 bps => int($bps),
4691 bps_rd => int($bps_rd),
4692 bps_wr => int($bps_wr),
4693 iops => int($iops),
4694 iops_rd => int($iops_rd),
4695 iops_wr => int($iops_wr),
4696 bps_max => int($bps_max),
4697 bps_rd_max => int($bps_rd_max),
4698 bps_wr_max => int($bps_wr_max),
4699 iops_max => int($iops_max),
4700 iops_rd_max => int($iops_rd_max),
4701 iops_wr_max => int($iops_wr_max),
4702 bps_max_length => int($bps_max_length),
4703 bps_rd_max_length => int($bps_rd_max_length),
4704 bps_wr_max_length => int($bps_wr_max_length),
4705 iops_max_length => int($iops_max_length),
4706 iops_rd_max_length => int($iops_rd_max_length),
4707 iops_wr_max_length => int($iops_wr_max_length),
4708 );
4709
4710 }
4711
4712 sub qemu_block_resize {
4713 my ($vmid, $deviceid, $storecfg, $volid, $size) = @_;
4714
4715 my $running = check_running($vmid);
4716
4717 PVE::Storage::volume_resize($storecfg, $volid, $size, $running);
4718
4719 return if !$running;
4720
4721 my $padding = (1024 - $size % 1024) % 1024;
4722 $size = $size + $padding;
4723
4724 mon_cmd(
4725 $vmid,
4726 "block_resize",
4727 device => $deviceid,
4728 size => int($size),
4729 timeout => 60,
4730 );
4731 }
4732
4733 sub qemu_volume_snapshot {
4734 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4735
4736 my $running = check_running($vmid);
4737
4738 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4739 mon_cmd($vmid, 'blockdev-snapshot-internal-sync', device => $deviceid, name => $snap);
4740 } else {
4741 PVE::Storage::volume_snapshot($storecfg, $volid, $snap);
4742 }
4743 }
4744
4745 sub qemu_volume_snapshot_delete {
4746 my ($vmid, $deviceid, $storecfg, $volid, $snap) = @_;
4747
4748 my $running = check_running($vmid);
4749
4750 if($running) {
4751
4752 $running = undef;
4753 my $conf = PVE::QemuConfig->load_config($vmid);
4754 PVE::QemuConfig->foreach_volume($conf, sub {
4755 my ($ds, $drive) = @_;
4756 $running = 1 if $drive->{file} eq $volid;
4757 });
4758 }
4759
4760 if ($running && do_snapshots_with_qemu($storecfg, $volid, $deviceid)) {
4761 mon_cmd($vmid, 'blockdev-snapshot-delete-internal-sync', device => $deviceid, name => $snap);
4762 } else {
4763 PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snap, $running);
4764 }
4765 }
4766
4767 sub set_migration_caps {
4768 my ($vmid, $savevm) = @_;
4769
4770 my $qemu_support = eval { mon_cmd($vmid, "query-proxmox-support") };
4771
4772 my $bitmap_prop = $savevm ? 'pbs-dirty-bitmap-savevm' : 'pbs-dirty-bitmap-migration';
4773 my $dirty_bitmaps = $qemu_support->{$bitmap_prop} ? 1 : 0;
4774
4775 my $cap_ref = [];
4776
4777 my $enabled_cap = {
4778 "auto-converge" => 1,
4779 "xbzrle" => 1,
4780 "x-rdma-pin-all" => 0,
4781 "zero-blocks" => 0,
4782 "compress" => 0,
4783 "dirty-bitmaps" => $dirty_bitmaps,
4784 };
4785
4786 my $supported_capabilities = mon_cmd($vmid, "query-migrate-capabilities");
4787
4788 for my $supported_capability (@$supported_capabilities) {
4789 push @$cap_ref, {
4790 capability => $supported_capability->{capability},
4791 state => $enabled_cap->{$supported_capability->{capability}} ? JSON::true : JSON::false,
4792 };
4793 }
4794
4795 mon_cmd($vmid, "migrate-set-capabilities", capabilities => $cap_ref);
4796 }
4797
4798 sub foreach_volid {
4799 my ($conf, $func, @param) = @_;
4800
4801 my $volhash = {};
4802
4803 my $test_volid = sub {
4804 my ($key, $drive, $snapname, $pending) = @_;
4805
4806 my $volid = $drive->{file};
4807 return if !$volid;
4808
4809 $volhash->{$volid}->{cdrom} //= 1;
4810 $volhash->{$volid}->{cdrom} = 0 if !drive_is_cdrom($drive);
4811
4812 my $replicate = $drive->{replicate} // 1;
4813 $volhash->{$volid}->{replicate} //= 0;
4814 $volhash->{$volid}->{replicate} = 1 if $replicate;
4815
4816 $volhash->{$volid}->{shared} //= 0;
4817 $volhash->{$volid}->{shared} = 1 if $drive->{shared};
4818
4819 $volhash->{$volid}->{is_unused} //= 0;
4820 $volhash->{$volid}->{is_unused} = 1 if $key =~ /^unused\d+$/;
4821
4822 $volhash->{$volid}->{is_attached} //= 0;
4823 $volhash->{$volid}->{is_attached} = 1
4824 if !$volhash->{$volid}->{is_unused} && !defined($snapname) && !$pending;
4825
4826 $volhash->{$volid}->{referenced_in_snapshot}->{$snapname} = 1
4827 if defined($snapname);
4828
4829 $volhash->{$volid}->{referenced_in_pending} = 1 if $pending;
4830
4831 my $size = $drive->{size};
4832 $volhash->{$volid}->{size} //= $size if $size;
4833
4834 $volhash->{$volid}->{is_vmstate} //= 0;
4835 $volhash->{$volid}->{is_vmstate} = 1 if $key eq 'vmstate';
4836
4837 $volhash->{$volid}->{is_tpmstate} //= 0;
4838 $volhash->{$volid}->{is_tpmstate} = 1 if $key eq 'tpmstate0';
4839
4840 $volhash->{$volid}->{drivename} = $key if is_valid_drivename($key);
4841 };
4842
4843 my $include_opts = {
4844 extra_keys => ['vmstate'],
4845 include_unused => 1,
4846 };
4847
4848 PVE::QemuConfig->foreach_volume_full($conf, $include_opts, $test_volid);
4849
4850 PVE::QemuConfig->foreach_volume_full($conf->{pending}, $include_opts, $test_volid, undef, 1)
4851 if defined($conf->{pending}) && $conf->{pending}->%*;
4852
4853 foreach my $snapname (keys %{$conf->{snapshots}}) {
4854 my $snap = $conf->{snapshots}->{$snapname};
4855 PVE::QemuConfig->foreach_volume_full($snap, $include_opts, $test_volid, $snapname);
4856 }
4857
4858 foreach my $volid (keys %$volhash) {
4859 &$func($volid, $volhash->{$volid}, @param);
4860 }
4861 }
4862
4863 my $fast_plug_option = {
4864 'description' => 1,
4865 'hookscript' => 1,
4866 'lock' => 1,
4867 'migrate_downtime' => 1,
4868 'migrate_speed' => 1,
4869 'name' => 1,
4870 'onboot' => 1,
4871 'protection' => 1,
4872 'shares' => 1,
4873 'startup' => 1,
4874 'tags' => 1,
4875 'vmstatestorage' => 1,
4876 };
4877
4878 for my $opt (keys %$confdesc_cloudinit) {
4879 $fast_plug_option->{$opt} = 1;
4880 };
4881
4882 # hotplug changes in [PENDING]
4883 # $selection hash can be used to only apply specified options, for
4884 # example: { cores => 1 } (only apply changed 'cores')
4885 # $errors ref is used to return error messages
4886 sub vmconfig_hotplug_pending {
4887 my ($vmid, $conf, $storecfg, $selection, $errors) = @_;
4888
4889 my $defaults = load_defaults();
4890 my $arch = get_vm_arch($conf);
4891 my $machine_type = get_vm_machine($conf, undef, $arch);
4892
4893 # commit values which do not have any impact on running VM first
4894 # Note: those option cannot raise errors, we we do not care about
4895 # $selection and always apply them.
4896
4897 my $add_error = sub {
4898 my ($opt, $msg) = @_;
4899 $errors->{$opt} = "hotplug problem - $msg";
4900 };
4901
4902 my $cloudinit_pending_properties = PVE::QemuServer::cloudinit_pending_properties();
4903
4904 my $cloudinit_record_changed = sub {
4905 my ($conf, $opt, $old, $new) = @_;
4906 return if !$cloudinit_pending_properties->{$opt};
4907
4908 my $ci = ($conf->{cloudinit} //= {});
4909
4910 my $recorded = $ci->{$opt};
4911 my %added = map { $_ => 1 } PVE::Tools::split_list(delete($ci->{added}) // '');
4912
4913 if (defined($new)) {
4914 if (defined($old)) {
4915 # an existing value is being modified
4916 if (defined($recorded)) {
4917 # the value was already not in sync
4918 if ($new eq $recorded) {
4919 # a value is being reverted to the cloud-init state:
4920 delete $ci->{$opt};
4921 delete $added{$opt};
4922 } else {
4923 # the value was changed multiple times, do nothing
4924 }
4925 } elsif ($added{$opt}) {
4926 # the value had been marked as added and is being changed, do nothing
4927 } else {
4928 # the value is new, record it:
4929 $ci->{$opt} = $old;
4930 }
4931 } else {
4932 # a new value is being added
4933 if (defined($recorded)) {
4934 # it was already not in sync
4935 if ($new eq $recorded) {
4936 # a value is being reverted to the cloud-init state:
4937 delete $ci->{$opt};
4938 delete $added{$opt};
4939 } else {
4940 # the value had temporarily been removed, do nothing
4941 }
4942 } elsif ($added{$opt}) {
4943 # the value had been marked as added already, do nothing
4944 } else {
4945 # the value is new, add it
4946 $added{$opt} = 1;
4947 }
4948 }
4949 } elsif (!defined($old)) {
4950 # a non-existent value is being removed? ignore...
4951 } else {
4952 # a value is being deleted
4953 if (defined($recorded)) {
4954 # a value was already recorded, just keep it
4955 } elsif ($added{$opt}) {
4956 # the value was marked as added, remove it
4957 delete $added{$opt};
4958 } else {
4959 # a previously unrecorded value is being removed, record the old value:
4960 $ci->{$opt} = $old;
4961 }
4962 }
4963
4964 my $added = join(',', sort keys %added);
4965 $ci->{added} = $added if length($added);
4966 };
4967
4968 my $changes = 0;
4969 foreach my $opt (keys %{$conf->{pending}}) { # add/change
4970 if ($fast_plug_option->{$opt}) {
4971 my $new = delete $conf->{pending}->{$opt};
4972 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $new);
4973 $conf->{$opt} = $new;
4974 $changes = 1;
4975 }
4976 }
4977
4978 if ($changes) {
4979 PVE::QemuConfig->write_config($vmid, $conf);
4980 }
4981
4982 my $ostype = $conf->{ostype};
4983 my $version = extract_version($machine_type, get_running_qemu_version($vmid));
4984 my $hotplug_features = parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
4985 my $usb_hotplug = $hotplug_features->{usb}
4986 && min_version($version, 7, 1)
4987 && defined($ostype) && ($ostype eq 'l26' || windows_version($ostype) > 7);
4988
4989 my $cgroup = PVE::QemuServer::CGroup->new($vmid);
4990 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
4991
4992 foreach my $opt (sort keys %$pending_delete_hash) {
4993 next if $selection && !$selection->{$opt};
4994 my $force = $pending_delete_hash->{$opt}->{force};
4995 eval {
4996 if ($opt eq 'hotplug') {
4997 die "skip\n" if ($conf->{hotplug} =~ /memory/);
4998 } elsif ($opt eq 'tablet') {
4999 die "skip\n" if !$hotplug_features->{usb};
5000 if ($defaults->{tablet}) {
5001 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5002 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5003 if $arch eq 'aarch64';
5004 } else {
5005 vm_deviceunplug($vmid, $conf, 'tablet');
5006 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5007 }
5008 } elsif ($opt =~ m/^usb(\d+)$/) {
5009 my $index = $1;
5010 die "skip\n" if !$usb_hotplug;
5011 vm_deviceunplug($vmid, $conf, "usbredirdev$index"); # if it's a spice port
5012 vm_deviceunplug($vmid, $conf, $opt);
5013 } elsif ($opt eq 'vcpus') {
5014 die "skip\n" if !$hotplug_features->{cpu};
5015 qemu_cpu_hotplug($vmid, $conf, undef);
5016 } elsif ($opt eq 'balloon') {
5017 # enable balloon device is not hotpluggable
5018 die "skip\n" if defined($conf->{balloon}) && $conf->{balloon} == 0;
5019 # here we reset the ballooning value to memory
5020 my $balloon = $conf->{memory} || $defaults->{memory};
5021 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5022 } elsif ($fast_plug_option->{$opt}) {
5023 # do nothing
5024 } elsif ($opt =~ m/^net(\d+)$/) {
5025 die "skip\n" if !$hotplug_features->{network};
5026 vm_deviceunplug($vmid, $conf, $opt);
5027 } elsif (is_valid_drivename($opt)) {
5028 die "skip\n" if !$hotplug_features->{disk} || $opt =~ m/(ide|sata)(\d+)/;
5029 vm_deviceunplug($vmid, $conf, $opt);
5030 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5031 } elsif ($opt =~ m/^memory$/) {
5032 die "skip\n" if !$hotplug_features->{memory};
5033 PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults);
5034 } elsif ($opt eq 'cpuunits') {
5035 $cgroup->change_cpu_shares(undef);
5036 } elsif ($opt eq 'cpulimit') {
5037 $cgroup->change_cpu_quota(undef, undef); # reset, cgroup module can better decide values
5038 } else {
5039 die "skip\n";
5040 }
5041 };
5042 if (my $err = $@) {
5043 &$add_error($opt, $err) if $err ne "skip\n";
5044 } else {
5045 my $old = delete $conf->{$opt};
5046 $cloudinit_record_changed->($conf, $opt, $old, undef);
5047 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5048 }
5049 }
5050
5051 my $cloudinit_opt;
5052 foreach my $opt (keys %{$conf->{pending}}) {
5053 next if $selection && !$selection->{$opt};
5054 my $value = $conf->{pending}->{$opt};
5055 eval {
5056 if ($opt eq 'hotplug') {
5057 die "skip\n" if ($value =~ /memory/) || ($value !~ /memory/ && $conf->{hotplug} =~ /memory/);
5058 } elsif ($opt eq 'tablet') {
5059 die "skip\n" if !$hotplug_features->{usb};
5060 if ($value == 1) {
5061 vm_deviceplug($storecfg, $conf, $vmid, 'tablet', $arch, $machine_type);
5062 vm_deviceplug($storecfg, $conf, $vmid, 'keyboard', $arch, $machine_type)
5063 if $arch eq 'aarch64';
5064 } elsif ($value == 0) {
5065 vm_deviceunplug($vmid, $conf, 'tablet');
5066 vm_deviceunplug($vmid, $conf, 'keyboard') if $arch eq 'aarch64';
5067 }
5068 } elsif ($opt =~ m/^usb(\d+)$/) {
5069 my $index = $1;
5070 die "skip\n" if !$usb_hotplug;
5071 my $d = eval { parse_property_string('pve-qm-usb', $value) };
5072 my $id = $opt;
5073 if ($d->{host} =~ m/^spice$/i) {
5074 $id = "usbredirdev$index";
5075 }
5076 qemu_usb_hotplug($storecfg, $conf, $vmid, $id, $d, $arch, $machine_type);
5077 } elsif ($opt eq 'vcpus') {
5078 die "skip\n" if !$hotplug_features->{cpu};
5079 qemu_cpu_hotplug($vmid, $conf, $value);
5080 } elsif ($opt eq 'balloon') {
5081 # enable/disable balloning device is not hotpluggable
5082 my $old_balloon_enabled = !!(!defined($conf->{balloon}) || $conf->{balloon});
5083 my $new_balloon_enabled = !!(!defined($conf->{pending}->{balloon}) || $conf->{pending}->{balloon});
5084 die "skip\n" if $old_balloon_enabled != $new_balloon_enabled;
5085
5086 # allow manual ballooning if shares is set to zero
5087 if ((defined($conf->{shares}) && ($conf->{shares} == 0))) {
5088 my $balloon = $conf->{pending}->{balloon} || $conf->{memory} || $defaults->{memory};
5089 mon_cmd($vmid, "balloon", value => $balloon*1024*1024);
5090 }
5091 } elsif ($opt =~ m/^net(\d+)$/) {
5092 # some changes can be done without hotplug
5093 vmconfig_update_net($storecfg, $conf, $hotplug_features->{network},
5094 $vmid, $opt, $value, $arch, $machine_type);
5095 } elsif (is_valid_drivename($opt)) {
5096 die "skip\n" if $opt eq 'efidisk0' || $opt eq 'tpmstate0';
5097 # some changes can be done without hotplug
5098 my $drive = parse_drive($opt, $value);
5099 if (drive_is_cloudinit($drive)) {
5100 $cloudinit_opt = [$opt, $drive];
5101 # apply all the other changes first, then generate the cloudinit disk
5102 die "skip\n";
5103 }
5104 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5105 $vmid, $opt, $value, $arch, $machine_type);
5106 } elsif ($opt =~ m/^memory$/) { #dimms
5107 die "skip\n" if !$hotplug_features->{memory};
5108 $value = PVE::QemuServer::Memory::qemu_memory_hotplug($vmid, $conf, $defaults, $value);
5109 } elsif ($opt eq 'cpuunits') {
5110 my $new_cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{pending}->{$opt}); #clamp
5111 $cgroup->change_cpu_shares($new_cpuunits);
5112 } elsif ($opt eq 'cpulimit') {
5113 my $cpulimit = $conf->{pending}->{$opt} == 0 ? -1 : int($conf->{pending}->{$opt} * 100000);
5114 $cgroup->change_cpu_quota($cpulimit, 100000);
5115 } elsif ($opt eq 'agent') {
5116 vmconfig_update_agent($conf, $opt, $value);
5117 } else {
5118 die "skip\n"; # skip non-hot-pluggable options
5119 }
5120 };
5121 if (my $err = $@) {
5122 &$add_error($opt, $err) if $err ne "skip\n";
5123 } else {
5124 $cloudinit_record_changed->($conf, $opt, $conf->{$opt}, $value);
5125 $conf->{$opt} = $value;
5126 delete $conf->{pending}->{$opt};
5127 }
5128 }
5129
5130 if (defined($cloudinit_opt)) {
5131 my ($opt, $drive) = @$cloudinit_opt;
5132 my $value = $conf->{pending}->{$opt};
5133 eval {
5134 my $temp = {%$conf, $opt => $value};
5135 PVE::QemuServer::Cloudinit::apply_cloudinit_config($temp, $vmid);
5136 vmconfig_update_disk($storecfg, $conf, $hotplug_features->{disk},
5137 $vmid, $opt, $value, $arch, $machine_type);
5138 };
5139 if (my $err = $@) {
5140 &$add_error($opt, $err) if $err ne "skip\n";
5141 } else {
5142 $conf->{$opt} = $value;
5143 delete $conf->{pending}->{$opt};
5144 }
5145 }
5146
5147 # unplug xhci controller if no usb device is left
5148 if ($usb_hotplug) {
5149 my $has_usb = 0;
5150 for (my $i = 0; $i < $PVE::QemuServer::USB::MAX_USB_DEVICES; $i++) {
5151 next if !defined($conf->{"usb$i"});
5152 $has_usb = 1;
5153 last;
5154 }
5155 if (!$has_usb) {
5156 vm_deviceunplug($vmid, $conf, 'xhci');
5157 }
5158 }
5159
5160 PVE::QemuConfig->write_config($vmid, $conf);
5161
5162 if ($hotplug_features->{cloudinit} && PVE::QemuServer::Cloudinit::has_changes($conf)) {
5163 PVE::QemuServer::vmconfig_update_cloudinit_drive($storecfg, $conf, $vmid);
5164 }
5165 }
5166
5167 sub try_deallocate_drive {
5168 my ($storecfg, $vmid, $conf, $key, $drive, $rpcenv, $authuser, $force) = @_;
5169
5170 if (($force || $key =~ /^unused/) && !drive_is_cdrom($drive, 1)) {
5171 my $volid = $drive->{file};
5172 if (vm_is_volid_owner($storecfg, $vmid, $volid)) {
5173 my $sid = PVE::Storage::parse_volume_id($volid);
5174 $rpcenv->check($authuser, "/storage/$sid", ['Datastore.AllocateSpace']);
5175
5176 # check if the disk is really unused
5177 die "unable to delete '$volid' - volume is still in use (snapshot?)\n"
5178 if PVE::QemuServer::Drive::is_volume_in_use($storecfg, $conf, $key, $volid);
5179 PVE::Storage::vdisk_free($storecfg, $volid);
5180 return 1;
5181 } else {
5182 # If vm is not owner of this disk remove from config
5183 return 1;
5184 }
5185 }
5186
5187 return;
5188 }
5189
5190 sub vmconfig_delete_or_detach_drive {
5191 my ($vmid, $storecfg, $conf, $opt, $force) = @_;
5192
5193 my $drive = parse_drive($opt, $conf->{$opt});
5194
5195 my $rpcenv = PVE::RPCEnvironment::get();
5196 my $authuser = $rpcenv->get_user();
5197
5198 if ($force) {
5199 $rpcenv->check_vm_perm($authuser, $vmid, undef, ['VM.Config.Disk']);
5200 try_deallocate_drive($storecfg, $vmid, $conf, $opt, $drive, $rpcenv, $authuser, $force);
5201 } else {
5202 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $drive);
5203 }
5204 }
5205
5206
5207
5208 sub vmconfig_apply_pending {
5209 my ($vmid, $conf, $storecfg, $errors, $skip_cloud_init) = @_;
5210
5211 return if !scalar(keys %{$conf->{pending}});
5212
5213 my $add_apply_error = sub {
5214 my ($opt, $msg) = @_;
5215 my $err_msg = "unable to apply pending change $opt : $msg";
5216 $errors->{$opt} = $err_msg;
5217 warn $err_msg;
5218 };
5219
5220 # cold plug
5221
5222 my $pending_delete_hash = PVE::QemuConfig->parse_pending_delete($conf->{pending}->{delete});
5223 foreach my $opt (sort keys %$pending_delete_hash) {
5224 my $force = $pending_delete_hash->{$opt}->{force};
5225 eval {
5226 if ($opt =~ m/^unused/) {
5227 die "internal error";
5228 } elsif (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5229 vmconfig_delete_or_detach_drive($vmid, $storecfg, $conf, $opt, $force);
5230 }
5231 };
5232 if (my $err = $@) {
5233 $add_apply_error->($opt, $err);
5234 } else {
5235 PVE::QemuConfig->remove_from_pending_delete($conf, $opt);
5236 delete $conf->{$opt};
5237 }
5238 }
5239
5240 PVE::QemuConfig->cleanup_pending($conf);
5241
5242 my $generate_cloudinit = $skip_cloud_init ? 0 : undef;
5243
5244 foreach my $opt (keys %{$conf->{pending}}) { # add/change
5245 next if $opt eq 'delete'; # just to be sure
5246 eval {
5247 if (defined($conf->{$opt}) && is_valid_drivename($opt)) {
5248 vmconfig_register_unused_drive($storecfg, $vmid, $conf, parse_drive($opt, $conf->{$opt}))
5249 }
5250 };
5251 if (my $err = $@) {
5252 $add_apply_error->($opt, $err);
5253 } else {
5254
5255 if (is_valid_drivename($opt)) {
5256 my $drive = parse_drive($opt, $conf->{pending}->{$opt});
5257 $generate_cloudinit //= 1 if drive_is_cloudinit($drive);
5258 }
5259
5260 $conf->{$opt} = delete $conf->{pending}->{$opt};
5261 }
5262 }
5263
5264 # write all changes at once to avoid unnecessary i/o
5265 PVE::QemuConfig->write_config($vmid, $conf);
5266 if ($generate_cloudinit) {
5267 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5268 # After successful generation and if there were changes to be applied, update the
5269 # config to drop the {cloudinit} entry.
5270 PVE::QemuConfig->write_config($vmid, $conf);
5271 }
5272 }
5273 }
5274
5275 sub vmconfig_update_net {
5276 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5277
5278 my $newnet = parse_net($value);
5279
5280 if ($conf->{$opt}) {
5281 my $oldnet = parse_net($conf->{$opt});
5282
5283 if (safe_string_ne($oldnet->{model}, $newnet->{model}) ||
5284 safe_string_ne($oldnet->{macaddr}, $newnet->{macaddr}) ||
5285 safe_num_ne($oldnet->{queues}, $newnet->{queues}) ||
5286 safe_num_ne($oldnet->{mtu}, $newnet->{mtu}) ||
5287 !($newnet->{bridge} && $oldnet->{bridge})) { # bridge/nat mode change
5288
5289 # for non online change, we try to hot-unplug
5290 die "skip\n" if !$hotplug;
5291 vm_deviceunplug($vmid, $conf, $opt);
5292 } else {
5293
5294 die "internal error" if $opt !~ m/net(\d+)/;
5295 my $iface = "tap${vmid}i$1";
5296
5297 if (safe_string_ne($oldnet->{bridge}, $newnet->{bridge}) ||
5298 safe_num_ne($oldnet->{tag}, $newnet->{tag}) ||
5299 safe_string_ne($oldnet->{trunks}, $newnet->{trunks}) ||
5300 safe_num_ne($oldnet->{firewall}, $newnet->{firewall})) {
5301 PVE::Network::tap_unplug($iface);
5302
5303 if ($have_sdn) {
5304 PVE::Network::SDN::Zones::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5305 } else {
5306 PVE::Network::tap_plug($iface, $newnet->{bridge}, $newnet->{tag}, $newnet->{firewall}, $newnet->{trunks}, $newnet->{rate});
5307 }
5308 } elsif (safe_num_ne($oldnet->{rate}, $newnet->{rate})) {
5309 # Rate can be applied on its own but any change above needs to
5310 # include the rate in tap_plug since OVS resets everything.
5311 PVE::Network::tap_rate_limit($iface, $newnet->{rate});
5312 }
5313
5314 if (safe_string_ne($oldnet->{link_down}, $newnet->{link_down})) {
5315 qemu_set_link_status($vmid, $opt, !$newnet->{link_down});
5316 }
5317
5318 return 1;
5319 }
5320 }
5321
5322 if ($hotplug) {
5323 vm_deviceplug($storecfg, $conf, $vmid, $opt, $newnet, $arch, $machine_type);
5324 } else {
5325 die "skip\n";
5326 }
5327 }
5328
5329 sub vmconfig_update_agent {
5330 my ($conf, $opt, $value) = @_;
5331
5332 die "skip\n" if !$conf->{$opt};
5333
5334 my $hotplug_options = { fstrim_cloned_disks => 1 };
5335
5336 my $old_agent = parse_guest_agent($conf);
5337 my $agent = parse_guest_agent({$opt => $value});
5338
5339 for my $option (keys %$agent) { # added/changed options
5340 next if defined($hotplug_options->{$option});
5341 die "skip\n" if safe_string_ne($agent->{$option}, $old_agent->{$option});
5342 }
5343
5344 for my $option (keys %$old_agent) { # removed options
5345 next if defined($hotplug_options->{$option});
5346 die "skip\n" if safe_string_ne($old_agent->{$option}, $agent->{$option});
5347 }
5348
5349 return; # either no actual change (e.g., format string reordered) or just hotpluggable changes
5350 }
5351
5352 sub vmconfig_update_disk {
5353 my ($storecfg, $conf, $hotplug, $vmid, $opt, $value, $arch, $machine_type) = @_;
5354
5355 my $drive = parse_drive($opt, $value);
5356
5357 if ($conf->{$opt} && (my $old_drive = parse_drive($opt, $conf->{$opt}))) {
5358 my $media = $drive->{media} || 'disk';
5359 my $oldmedia = $old_drive->{media} || 'disk';
5360 die "unable to change media type\n" if $media ne $oldmedia;
5361
5362 if (!drive_is_cdrom($old_drive)) {
5363
5364 if ($drive->{file} ne $old_drive->{file}) {
5365
5366 die "skip\n" if !$hotplug;
5367
5368 # unplug and register as unused
5369 vm_deviceunplug($vmid, $conf, $opt);
5370 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive)
5371
5372 } else {
5373 # update existing disk
5374
5375 # skip non hotpluggable value
5376 if (safe_string_ne($drive->{aio}, $old_drive->{aio}) ||
5377 safe_string_ne($drive->{discard}, $old_drive->{discard}) ||
5378 safe_string_ne($drive->{iothread}, $old_drive->{iothread}) ||
5379 safe_string_ne($drive->{queues}, $old_drive->{queues}) ||
5380 safe_string_ne($drive->{cache}, $old_drive->{cache}) ||
5381 safe_string_ne($drive->{ssd}, $old_drive->{ssd}) ||
5382 safe_string_ne($drive->{ro}, $old_drive->{ro})) {
5383 die "skip\n";
5384 }
5385
5386 # apply throttle
5387 if (safe_num_ne($drive->{mbps}, $old_drive->{mbps}) ||
5388 safe_num_ne($drive->{mbps_rd}, $old_drive->{mbps_rd}) ||
5389 safe_num_ne($drive->{mbps_wr}, $old_drive->{mbps_wr}) ||
5390 safe_num_ne($drive->{iops}, $old_drive->{iops}) ||
5391 safe_num_ne($drive->{iops_rd}, $old_drive->{iops_rd}) ||
5392 safe_num_ne($drive->{iops_wr}, $old_drive->{iops_wr}) ||
5393 safe_num_ne($drive->{mbps_max}, $old_drive->{mbps_max}) ||
5394 safe_num_ne($drive->{mbps_rd_max}, $old_drive->{mbps_rd_max}) ||
5395 safe_num_ne($drive->{mbps_wr_max}, $old_drive->{mbps_wr_max}) ||
5396 safe_num_ne($drive->{iops_max}, $old_drive->{iops_max}) ||
5397 safe_num_ne($drive->{iops_rd_max}, $old_drive->{iops_rd_max}) ||
5398 safe_num_ne($drive->{iops_wr_max}, $old_drive->{iops_wr_max}) ||
5399 safe_num_ne($drive->{bps_max_length}, $old_drive->{bps_max_length}) ||
5400 safe_num_ne($drive->{bps_rd_max_length}, $old_drive->{bps_rd_max_length}) ||
5401 safe_num_ne($drive->{bps_wr_max_length}, $old_drive->{bps_wr_max_length}) ||
5402 safe_num_ne($drive->{iops_max_length}, $old_drive->{iops_max_length}) ||
5403 safe_num_ne($drive->{iops_rd_max_length}, $old_drive->{iops_rd_max_length}) ||
5404 safe_num_ne($drive->{iops_wr_max_length}, $old_drive->{iops_wr_max_length})) {
5405
5406 qemu_block_set_io_throttle(
5407 $vmid,"drive-$opt",
5408 ($drive->{mbps} || 0)*1024*1024,
5409 ($drive->{mbps_rd} || 0)*1024*1024,
5410 ($drive->{mbps_wr} || 0)*1024*1024,
5411 $drive->{iops} || 0,
5412 $drive->{iops_rd} || 0,
5413 $drive->{iops_wr} || 0,
5414 ($drive->{mbps_max} || 0)*1024*1024,
5415 ($drive->{mbps_rd_max} || 0)*1024*1024,
5416 ($drive->{mbps_wr_max} || 0)*1024*1024,
5417 $drive->{iops_max} || 0,
5418 $drive->{iops_rd_max} || 0,
5419 $drive->{iops_wr_max} || 0,
5420 $drive->{bps_max_length} || 1,
5421 $drive->{bps_rd_max_length} || 1,
5422 $drive->{bps_wr_max_length} || 1,
5423 $drive->{iops_max_length} || 1,
5424 $drive->{iops_rd_max_length} || 1,
5425 $drive->{iops_wr_max_length} || 1,
5426 );
5427
5428 }
5429
5430 return 1;
5431 }
5432
5433 } else { # cdrom
5434
5435 if ($drive->{file} eq 'none') {
5436 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5437 if (drive_is_cloudinit($old_drive)) {
5438 vmconfig_register_unused_drive($storecfg, $vmid, $conf, $old_drive);
5439 }
5440 } else {
5441 my $path = get_iso_path($storecfg, $vmid, $drive->{file});
5442
5443 # force eject if locked
5444 mon_cmd($vmid, "eject", force => JSON::true, id => "$opt");
5445
5446 if ($path) {
5447 mon_cmd($vmid, "blockdev-change-medium",
5448 id => "$opt", filename => "$path");
5449 }
5450 }
5451
5452 return 1;
5453 }
5454 }
5455
5456 die "skip\n" if !$hotplug || $opt =~ m/(ide|sata)(\d+)/;
5457 # hotplug new disks
5458 PVE::Storage::activate_volumes($storecfg, [$drive->{file}]) if $drive->{file} !~ m|^/dev/.+|;
5459 vm_deviceplug($storecfg, $conf, $vmid, $opt, $drive, $arch, $machine_type);
5460 }
5461
5462 sub vmconfig_update_cloudinit_drive {
5463 my ($storecfg, $conf, $vmid) = @_;
5464
5465 my $cloudinit_ds = undef;
5466 my $cloudinit_drive = undef;
5467
5468 PVE::QemuConfig->foreach_volume($conf, sub {
5469 my ($ds, $drive) = @_;
5470 if (PVE::QemuServer::drive_is_cloudinit($drive)) {
5471 $cloudinit_ds = $ds;
5472 $cloudinit_drive = $drive;
5473 }
5474 });
5475
5476 return if !$cloudinit_drive;
5477
5478 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5479 PVE::QemuConfig->write_config($vmid, $conf);
5480 }
5481
5482 my $running = PVE::QemuServer::check_running($vmid);
5483
5484 if ($running) {
5485 my $path = PVE::Storage::path($storecfg, $cloudinit_drive->{file});
5486 if ($path) {
5487 mon_cmd($vmid, "eject", force => JSON::true, id => "$cloudinit_ds");
5488 mon_cmd($vmid, "blockdev-change-medium", id => "$cloudinit_ds", filename => "$path");
5489 }
5490 }
5491 }
5492
5493 # called in locked context by incoming migration
5494 sub vm_migrate_get_nbd_disks {
5495 my ($storecfg, $conf, $replicated_volumes) = @_;
5496
5497 my $local_volumes = {};
5498 PVE::QemuConfig->foreach_volume($conf, sub {
5499 my ($ds, $drive) = @_;
5500
5501 return if drive_is_cdrom($drive);
5502 return if $ds eq 'tpmstate0';
5503
5504 my $volid = $drive->{file};
5505
5506 return if !$volid;
5507
5508 my ($storeid, $volname) = PVE::Storage::parse_volume_id($volid);
5509
5510 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
5511 return if $scfg->{shared};
5512
5513 my $format = qemu_img_format($scfg, $volname);
5514
5515 # replicated disks re-use existing state via bitmap
5516 my $use_existing = $replicated_volumes->{$volid} ? 1 : 0;
5517 $local_volumes->{$ds} = [$volid, $storeid, $drive, $use_existing, $format];
5518 });
5519 return $local_volumes;
5520 }
5521
5522 # called in locked context by incoming migration
5523 sub vm_migrate_alloc_nbd_disks {
5524 my ($storecfg, $vmid, $source_volumes, $storagemap) = @_;
5525
5526 my $nbd = {};
5527 foreach my $opt (sort keys %$source_volumes) {
5528 my ($volid, $storeid, $drive, $use_existing, $format) = @{$source_volumes->{$opt}};
5529
5530 if ($use_existing) {
5531 $nbd->{$opt}->{drivestr} = print_drive($drive);
5532 $nbd->{$opt}->{volid} = $volid;
5533 $nbd->{$opt}->{replicated} = 1;
5534 next;
5535 }
5536
5537 $storeid = PVE::JSONSchema::map_id($storagemap, $storeid);
5538
5539 # order of precedence, filtered by whether storage supports it:
5540 # 1. explicit requested format
5541 # 2. default format of storage
5542 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
5543 $format = $defFormat if !$format || !grep { $format eq $_ } $validFormats->@*;
5544
5545 my $size = $drive->{size} / 1024;
5546 my $newvolid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $format, undef, $size);
5547 my $newdrive = $drive;
5548 $newdrive->{format} = $format;
5549 $newdrive->{file} = $newvolid;
5550 my $drivestr = print_drive($newdrive);
5551 $nbd->{$opt}->{drivestr} = $drivestr;
5552 $nbd->{$opt}->{volid} = $newvolid;
5553 }
5554
5555 return $nbd;
5556 }
5557
5558 # see vm_start_nolock for parameters, additionally:
5559 # migrate_opts:
5560 # storagemap = parsed storage map for allocating NBD disks
5561 sub vm_start {
5562 my ($storecfg, $vmid, $params, $migrate_opts) = @_;
5563
5564 return PVE::QemuConfig->lock_config($vmid, sub {
5565 my $conf = PVE::QemuConfig->load_config($vmid, $migrate_opts->{migratedfrom});
5566
5567 die "you can't start a vm if it's a template\n"
5568 if !$params->{skiptemplate} && PVE::QemuConfig->is_template($conf);
5569
5570 my $has_suspended_lock = PVE::QemuConfig->has_lock($conf, 'suspended');
5571 my $has_backup_lock = PVE::QemuConfig->has_lock($conf, 'backup');
5572
5573 my $running = check_running($vmid, undef, $migrate_opts->{migratedfrom});
5574
5575 if ($has_backup_lock && $running) {
5576 # a backup is currently running, attempt to start the guest in the
5577 # existing QEMU instance
5578 return vm_resume($vmid);
5579 }
5580
5581 PVE::QemuConfig->check_lock($conf)
5582 if !($params->{skiplock} || $has_suspended_lock);
5583
5584 $params->{resume} = $has_suspended_lock || defined($conf->{vmstate});
5585
5586 die "VM $vmid already running\n" if $running;
5587
5588 if (my $storagemap = $migrate_opts->{storagemap}) {
5589 my $replicated = $migrate_opts->{replicated_volumes};
5590 my $disks = vm_migrate_get_nbd_disks($storecfg, $conf, $replicated);
5591 $migrate_opts->{nbd} = vm_migrate_alloc_nbd_disks($storecfg, $vmid, $disks, $storagemap);
5592
5593 foreach my $opt (keys %{$migrate_opts->{nbd}}) {
5594 $conf->{$opt} = $migrate_opts->{nbd}->{$opt}->{drivestr};
5595 }
5596 }
5597
5598 return vm_start_nolock($storecfg, $vmid, $conf, $params, $migrate_opts);
5599 });
5600 }
5601
5602
5603 # params:
5604 # statefile => 'tcp', 'unix' for migration or path/volid for RAM state
5605 # skiplock => 0/1, skip checking for config lock
5606 # skiptemplate => 0/1, skip checking whether VM is template
5607 # forcemachine => to force QEMU machine (rollback/migration)
5608 # forcecpu => a QEMU '-cpu' argument string to override get_cpu_options
5609 # timeout => in seconds
5610 # paused => start VM in paused state (backup)
5611 # resume => resume from hibernation
5612 # pbs-backing => {
5613 # sata0 => {
5614 # repository
5615 # snapshot
5616 # keyfile
5617 # archive
5618 # },
5619 # virtio2 => ...
5620 # }
5621 # migrate_opts:
5622 # nbd => volumes for NBD exports (vm_migrate_alloc_nbd_disks)
5623 # migratedfrom => source node
5624 # spice_ticket => used for spice migration, passed via tunnel/stdin
5625 # network => CIDR of migration network
5626 # type => secure/insecure - tunnel over encrypted connection or plain-text
5627 # nbd_proto_version => int, 0 for TCP, 1 for UNIX
5628 # replicated_volumes => which volids should be re-used with bitmaps for nbd migration
5629 # offline_volumes => new volids of offline migrated disks like tpmstate and cloudinit, not yet
5630 # contained in config
5631 sub vm_start_nolock {
5632 my ($storecfg, $vmid, $conf, $params, $migrate_opts) = @_;
5633
5634 my $statefile = $params->{statefile};
5635 my $resume = $params->{resume};
5636
5637 my $migratedfrom = $migrate_opts->{migratedfrom};
5638 my $migration_type = $migrate_opts->{type};
5639
5640 my $res = {};
5641
5642 # clean up leftover reboot request files
5643 eval { clear_reboot_request($vmid); };
5644 warn $@ if $@;
5645
5646 if (!$statefile && scalar(keys %{$conf->{pending}})) {
5647 vmconfig_apply_pending($vmid, $conf, $storecfg);
5648 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5649 }
5650
5651 # don't regenerate the ISO if the VM is started as part of a live migration
5652 # this way we can reuse the old ISO with the correct config
5653 if (!$migratedfrom) {
5654 if (PVE::QemuServer::Cloudinit::apply_cloudinit_config($conf, $vmid)) {
5655 # FIXME: apply_cloudinit_config updates $conf in this case, and it would only drop
5656 # $conf->{cloudinit}, so we could just not do this?
5657 # But we do it above, so for now let's be consistent.
5658 $conf = PVE::QemuConfig->load_config($vmid); # update/reload
5659 }
5660 }
5661
5662 # override offline migrated volumes, conf is out of date still
5663 if (my $offline_volumes = $migrate_opts->{offline_volumes}) {
5664 for my $key (sort keys $offline_volumes->%*) {
5665 my $parsed = parse_drive($key, $conf->{$key});
5666 $parsed->{file} = $offline_volumes->{$key};
5667 $conf->{$key} = print_drive($parsed);
5668 }
5669 }
5670
5671 my $defaults = load_defaults();
5672
5673 # set environment variable useful inside network script
5674 # for remote migration the config is available on the target node!
5675 if (!$migrate_opts->{remote_node}) {
5676 $ENV{PVE_MIGRATED_FROM} = $migratedfrom;
5677 }
5678
5679 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-start', 1);
5680
5681 my $forcemachine = $params->{forcemachine};
5682 my $forcecpu = $params->{forcecpu};
5683 if ($resume) {
5684 # enforce machine and CPU type on suspended vm to ensure HW compatibility
5685 $forcemachine = $conf->{runningmachine};
5686 $forcecpu = $conf->{runningcpu};
5687 print "Resuming suspended VM\n";
5688 }
5689
5690 my ($cmd, $vollist, $spice_port, $pci_devices) = config_to_command($storecfg, $vmid,
5691 $conf, $defaults, $forcemachine, $forcecpu, $params->{'pbs-backing'});
5692
5693 my $migration_ip;
5694 my $get_migration_ip = sub {
5695 my ($nodename) = @_;
5696
5697 return $migration_ip if defined($migration_ip);
5698
5699 my $cidr = $migrate_opts->{network};
5700
5701 if (!defined($cidr)) {
5702 my $dc_conf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5703 $cidr = $dc_conf->{migration}->{network};
5704 }
5705
5706 if (defined($cidr)) {
5707 my $ips = PVE::Network::get_local_ip_from_cidr($cidr);
5708
5709 die "could not get IP: no address configured on local " .
5710 "node for network '$cidr'\n" if scalar(@$ips) == 0;
5711
5712 die "could not get IP: multiple addresses configured on local " .
5713 "node for network '$cidr'\n" if scalar(@$ips) > 1;
5714
5715 $migration_ip = @$ips[0];
5716 }
5717
5718 $migration_ip = PVE::Cluster::remote_node_ip($nodename, 1)
5719 if !defined($migration_ip);
5720
5721 return $migration_ip;
5722 };
5723
5724 if ($statefile) {
5725 if ($statefile eq 'tcp') {
5726 my $migrate = $res->{migrate} = { proto => 'tcp' };
5727 $migrate->{addr} = "localhost";
5728 my $datacenterconf = PVE::Cluster::cfs_read_file('datacenter.cfg');
5729 my $nodename = nodename();
5730
5731 if (!defined($migration_type)) {
5732 if (defined($datacenterconf->{migration}->{type})) {
5733 $migration_type = $datacenterconf->{migration}->{type};
5734 } else {
5735 $migration_type = 'secure';
5736 }
5737 }
5738
5739 if ($migration_type eq 'insecure') {
5740 $migrate->{addr} = $get_migration_ip->($nodename);
5741 $migrate->{addr} = "[$migrate->{addr}]" if Net::IP::ip_is_ipv6($migrate->{addr});
5742 }
5743
5744 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5745 $migrate->{port} = PVE::Tools::next_migrate_port($pfamily);
5746 $migrate->{uri} = "tcp:$migrate->{addr}:$migrate->{port}";
5747 push @$cmd, '-incoming', $migrate->{uri};
5748 push @$cmd, '-S';
5749
5750 } elsif ($statefile eq 'unix') {
5751 # should be default for secure migrations as a ssh TCP forward
5752 # tunnel is not deterministic reliable ready and fails regurarly
5753 # to set up in time, so use UNIX socket forwards
5754 my $migrate = $res->{migrate} = { proto => 'unix' };
5755 $migrate->{addr} = "/run/qemu-server/$vmid.migrate";
5756 unlink $migrate->{addr};
5757
5758 $migrate->{uri} = "unix:$migrate->{addr}";
5759 push @$cmd, '-incoming', $migrate->{uri};
5760 push @$cmd, '-S';
5761
5762 } elsif (-e $statefile) {
5763 push @$cmd, '-loadstate', $statefile;
5764 } else {
5765 my $statepath = PVE::Storage::path($storecfg, $statefile);
5766 push @$vollist, $statefile;
5767 push @$cmd, '-loadstate', $statepath;
5768 }
5769 } elsif ($params->{paused}) {
5770 push @$cmd, '-S';
5771 }
5772
5773 my $start_timeout = $params->{timeout} // config_aware_timeout($conf, $resume);
5774
5775 my $pci_reserve_list = [];
5776 for my $device (values $pci_devices->%*) {
5777 next if $device->{mdev}; # we don't reserve for mdev devices
5778 push $pci_reserve_list->@*, map { $_->{id} } $device->{ids}->@*;
5779 }
5780
5781 # reserve all PCI IDs before actually doing anything with them
5782 PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, $start_timeout);
5783
5784 eval {
5785 my $uuid;
5786 for my $id (sort keys %$pci_devices) {
5787 my $d = $pci_devices->{$id};
5788 my ($index) = ($id =~ m/^hostpci(\d+)$/);
5789
5790 my $chosen_mdev;
5791 for my $dev ($d->{ids}->@*) {
5792 my $info = eval { PVE::QemuServer::PCI::prepare_pci_device($vmid, $dev->{id}, $index, $d->{mdev}) };
5793 if ($d->{mdev}) {
5794 warn $@ if $@;
5795 $chosen_mdev = $info;
5796 last if $chosen_mdev; # if successful, we're done
5797 } else {
5798 die $@ if $@;
5799 }
5800 }
5801
5802 next if !$d->{mdev};
5803 die "could not create mediated device\n" if !defined($chosen_mdev);
5804
5805 # nvidia grid needs the uuid of the mdev as qemu parameter
5806 if (!defined($uuid) && $chosen_mdev->{vendor} =~ m/^(0x)?10de$/) {
5807 if (defined($conf->{smbios1})) {
5808 my $smbios_conf = parse_smbios1($conf->{smbios1});
5809 $uuid = $smbios_conf->{uuid} if defined($smbios_conf->{uuid});
5810 }
5811 $uuid = PVE::QemuServer::PCI::generate_mdev_uuid($vmid, $index) if !defined($uuid);
5812 }
5813 }
5814 push @$cmd, '-uuid', $uuid if defined($uuid);
5815 };
5816 if (my $err = $@) {
5817 eval { cleanup_pci_devices($vmid, $conf) };
5818 warn $@ if $@;
5819 die $err;
5820 }
5821
5822 PVE::Storage::activate_volumes($storecfg, $vollist);
5823
5824
5825 my %silence_std_outs = (outfunc => sub {}, errfunc => sub {});
5826 eval { run_command(['/bin/systemctl', 'reset-failed', "$vmid.scope"], %silence_std_outs) };
5827 eval { run_command(['/bin/systemctl', 'stop', "$vmid.scope"], %silence_std_outs) };
5828 # Issues with the above 'stop' not being fully completed are extremely rare, a very low
5829 # timeout should be more than enough here...
5830 PVE::Systemd::wait_for_unit_removed("$vmid.scope", 20);
5831
5832 my $cpuunits = PVE::CGroup::clamp_cpu_shares($conf->{cpuunits});
5833
5834 my %run_params = (
5835 timeout => $statefile ? undef : $start_timeout,
5836 umask => 0077,
5837 noerr => 1,
5838 );
5839
5840 # when migrating, prefix QEMU output so other side can pick up any
5841 # errors that might occur and show the user
5842 if ($migratedfrom) {
5843 $run_params{quiet} = 1;
5844 $run_params{logfunc} = sub { print "QEMU: $_[0]\n" };
5845 }
5846
5847 my %systemd_properties = (
5848 Slice => 'qemu.slice',
5849 KillMode => 'process',
5850 SendSIGKILL => 0,
5851 TimeoutStopUSec => ULONG_MAX, # infinity
5852 );
5853
5854 if (PVE::CGroup::cgroup_mode() == 2) {
5855 $systemd_properties{CPUWeight} = $cpuunits;
5856 } else {
5857 $systemd_properties{CPUShares} = $cpuunits;
5858 }
5859
5860 if (my $cpulimit = $conf->{cpulimit}) {
5861 $systemd_properties{CPUQuota} = int($cpulimit * 100);
5862 }
5863 $systemd_properties{timeout} = 10 if $statefile; # setting up the scope shoul be quick
5864
5865 my $run_qemu = sub {
5866 PVE::Tools::run_fork sub {
5867 PVE::Systemd::enter_systemd_scope($vmid, "Proxmox VE VM $vmid", %systemd_properties);
5868
5869 my $tpmpid;
5870 if ((my $tpm = $conf->{tpmstate0}) && !PVE::QemuConfig->is_template($conf)) {
5871 # start the TPM emulator so QEMU can connect on start
5872 $tpmpid = start_swtpm($storecfg, $vmid, $tpm, $migratedfrom);
5873 }
5874
5875 my $exitcode = run_command($cmd, %run_params);
5876 if ($exitcode) {
5877 if ($tpmpid) {
5878 warn "stopping swtpm instance (pid $tpmpid) due to QEMU startup error\n";
5879 kill 'TERM', $tpmpid;
5880 }
5881 die "QEMU exited with code $exitcode\n";
5882 }
5883 };
5884 };
5885
5886 if ($conf->{hugepages}) {
5887
5888 my $code = sub {
5889 my $hotplug_features =
5890 parse_hotplug_features(defined($conf->{hotplug}) ? $conf->{hotplug} : '1');
5891 my $hugepages_topology =
5892 PVE::QemuServer::Memory::hugepages_topology($conf, $hotplug_features->{memory});
5893
5894 my $hugepages_host_topology = PVE::QemuServer::Memory::hugepages_host_topology();
5895
5896 PVE::QemuServer::Memory::hugepages_mount();
5897 PVE::QemuServer::Memory::hugepages_allocate($hugepages_topology, $hugepages_host_topology);
5898
5899 eval { $run_qemu->() };
5900 if (my $err = $@) {
5901 PVE::QemuServer::Memory::hugepages_reset($hugepages_host_topology)
5902 if !$conf->{keephugepages};
5903 die $err;
5904 }
5905
5906 PVE::QemuServer::Memory::hugepages_pre_deallocate($hugepages_topology)
5907 if !$conf->{keephugepages};
5908 };
5909 eval { PVE::QemuServer::Memory::hugepages_update_locked($code); };
5910
5911 } else {
5912 eval { $run_qemu->() };
5913 }
5914
5915 if (my $err = $@) {
5916 # deactivate volumes if start fails
5917 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
5918 warn $@ if $@;
5919 eval { cleanup_pci_devices($vmid, $conf) };
5920 warn $@ if $@;
5921
5922 die "start failed: $err";
5923 }
5924
5925 # re-reserve all PCI IDs now that we can know the actual VM PID
5926 my $pid = PVE::QemuServer::Helpers::vm_running_locally($vmid);
5927 eval { PVE::QemuServer::PCI::reserve_pci_usage($pci_reserve_list, $vmid, undef, $pid) };
5928 warn $@ if $@;
5929
5930 if (defined($res->{migrate})) {
5931 print "migration listens on $res->{migrate}->{uri}\n";
5932 } elsif ($statefile) {
5933 eval { mon_cmd($vmid, "cont"); };
5934 warn $@ if $@;
5935 }
5936
5937 #start nbd server for storage migration
5938 if (my $nbd = $migrate_opts->{nbd}) {
5939 my $nbd_protocol_version = $migrate_opts->{nbd_proto_version} // 0;
5940
5941 my $migrate_storage_uri;
5942 # nbd_protocol_version > 0 for unix socket support
5943 if ($nbd_protocol_version > 0 && ($migration_type eq 'secure' || $migration_type eq 'websocket')) {
5944 my $socket_path = "/run/qemu-server/$vmid\_nbd.migrate";
5945 mon_cmd($vmid, "nbd-server-start", addr => { type => 'unix', data => { path => $socket_path } } );
5946 $migrate_storage_uri = "nbd:unix:$socket_path";
5947 $res->{migrate}->{unix_sockets} = [$socket_path];
5948 } else {
5949 my $nodename = nodename();
5950 my $localip = $get_migration_ip->($nodename);
5951 my $pfamily = PVE::Tools::get_host_address_family($nodename);
5952 my $storage_migrate_port = PVE::Tools::next_migrate_port($pfamily);
5953
5954 mon_cmd($vmid, "nbd-server-start", addr => {
5955 type => 'inet',
5956 data => {
5957 host => "${localip}",
5958 port => "${storage_migrate_port}",
5959 },
5960 });
5961 $localip = "[$localip]" if Net::IP::ip_is_ipv6($localip);
5962 $migrate_storage_uri = "nbd:${localip}:${storage_migrate_port}";
5963 }
5964
5965 my $block_info = mon_cmd($vmid, "query-block");
5966 $block_info = { map { $_->{device} => $_ } $block_info->@* };
5967
5968 foreach my $opt (sort keys %$nbd) {
5969 my $drivestr = $nbd->{$opt}->{drivestr};
5970 my $volid = $nbd->{$opt}->{volid};
5971
5972 my $block_node = $block_info->{"drive-$opt"}->{inserted}->{'node-name'};
5973
5974 mon_cmd(
5975 $vmid,
5976 "block-export-add",
5977 id => "drive-$opt",
5978 'node-name' => $block_node,
5979 writable => JSON::true,
5980 type => "nbd",
5981 name => "drive-$opt", # NBD export name
5982 );
5983
5984 my $nbd_uri = "$migrate_storage_uri:exportname=drive-$opt";
5985 print "storage migration listens on $nbd_uri volume:$drivestr\n";
5986 print "re-using replicated volume: $opt - $volid\n"
5987 if $nbd->{$opt}->{replicated};
5988
5989 $res->{drives}->{$opt} = $nbd->{$opt};
5990 $res->{drives}->{$opt}->{nbd_uri} = $nbd_uri;
5991 }
5992 }
5993
5994 if ($migratedfrom) {
5995 eval {
5996 set_migration_caps($vmid);
5997 };
5998 warn $@ if $@;
5999
6000 if ($spice_port) {
6001 print "spice listens on port $spice_port\n";
6002 $res->{spice_port} = $spice_port;
6003 if ($migrate_opts->{spice_ticket}) {
6004 mon_cmd($vmid, "set_password", protocol => 'spice', password =>
6005 $migrate_opts->{spice_ticket});
6006 mon_cmd($vmid, "expire_password", protocol => 'spice', time => "+30");
6007 }
6008 }
6009
6010 } else {
6011 mon_cmd($vmid, "balloon", value => $conf->{balloon}*1024*1024)
6012 if !$statefile && $conf->{balloon};
6013
6014 foreach my $opt (keys %$conf) {
6015 next if $opt !~ m/^net\d+$/;
6016 my $nicconf = parse_net($conf->{$opt});
6017 qemu_set_link_status($vmid, $opt, 0) if $nicconf->{link_down};
6018 }
6019 add_nets_bridge_fdb($conf, $vmid);
6020 }
6021
6022 if (!defined($conf->{balloon}) || $conf->{balloon}) {
6023 eval {
6024 mon_cmd(
6025 $vmid,
6026 'qom-set',
6027 path => "machine/peripheral/balloon0",
6028 property => "guest-stats-polling-interval",
6029 value => 2
6030 );
6031 };
6032 log_warn("could not set polling interval for ballooning - $@") if $@;
6033 }
6034
6035 if ($resume) {
6036 print "Resumed VM, removing state\n";
6037 if (my $vmstate = $conf->{vmstate}) {
6038 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6039 PVE::Storage::vdisk_free($storecfg, $vmstate);
6040 }
6041 delete $conf->@{qw(lock vmstate runningmachine runningcpu)};
6042 PVE::QemuConfig->write_config($vmid, $conf);
6043 }
6044
6045 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'post-start');
6046
6047 return $res;
6048 }
6049
6050 sub vm_commandline {
6051 my ($storecfg, $vmid, $snapname) = @_;
6052
6053 my $conf = PVE::QemuConfig->load_config($vmid);
6054
6055 my ($forcemachine, $forcecpu);
6056 if ($snapname) {
6057 my $snapshot = $conf->{snapshots}->{$snapname};
6058 die "snapshot '$snapname' does not exist\n" if !defined($snapshot);
6059
6060 # check for machine or CPU overrides in snapshot
6061 $forcemachine = $snapshot->{runningmachine};
6062 $forcecpu = $snapshot->{runningcpu};
6063
6064 $snapshot->{digest} = $conf->{digest}; # keep file digest for API
6065
6066 $conf = $snapshot;
6067 }
6068
6069 my $defaults = load_defaults();
6070
6071 my $cmd = config_to_command($storecfg, $vmid, $conf, $defaults, $forcemachine, $forcecpu);
6072
6073 return PVE::Tools::cmd2string($cmd);
6074 }
6075
6076 sub vm_reset {
6077 my ($vmid, $skiplock) = @_;
6078
6079 PVE::QemuConfig->lock_config($vmid, sub {
6080
6081 my $conf = PVE::QemuConfig->load_config($vmid);
6082
6083 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6084
6085 mon_cmd($vmid, "system_reset");
6086 });
6087 }
6088
6089 sub get_vm_volumes {
6090 my ($conf) = @_;
6091
6092 my $vollist = [];
6093 foreach_volid($conf, sub {
6094 my ($volid, $attr) = @_;
6095
6096 return if $volid =~ m|^/|;
6097
6098 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
6099 return if !$sid;
6100
6101 push @$vollist, $volid;
6102 });
6103
6104 return $vollist;
6105 }
6106
6107 sub cleanup_pci_devices {
6108 my ($vmid, $conf) = @_;
6109
6110 foreach my $key (keys %$conf) {
6111 next if $key !~ m/^hostpci(\d+)$/;
6112 my $hostpciindex = $1;
6113 my $uuid = PVE::SysFSTools::generate_mdev_uuid($vmid, $hostpciindex);
6114 my $d = parse_hostpci($conf->{$key});
6115 if ($d->{mdev}) {
6116 # NOTE: avoid PVE::SysFSTools::pci_cleanup_mdev_device as it requires PCI ID and we
6117 # don't want to break ABI just for this two liner
6118 my $dev_sysfs_dir = "/sys/bus/mdev/devices/$uuid";
6119
6120 # some nvidia vgpu driver versions want to clean the mdevs up themselves, and error
6121 # out when we do it first. so wait for 10 seconds and then try it
6122 if ($d->{ids}->[0]->[0]->{vendor} =~ m/^(0x)?10de$/) {
6123 sleep 10;
6124 }
6125
6126 PVE::SysFSTools::file_write("$dev_sysfs_dir/remove", "1") if -e $dev_sysfs_dir;
6127 }
6128 }
6129 PVE::QemuServer::PCI::remove_pci_reservation($vmid);
6130 }
6131
6132 sub vm_stop_cleanup {
6133 my ($storecfg, $vmid, $conf, $keepActive, $apply_pending_changes) = @_;
6134
6135 eval {
6136
6137 if (!$keepActive) {
6138 my $vollist = get_vm_volumes($conf);
6139 PVE::Storage::deactivate_volumes($storecfg, $vollist);
6140
6141 if (my $tpmdrive = $conf->{tpmstate0}) {
6142 my $tpm = parse_drive("tpmstate0", $tpmdrive);
6143 my ($storeid, $volname) = PVE::Storage::parse_volume_id($tpm->{file}, 1);
6144 if ($storeid) {
6145 PVE::Storage::unmap_volume($storecfg, $tpm->{file});
6146 }
6147 }
6148 }
6149
6150 foreach my $ext (qw(mon qmp pid vnc qga)) {
6151 unlink "/var/run/qemu-server/${vmid}.$ext";
6152 }
6153
6154 if ($conf->{ivshmem}) {
6155 my $ivshmem = parse_property_string($ivshmem_fmt, $conf->{ivshmem});
6156 # just delete it for now, VMs which have this already open do not
6157 # are affected, but new VMs will get a separated one. If this
6158 # becomes an issue we either add some sort of ref-counting or just
6159 # add a "don't delete on stop" flag to the ivshmem format.
6160 unlink '/dev/shm/pve-shm-' . ($ivshmem->{name} // $vmid);
6161 }
6162
6163 cleanup_pci_devices($vmid, $conf);
6164
6165 vmconfig_apply_pending($vmid, $conf, $storecfg) if $apply_pending_changes;
6166 };
6167 warn $@ if $@; # avoid errors - just warn
6168 }
6169
6170 # call only in locked context
6171 sub _do_vm_stop {
6172 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive) = @_;
6173
6174 my $pid = check_running($vmid, $nocheck);
6175 return if !$pid;
6176
6177 my $conf;
6178 if (!$nocheck) {
6179 $conf = PVE::QemuConfig->load_config($vmid);
6180 PVE::QemuConfig->check_lock($conf) if !$skiplock;
6181 if (!defined($timeout) && $shutdown && $conf->{startup}) {
6182 my $opts = PVE::JSONSchema::pve_parse_startup_order($conf->{startup});
6183 $timeout = $opts->{down} if $opts->{down};
6184 }
6185 PVE::GuestHelpers::exec_hookscript($conf, $vmid, 'pre-stop');
6186 }
6187
6188 eval {
6189 if ($shutdown) {
6190 if (defined($conf) && get_qga_key($conf, 'enabled')) {
6191 mon_cmd($vmid, "guest-shutdown", timeout => $timeout);
6192 } else {
6193 mon_cmd($vmid, "system_powerdown");
6194 }
6195 } else {
6196 mon_cmd($vmid, "quit");
6197 }
6198 };
6199 my $err = $@;
6200
6201 if (!$err) {
6202 $timeout = 60 if !defined($timeout);
6203
6204 my $count = 0;
6205 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6206 $count++;
6207 sleep 1;
6208 }
6209
6210 if ($count >= $timeout) {
6211 if ($force) {
6212 warn "VM still running - terminating now with SIGTERM\n";
6213 kill 15, $pid;
6214 } else {
6215 die "VM quit/powerdown failed - got timeout\n";
6216 }
6217 } else {
6218 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6219 return;
6220 }
6221 } else {
6222 if (!check_running($vmid, $nocheck)) {
6223 warn "Unexpected: VM shutdown command failed, but VM not running anymore..\n";
6224 return;
6225 }
6226 if ($force) {
6227 warn "VM quit/powerdown failed - terminating now with SIGTERM\n";
6228 kill 15, $pid;
6229 } else {
6230 die "VM quit/powerdown failed\n";
6231 }
6232 }
6233
6234 # wait again
6235 $timeout = 10;
6236
6237 my $count = 0;
6238 while (($count < $timeout) && check_running($vmid, $nocheck)) {
6239 $count++;
6240 sleep 1;
6241 }
6242
6243 if ($count >= $timeout) {
6244 warn "VM still running - terminating now with SIGKILL\n";
6245 kill 9, $pid;
6246 sleep 1;
6247 }
6248
6249 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 1) if $conf;
6250 }
6251
6252 # Note: use $nocheck to skip tests if VM configuration file exists.
6253 # We need that when migration VMs to other nodes (files already moved)
6254 # Note: we set $keepActive in vzdump stop mode - volumes need to stay active
6255 sub vm_stop {
6256 my ($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive, $migratedfrom) = @_;
6257
6258 $force = 1 if !defined($force) && !$shutdown;
6259
6260 if ($migratedfrom){
6261 my $pid = check_running($vmid, $nocheck, $migratedfrom);
6262 kill 15, $pid if $pid;
6263 my $conf = PVE::QemuConfig->load_config($vmid, $migratedfrom);
6264 vm_stop_cleanup($storecfg, $vmid, $conf, $keepActive, 0);
6265 return;
6266 }
6267
6268 PVE::QemuConfig->lock_config($vmid, sub {
6269 _do_vm_stop($storecfg, $vmid, $skiplock, $nocheck, $timeout, $shutdown, $force, $keepActive);
6270 });
6271 }
6272
6273 sub vm_reboot {
6274 my ($vmid, $timeout) = @_;
6275
6276 PVE::QemuConfig->lock_config($vmid, sub {
6277 eval {
6278
6279 # only reboot if running, as qmeventd starts it again on a stop event
6280 return if !check_running($vmid);
6281
6282 create_reboot_request($vmid);
6283
6284 my $storecfg = PVE::Storage::config();
6285 _do_vm_stop($storecfg, $vmid, undef, undef, $timeout, 1);
6286
6287 };
6288 if (my $err = $@) {
6289 # avoid that the next normal shutdown will be confused for a reboot
6290 clear_reboot_request($vmid);
6291 die $err;
6292 }
6293 });
6294 }
6295
6296 # note: if using the statestorage parameter, the caller has to check privileges
6297 sub vm_suspend {
6298 my ($vmid, $skiplock, $includestate, $statestorage) = @_;
6299
6300 my $conf;
6301 my $path;
6302 my $storecfg;
6303 my $vmstate;
6304
6305 PVE::QemuConfig->lock_config($vmid, sub {
6306
6307 $conf = PVE::QemuConfig->load_config($vmid);
6308
6309 my $is_backing_up = PVE::QemuConfig->has_lock($conf, 'backup');
6310 PVE::QemuConfig->check_lock($conf)
6311 if !($skiplock || $is_backing_up);
6312
6313 die "cannot suspend to disk during backup\n"
6314 if $is_backing_up && $includestate;
6315
6316 if ($includestate) {
6317 $conf->{lock} = 'suspending';
6318 my $date = strftime("%Y-%m-%d", localtime(time()));
6319 $storecfg = PVE::Storage::config();
6320 if (!$statestorage) {
6321 $statestorage = find_vmstate_storage($conf, $storecfg);
6322 # check permissions for the storage
6323 my $rpcenv = PVE::RPCEnvironment::get();
6324 if ($rpcenv->{type} ne 'cli') {
6325 my $authuser = $rpcenv->get_user();
6326 $rpcenv->check($authuser, "/storage/$statestorage", ['Datastore.AllocateSpace']);
6327 }
6328 }
6329
6330
6331 $vmstate = PVE::QemuConfig->__snapshot_save_vmstate(
6332 $vmid, $conf, "suspend-$date", $storecfg, $statestorage, 1);
6333 $path = PVE::Storage::path($storecfg, $vmstate);
6334 PVE::QemuConfig->write_config($vmid, $conf);
6335 } else {
6336 mon_cmd($vmid, "stop");
6337 }
6338 });
6339
6340 if ($includestate) {
6341 # save vm state
6342 PVE::Storage::activate_volumes($storecfg, [$vmstate]);
6343
6344 eval {
6345 set_migration_caps($vmid, 1);
6346 mon_cmd($vmid, "savevm-start", statefile => $path);
6347 for(;;) {
6348 my $state = mon_cmd($vmid, "query-savevm");
6349 if (!$state->{status}) {
6350 die "savevm not active\n";
6351 } elsif ($state->{status} eq 'active') {
6352 sleep(1);
6353 next;
6354 } elsif ($state->{status} eq 'completed') {
6355 print "State saved, quitting\n";
6356 last;
6357 } elsif ($state->{status} eq 'failed' && $state->{error}) {
6358 die "query-savevm failed with error '$state->{error}'\n"
6359 } else {
6360 die "query-savevm returned status '$state->{status}'\n";
6361 }
6362 }
6363 };
6364 my $err = $@;
6365
6366 PVE::QemuConfig->lock_config($vmid, sub {
6367 $conf = PVE::QemuConfig->load_config($vmid);
6368 if ($err) {
6369 # cleanup, but leave suspending lock, to indicate something went wrong
6370 eval {
6371 mon_cmd($vmid, "savevm-end");
6372 PVE::Storage::deactivate_volumes($storecfg, [$vmstate]);
6373 PVE::Storage::vdisk_free($storecfg, $vmstate);
6374 delete $conf->@{qw(vmstate runningmachine runningcpu)};
6375 PVE::QemuConfig->write_config($vmid, $conf);
6376 };
6377 warn $@ if $@;
6378 die $err;
6379 }
6380
6381 die "lock changed unexpectedly\n"
6382 if !PVE::QemuConfig->has_lock($conf, 'suspending');
6383
6384 mon_cmd($vmid, "quit");
6385 $conf->{lock} = 'suspended';
6386 PVE::QemuConfig->write_config($vmid, $conf);
6387 });
6388 }
6389 }
6390
6391 # $nocheck is set when called as part of a migration - in this context the
6392 # location of the config file (source or target node) is not deterministic,
6393 # since migration cannot wait for pmxcfs to process the rename
6394 sub vm_resume {
6395 my ($vmid, $skiplock, $nocheck) = @_;
6396
6397 PVE::QemuConfig->lock_config($vmid, sub {
6398 my $res = mon_cmd($vmid, 'query-status');
6399 my $resume_cmd = 'cont';
6400 my $reset = 0;
6401 my $conf;
6402 if ($nocheck) {
6403 $conf = eval { PVE::QemuConfig->load_config($vmid) }; # try on target node
6404 if ($@) {
6405 my $vmlist = PVE::Cluster::get_vmlist();
6406 if (exists($vmlist->{ids}->{$vmid})) {
6407 my $node = $vmlist->{ids}->{$vmid}->{node};
6408 $conf = eval { PVE::QemuConfig->load_config($vmid, $node) }; # try on source node
6409 }
6410 if (!$conf) {
6411 PVE::Cluster::cfs_update(); # vmlist was wrong, invalidate cache
6412 $conf = PVE::QemuConfig->load_config($vmid); # last try on target node again
6413 }
6414 }
6415 } else {
6416 $conf = PVE::QemuConfig->load_config($vmid);
6417 }
6418
6419 if ($res->{status}) {
6420 return if $res->{status} eq 'running'; # job done, go home
6421 $resume_cmd = 'system_wakeup' if $res->{status} eq 'suspended';
6422 $reset = 1 if $res->{status} eq 'shutdown';
6423 }
6424
6425 if (!$nocheck) {
6426 PVE::QemuConfig->check_lock($conf)
6427 if !($skiplock || PVE::QemuConfig->has_lock($conf, 'backup'));
6428 }
6429
6430 if ($reset) {
6431 # required if a VM shuts down during a backup and we get a resume
6432 # request before the backup finishes for example
6433 mon_cmd($vmid, "system_reset");
6434 }
6435
6436 add_nets_bridge_fdb($conf, $vmid) if $resume_cmd eq 'cont';
6437
6438 mon_cmd($vmid, $resume_cmd);
6439 });
6440 }
6441
6442 sub vm_sendkey {
6443 my ($vmid, $skiplock, $key) = @_;
6444
6445 PVE::QemuConfig->lock_config($vmid, sub {
6446
6447 my $conf = PVE::QemuConfig->load_config($vmid);
6448
6449 # there is no qmp command, so we use the human monitor command
6450 my $res = PVE::QemuServer::Monitor::hmp_cmd($vmid, "sendkey $key");
6451 die $res if $res ne '';
6452 });
6453 }
6454
6455 sub check_bridge_access {
6456 my ($rpcenv, $authuser, $conf) = @_;
6457
6458 return 1 if $authuser eq 'root@pam';
6459
6460 for my $opt (sort keys $conf->%*) {
6461 next if $opt !~ m/^net\d+$/;
6462 my $net = parse_net($conf->{$opt});
6463 my ($bridge, $tag, $trunks) = $net->@{'bridge', 'tag', 'trunks'};
6464 PVE::GuestHelpers::check_vnet_access($rpcenv, $authuser, $bridge, $tag, $trunks);
6465 }
6466 return 1;
6467 };
6468
6469 sub check_mapping_access {
6470 my ($rpcenv, $user, $conf) = @_;
6471
6472 for my $opt (keys $conf->%*) {
6473 if ($opt =~ m/^usb\d+$/) {
6474 my $device = PVE::JSONSchema::parse_property_string('pve-qm-usb', $conf->{$opt});
6475 if (my $host = $device->{host}) {
6476 die "only root can set '$opt' config for real devices\n"
6477 if $host !~ m/^spice$/i && $user ne 'root@pam';
6478 } elsif ($device->{mapping}) {
6479 $rpcenv->check_full($user, "/mapping/usb/$device->{mapping}", ['Mapping.Use']);
6480 } else {
6481 die "either 'host' or 'mapping' must be set.\n";
6482 }
6483 } elsif ($opt =~ m/^hostpci\d+$/) {
6484 my $device = PVE::JSONSchema::parse_property_string('pve-qm-hostpci', $conf->{$opt});
6485 if ($device->{host}) {
6486 die "only root can set '$opt' config for non-mapped devices\n" if $user ne 'root@pam';
6487 } elsif ($device->{mapping}) {
6488 $rpcenv->check_full($user, "/mapping/pci/$device->{mapping}", ['Mapping.Use']);
6489 } else {
6490 die "either 'host' or 'mapping' must be set.\n";
6491 }
6492 }
6493 }
6494 };
6495
6496 sub check_restore_permissions {
6497 my ($rpcenv, $user, $conf) = @_;
6498
6499 check_bridge_access($rpcenv, $user, $conf);
6500 check_mapping_access($rpcenv, $user, $conf);
6501 }
6502 # vzdump restore implementaion
6503
6504 sub tar_archive_read_firstfile {
6505 my $archive = shift;
6506
6507 die "ERROR: file '$archive' does not exist\n" if ! -f $archive;
6508
6509 # try to detect archive type first
6510 my $pid = open (my $fh, '-|', 'tar', 'tf', $archive) ||
6511 die "unable to open file '$archive'\n";
6512 my $firstfile = <$fh>;
6513 kill 15, $pid;
6514 close $fh;
6515
6516 die "ERROR: archive contaions no data\n" if !$firstfile;
6517 chomp $firstfile;
6518
6519 return $firstfile;
6520 }
6521
6522 sub tar_restore_cleanup {
6523 my ($storecfg, $statfile) = @_;
6524
6525 print STDERR "starting cleanup\n";
6526
6527 if (my $fd = IO::File->new($statfile, "r")) {
6528 while (defined(my $line = <$fd>)) {
6529 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
6530 my $volid = $2;
6531 eval {
6532 if ($volid =~ m|^/|) {
6533 unlink $volid || die 'unlink failed\n';
6534 } else {
6535 PVE::Storage::vdisk_free($storecfg, $volid);
6536 }
6537 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6538 };
6539 print STDERR "unable to cleanup '$volid' - $@" if $@;
6540 } else {
6541 print STDERR "unable to parse line in statfile - $line";
6542 }
6543 }
6544 $fd->close();
6545 }
6546 }
6547
6548 sub restore_file_archive {
6549 my ($archive, $vmid, $user, $opts) = @_;
6550
6551 return restore_vma_archive($archive, $vmid, $user, $opts)
6552 if $archive eq '-';
6553
6554 my $info = PVE::Storage::archive_info($archive);
6555 my $format = $opts->{format} // $info->{format};
6556 my $comp = $info->{compression};
6557
6558 # try to detect archive format
6559 if ($format eq 'tar') {
6560 return restore_tar_archive($archive, $vmid, $user, $opts);
6561 } else {
6562 return restore_vma_archive($archive, $vmid, $user, $opts, $comp);
6563 }
6564 }
6565
6566 # hepler to remove disks that will not be used after restore
6567 my $restore_cleanup_oldconf = sub {
6568 my ($storecfg, $vmid, $oldconf, $virtdev_hash) = @_;
6569
6570 my $kept_disks = {};
6571
6572 PVE::QemuConfig->foreach_volume($oldconf, sub {
6573 my ($ds, $drive) = @_;
6574
6575 return if drive_is_cdrom($drive, 1);
6576
6577 my $volid = $drive->{file};
6578 return if !$volid || $volid =~ m|^/|;
6579
6580 my ($path, $owner) = PVE::Storage::path($storecfg, $volid);
6581 return if !$path || !$owner || ($owner != $vmid);
6582
6583 # Note: only delete disk we want to restore
6584 # other volumes will become unused
6585 if ($virtdev_hash->{$ds}) {
6586 eval { PVE::Storage::vdisk_free($storecfg, $volid); };
6587 if (my $err = $@) {
6588 warn $err;
6589 }
6590 } else {
6591 $kept_disks->{$volid} = 1;
6592 }
6593 });
6594
6595 # after the restore we have no snapshots anymore
6596 for my $snapname (keys $oldconf->{snapshots}->%*) {
6597 my $snap = $oldconf->{snapshots}->{$snapname};
6598 if ($snap->{vmstate}) {
6599 eval { PVE::Storage::vdisk_free($storecfg, $snap->{vmstate}); };
6600 if (my $err = $@) {
6601 warn $err;
6602 }
6603 }
6604
6605 for my $volid (keys $kept_disks->%*) {
6606 eval { PVE::Storage::volume_snapshot_delete($storecfg, $volid, $snapname); };
6607 warn $@ if $@;
6608 }
6609 }
6610 };
6611
6612 # Helper to parse vzdump backup device hints
6613 #
6614 # $rpcenv: Environment, used to ckeck storage permissions
6615 # $user: User ID, to check storage permissions
6616 # $storecfg: Storage configuration
6617 # $fh: the file handle for reading the configuration
6618 # $devinfo: should contain device sizes for all backu-up'ed devices
6619 # $options: backup options (pool, default storage)
6620 #
6621 # Return: $virtdev_hash, updates $devinfo (add devname, virtdev, format, storeid)
6622 my $parse_backup_hints = sub {
6623 my ($rpcenv, $user, $storecfg, $fh, $devinfo, $options) = @_;
6624
6625 my $check_storage = sub { # assert if an image can be allocate
6626 my ($storeid, $scfg) = @_;
6627 die "Content type 'images' is not available on storage '$storeid'\n"
6628 if !$scfg->{content}->{images};
6629 $rpcenv->check($user, "/storage/$storeid", ['Datastore.AllocateSpace'])
6630 if $user ne 'root@pam';
6631 };
6632
6633 my $virtdev_hash = {};
6634 while (defined(my $line = <$fh>)) {
6635 if ($line =~ m/^\#qmdump\#map:(\S+):(\S+):(\S*):(\S*):$/) {
6636 my ($virtdev, $devname, $storeid, $format) = ($1, $2, $3, $4);
6637 die "archive does not contain data for drive '$virtdev'\n"
6638 if !$devinfo->{$devname};
6639
6640 if (defined($options->{storage})) {
6641 $storeid = $options->{storage} || 'local';
6642 } elsif (!$storeid) {
6643 $storeid = 'local';
6644 }
6645 $format = 'raw' if !$format;
6646 $devinfo->{$devname}->{devname} = $devname;
6647 $devinfo->{$devname}->{virtdev} = $virtdev;
6648 $devinfo->{$devname}->{format} = $format;
6649 $devinfo->{$devname}->{storeid} = $storeid;
6650
6651 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6652 $check_storage->($storeid, $scfg); # permission and content type check
6653
6654 $virtdev_hash->{$virtdev} = $devinfo->{$devname};
6655 } elsif ($line =~ m/^((?:ide|sata|scsi)\d+):\s*(.*)\s*$/) {
6656 my $virtdev = $1;
6657 my $drive = parse_drive($virtdev, $2);
6658
6659 if (drive_is_cloudinit($drive)) {
6660 my ($storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
6661 $storeid = $options->{storage} if defined ($options->{storage});
6662 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6663 my $format = qemu_img_format($scfg, $volname); # has 'raw' fallback
6664
6665 $check_storage->($storeid, $scfg); # permission and content type check
6666
6667 $virtdev_hash->{$virtdev} = {
6668 format => $format,
6669 storeid => $storeid,
6670 size => PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE,
6671 is_cloudinit => 1,
6672 };
6673 }
6674 }
6675 }
6676
6677 return $virtdev_hash;
6678 };
6679
6680 # Helper to allocate and activate all volumes required for a restore
6681 #
6682 # $storecfg: Storage configuration
6683 # $virtdev_hash: as returned by parse_backup_hints()
6684 #
6685 # Returns: { $virtdev => $volid }
6686 my $restore_allocate_devices = sub {
6687 my ($storecfg, $virtdev_hash, $vmid) = @_;
6688
6689 my $map = {};
6690 foreach my $virtdev (sort keys %$virtdev_hash) {
6691 my $d = $virtdev_hash->{$virtdev};
6692 my $alloc_size = int(($d->{size} + 1024 - 1)/1024);
6693 my $storeid = $d->{storeid};
6694 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6695
6696 # test if requested format is supported
6697 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
6698 my $supported = grep { $_ eq $d->{format} } @$validFormats;
6699 $d->{format} = $defFormat if !$supported;
6700
6701 my $name;
6702 if ($d->{is_cloudinit}) {
6703 $name = "vm-$vmid-cloudinit";
6704 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6705 if ($scfg->{path}) {
6706 $name .= ".$d->{format}";
6707 }
6708 }
6709
6710 my $volid = PVE::Storage::vdisk_alloc(
6711 $storecfg, $storeid, $vmid, $d->{format}, $name, $alloc_size);
6712
6713 print STDERR "new volume ID is '$volid'\n";
6714 $d->{volid} = $volid;
6715
6716 PVE::Storage::activate_volumes($storecfg, [$volid]);
6717
6718 $map->{$virtdev} = $volid;
6719 }
6720
6721 return $map;
6722 };
6723
6724 sub restore_update_config_line {
6725 my ($cookie, $map, $line, $unique) = @_;
6726
6727 return '' if $line =~ m/^\#qmdump\#/;
6728 return '' if $line =~ m/^\#vzdump\#/;
6729 return '' if $line =~ m/^lock:/;
6730 return '' if $line =~ m/^unused\d+:/;
6731 return '' if $line =~ m/^parent:/;
6732
6733 my $res = '';
6734
6735 my $dc = PVE::Cluster::cfs_read_file('datacenter.cfg');
6736 if (($line =~ m/^(vlan(\d+)):\s*(\S+)\s*$/)) {
6737 # try to convert old 1.X settings
6738 my ($id, $ind, $ethcfg) = ($1, $2, $3);
6739 foreach my $devconfig (PVE::Tools::split_list($ethcfg)) {
6740 my ($model, $macaddr) = split(/\=/, $devconfig);
6741 $macaddr = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if !$macaddr || $unique;
6742 my $net = {
6743 model => $model,
6744 bridge => "vmbr$ind",
6745 macaddr => $macaddr,
6746 };
6747 my $netstr = print_net($net);
6748
6749 $res .= "net$cookie->{netcount}: $netstr\n";
6750 $cookie->{netcount}++;
6751 }
6752 } elsif (($line =~ m/^(net\d+):\s*(\S+)\s*$/) && $unique) {
6753 my ($id, $netstr) = ($1, $2);
6754 my $net = parse_net($netstr);
6755 $net->{macaddr} = PVE::Tools::random_ether_addr($dc->{mac_prefix}) if $net->{macaddr};
6756 $netstr = print_net($net);
6757 $res .= "$id: $netstr\n";
6758 } elsif ($line =~ m/^((ide|scsi|virtio|sata|efidisk|tpmstate)\d+):\s*(\S+)\s*$/) {
6759 my $virtdev = $1;
6760 my $value = $3;
6761 my $di = parse_drive($virtdev, $value);
6762 if (defined($di->{backup}) && !$di->{backup}) {
6763 $res .= "#$line";
6764 } elsif ($map->{$virtdev}) {
6765 delete $di->{format}; # format can change on restore
6766 $di->{file} = $map->{$virtdev};
6767 $value = print_drive($di);
6768 $res .= "$virtdev: $value\n";
6769 } else {
6770 $res .= $line;
6771 }
6772 } elsif (($line =~ m/^vmgenid: (.*)/)) {
6773 my $vmgenid = $1;
6774 if ($vmgenid ne '0') {
6775 # always generate a new vmgenid if there was a valid one setup
6776 $vmgenid = generate_uuid();
6777 }
6778 $res .= "vmgenid: $vmgenid\n";
6779 } elsif (($line =~ m/^(smbios1: )(.*)/) && $unique) {
6780 my ($uuid, $uuid_str);
6781 UUID::generate($uuid);
6782 UUID::unparse($uuid, $uuid_str);
6783 my $smbios1 = parse_smbios1($2);
6784 $smbios1->{uuid} = $uuid_str;
6785 $res .= $1.print_smbios1($smbios1)."\n";
6786 } else {
6787 $res .= $line;
6788 }
6789
6790 return $res;
6791 }
6792
6793 my $restore_deactivate_volumes = sub {
6794 my ($storecfg, $virtdev_hash) = @_;
6795
6796 my $vollist = [];
6797 for my $dev (values $virtdev_hash->%*) {
6798 push $vollist->@*, $dev->{volid} if $dev->{volid};
6799 }
6800
6801 eval { PVE::Storage::deactivate_volumes($storecfg, $vollist); };
6802 print STDERR $@ if $@;
6803 };
6804
6805 my $restore_destroy_volumes = sub {
6806 my ($storecfg, $virtdev_hash) = @_;
6807
6808 for my $dev (values $virtdev_hash->%*) {
6809 my $volid = $dev->{volid} or next;
6810 eval {
6811 PVE::Storage::vdisk_free($storecfg, $volid);
6812 print STDERR "temporary volume '$volid' sucessfuly removed\n";
6813 };
6814 print STDERR "unable to cleanup '$volid' - $@" if $@;
6815 }
6816 };
6817
6818 sub restore_merge_config {
6819 my ($filename, $backup_conf_raw, $override_conf) = @_;
6820
6821 my $backup_conf = parse_vm_config($filename, $backup_conf_raw);
6822 for my $key (keys $override_conf->%*) {
6823 $backup_conf->{$key} = $override_conf->{$key};
6824 }
6825
6826 return $backup_conf;
6827 }
6828
6829 sub scan_volids {
6830 my ($cfg, $vmid) = @_;
6831
6832 my $info = PVE::Storage::vdisk_list($cfg, undef, $vmid, undef, 'images');
6833
6834 my $volid_hash = {};
6835 foreach my $storeid (keys %$info) {
6836 foreach my $item (@{$info->{$storeid}}) {
6837 next if !($item->{volid} && $item->{size});
6838 $item->{path} = PVE::Storage::path($cfg, $item->{volid});
6839 $volid_hash->{$item->{volid}} = $item;
6840 }
6841 }
6842
6843 return $volid_hash;
6844 }
6845
6846 sub update_disk_config {
6847 my ($vmid, $conf, $volid_hash) = @_;
6848
6849 my $changes;
6850 my $prefix = "VM $vmid";
6851
6852 # used and unused disks
6853 my $referenced = {};
6854
6855 # Note: it is allowed to define multiple storages with same path (alias), so
6856 # we need to check both 'volid' and real 'path' (two different volid can point
6857 # to the same path).
6858
6859 my $referencedpath = {};
6860
6861 # update size info
6862 PVE::QemuConfig->foreach_volume($conf, sub {
6863 my ($opt, $drive) = @_;
6864
6865 my $volid = $drive->{file};
6866 return if !$volid;
6867 my $volume = $volid_hash->{$volid};
6868
6869 # mark volid as "in-use" for next step
6870 $referenced->{$volid} = 1;
6871 if ($volume && (my $path = $volume->{path})) {
6872 $referencedpath->{$path} = 1;
6873 }
6874
6875 return if drive_is_cdrom($drive);
6876 return if !$volume;
6877
6878 my ($updated, $msg) = PVE::QemuServer::Drive::update_disksize($drive, $volume->{size});
6879 if (defined($updated)) {
6880 $changes = 1;
6881 $conf->{$opt} = print_drive($updated);
6882 print "$prefix ($opt): $msg\n";
6883 }
6884 });
6885
6886 # remove 'unusedX' entry if volume is used
6887 PVE::QemuConfig->foreach_unused_volume($conf, sub {
6888 my ($opt, $drive) = @_;
6889
6890 my $volid = $drive->{file};
6891 return if !$volid;
6892
6893 my $path;
6894 $path = $volid_hash->{$volid}->{path} if $volid_hash->{$volid};
6895 if ($referenced->{$volid} || ($path && $referencedpath->{$path})) {
6896 print "$prefix remove entry '$opt', its volume '$volid' is in use\n";
6897 $changes = 1;
6898 delete $conf->{$opt};
6899 }
6900
6901 $referenced->{$volid} = 1;
6902 $referencedpath->{$path} = 1 if $path;
6903 });
6904
6905 foreach my $volid (sort keys %$volid_hash) {
6906 next if $volid =~ m/vm-$vmid-state-/;
6907 next if $referenced->{$volid};
6908 my $path = $volid_hash->{$volid}->{path};
6909 next if !$path; # just to be sure
6910 next if $referencedpath->{$path};
6911 $changes = 1;
6912 my $key = PVE::QemuConfig->add_unused_volume($conf, $volid);
6913 print "$prefix add unreferenced volume '$volid' as '$key' to config\n";
6914 $referencedpath->{$path} = 1; # avoid to add more than once (aliases)
6915 }
6916
6917 return $changes;
6918 }
6919
6920 sub rescan {
6921 my ($vmid, $nolock, $dryrun) = @_;
6922
6923 my $cfg = PVE::Storage::config();
6924
6925 print "rescan volumes...\n";
6926 my $volid_hash = scan_volids($cfg, $vmid);
6927
6928 my $updatefn = sub {
6929 my ($vmid) = @_;
6930
6931 my $conf = PVE::QemuConfig->load_config($vmid);
6932
6933 PVE::QemuConfig->check_lock($conf);
6934
6935 my $vm_volids = {};
6936 foreach my $volid (keys %$volid_hash) {
6937 my $info = $volid_hash->{$volid};
6938 $vm_volids->{$volid} = $info if $info->{vmid} && $info->{vmid} == $vmid;
6939 }
6940
6941 my $changes = update_disk_config($vmid, $conf, $vm_volids);
6942
6943 PVE::QemuConfig->write_config($vmid, $conf) if $changes && !$dryrun;
6944 };
6945
6946 if (defined($vmid)) {
6947 if ($nolock) {
6948 &$updatefn($vmid);
6949 } else {
6950 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6951 }
6952 } else {
6953 my $vmlist = config_list();
6954 foreach my $vmid (keys %$vmlist) {
6955 if ($nolock) {
6956 &$updatefn($vmid);
6957 } else {
6958 PVE::QemuConfig->lock_config($vmid, $updatefn, $vmid);
6959 }
6960 }
6961 }
6962 }
6963
6964 sub restore_proxmox_backup_archive {
6965 my ($archive, $vmid, $user, $options) = @_;
6966
6967 my $storecfg = PVE::Storage::config();
6968
6969 my ($storeid, $volname) = PVE::Storage::parse_volume_id($archive);
6970 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
6971
6972 my $fingerprint = $scfg->{fingerprint};
6973 my $keyfile = PVE::Storage::PBSPlugin::pbs_encryption_key_file_name($storecfg, $storeid);
6974
6975 my $repo = PVE::PBSClient::get_repository($scfg);
6976 my $namespace = $scfg->{namespace};
6977
6978 # This is only used for `pbs-restore` and the QEMU PBS driver (live-restore)
6979 my $password = PVE::Storage::PBSPlugin::pbs_get_password($scfg, $storeid);
6980 local $ENV{PBS_PASSWORD} = $password;
6981 local $ENV{PBS_FINGERPRINT} = $fingerprint if defined($fingerprint);
6982
6983 my ($vtype, $pbs_backup_name, undef, undef, undef, undef, $format) =
6984 PVE::Storage::parse_volname($storecfg, $archive);
6985
6986 die "got unexpected vtype '$vtype'\n" if $vtype ne 'backup';
6987
6988 die "got unexpected backup format '$format'\n" if $format ne 'pbs-vm';
6989
6990 my $tmpdir = "/var/tmp/vzdumptmp$$";
6991 rmtree $tmpdir;
6992 mkpath $tmpdir;
6993
6994 my $conffile = PVE::QemuConfig->config_file($vmid);
6995 # disable interrupts (always do cleanups)
6996 local $SIG{INT} =
6997 local $SIG{TERM} =
6998 local $SIG{QUIT} =
6999 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7000
7001 # Note: $oldconf is undef if VM does not exists
7002 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7003 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7004 my $new_conf_raw = '';
7005
7006 my $rpcenv = PVE::RPCEnvironment::get();
7007 my $devinfo = {}; # info about drives included in backup
7008 my $virtdev_hash = {}; # info about allocated drives
7009
7010 eval {
7011 # enable interrupts
7012 local $SIG{INT} =
7013 local $SIG{TERM} =
7014 local $SIG{QUIT} =
7015 local $SIG{HUP} =
7016 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7017
7018 my $cfgfn = "$tmpdir/qemu-server.conf";
7019 my $firewall_config_fn = "$tmpdir/fw.conf";
7020 my $index_fn = "$tmpdir/index.json";
7021
7022 my $cmd = "restore";
7023
7024 my $param = [$pbs_backup_name, "index.json", $index_fn];
7025 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7026 my $index = PVE::Tools::file_get_contents($index_fn);
7027 $index = decode_json($index);
7028
7029 foreach my $info (@{$index->{files}}) {
7030 if ($info->{filename} =~ m/^(drive-\S+).img.fidx$/) {
7031 my $devname = $1;
7032 if ($info->{size} =~ m/^(\d+)$/) { # untaint size
7033 $devinfo->{$devname}->{size} = $1;
7034 } else {
7035 die "unable to parse file size in 'index.json' - got '$info->{size}'\n";
7036 }
7037 }
7038 }
7039
7040 my $is_qemu_server_backup = scalar(
7041 grep { $_->{filename} eq 'qemu-server.conf.blob' } @{$index->{files}}
7042 );
7043 if (!$is_qemu_server_backup) {
7044 die "backup does not look like a qemu-server backup (missing 'qemu-server.conf' file)\n";
7045 }
7046 my $has_firewall_config = scalar(grep { $_->{filename} eq 'fw.conf.blob' } @{$index->{files}});
7047
7048 $param = [$pbs_backup_name, "qemu-server.conf", $cfgfn];
7049 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7050
7051 if ($has_firewall_config) {
7052 $param = [$pbs_backup_name, "fw.conf", $firewall_config_fn];
7053 PVE::Storage::PBSPlugin::run_raw_client_cmd($scfg, $storeid, $cmd, $param);
7054
7055 my $pve_firewall_dir = '/etc/pve/firewall';
7056 mkdir $pve_firewall_dir; # make sure the dir exists
7057 PVE::Tools::file_copy($firewall_config_fn, "${pve_firewall_dir}/$vmid.fw");
7058 }
7059
7060 my $fh = IO::File->new($cfgfn, "r") ||
7061 die "unable to read qemu-server.conf - $!\n";
7062
7063 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $storecfg, $fh, $devinfo, $options);
7064
7065 # fixme: rate limit?
7066
7067 # create empty/temp config
7068 PVE::Tools::file_set_contents($conffile, "memory: 128\nlock: create");
7069
7070 $restore_cleanup_oldconf->($storecfg, $vmid, $oldconf, $virtdev_hash) if $oldconf;
7071
7072 # allocate volumes
7073 my $map = $restore_allocate_devices->($storecfg, $virtdev_hash, $vmid);
7074
7075 foreach my $virtdev (sort keys %$virtdev_hash) {
7076 my $d = $virtdev_hash->{$virtdev};
7077 next if $d->{is_cloudinit}; # no need to restore cloudinit
7078
7079 # this fails if storage is unavailable
7080 my $volid = $d->{volid};
7081 my $path = PVE::Storage::path($storecfg, $volid);
7082
7083 # for live-restore we only want to preload the efidisk and TPM state
7084 next if $options->{live} && $virtdev ne 'efidisk0' && $virtdev ne 'tpmstate0';
7085
7086 my @ns_arg;
7087 if (defined(my $ns = $scfg->{namespace})) {
7088 @ns_arg = ('--ns', $ns);
7089 }
7090
7091 my $pbs_restore_cmd = [
7092 '/usr/bin/pbs-restore',
7093 '--repository', $repo,
7094 @ns_arg,
7095 $pbs_backup_name,
7096 "$d->{devname}.img.fidx",
7097 $path,
7098 '--verbose',
7099 ];
7100
7101 push @$pbs_restore_cmd, '--format', $d->{format} if $d->{format};
7102 push @$pbs_restore_cmd, '--keyfile', $keyfile if -e $keyfile;
7103
7104 if (PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $volid)) {
7105 push @$pbs_restore_cmd, '--skip-zero';
7106 }
7107
7108 my $dbg_cmdstring = PVE::Tools::cmd2string($pbs_restore_cmd);
7109 print "restore proxmox backup image: $dbg_cmdstring\n";
7110 run_command($pbs_restore_cmd);
7111 }
7112
7113 $fh->seek(0, 0) || die "seek failed - $!\n";
7114
7115 my $cookie = { netcount => 0 };
7116 while (defined(my $line = <$fh>)) {
7117 $new_conf_raw .= restore_update_config_line(
7118 $cookie,
7119 $map,
7120 $line,
7121 $options->{unique},
7122 );
7123 }
7124
7125 $fh->close();
7126 };
7127 my $err = $@;
7128
7129 if ($err || !$options->{live}) {
7130 $restore_deactivate_volumes->($storecfg, $virtdev_hash);
7131 }
7132
7133 rmtree $tmpdir;
7134
7135 if ($err) {
7136 $restore_destroy_volumes->($storecfg, $virtdev_hash);
7137 die $err;
7138 }
7139
7140 if ($options->{live}) {
7141 # keep lock during live-restore
7142 $new_conf_raw .= "\nlock: create";
7143 }
7144
7145 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $options->{override_conf});
7146 check_restore_permissions($rpcenv, $user, $new_conf);
7147 PVE::QemuConfig->write_config($vmid, $new_conf);
7148
7149 eval { rescan($vmid, 1); };
7150 warn $@ if $@;
7151
7152 PVE::AccessControl::add_vm_to_pool($vmid, $options->{pool}) if $options->{pool};
7153
7154 if ($options->{live}) {
7155 # enable interrupts
7156 local $SIG{INT} =
7157 local $SIG{TERM} =
7158 local $SIG{QUIT} =
7159 local $SIG{HUP} =
7160 local $SIG{PIPE} = sub { die "got signal ($!) - abort\n"; };
7161
7162 my $conf = PVE::QemuConfig->load_config($vmid);
7163 die "cannot do live-restore for template\n" if PVE::QemuConfig->is_template($conf);
7164
7165 # these special drives are already restored before start
7166 delete $devinfo->{'drive-efidisk0'};
7167 delete $devinfo->{'drive-tpmstate0-backup'};
7168
7169 my $pbs_opts = {
7170 repo => $repo,
7171 keyfile => $keyfile,
7172 snapshot => $pbs_backup_name,
7173 namespace => $namespace,
7174 };
7175 pbs_live_restore($vmid, $conf, $storecfg, $devinfo, $pbs_opts);
7176
7177 PVE::QemuConfig->remove_lock($vmid, "create");
7178 }
7179 }
7180
7181 sub pbs_live_restore {
7182 my ($vmid, $conf, $storecfg, $restored_disks, $opts) = @_;
7183
7184 print "starting VM for live-restore\n";
7185 print "repository: '$opts->{repo}', snapshot: '$opts->{snapshot}'\n";
7186
7187 my $pbs_backing = {};
7188 for my $ds (keys %$restored_disks) {
7189 $ds =~ m/^drive-(.*)$/;
7190 my $confname = $1;
7191 $pbs_backing->{$confname} = {
7192 repository => $opts->{repo},
7193 snapshot => $opts->{snapshot},
7194 archive => "$ds.img.fidx",
7195 };
7196 $pbs_backing->{$confname}->{keyfile} = $opts->{keyfile} if -e $opts->{keyfile};
7197 $pbs_backing->{$confname}->{namespace} = $opts->{namespace} if defined($opts->{namespace});
7198
7199 my $drive = parse_drive($confname, $conf->{$confname});
7200 print "restoring '$ds' to '$drive->{file}'\n";
7201 }
7202
7203 my $drives_streamed = 0;
7204 eval {
7205 # make sure HA doesn't interrupt our restore by stopping the VM
7206 if (PVE::HA::Config::vm_is_ha_managed($vmid)) {
7207 run_command(['ha-manager', 'set', "vm:$vmid", '--state', 'started']);
7208 }
7209
7210 # start VM with backing chain pointing to PBS backup, environment vars for PBS driver
7211 # in QEMU (PBS_PASSWORD and PBS_FINGERPRINT) are already set by our caller
7212 vm_start_nolock($storecfg, $vmid, $conf, {paused => 1, 'pbs-backing' => $pbs_backing}, {});
7213
7214 my $qmeventd_fd = register_qmeventd_handle($vmid);
7215
7216 # begin streaming, i.e. data copy from PBS to target disk for every vol,
7217 # this will effectively collapse the backing image chain consisting of
7218 # [target <- alloc-track -> PBS snapshot] to just [target] (alloc-track
7219 # removes itself once all backing images vanish with 'auto-remove=on')
7220 my $jobs = {};
7221 for my $ds (sort keys %$restored_disks) {
7222 my $job_id = "restore-$ds";
7223 mon_cmd($vmid, 'block-stream',
7224 'job-id' => $job_id,
7225 device => "$ds",
7226 );
7227 $jobs->{$job_id} = {};
7228 }
7229
7230 mon_cmd($vmid, 'cont');
7231 qemu_drive_mirror_monitor($vmid, undef, $jobs, 'auto', 0, 'stream');
7232
7233 print "restore-drive jobs finished successfully, removing all tracking block devices"
7234 ." to disconnect from Proxmox Backup Server\n";
7235
7236 for my $ds (sort keys %$restored_disks) {
7237 mon_cmd($vmid, 'blockdev-del', 'node-name' => "$ds-pbs");
7238 }
7239
7240 close($qmeventd_fd);
7241 };
7242
7243 my $err = $@;
7244
7245 if ($err) {
7246 warn "An error occurred during live-restore: $err\n";
7247 _do_vm_stop($storecfg, $vmid, 1, 1, 10, 0, 1);
7248 die "live-restore failed\n";
7249 }
7250 }
7251
7252 sub restore_vma_archive {
7253 my ($archive, $vmid, $user, $opts, $comp) = @_;
7254
7255 my $readfrom = $archive;
7256
7257 my $cfg = PVE::Storage::config();
7258 my $commands = [];
7259 my $bwlimit = $opts->{bwlimit};
7260
7261 my $dbg_cmdstring = '';
7262 my $add_pipe = sub {
7263 my ($cmd) = @_;
7264 push @$commands, $cmd;
7265 $dbg_cmdstring .= ' | ' if length($dbg_cmdstring);
7266 $dbg_cmdstring .= PVE::Tools::cmd2string($cmd);
7267 $readfrom = '-';
7268 };
7269
7270 my $input = undef;
7271 if ($archive eq '-') {
7272 $input = '<&STDIN';
7273 } else {
7274 # If we use a backup from a PVE defined storage we also consider that
7275 # storage's rate limit:
7276 my (undef, $volid) = PVE::Storage::path_to_volume_id($cfg, $archive);
7277 if (defined($volid)) {
7278 my ($sid, undef) = PVE::Storage::parse_volume_id($volid);
7279 my $readlimit = PVE::Storage::get_bandwidth_limit('restore', [$sid], $bwlimit);
7280 if ($readlimit) {
7281 print STDERR "applying read rate limit: $readlimit\n";
7282 my $cstream = ['cstream', '-t', $readlimit*1024, '--', $readfrom];
7283 $add_pipe->($cstream);
7284 }
7285 }
7286 }
7287
7288 if ($comp) {
7289 my $info = PVE::Storage::decompressor_info('vma', $comp);
7290 my $cmd = $info->{decompressor};
7291 push @$cmd, $readfrom;
7292 $add_pipe->($cmd);
7293 }
7294
7295 my $tmpdir = "/var/tmp/vzdumptmp$$";
7296 rmtree $tmpdir;
7297
7298 # disable interrupts (always do cleanups)
7299 local $SIG{INT} =
7300 local $SIG{TERM} =
7301 local $SIG{QUIT} =
7302 local $SIG{HUP} = sub { warn "got interrupt - ignored\n"; };
7303
7304 my $mapfifo = "/var/tmp/vzdumptmp$$.fifo";
7305 POSIX::mkfifo($mapfifo, 0600);
7306 my $fifofh;
7307 my $openfifo = sub { open($fifofh, '>', $mapfifo) or die $! };
7308
7309 $add_pipe->(['vma', 'extract', '-v', '-r', $mapfifo, $readfrom, $tmpdir]);
7310
7311 my $devinfo = {}; # info about drives included in backup
7312 my $virtdev_hash = {}; # info about allocated drives
7313
7314 my $rpcenv = PVE::RPCEnvironment::get();
7315
7316 my $conffile = PVE::QemuConfig->config_file($vmid);
7317
7318 # Note: $oldconf is undef if VM does not exist
7319 my $cfs_path = PVE::QemuConfig->cfs_config_path($vmid);
7320 my $oldconf = PVE::Cluster::cfs_read_file($cfs_path);
7321 my $new_conf_raw = '';
7322
7323 my %storage_limits;
7324
7325 my $print_devmap = sub {
7326 my $cfgfn = "$tmpdir/qemu-server.conf";
7327
7328 # we can read the config - that is already extracted
7329 my $fh = IO::File->new($cfgfn, "r") ||
7330 die "unable to read qemu-server.conf - $!\n";
7331
7332 my $fwcfgfn = "$tmpdir/qemu-server.fw";
7333 if (-f $fwcfgfn) {
7334 my $pve_firewall_dir = '/etc/pve/firewall';
7335 mkdir $pve_firewall_dir; # make sure the dir exists
7336 PVE::Tools::file_copy($fwcfgfn, "${pve_firewall_dir}/$vmid.fw");
7337 }
7338
7339 $virtdev_hash = $parse_backup_hints->($rpcenv, $user, $cfg, $fh, $devinfo, $opts);
7340
7341 foreach my $info (values %{$virtdev_hash}) {
7342 my $storeid = $info->{storeid};
7343 next if defined($storage_limits{$storeid});
7344
7345 my $limit = PVE::Storage::get_bandwidth_limit('restore', [$storeid], $bwlimit) // 0;
7346 print STDERR "rate limit for storage $storeid: $limit KiB/s\n" if $limit;
7347 $storage_limits{$storeid} = $limit * 1024;
7348 }
7349
7350 foreach my $devname (keys %$devinfo) {
7351 die "found no device mapping information for device '$devname'\n"
7352 if !$devinfo->{$devname}->{virtdev};
7353 }
7354
7355 # create empty/temp config
7356 if ($oldconf) {
7357 PVE::Tools::file_set_contents($conffile, "memory: 128\n");
7358 $restore_cleanup_oldconf->($cfg, $vmid, $oldconf, $virtdev_hash);
7359 }
7360
7361 # allocate volumes
7362 my $map = $restore_allocate_devices->($cfg, $virtdev_hash, $vmid);
7363
7364 # print restore information to $fifofh
7365 foreach my $virtdev (sort keys %$virtdev_hash) {
7366 my $d = $virtdev_hash->{$virtdev};
7367 next if $d->{is_cloudinit}; # no need to restore cloudinit
7368
7369 my $storeid = $d->{storeid};
7370 my $volid = $d->{volid};
7371
7372 my $map_opts = '';
7373 if (my $limit = $storage_limits{$storeid}) {
7374 $map_opts .= "throttling.bps=$limit:throttling.group=$storeid:";
7375 }
7376
7377 my $write_zeros = 1;
7378 if (PVE::Storage::volume_has_feature($cfg, 'sparseinit', $volid)) {
7379 $write_zeros = 0;
7380 }
7381
7382 my $path = PVE::Storage::path($cfg, $volid);
7383
7384 print $fifofh "${map_opts}format=$d->{format}:${write_zeros}:$d->{devname}=$path\n";
7385
7386 print "map '$d->{devname}' to '$path' (write zeros = ${write_zeros})\n";
7387 }
7388
7389 $fh->seek(0, 0) || die "seek failed - $!\n";
7390
7391 my $cookie = { netcount => 0 };
7392 while (defined(my $line = <$fh>)) {
7393 $new_conf_raw .= restore_update_config_line(
7394 $cookie,
7395 $map,
7396 $line,
7397 $opts->{unique},
7398 );
7399 }
7400
7401 $fh->close();
7402 };
7403
7404 my $oldtimeout;
7405
7406 eval {
7407 # enable interrupts
7408 local $SIG{INT} =
7409 local $SIG{TERM} =
7410 local $SIG{QUIT} =
7411 local $SIG{HUP} =
7412 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7413 local $SIG{ALRM} = sub { die "got timeout\n"; };
7414
7415 $oldtimeout = alarm(5); # for reading the VMA header - might hang with a corrupted one
7416
7417 my $parser = sub {
7418 my $line = shift;
7419
7420 print "$line\n";
7421
7422 if ($line =~ m/^DEV:\sdev_id=(\d+)\ssize:\s(\d+)\sdevname:\s(\S+)$/) {
7423 my ($dev_id, $size, $devname) = ($1, $2, $3);
7424 $devinfo->{$devname} = { size => $size, dev_id => $dev_id };
7425 } elsif ($line =~ m/^CTIME: /) {
7426 # we correctly received the vma config, so we can disable
7427 # the timeout now for disk allocation
7428 alarm($oldtimeout || 0);
7429 $oldtimeout = undef;
7430 &$print_devmap();
7431 print $fifofh "done\n";
7432 close($fifofh);
7433 $fifofh = undef;
7434 }
7435 };
7436
7437 print "restore vma archive: $dbg_cmdstring\n";
7438 run_command($commands, input => $input, outfunc => $parser, afterfork => $openfifo);
7439 };
7440 my $err = $@;
7441
7442 alarm($oldtimeout) if $oldtimeout;
7443
7444 $restore_deactivate_volumes->($cfg, $virtdev_hash);
7445
7446 close($fifofh) if $fifofh;
7447 unlink $mapfifo;
7448 rmtree $tmpdir;
7449
7450 if ($err) {
7451 $restore_destroy_volumes->($cfg, $virtdev_hash);
7452 die $err;
7453 }
7454
7455 my $new_conf = restore_merge_config($conffile, $new_conf_raw, $opts->{override_conf});
7456 check_restore_permissions($rpcenv, $user, $new_conf);
7457 PVE::QemuConfig->write_config($vmid, $new_conf);
7458
7459 eval { rescan($vmid, 1); };
7460 warn $@ if $@;
7461
7462 PVE::AccessControl::add_vm_to_pool($vmid, $opts->{pool}) if $opts->{pool};
7463 }
7464
7465 sub restore_tar_archive {
7466 my ($archive, $vmid, $user, $opts) = @_;
7467
7468 if (scalar(keys $opts->{override_conf}->%*) > 0) {
7469 my $keystring = join(' ', keys $opts->{override_conf}->%*);
7470 die "cannot pass along options ($keystring) when restoring from tar archive\n";
7471 }
7472
7473 if ($archive ne '-') {
7474 my $firstfile = tar_archive_read_firstfile($archive);
7475 die "ERROR: file '$archive' does not look like a QemuServer vzdump backup\n"
7476 if $firstfile ne 'qemu-server.conf';
7477 }
7478
7479 my $storecfg = PVE::Storage::config();
7480
7481 # avoid zombie disks when restoring over an existing VM -> cleanup first
7482 # pass keep_empty_config=1 to keep the config (thus VMID) reserved for us
7483 # skiplock=1 because qmrestore has set the 'create' lock itself already
7484 my $vmcfgfn = PVE::QemuConfig->config_file($vmid);
7485 destroy_vm($storecfg, $vmid, 1, { lock => 'restore' }) if -f $vmcfgfn;
7486
7487 my $tocmd = "/usr/lib/qemu-server/qmextract";
7488
7489 $tocmd .= " --storage " . PVE::Tools::shellquote($opts->{storage}) if $opts->{storage};
7490 $tocmd .= " --pool " . PVE::Tools::shellquote($opts->{pool}) if $opts->{pool};
7491 $tocmd .= ' --prealloc' if $opts->{prealloc};
7492 $tocmd .= ' --info' if $opts->{info};
7493
7494 # tar option "xf" does not autodetect compression when read from STDIN,
7495 # so we pipe to zcat
7496 my $cmd = "zcat -f|tar xf " . PVE::Tools::shellquote($archive) . " " .
7497 PVE::Tools::shellquote("--to-command=$tocmd");
7498
7499 my $tmpdir = "/var/tmp/vzdumptmp$$";
7500 mkpath $tmpdir;
7501
7502 local $ENV{VZDUMP_TMPDIR} = $tmpdir;
7503 local $ENV{VZDUMP_VMID} = $vmid;
7504 local $ENV{VZDUMP_USER} = $user;
7505
7506 my $conffile = PVE::QemuConfig->config_file($vmid);
7507 my $new_conf_raw = '';
7508
7509 # disable interrupts (always do cleanups)
7510 local $SIG{INT} =
7511 local $SIG{TERM} =
7512 local $SIG{QUIT} =
7513 local $SIG{HUP} = sub { print STDERR "got interrupt - ignored\n"; };
7514
7515 eval {
7516 # enable interrupts
7517 local $SIG{INT} =
7518 local $SIG{TERM} =
7519 local $SIG{QUIT} =
7520 local $SIG{HUP} =
7521 local $SIG{PIPE} = sub { die "interrupted by signal\n"; };
7522
7523 if ($archive eq '-') {
7524 print "extracting archive from STDIN\n";
7525 run_command($cmd, input => "<&STDIN");
7526 } else {
7527 print "extracting archive '$archive'\n";
7528 run_command($cmd);
7529 }
7530
7531 return if $opts->{info};
7532
7533 # read new mapping
7534 my $map = {};
7535 my $statfile = "$tmpdir/qmrestore.stat";
7536 if (my $fd = IO::File->new($statfile, "r")) {
7537 while (defined (my $line = <$fd>)) {
7538 if ($line =~ m/vzdump:([^\s:]*):(\S+)$/) {
7539 $map->{$1} = $2 if $1;
7540 } else {
7541 print STDERR "unable to parse line in statfile - $line\n";
7542 }
7543 }
7544 $fd->close();
7545 }
7546
7547 my $confsrc = "$tmpdir/qemu-server.conf";
7548
7549 my $srcfd = IO::File->new($confsrc, "r") || die "unable to open file '$confsrc'\n";
7550
7551 my $cookie = { netcount => 0 };
7552 while (defined (my $line = <$srcfd>)) {
7553 $new_conf_raw .= restore_update_config_line(
7554 $cookie,
7555 $map,
7556 $line,
7557 $opts->{unique},
7558 );
7559 }
7560
7561 $srcfd->close();
7562 };
7563 if (my $err = $@) {
7564 tar_restore_cleanup($storecfg, "$tmpdir/qmrestore.stat") if !$opts->{info};
7565 die $err;
7566 }
7567
7568 rmtree $tmpdir;
7569
7570 PVE::Tools::file_set_contents($conffile, $new_conf_raw);
7571
7572 PVE::Cluster::cfs_update(); # make sure we read new file
7573
7574 eval { rescan($vmid, 1); };
7575 warn $@ if $@;
7576 };
7577
7578 sub foreach_storage_used_by_vm {
7579 my ($conf, $func) = @_;
7580
7581 my $sidhash = {};
7582
7583 PVE::QemuConfig->foreach_volume($conf, sub {
7584 my ($ds, $drive) = @_;
7585 return if drive_is_cdrom($drive);
7586
7587 my $volid = $drive->{file};
7588
7589 my ($sid, $volname) = PVE::Storage::parse_volume_id($volid, 1);
7590 $sidhash->{$sid} = $sid if $sid;
7591 });
7592
7593 foreach my $sid (sort keys %$sidhash) {
7594 &$func($sid);
7595 }
7596 }
7597
7598 my $qemu_snap_storage = {
7599 rbd => 1,
7600 };
7601 sub do_snapshots_with_qemu {
7602 my ($storecfg, $volid, $deviceid) = @_;
7603
7604 return if $deviceid =~ m/tpmstate0/;
7605
7606 my $storage_name = PVE::Storage::parse_volume_id($volid);
7607 my $scfg = $storecfg->{ids}->{$storage_name};
7608 die "could not find storage '$storage_name'\n" if !defined($scfg);
7609
7610 if ($qemu_snap_storage->{$scfg->{type}} && !$scfg->{krbd}){
7611 return 1;
7612 }
7613
7614 if ($volid =~ m/\.(qcow2|qed)$/){
7615 return 1;
7616 }
7617
7618 return;
7619 }
7620
7621 sub qga_check_running {
7622 my ($vmid, $nowarn) = @_;
7623
7624 eval { mon_cmd($vmid, "guest-ping", timeout => 3); };
7625 if ($@) {
7626 warn "QEMU Guest Agent is not running - $@" if !$nowarn;
7627 return 0;
7628 }
7629 return 1;
7630 }
7631
7632 sub template_create {
7633 my ($vmid, $conf, $disk) = @_;
7634
7635 my $storecfg = PVE::Storage::config();
7636
7637 PVE::QemuConfig->foreach_volume($conf, sub {
7638 my ($ds, $drive) = @_;
7639
7640 return if drive_is_cdrom($drive);
7641 return if $disk && $ds ne $disk;
7642
7643 my $volid = $drive->{file};
7644 return if !PVE::Storage::volume_has_feature($storecfg, 'template', $volid);
7645
7646 my $voliddst = PVE::Storage::vdisk_create_base($storecfg, $volid);
7647 $drive->{file} = $voliddst;
7648 $conf->{$ds} = print_drive($drive);
7649 PVE::QemuConfig->write_config($vmid, $conf);
7650 });
7651 }
7652
7653 sub convert_iscsi_path {
7654 my ($path) = @_;
7655
7656 if ($path =~ m|^iscsi://([^/]+)/([^/]+)/(.+)$|) {
7657 my $portal = $1;
7658 my $target = $2;
7659 my $lun = $3;
7660
7661 my $initiator_name = get_initiator_name();
7662
7663 return "file.driver=iscsi,file.transport=tcp,file.initiator-name=$initiator_name,".
7664 "file.portal=$portal,file.target=$target,file.lun=$lun,driver=raw";
7665 }
7666
7667 die "cannot convert iscsi path '$path', unkown format\n";
7668 }
7669
7670 sub qemu_img_convert {
7671 my ($src_volid, $dst_volid, $size, $snapname, $is_zero_initialized, $bwlimit) = @_;
7672
7673 my $storecfg = PVE::Storage::config();
7674 my ($src_storeid, $src_volname) = PVE::Storage::parse_volume_id($src_volid, 1);
7675 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid, 1);
7676
7677 die "destination '$dst_volid' is not a valid volid form qemu-img convert\n" if !$dst_storeid;
7678
7679 my $cachemode;
7680 my $src_path;
7681 my $src_is_iscsi = 0;
7682 my $src_format;
7683
7684 if ($src_storeid) {
7685 PVE::Storage::activate_volumes($storecfg, [$src_volid], $snapname);
7686 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
7687 $src_format = qemu_img_format($src_scfg, $src_volname);
7688 $src_path = PVE::Storage::path($storecfg, $src_volid, $snapname);
7689 $src_is_iscsi = ($src_path =~ m|^iscsi://|);
7690 $cachemode = 'none' if $src_scfg->{type} eq 'zfspool';
7691 } elsif (-f $src_volid || -b $src_volid) {
7692 $src_path = $src_volid;
7693 if ($src_path =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7694 $src_format = $1;
7695 }
7696 }
7697
7698 die "source '$src_volid' is not a valid volid nor path for qemu-img convert\n" if !$src_path;
7699
7700 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7701 my $dst_format = qemu_img_format($dst_scfg, $dst_volname);
7702 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7703 my $dst_is_iscsi = ($dst_path =~ m|^iscsi://|);
7704
7705 my $cmd = [];
7706 push @$cmd, '/usr/bin/qemu-img', 'convert', '-p', '-n';
7707 push @$cmd, '-l', "snapshot.name=$snapname"
7708 if $snapname && $src_format && $src_format eq "qcow2";
7709 push @$cmd, '-t', 'none' if $dst_scfg->{type} eq 'zfspool';
7710 push @$cmd, '-T', $cachemode if defined($cachemode);
7711 push @$cmd, '-r', "${bwlimit}K" if defined($bwlimit);
7712
7713 if ($src_is_iscsi) {
7714 push @$cmd, '--image-opts';
7715 $src_path = convert_iscsi_path($src_path);
7716 } elsif ($src_format) {
7717 push @$cmd, '-f', $src_format;
7718 }
7719
7720 if ($dst_is_iscsi) {
7721 push @$cmd, '--target-image-opts';
7722 $dst_path = convert_iscsi_path($dst_path);
7723 } else {
7724 push @$cmd, '-O', $dst_format;
7725 }
7726
7727 push @$cmd, $src_path;
7728
7729 if (!$dst_is_iscsi && $is_zero_initialized) {
7730 push @$cmd, "zeroinit:$dst_path";
7731 } else {
7732 push @$cmd, $dst_path;
7733 }
7734
7735 my $parser = sub {
7736 my $line = shift;
7737 if($line =~ m/\((\S+)\/100\%\)/){
7738 my $percent = $1;
7739 my $transferred = int($size * $percent / 100);
7740 my $total_h = render_bytes($size, 1);
7741 my $transferred_h = render_bytes($transferred, 1);
7742
7743 print "transferred $transferred_h of $total_h ($percent%)\n";
7744 }
7745
7746 };
7747
7748 eval { run_command($cmd, timeout => undef, outfunc => $parser); };
7749 my $err = $@;
7750 die "copy failed: $err" if $err;
7751 }
7752
7753 sub qemu_img_format {
7754 my ($scfg, $volname) = @_;
7755
7756 if ($scfg->{path} && $volname =~ m/\.($PVE::QemuServer::Drive::QEMU_FORMAT_RE)$/) {
7757 return $1;
7758 } else {
7759 return "raw";
7760 }
7761 }
7762
7763 sub qemu_drive_mirror {
7764 my ($vmid, $drive, $dst_volid, $vmiddst, $is_zero_initialized, $jobs, $completion, $qga, $bwlimit, $src_bitmap) = @_;
7765
7766 $jobs = {} if !$jobs;
7767
7768 my $qemu_target;
7769 my $format;
7770 $jobs->{"drive-$drive"} = {};
7771
7772 if ($dst_volid =~ /^nbd:/) {
7773 $qemu_target = $dst_volid;
7774 $format = "nbd";
7775 } else {
7776 my $storecfg = PVE::Storage::config();
7777 my ($dst_storeid, $dst_volname) = PVE::Storage::parse_volume_id($dst_volid);
7778
7779 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
7780
7781 $format = qemu_img_format($dst_scfg, $dst_volname);
7782
7783 my $dst_path = PVE::Storage::path($storecfg, $dst_volid);
7784
7785 $qemu_target = $is_zero_initialized ? "zeroinit:$dst_path" : $dst_path;
7786 }
7787
7788 my $opts = { timeout => 10, device => "drive-$drive", mode => "existing", sync => "full", target => $qemu_target };
7789 $opts->{format} = $format if $format;
7790
7791 if (defined($src_bitmap)) {
7792 $opts->{sync} = 'incremental';
7793 $opts->{bitmap} = $src_bitmap;
7794 print "drive mirror re-using dirty bitmap '$src_bitmap'\n";
7795 }
7796
7797 if (defined($bwlimit)) {
7798 $opts->{speed} = $bwlimit * 1024;
7799 print "drive mirror is starting for drive-$drive with bandwidth limit: ${bwlimit} KB/s\n";
7800 } else {
7801 print "drive mirror is starting for drive-$drive\n";
7802 }
7803
7804 # if a job already runs for this device we get an error, catch it for cleanup
7805 eval { mon_cmd($vmid, "drive-mirror", %$opts); };
7806 if (my $err = $@) {
7807 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7808 warn "$@\n" if $@;
7809 die "mirroring error: $err\n";
7810 }
7811
7812 qemu_drive_mirror_monitor ($vmid, $vmiddst, $jobs, $completion, $qga);
7813 }
7814
7815 # $completion can be either
7816 # 'complete': wait until all jobs are ready, block-job-complete them (default)
7817 # 'cancel': wait until all jobs are ready, block-job-cancel them
7818 # 'skip': wait until all jobs are ready, return with block jobs in ready state
7819 # 'auto': wait until all jobs disappear, only use for jobs which complete automatically
7820 sub qemu_drive_mirror_monitor {
7821 my ($vmid, $vmiddst, $jobs, $completion, $qga, $op) = @_;
7822
7823 $completion //= 'complete';
7824 $op //= "mirror";
7825
7826 eval {
7827 my $err_complete = 0;
7828
7829 my $starttime = time ();
7830 while (1) {
7831 die "block job ('$op') timed out\n" if $err_complete > 300;
7832
7833 my $stats = mon_cmd($vmid, "query-block-jobs");
7834 my $ctime = time();
7835
7836 my $running_jobs = {};
7837 for my $stat (@$stats) {
7838 next if $stat->{type} ne $op;
7839 $running_jobs->{$stat->{device}} = $stat;
7840 }
7841
7842 my $readycounter = 0;
7843
7844 for my $job_id (sort keys %$jobs) {
7845 my $job = $running_jobs->{$job_id};
7846
7847 my $vanished = !defined($job);
7848 my $complete = defined($jobs->{$job_id}->{complete}) && $vanished;
7849 if($complete || ($vanished && $completion eq 'auto')) {
7850 print "$job_id: $op-job finished\n";
7851 delete $jobs->{$job_id};
7852 next;
7853 }
7854
7855 die "$job_id: '$op' has been cancelled\n" if !defined($job);
7856
7857 my $busy = $job->{busy};
7858 my $ready = $job->{ready};
7859 if (my $total = $job->{len}) {
7860 my $transferred = $job->{offset} || 0;
7861 my $remaining = $total - $transferred;
7862 my $percent = sprintf "%.2f", ($transferred * 100 / $total);
7863
7864 my $duration = $ctime - $starttime;
7865 my $total_h = render_bytes($total, 1);
7866 my $transferred_h = render_bytes($transferred, 1);
7867
7868 my $status = sprintf(
7869 "transferred $transferred_h of $total_h ($percent%%) in %s",
7870 render_duration($duration),
7871 );
7872
7873 if ($ready) {
7874 if ($busy) {
7875 $status .= ", still busy"; # shouldn't even happen? but mirror is weird
7876 } else {
7877 $status .= ", ready";
7878 }
7879 }
7880 print "$job_id: $status\n" if !$jobs->{$job_id}->{ready};
7881 $jobs->{$job_id}->{ready} = $ready;
7882 }
7883
7884 $readycounter++ if $job->{ready};
7885 }
7886
7887 last if scalar(keys %$jobs) == 0;
7888
7889 if ($readycounter == scalar(keys %$jobs)) {
7890 print "all '$op' jobs are ready\n";
7891
7892 # do the complete later (or has already been done)
7893 last if $completion eq 'skip' || $completion eq 'auto';
7894
7895 if ($vmiddst && $vmiddst != $vmid) {
7896 my $agent_running = $qga && qga_check_running($vmid);
7897 if ($agent_running) {
7898 print "freeze filesystem\n";
7899 eval { mon_cmd($vmid, "guest-fsfreeze-freeze"); };
7900 warn $@ if $@;
7901 } else {
7902 print "suspend vm\n";
7903 eval { PVE::QemuServer::vm_suspend($vmid, 1); };
7904 warn $@ if $@;
7905 }
7906
7907 # if we clone a disk for a new target vm, we don't switch the disk
7908 PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs);
7909
7910 if ($agent_running) {
7911 print "unfreeze filesystem\n";
7912 eval { mon_cmd($vmid, "guest-fsfreeze-thaw"); };
7913 warn $@ if $@;
7914 } else {
7915 print "resume vm\n";
7916 eval { PVE::QemuServer::vm_resume($vmid, 1, 1); };
7917 warn $@ if $@;
7918 }
7919
7920 last;
7921 } else {
7922
7923 for my $job_id (sort keys %$jobs) {
7924 # try to switch the disk if source and destination are on the same guest
7925 print "$job_id: Completing block job_id...\n";
7926
7927 my $op;
7928 if ($completion eq 'complete') {
7929 $op = 'block-job-complete';
7930 } elsif ($completion eq 'cancel') {
7931 $op = 'block-job-cancel';
7932 } else {
7933 die "invalid completion value: $completion\n";
7934 }
7935 eval { mon_cmd($vmid, $op, device => $job_id) };
7936 if ($@ =~ m/cannot be completed/) {
7937 print "$job_id: block job cannot be completed, trying again.\n";
7938 $err_complete++;
7939 }else {
7940 print "$job_id: Completed successfully.\n";
7941 $jobs->{$job_id}->{complete} = 1;
7942 }
7943 }
7944 }
7945 }
7946 sleep 1;
7947 }
7948 };
7949 my $err = $@;
7950
7951 if ($err) {
7952 eval { PVE::QemuServer::qemu_blockjobs_cancel($vmid, $jobs) };
7953 die "block job ($op) error: $err";
7954 }
7955 }
7956
7957 sub qemu_blockjobs_cancel {
7958 my ($vmid, $jobs) = @_;
7959
7960 foreach my $job (keys %$jobs) {
7961 print "$job: Cancelling block job\n";
7962 eval { mon_cmd($vmid, "block-job-cancel", device => $job); };
7963 $jobs->{$job}->{cancel} = 1;
7964 }
7965
7966 while (1) {
7967 my $stats = mon_cmd($vmid, "query-block-jobs");
7968
7969 my $running_jobs = {};
7970 foreach my $stat (@$stats) {
7971 $running_jobs->{$stat->{device}} = $stat;
7972 }
7973
7974 foreach my $job (keys %$jobs) {
7975
7976 if (defined($jobs->{$job}->{cancel}) && !defined($running_jobs->{$job})) {
7977 print "$job: Done.\n";
7978 delete $jobs->{$job};
7979 }
7980 }
7981
7982 last if scalar(keys %$jobs) == 0;
7983
7984 sleep 1;
7985 }
7986 }
7987
7988 # Check for bug #4525: drive-mirror will open the target drive with the same aio setting as the
7989 # source, but some storages have problems with io_uring, sometimes even leading to crashes.
7990 my sub clone_disk_check_io_uring {
7991 my ($src_drive, $storecfg, $src_storeid, $dst_storeid, $use_drive_mirror) = @_;
7992
7993 return if !$use_drive_mirror;
7994
7995 # Don't complain when not changing storage.
7996 # Assume if it works for the source, it'll work for the target too.
7997 return if $src_storeid eq $dst_storeid;
7998
7999 my $src_scfg = PVE::Storage::storage_config($storecfg, $src_storeid);
8000 my $dst_scfg = PVE::Storage::storage_config($storecfg, $dst_storeid);
8001
8002 my $cache_direct = drive_uses_cache_direct($src_drive);
8003
8004 my $src_uses_io_uring;
8005 if ($src_drive->{aio}) {
8006 $src_uses_io_uring = $src_drive->{aio} eq 'io_uring';
8007 } else {
8008 $src_uses_io_uring = storage_allows_io_uring_default($src_scfg, $cache_direct);
8009 }
8010
8011 die "target storage is known to cause issues with aio=io_uring (used by current drive)\n"
8012 if $src_uses_io_uring && !storage_allows_io_uring_default($dst_scfg, $cache_direct);
8013 }
8014
8015 sub clone_disk {
8016 my ($storecfg, $source, $dest, $full, $newvollist, $jobs, $completion, $qga, $bwlimit) = @_;
8017
8018 my ($vmid, $running) = $source->@{qw(vmid running)};
8019 my ($src_drivename, $drive, $snapname) = $source->@{qw(drivename drive snapname)};
8020
8021 my ($newvmid, $dst_drivename, $efisize) = $dest->@{qw(vmid drivename efisize)};
8022 my ($storage, $format) = $dest->@{qw(storage format)};
8023
8024 my $use_drive_mirror = $full && $running && $src_drivename && !$snapname;
8025
8026 if ($src_drivename && $dst_drivename && $src_drivename ne $dst_drivename) {
8027 die "cloning from/to EFI disk requires EFI disk\n"
8028 if $src_drivename eq 'efidisk0' || $dst_drivename eq 'efidisk0';
8029 die "cloning from/to TPM state requires TPM state\n"
8030 if $src_drivename eq 'tpmstate0' || $dst_drivename eq 'tpmstate0';
8031
8032 # This would lead to two device nodes in QEMU pointing to the same backing image!
8033 die "cannot change drive name when cloning disk from/to the same VM\n"
8034 if $use_drive_mirror && $vmid == $newvmid;
8035 }
8036
8037 die "cannot move TPM state while VM is running\n"
8038 if $use_drive_mirror && $src_drivename eq 'tpmstate0';
8039
8040 my $newvolid;
8041
8042 print "create " . ($full ? 'full' : 'linked') . " clone of drive ";
8043 print "$src_drivename " if $src_drivename;
8044 print "($drive->{file})\n";
8045
8046 if (!$full) {
8047 $newvolid = PVE::Storage::vdisk_clone($storecfg, $drive->{file}, $newvmid, $snapname);
8048 push @$newvollist, $newvolid;
8049 } else {
8050 my ($src_storeid, $volname) = PVE::Storage::parse_volume_id($drive->{file});
8051 my $storeid = $storage || $src_storeid;
8052
8053 my $dst_format = resolve_dst_disk_format($storecfg, $storeid, $volname, $format);
8054
8055 my $name = undef;
8056 my $size = undef;
8057 if (drive_is_cloudinit($drive)) {
8058 $name = "vm-$newvmid-cloudinit";
8059 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8060 if ($scfg->{path}) {
8061 $name .= ".$dst_format";
8062 }
8063 $snapname = undef;
8064 $size = PVE::QemuServer::Cloudinit::CLOUDINIT_DISK_SIZE;
8065 } elsif ($dst_drivename eq 'efidisk0') {
8066 $size = $efisize or die "internal error - need to specify EFI disk size\n";
8067 } elsif ($dst_drivename eq 'tpmstate0') {
8068 $dst_format = 'raw';
8069 $size = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8070 } else {
8071 clone_disk_check_io_uring($drive, $storecfg, $src_storeid, $storeid, $use_drive_mirror);
8072
8073 $size = PVE::Storage::volume_size_info($storecfg, $drive->{file}, 10);
8074 }
8075 $newvolid = PVE::Storage::vdisk_alloc(
8076 $storecfg, $storeid, $newvmid, $dst_format, $name, ($size/1024)
8077 );
8078 push @$newvollist, $newvolid;
8079
8080 PVE::Storage::activate_volumes($storecfg, [$newvolid]);
8081
8082 if (drive_is_cloudinit($drive)) {
8083 # when cloning multiple disks (e.g. during clone_vm) it might be the last disk
8084 # if this is the case, we have to complete any block-jobs still there from
8085 # previous drive-mirrors
8086 if (($completion eq 'complete') && (scalar(keys %$jobs) > 0)) {
8087 qemu_drive_mirror_monitor($vmid, $newvmid, $jobs, $completion, $qga);
8088 }
8089 goto no_data_clone;
8090 }
8091
8092 my $sparseinit = PVE::Storage::volume_has_feature($storecfg, 'sparseinit', $newvolid);
8093 if ($use_drive_mirror) {
8094 qemu_drive_mirror($vmid, $src_drivename, $newvolid, $newvmid, $sparseinit, $jobs,
8095 $completion, $qga, $bwlimit);
8096 } else {
8097 if ($dst_drivename eq 'efidisk0') {
8098 # the relevant data on the efidisk may be smaller than the source
8099 # e.g. on RBD/ZFS, so we use dd to copy only the amount
8100 # that is given by the OVMF_VARS.fd
8101 my $src_path = PVE::Storage::path($storecfg, $drive->{file}, $snapname);
8102 my $dst_path = PVE::Storage::path($storecfg, $newvolid);
8103
8104 my $src_format = (PVE::Storage::parse_volname($storecfg, $drive->{file}))[6];
8105
8106 # better for Ceph if block size is not too small, see bug #3324
8107 my $bs = 1024*1024;
8108
8109 my $cmd = ['qemu-img', 'dd', '-n', '-O', $dst_format];
8110
8111 if ($src_format eq 'qcow2' && $snapname) {
8112 die "cannot clone qcow2 EFI disk snapshot - requires QEMU >= 6.2\n"
8113 if !min_version(kvm_user_version(), 6, 2);
8114 push $cmd->@*, '-l', $snapname;
8115 }
8116 push $cmd->@*, "bs=$bs", "osize=$size", "if=$src_path", "of=$dst_path";
8117 run_command($cmd);
8118 } else {
8119 qemu_img_convert($drive->{file}, $newvolid, $size, $snapname, $sparseinit, $bwlimit);
8120 }
8121 }
8122 }
8123
8124 no_data_clone:
8125 my $size = eval { PVE::Storage::volume_size_info($storecfg, $newvolid, 10) };
8126
8127 my $disk = dclone($drive);
8128 delete $disk->{format};
8129 $disk->{file} = $newvolid;
8130 $disk->{size} = $size if defined($size);
8131
8132 return $disk;
8133 }
8134
8135 sub get_running_qemu_version {
8136 my ($vmid) = @_;
8137 my $res = mon_cmd($vmid, "query-version");
8138 return "$res->{qemu}->{major}.$res->{qemu}->{minor}";
8139 }
8140
8141 sub qemu_use_old_bios_files {
8142 my ($machine_type) = @_;
8143
8144 return if !$machine_type;
8145
8146 my $use_old_bios_files = undef;
8147
8148 if ($machine_type =~ m/^(\S+)\.pxe$/) {
8149 $machine_type = $1;
8150 $use_old_bios_files = 1;
8151 } else {
8152 my $version = extract_version($machine_type, kvm_user_version());
8153 # Note: kvm version < 2.4 use non-efi pxe files, and have problems when we
8154 # load new efi bios files on migration. So this hack is required to allow
8155 # live migration from qemu-2.2 to qemu-2.4, which is sometimes used when
8156 # updrading from proxmox-ve-3.X to proxmox-ve 4.0
8157 $use_old_bios_files = !min_version($version, 2, 4);
8158 }
8159
8160 return ($use_old_bios_files, $machine_type);
8161 }
8162
8163 sub get_efivars_size {
8164 my ($conf, $efidisk) = @_;
8165
8166 my $arch = get_vm_arch($conf);
8167 $efidisk //= $conf->{efidisk0} ? parse_drive('efidisk0', $conf->{efidisk0}) : undef;
8168 my $smm = PVE::QemuServer::Machine::machine_type_is_q35($conf);
8169 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8170 return -s $ovmf_vars;
8171 }
8172
8173 sub update_efidisk_size {
8174 my ($conf) = @_;
8175
8176 return if !defined($conf->{efidisk0});
8177
8178 my $disk = PVE::QemuServer::parse_drive('efidisk0', $conf->{efidisk0});
8179 $disk->{size} = get_efivars_size($conf);
8180 $conf->{efidisk0} = print_drive($disk);
8181
8182 return;
8183 }
8184
8185 sub update_tpmstate_size {
8186 my ($conf) = @_;
8187
8188 my $disk = PVE::QemuServer::parse_drive('tpmstate0', $conf->{tpmstate0});
8189 $disk->{size} = PVE::QemuServer::Drive::TPMSTATE_DISK_SIZE;
8190 $conf->{tpmstate0} = print_drive($disk);
8191 }
8192
8193 sub create_efidisk($$$$$$$) {
8194 my ($storecfg, $storeid, $vmid, $fmt, $arch, $efidisk, $smm) = @_;
8195
8196 my (undef, $ovmf_vars) = get_ovmf_files($arch, $efidisk, $smm);
8197
8198 my $vars_size_b = -s $ovmf_vars;
8199 my $vars_size = PVE::Tools::convert_size($vars_size_b, 'b' => 'kb');
8200 my $volid = PVE::Storage::vdisk_alloc($storecfg, $storeid, $vmid, $fmt, undef, $vars_size);
8201 PVE::Storage::activate_volumes($storecfg, [$volid]);
8202
8203 qemu_img_convert($ovmf_vars, $volid, $vars_size_b, undef, 0);
8204 my $size = PVE::Storage::volume_size_info($storecfg, $volid, 3);
8205
8206 return ($volid, $size/1024);
8207 }
8208
8209 sub vm_iothreads_list {
8210 my ($vmid) = @_;
8211
8212 my $res = mon_cmd($vmid, 'query-iothreads');
8213
8214 my $iothreads = {};
8215 foreach my $iothread (@$res) {
8216 $iothreads->{ $iothread->{id} } = $iothread->{"thread-id"};
8217 }
8218
8219 return $iothreads;
8220 }
8221
8222 sub scsihw_infos {
8223 my ($conf, $drive) = @_;
8224
8225 my $maxdev = 0;
8226
8227 if (!$conf->{scsihw} || ($conf->{scsihw} =~ m/^lsi/)) {
8228 $maxdev = 7;
8229 } elsif ($conf->{scsihw} && ($conf->{scsihw} eq 'virtio-scsi-single')) {
8230 $maxdev = 1;
8231 } else {
8232 $maxdev = 256;
8233 }
8234
8235 my $controller = int($drive->{index} / $maxdev);
8236 my $controller_prefix = ($conf->{scsihw} && $conf->{scsihw} eq 'virtio-scsi-single')
8237 ? "virtioscsi"
8238 : "scsihw";
8239
8240 return ($maxdev, $controller, $controller_prefix);
8241 }
8242
8243 sub resolve_dst_disk_format {
8244 my ($storecfg, $storeid, $src_volname, $format) = @_;
8245 my ($defFormat, $validFormats) = PVE::Storage::storage_default_format($storecfg, $storeid);
8246
8247 if (!$format) {
8248 # if no target format is specified, use the source disk format as hint
8249 if ($src_volname) {
8250 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8251 $format = qemu_img_format($scfg, $src_volname);
8252 } else {
8253 return $defFormat;
8254 }
8255 }
8256
8257 # test if requested format is supported - else use default
8258 my $supported = grep { $_ eq $format } @$validFormats;
8259 $format = $defFormat if !$supported;
8260 return $format;
8261 }
8262
8263 # NOTE: if this logic changes, please update docs & possibly gui logic
8264 sub find_vmstate_storage {
8265 my ($conf, $storecfg) = @_;
8266
8267 # first, return storage from conf if set
8268 return $conf->{vmstatestorage} if $conf->{vmstatestorage};
8269
8270 my ($target, $shared, $local);
8271
8272 foreach_storage_used_by_vm($conf, sub {
8273 my ($sid) = @_;
8274 my $scfg = PVE::Storage::storage_config($storecfg, $sid);
8275 my $dst = $scfg->{shared} ? \$shared : \$local;
8276 $$dst = $sid if !$$dst || $scfg->{path}; # prefer file based storage
8277 });
8278
8279 # second, use shared storage where VM has at least one disk
8280 # third, use local storage where VM has at least one disk
8281 # fall back to local storage
8282 $target = $shared // $local // 'local';
8283
8284 return $target;
8285 }
8286
8287 sub generate_uuid {
8288 my ($uuid, $uuid_str);
8289 UUID::generate($uuid);
8290 UUID::unparse($uuid, $uuid_str);
8291 return $uuid_str;
8292 }
8293
8294 sub generate_smbios1_uuid {
8295 return "uuid=".generate_uuid();
8296 }
8297
8298 sub nbd_stop {
8299 my ($vmid) = @_;
8300
8301 mon_cmd($vmid, 'nbd-server-stop');
8302 }
8303
8304 sub create_reboot_request {
8305 my ($vmid) = @_;
8306 open(my $fh, '>', "/run/qemu-server/$vmid.reboot")
8307 or die "failed to create reboot trigger file: $!\n";
8308 close($fh);
8309 }
8310
8311 sub clear_reboot_request {
8312 my ($vmid) = @_;
8313 my $path = "/run/qemu-server/$vmid.reboot";
8314 my $res = 0;
8315
8316 $res = unlink($path);
8317 die "could not remove reboot request for $vmid: $!"
8318 if !$res && $! != POSIX::ENOENT;
8319
8320 return $res;
8321 }
8322
8323 sub bootorder_from_legacy {
8324 my ($conf, $bootcfg) = @_;
8325
8326 my $boot = $bootcfg->{legacy} || $boot_fmt->{legacy}->{default};
8327 my $bootindex_hash = {};
8328 my $i = 1;
8329 foreach my $o (split(//, $boot)) {
8330 $bootindex_hash->{$o} = $i*100;
8331 $i++;
8332 }
8333
8334 my $bootorder = {};
8335
8336 PVE::QemuConfig->foreach_volume($conf, sub {
8337 my ($ds, $drive) = @_;
8338
8339 if (drive_is_cdrom ($drive, 1)) {
8340 if ($bootindex_hash->{d}) {
8341 $bootorder->{$ds} = $bootindex_hash->{d};
8342 $bootindex_hash->{d} += 1;
8343 }
8344 } elsif ($bootindex_hash->{c}) {
8345 $bootorder->{$ds} = $bootindex_hash->{c}
8346 if $conf->{bootdisk} && $conf->{bootdisk} eq $ds;
8347 $bootindex_hash->{c} += 1;
8348 }
8349 });
8350
8351 if ($bootindex_hash->{n}) {
8352 for (my $i = 0; $i < $MAX_NETS; $i++) {
8353 my $netname = "net$i";
8354 next if !$conf->{$netname};
8355 $bootorder->{$netname} = $bootindex_hash->{n};
8356 $bootindex_hash->{n} += 1;
8357 }
8358 }
8359
8360 return $bootorder;
8361 }
8362
8363 # Generate default device list for 'boot: order=' property. Matches legacy
8364 # default boot order, but with explicit device names. This is important, since
8365 # the fallback for when neither 'order' nor the old format is specified relies
8366 # on 'bootorder_from_legacy' above, and it would be confusing if this diverges.
8367 sub get_default_bootdevices {
8368 my ($conf) = @_;
8369
8370 my @ret = ();
8371
8372 # harddisk
8373 my $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 0);
8374 push @ret, $first if $first;
8375
8376 # cdrom
8377 $first = PVE::QemuServer::Drive::resolve_first_disk($conf, 1);
8378 push @ret, $first if $first;
8379
8380 # network
8381 for (my $i = 0; $i < $MAX_NETS; $i++) {
8382 my $netname = "net$i";
8383 next if !$conf->{$netname};
8384 push @ret, $netname;
8385 last;
8386 }
8387
8388 return \@ret;
8389 }
8390
8391 sub device_bootorder {
8392 my ($conf) = @_;
8393
8394 return bootorder_from_legacy($conf) if !defined($conf->{boot});
8395
8396 my $boot = parse_property_string($boot_fmt, $conf->{boot});
8397
8398 my $bootorder = {};
8399 if (!defined($boot) || $boot->{legacy}) {
8400 $bootorder = bootorder_from_legacy($conf, $boot);
8401 } elsif ($boot->{order}) {
8402 my $i = 100; # start at 100 to allow user to insert devices before us with -args
8403 for my $dev (PVE::Tools::split_list($boot->{order})) {
8404 $bootorder->{$dev} = $i++;
8405 }
8406 }
8407
8408 return $bootorder;
8409 }
8410
8411 sub register_qmeventd_handle {
8412 my ($vmid) = @_;
8413
8414 my $fh;
8415 my $peer = "/var/run/qmeventd.sock";
8416 my $count = 0;
8417
8418 for (;;) {
8419 $count++;
8420 $fh = IO::Socket::UNIX->new(Peer => $peer, Blocking => 0, Timeout => 1);
8421 last if $fh;
8422 if ($! != EINTR && $! != EAGAIN) {
8423 die "unable to connect to qmeventd socket (vmid: $vmid) - $!\n";
8424 }
8425 if ($count > 4) {
8426 die "unable to connect to qmeventd socket (vmid: $vmid) - timeout "
8427 . "after $count retries\n";
8428 }
8429 usleep(25000);
8430 }
8431
8432 # send handshake to mark VM as backing up
8433 print $fh to_json({vzdump => {vmid => "$vmid"}});
8434
8435 # return handle to be closed later when inhibit is no longer required
8436 return $fh;
8437 }
8438
8439 # bash completion helper
8440
8441 sub complete_backup_archives {
8442 my ($cmdname, $pname, $cvalue) = @_;
8443
8444 my $cfg = PVE::Storage::config();
8445
8446 my $storeid;
8447
8448 if ($cvalue =~ m/^([^:]+):/) {
8449 $storeid = $1;
8450 }
8451
8452 my $data = PVE::Storage::template_list($cfg, $storeid, 'backup');
8453
8454 my $res = [];
8455 foreach my $id (keys %$data) {
8456 foreach my $item (@{$data->{$id}}) {
8457 next if $item->{format} !~ m/^vma\.(${\PVE::Storage::Plugin::COMPRESSOR_RE})$/;
8458 push @$res, $item->{volid} if defined($item->{volid});
8459 }
8460 }
8461
8462 return $res;
8463 }
8464
8465 my $complete_vmid_full = sub {
8466 my ($running) = @_;
8467
8468 my $idlist = vmstatus();
8469
8470 my $res = [];
8471
8472 foreach my $id (keys %$idlist) {
8473 my $d = $idlist->{$id};
8474 if (defined($running)) {
8475 next if $d->{template};
8476 next if $running && $d->{status} ne 'running';
8477 next if !$running && $d->{status} eq 'running';
8478 }
8479 push @$res, $id;
8480
8481 }
8482 return $res;
8483 };
8484
8485 sub complete_vmid {
8486 return &$complete_vmid_full();
8487 }
8488
8489 sub complete_vmid_stopped {
8490 return &$complete_vmid_full(0);
8491 }
8492
8493 sub complete_vmid_running {
8494 return &$complete_vmid_full(1);
8495 }
8496
8497 sub complete_storage {
8498
8499 my $cfg = PVE::Storage::config();
8500 my $ids = $cfg->{ids};
8501
8502 my $res = [];
8503 foreach my $sid (keys %$ids) {
8504 next if !PVE::Storage::storage_check_enabled($cfg, $sid, undef, 1);
8505 next if !$ids->{$sid}->{content}->{images};
8506 push @$res, $sid;
8507 }
8508
8509 return $res;
8510 }
8511
8512 sub complete_migration_storage {
8513 my ($cmd, $param, $current_value, $all_args) = @_;
8514
8515 my $targetnode = @$all_args[1];
8516
8517 my $cfg = PVE::Storage::config();
8518 my $ids = $cfg->{ids};
8519
8520 my $res = [];
8521 foreach my $sid (keys %$ids) {
8522 next if !PVE::Storage::storage_check_enabled($cfg, $sid, $targetnode, 1);
8523 next if !$ids->{$sid}->{content}->{images};
8524 push @$res, $sid;
8525 }
8526
8527 return $res;
8528 }
8529
8530 sub vm_is_paused {
8531 my ($vmid) = @_;
8532 my $qmpstatus = eval {
8533 PVE::QemuConfig::assert_config_exists_on_node($vmid);
8534 mon_cmd($vmid, "query-status");
8535 };
8536 warn "$@\n" if $@;
8537 return $qmpstatus && $qmpstatus->{status} eq "paused";
8538 }
8539
8540 sub check_volume_storage_type {
8541 my ($storecfg, $vol) = @_;
8542
8543 my ($storeid, $volname) = PVE::Storage::parse_volume_id($vol);
8544 my $scfg = PVE::Storage::storage_config($storecfg, $storeid);
8545 my ($vtype) = PVE::Storage::parse_volname($storecfg, $vol);
8546
8547 die "storage '$storeid' does not support content-type '$vtype'\n"
8548 if !$scfg->{content}->{$vtype};
8549
8550 return 1;
8551 }
8552
8553 sub add_nets_bridge_fdb {
8554 my ($conf, $vmid) = @_;
8555
8556 for my $opt (keys %$conf) {
8557 next if $opt !~ m/^net(\d+)$/;
8558 my $iface = "tap${vmid}i$1";
8559 # NOTE: expect setups with learning off to *not* use auto-random-generation of MAC on start
8560 my $net = parse_net($conf->{$opt}, 1) or next;
8561
8562 my $mac = $net->{macaddr};
8563 if (!$mac) {
8564 log_warn("MAC learning disabled, but vNIC '$iface' has no static MAC to add to forwarding DB!")
8565 if !file_read_firstline("/sys/class/net/$iface/brport/learning");
8566 next;
8567 }
8568
8569 my $bridge = $net->{bridge};
8570 if (!$bridge) {
8571 log_warn("Interface '$iface' not attached to any bridge.");
8572 next;
8573 }
8574 if ($have_sdn) {
8575 PVE::Network::SDN::Zones::add_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8576 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8577 PVE::Network::add_bridge_fdb($iface, $mac, $net->{firewall});
8578 }
8579 }
8580 }
8581
8582 sub del_nets_bridge_fdb {
8583 my ($conf, $vmid) = @_;
8584
8585 for my $opt (keys %$conf) {
8586 next if $opt !~ m/^net(\d+)$/;
8587 my $iface = "tap${vmid}i$1";
8588
8589 my $net = parse_net($conf->{$opt}) or next;
8590 my $mac = $net->{macaddr} or next;
8591
8592 my $bridge = $net->{bridge};
8593 if ($have_sdn) {
8594 PVE::Network::SDN::Zones::del_bridge_fdb($iface, $mac, $bridge, $net->{firewall});
8595 } elsif (-d "/sys/class/net/$bridge/bridge") { # avoid fdb management with OVS for now
8596 PVE::Network::del_bridge_fdb($iface, $mac, $net->{firewall});
8597 }
8598 }
8599 }
8600
8601 1;